From 7d5c96fdc922eff6ff2c13d686b548524793f3c1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jan=20R=C4=99korajski?= Date: Tue, 6 Jun 2023 21:32:35 +0200 Subject: [PATCH] 2.1.11 with updates from 2.1.12-staging branch to build with kernel 6.3 --- staging.patch | 5955 +++++++++++++++++++++++++++++++++++++++++++++++++ zfs.spec | 10 +- 2 files changed, 5959 insertions(+), 6 deletions(-) create mode 100644 staging.patch diff --git a/staging.patch b/staging.patch new file mode 100644 index 0000000..c6131ed --- /dev/null +++ b/staging.patch @@ -0,0 +1,5955 @@ +diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c +index f424dd771..bcd520de3 100644 +--- a/cmd/zdb/zdb.c ++++ b/cmd/zdb/zdb.c +@@ -3102,13 +3102,22 @@ dump_znode_sa_xattr(sa_handle_t *hdl) + (void) printf("\tSA xattrs: %d bytes, %d entries\n\n", + sa_xattr_size, sa_xattr_entries); + while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) { ++ boolean_t can_print = !dump_opt['P']; + uchar_t *value; + uint_t cnt, idx; + + (void) printf("\t\t%s = ", nvpair_name(elem)); + nvpair_value_byte_array(elem, &value, &cnt); ++ ++ for (idx = 0; idx < cnt; ++idx) { ++ if (!isprint(value[idx])) { ++ can_print = B_FALSE; ++ break; ++ } ++ } ++ + for (idx = 0; idx < cnt; ++idx) { +- if (isprint(value[idx])) ++ if (can_print) + (void) putchar(value[idx]); + else + (void) printf("\\%3.3o", value[idx]); +diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c +index b4794e311..29eaee750 100644 +--- a/cmd/zed/agents/zfs_retire.c ++++ b/cmd/zed/agents/zfs_retire.c +@@ -444,14 +444,16 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, + return; + + /* Remove the vdev since device is unplugged */ ++ int remove_status = 0; + if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) { +- int status = zpool_vdev_remove_wanted(zhp, devname); ++ remove_status = zpool_vdev_remove_wanted(zhp, devname); + fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'" +- ", ret:%d", devname, status); ++ ", err:%d", devname, libzfs_errno(zhdl)); + } + + /* Replace the vdev with a spare if its not a l2arc */ +- if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") || ++ if (!l2arc && !remove_status && ++ (!fmd_prop_get_int32(hdl, "spare_on_remove") || + replace_with_spare(hdl, zhp, vdev) == B_FALSE)) { + /* Could not handle with spare */ + fmd_hdl_debug(hdl, "no spare for '%s'", devname); +diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c +index 2311d4f04..a06af9aec 100644 +--- a/cmd/zpool/zpool_main.c ++++ b/cmd/zpool/zpool_main.c +@@ -392,7 +392,7 @@ get_usage(zpool_help_t idx) + case HELP_REOPEN: + return (gettext("\treopen [-n] \n")); + case HELP_INITIALIZE: +- return (gettext("\tinitialize [-c | -s] [-w] " ++ return (gettext("\tinitialize [-c | -s | -u] [-w] " + "[ ...]\n")); + case HELP_SCRUB: + return (gettext("\tscrub [-s | -p] [-w] ...\n")); +@@ -548,12 +548,13 @@ usage(boolean_t requested) + } + + /* +- * zpool initialize [-c | -s] [-w] [ ...] ++ * zpool initialize [-c | -s | -u] [-w] [ ...] + * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool + * if none specified. + * + * -c Cancel. Ends active initializing. + * -s Suspend. Initializing can then be restarted with no flags. ++ * -u Uninitialize. Clears initialization state. + * -w Wait. Blocks until initializing has completed. + */ + int +@@ -569,12 +570,14 @@ zpool_do_initialize(int argc, char **argv) + struct option long_options[] = { + {"cancel", no_argument, NULL, 'c'}, + {"suspend", no_argument, NULL, 's'}, ++ {"uninit", no_argument, NULL, 'u'}, + {"wait", no_argument, NULL, 'w'}, + {0, 0, 0, 0} + }; + + pool_initialize_func_t cmd_type = POOL_INITIALIZE_START; +- while ((c = getopt_long(argc, argv, "csw", long_options, NULL)) != -1) { ++ while ((c = getopt_long(argc, argv, "csuw", long_options, ++ NULL)) != -1) { + switch (c) { + case 'c': + if (cmd_type != POOL_INITIALIZE_START && +@@ -594,6 +597,15 @@ zpool_do_initialize(int argc, char **argv) + } + cmd_type = POOL_INITIALIZE_SUSPEND; + break; ++ case 'u': ++ if (cmd_type != POOL_INITIALIZE_START && ++ cmd_type != POOL_INITIALIZE_UNINIT) { ++ (void) fprintf(stderr, gettext("-u cannot be " ++ "combined with other options\n")); ++ usage(B_FALSE); ++ } ++ cmd_type = POOL_INITIALIZE_UNINIT; ++ break; + case 'w': + wait = B_TRUE; + break; +@@ -620,8 +632,8 @@ zpool_do_initialize(int argc, char **argv) + } + + if (wait && (cmd_type != POOL_INITIALIZE_START)) { +- (void) fprintf(stderr, gettext("-w cannot be used with -c or " +- "-s\n")); ++ (void) fprintf(stderr, gettext("-w cannot be used with -c, -s" ++ "or -u\n")); + usage(B_FALSE); + } + +@@ -6921,6 +6933,17 @@ zpool_do_online(int argc, char **argv) + return (1); + + for (i = 1; i < argc; i++) { ++ vdev_state_t oldstate; ++ boolean_t avail_spare, l2cache; ++ nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare, ++ &l2cache, NULL); ++ if (tgt == NULL) { ++ ret = 1; ++ continue; ++ } ++ uint_t vsc; ++ oldstate = ((vdev_stat_t *)fnvlist_lookup_uint64_array(tgt, ++ ZPOOL_CONFIG_VDEV_STATS, &vsc))->vs_state; + if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) { + if (newstate != VDEV_STATE_HEALTHY) { + (void) printf(gettext("warning: device '%s' " +@@ -6934,6 +6957,17 @@ zpool_do_online(int argc, char **argv) + (void) printf(gettext("use 'zpool " + "replace' to replace devices " + "that are no longer present\n")); ++ if ((flags & ZFS_ONLINE_EXPAND)) { ++ (void) printf(gettext("%s: failed " ++ "to expand usable space on " ++ "unhealthy device '%s'\n"), ++ (oldstate >= VDEV_STATE_DEGRADED ? ++ "error" : "warning"), argv[i]); ++ if (oldstate >= VDEV_STATE_DEGRADED) { ++ ret = 1; ++ break; ++ } ++ } + } + } else { + ret = 1; +@@ -7549,19 +7583,20 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) + + zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf)); + +- assert(ps->pss_func == POOL_SCAN_SCRUB || +- ps->pss_func == POOL_SCAN_RESILVER); ++ int is_resilver = ps->pss_func == POOL_SCAN_RESILVER; ++ int is_scrub = ps->pss_func == POOL_SCAN_SCRUB; ++ assert(is_resilver || is_scrub); + + /* Scan is finished or canceled. */ + if (ps->pss_state == DSS_FINISHED) { + secs_to_dhms(end - start, time_buf); + +- if (ps->pss_func == POOL_SCAN_SCRUB) { ++ if (is_scrub) { + (void) printf(gettext("scrub repaired %s " + "in %s with %llu errors on %s"), processed_buf, + time_buf, (u_longlong_t)ps->pss_errors, + ctime(&end)); +- } else if (ps->pss_func == POOL_SCAN_RESILVER) { ++ } else if (is_resilver) { + (void) printf(gettext("resilvered %s " + "in %s with %llu errors on %s"), processed_buf, + time_buf, (u_longlong_t)ps->pss_errors, +@@ -7569,10 +7604,10 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) + } + return; + } else if (ps->pss_state == DSS_CANCELED) { +- if (ps->pss_func == POOL_SCAN_SCRUB) { ++ if (is_scrub) { + (void) printf(gettext("scrub canceled on %s"), + ctime(&end)); +- } else if (ps->pss_func == POOL_SCAN_RESILVER) { ++ } else if (is_resilver) { + (void) printf(gettext("resilver canceled on %s"), + ctime(&end)); + } +@@ -7582,7 +7617,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) + assert(ps->pss_state == DSS_SCANNING); + + /* Scan is in progress. Resilvers can't be paused. */ +- if (ps->pss_func == POOL_SCAN_SCRUB) { ++ if (is_scrub) { + if (pause == 0) { + (void) printf(gettext("scrub in progress since %s"), + ctime(&start)); +@@ -7592,7 +7627,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) + (void) printf(gettext("\tscrub started on %s"), + ctime(&start)); + } +- } else if (ps->pss_func == POOL_SCAN_RESILVER) { ++ } else if (is_resilver) { + (void) printf(gettext("resilver in progress since %s"), + ctime(&start)); + } +@@ -7634,17 +7669,27 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) + scanned_buf, issued_buf, total_buf); + } + +- if (ps->pss_func == POOL_SCAN_RESILVER) { ++ if (is_resilver) { + (void) printf(gettext("\t%s resilvered, %.2f%% done"), + processed_buf, 100 * fraction_done); +- } else if (ps->pss_func == POOL_SCAN_SCRUB) { ++ } else if (is_scrub) { + (void) printf(gettext("\t%s repaired, %.2f%% done"), + processed_buf, 100 * fraction_done); + } + + if (pause == 0) { ++ /* ++ * Only provide an estimate iff: ++ * 1) the time remaining is valid, and ++ * 2) the issue rate exceeds 10 MB/s, and ++ * 3) it's either: ++ * a) a resilver which has started repairs, or ++ * b) a scrub which has entered the issue phase. ++ */ + if (total_secs_left != UINT64_MAX && +- issue_rate >= 10 * 1024 * 1024) { ++ issue_rate >= 10 * 1024 * 1024 && ++ ((is_resilver && ps->pss_processed > 0) || ++ (is_scrub && issued > 0))) { + (void) printf(gettext(", %s to go\n"), time_buf); + } else { + (void) printf(gettext(", no estimated " +diff --git a/config/always-compiler-options.m4 b/config/always-compiler-options.m4 +index 5046ce0dd..0f66db584 100644 +--- a/config/always-compiler-options.m4 ++++ b/config/always-compiler-options.m4 +@@ -221,3 +221,34 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA], [ + CFLAGS="$saved_flags" + AC_SUBST([NO_IPA_SRA]) + ]) ++ ++dnl # ++dnl # Check if kernel cc supports -fno-ipa-sra option. ++dnl # ++AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [ ++ AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra]) ++ ++ saved_cc="$CC" ++ saved_flags="$CFLAGS" ++ CC="gcc" ++ CFLAGS="$CFLAGS -Werror -fno-ipa-sra" ++ ++ AS_IF([ test -n "$KERNEL_CC" ], [ ++ CC="$KERNEL_CC" ++ ]) ++ AS_IF([ test -n "$KERNEL_LLVM" ], [ ++ CC="clang" ++ ]) ++ ++ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [ ++ KERNEL_NO_IPA_SRA=-fno-ipa-sra ++ AC_MSG_RESULT([yes]) ++ ], [ ++ KERNEL_NO_IPA_SRA= ++ AC_MSG_RESULT([no]) ++ ]) ++ ++ CC="$saved_cc" ++ CFLAGS="$saved_flags" ++ AC_SUBST([KERNEL_NO_IPA_SRA]) ++]) +diff --git a/config/kernel-acl.m4 b/config/kernel-acl.m4 +index 6e92da97d..be08c3c60 100644 +--- a/config/kernel-acl.m4 ++++ b/config/kernel-acl.m4 +@@ -236,7 +236,22 @@ dnl # + dnl # 6.2 API change, + dnl # set_acl() second paramter changed to a struct dentry * + dnl # ++dnl # 6.3 API change, ++dnl # set_acl() first parameter changed to struct mnt_idmap * ++dnl # + AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ++ ZFS_LINUX_TEST_SRC([inode_operations_set_acl_mnt_idmap_dentry], [ ++ #include ++ ++ int set_acl_fn(struct mnt_idmap *idmap, ++ struct dentry *dent, struct posix_acl *acl, ++ int type) { return 0; } ++ ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .set_acl = set_acl_fn, ++ }; ++ ],[]) + ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns_dentry], [ + #include + +@@ -281,17 +296,24 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [ + AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) + AC_DEFINE(HAVE_SET_ACL_USERNS, 1, [iops->set_acl() takes 4 args]) + ],[ +- ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [ ++ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_mnt_idmap_dentry], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) +- AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1, +- [iops->set_acl() takes 4 args, arg2 is struct dentry *]) ++ AC_DEFINE(HAVE_SET_ACL_IDMAP_DENTRY, 1, ++ [iops->set_acl() takes 4 args, arg1 is struct mnt_idmap *]) + ],[ +- ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [ ++ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args]) ++ AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) ++ AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1, ++ [iops->set_acl() takes 4 args, arg2 is struct dentry *]) + ],[ +- ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args]) ++ ],[ ++ ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14]) ++ ]) + ]) + ]) + ]) +diff --git a/config/kernel-cpu_has_feature.m4 b/config/kernel-cpu_has_feature.m4 +new file mode 100644 +index 000000000..608faf0f8 +--- /dev/null ++++ b/config/kernel-cpu_has_feature.m4 +@@ -0,0 +1,29 @@ ++dnl # ++dnl # cpu_has_feature() may referencing GPL-only cpu_feature_keys on powerpc ++dnl # ++ ++dnl # ++dnl # Checking if cpu_has_feature is exported GPL-only ++dnl # ++AC_DEFUN([ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE], [ ++ ZFS_LINUX_TEST_SRC([cpu_has_feature], [ ++ #include ++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) ++ #include ++ #else ++ #include ++ #endif ++ ], [ ++ return cpu_has_feature(CPU_FTR_ALTIVEC) ? 0 : 1; ++ ], [], [ZFS_META_LICENSE]) ++]) ++AC_DEFUN([ZFS_AC_KERNEL_CPU_HAS_FEATURE], [ ++ AC_MSG_CHECKING([whether cpu_has_feature() is GPL-only]) ++ ZFS_LINUX_TEST_RESULT([cpu_has_feature_license], [ ++ AC_MSG_RESULT(no) ++ ], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_CPU_HAS_FEATURE_GPL_ONLY, 1, ++ [cpu_has_feature() is GPL-only]) ++ ]) ++]) +diff --git a/config/kernel-filemap.m4 b/config/kernel-filemap.m4 +new file mode 100644 +index 000000000..745928168 +--- /dev/null ++++ b/config/kernel-filemap.m4 +@@ -0,0 +1,26 @@ ++dnl # ++dnl # filemap_range_has_page was not available till 4.13 ++dnl # ++AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [ ++ ZFS_LINUX_TEST_SRC([filemap_range_has_page], [ ++ #include ++ ],[ ++ struct address_space *mapping = NULL; ++ loff_t lstart = 0; ++ loff_t lend = 0; ++ bool ret __attribute__ ((unused)); ++ ++ ret = filemap_range_has_page(mapping, lstart, lend); ++ ]) ++]) ++ ++AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [ ++ AC_MSG_CHECKING([whether filemap_range_has_page() is available]) ++ ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1, ++ [filemap_range_has_page() is available]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ]) ++]) +diff --git a/config/kernel-flush_dcache_page.m4 b/config/kernel-flush_dcache_page.m4 +new file mode 100644 +index 000000000..2340c386e +--- /dev/null ++++ b/config/kernel-flush_dcache_page.m4 +@@ -0,0 +1,26 @@ ++dnl # ++dnl # Starting from Linux 5.13, flush_dcache_page() becomes an inline ++dnl # function and may indirectly referencing GPL-only cpu_feature_keys on ++dnl # powerpc ++dnl # ++ ++dnl # ++dnl # Checking if flush_dcache_page is exported GPL-only ++dnl # ++AC_DEFUN([ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE], [ ++ ZFS_LINUX_TEST_SRC([flush_dcache_page], [ ++ #include ++ ], [ ++ flush_dcache_page(0); ++ ], [], [ZFS_META_LICENSE]) ++]) ++AC_DEFUN([ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE], [ ++ AC_MSG_CHECKING([whether flush_dcache_page() is GPL-only]) ++ ZFS_LINUX_TEST_RESULT([flush_dcache_page_license], [ ++ AC_MSG_RESULT(no) ++ ], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY, 1, ++ [flush_dcache_page() is GPL-only]) ++ ]) ++]) +diff --git a/config/kernel-generic_fillattr.m4 b/config/kernel-generic_fillattr.m4 +index 0acd5d531..02dee4d4c 100644 +--- a/config/kernel-generic_fillattr.m4 ++++ b/config/kernel-generic_fillattr.m4 +@@ -4,7 +4,10 @@ dnl # + dnl # generic_fillattr in linux/fs.h now requires a struct user_namespace* + dnl # as the first arg, to support idmapped mounts. + dnl # +-AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [ ++dnl # 6.3 API ++dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument ++dnl # ++AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [ + ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [ + #include + ],[ +@@ -13,16 +16,32 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [ + struct kstat *k = NULL; + generic_fillattr(userns, in, k); + ]) ++ ++ ZFS_LINUX_TEST_SRC([generic_fillattr_mnt_idmap], [ ++ #include ++ ],[ ++ struct mnt_idmap *idmap = NULL; ++ struct inode *in = NULL; ++ struct kstat *k = NULL; ++ generic_fillattr(idmap, in, k); ++ ]) + ]) + +-AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS], [ +- AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*]) +- ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [ ++AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [ ++ AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*]) ++ ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [ + AC_MSG_RESULT([yes]) +- AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1, +- [generic_fillattr requires struct user_namespace*]) ++ AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1, ++ [generic_fillattr requires struct mnt_idmap*]) + ],[ +- AC_MSG_RESULT([no]) ++ AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*]) ++ ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [ ++ AC_MSG_RESULT([yes]) ++ AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1, ++ [generic_fillattr requires struct user_namespace*]) ++ ],[ ++ AC_MSG_RESULT([no]) ++ ]) + ]) + ]) + +diff --git a/config/kernel-inode-create.m4 b/config/kernel-inode-create.m4 +index a6ea11fb6..9e9e43180 100644 +--- a/config/kernel-inode-create.m4 ++++ b/config/kernel-inode-create.m4 +@@ -1,4 +1,22 @@ + AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [ ++ dnl # ++ dnl # 6.3 API change ++ dnl # The first arg is changed to struct mnt_idmap * ++ dnl # ++ ZFS_LINUX_TEST_SRC([create_mnt_idmap], [ ++ #include ++ #include ++ ++ int inode_create(struct mnt_idmap *idmap, ++ struct inode *inode ,struct dentry *dentry, ++ umode_t umode, bool flag) { return 0; } ++ ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .create = inode_create, ++ }; ++ ],[]) ++ + dnl # + dnl # 5.12 API change that added the struct user_namespace* arg + dnl # to the front of this function type's arg list. +@@ -35,19 +53,28 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [ + ]) + + AC_DEFUN([ZFS_AC_KERNEL_CREATE], [ +- AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*]) +- ZFS_LINUX_TEST_RESULT([create_userns], [ ++ AC_MSG_CHECKING([whether iops->create() takes struct mnt_idmap*]) ++ ZFS_LINUX_TEST_RESULT([create_mnt_idmap], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1, +- [iops->create() takes struct user_namespace*]) ++ AC_DEFINE(HAVE_IOPS_CREATE_IDMAP, 1, ++ [iops->create() takes struct mnt_idmap*]) + ],[ + AC_MSG_RESULT(no) + +- AC_MSG_CHECKING([whether iops->create() passes flags]) +- ZFS_LINUX_TEST_RESULT([create_flags], [ ++ AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*]) ++ ZFS_LINUX_TEST_RESULT([create_userns], [ + AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1, ++ [iops->create() takes struct user_namespace*]) + ],[ +- ZFS_LINUX_TEST_ERROR([iops->create()]) ++ AC_MSG_RESULT(no) ++ ++ AC_MSG_CHECKING([whether iops->create() passes flags]) ++ ZFS_LINUX_TEST_RESULT([create_flags], [ ++ AC_MSG_RESULT(yes) ++ ],[ ++ ZFS_LINUX_TEST_ERROR([iops->create()]) ++ ]) + ]) + ]) + ]) +diff --git a/config/kernel-inode-getattr.m4 b/config/kernel-inode-getattr.m4 +index f62e82f52..c8bfb0786 100644 +--- a/config/kernel-inode-getattr.m4 ++++ b/config/kernel-inode-getattr.m4 +@@ -1,4 +1,24 @@ + AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ ++ dnl # ++ dnl # Linux 6.3 API ++ dnl # The first arg of getattr I/O operations handler type ++ dnl # is changed to struct mnt_idmap* ++ dnl # ++ ZFS_LINUX_TEST_SRC([inode_operations_getattr_mnt_idmap], [ ++ #include ++ ++ int test_getattr( ++ struct mnt_idmap *idmap, ++ const struct path *p, struct kstat *k, ++ u32 request_mask, unsigned int query_flags) ++ { return 0; } ++ ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .getattr = test_getattr, ++ }; ++ ],[]) ++ + dnl # + dnl # Linux 5.12 API + dnl # The getattr I/O operations handler type was extended to require +@@ -55,37 +75,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ + + AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [ + dnl # +- dnl # Kernel 5.12 test ++ dnl # Kernel 6.3 test + dnl # +- AC_MSG_CHECKING([whether iops->getattr() takes user_namespace]) +- ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [ ++ AC_MSG_CHECKING([whether iops->getattr() takes mnt_idmap]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_getattr_mnt_idmap], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1, +- [iops->getattr() takes struct user_namespace*]) ++ AC_DEFINE(HAVE_IDMAP_IOPS_GETATTR, 1, ++ [iops->getattr() takes struct mnt_idmap*]) + ],[ + AC_MSG_RESULT(no) +- + dnl # +- dnl # Kernel 4.11 test ++ dnl # Kernel 5.12 test + dnl # +- AC_MSG_CHECKING([whether iops->getattr() takes a path]) +- ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [ ++ AC_MSG_CHECKING([whether iops->getattr() takes user_namespace]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1, +- [iops->getattr() takes a path]) ++ AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1, ++ [iops->getattr() takes struct user_namespace*]) + ],[ + AC_MSG_RESULT(no) + + dnl # +- dnl # Kernel < 4.11 test ++ dnl # Kernel 4.11 test + dnl # +- AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount]) +- ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [ ++ AC_MSG_CHECKING([whether iops->getattr() takes a path]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1, +- [iops->getattr() takes a vfsmount]) ++ AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1, ++ [iops->getattr() takes a path]) + ],[ + AC_MSG_RESULT(no) ++ ++ dnl # ++ dnl # Kernel < 4.11 test ++ dnl # ++ AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1, ++ [iops->getattr() takes a vfsmount]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ]) + ]) + ]) + ]) +diff --git a/config/kernel-inode-setattr.m4 b/config/kernel-inode-setattr.m4 +new file mode 100644 +index 000000000..45755b4eb +--- /dev/null ++++ b/config/kernel-inode-setattr.m4 +@@ -0,0 +1,87 @@ ++AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [ ++ dnl # ++ dnl # Linux 6.3 API ++ dnl # The first arg of setattr I/O operations handler type ++ dnl # is changed to struct mnt_idmap* ++ dnl # ++ ZFS_LINUX_TEST_SRC([inode_operations_setattr_mnt_idmap], [ ++ #include ++ ++ int test_setattr( ++ struct mnt_idmap *idmap, ++ struct dentry *de, struct iattr *ia) ++ { return 0; } ++ ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .setattr = test_setattr, ++ }; ++ ],[]) ++ ++ dnl # ++ dnl # Linux 5.12 API ++ dnl # The setattr I/O operations handler type was extended to require ++ dnl # a struct user_namespace* as its first arg, to support idmapped ++ dnl # mounts. ++ dnl # ++ ZFS_LINUX_TEST_SRC([inode_operations_setattr_userns], [ ++ #include ++ ++ int test_setattr( ++ struct user_namespace *userns, ++ struct dentry *de, struct iattr *ia) ++ { return 0; } ++ ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .setattr = test_setattr, ++ }; ++ ],[]) ++ ++ ZFS_LINUX_TEST_SRC([inode_operations_setattr], [ ++ #include ++ ++ int test_setattr( ++ struct dentry *de, struct iattr *ia) ++ { return 0; } ++ ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .setattr = test_setattr, ++ }; ++ ],[]) ++]) ++ ++AC_DEFUN([ZFS_AC_KERNEL_INODE_SETATTR], [ ++ dnl # ++ dnl # Kernel 6.3 test ++ dnl # ++ AC_MSG_CHECKING([whether iops->setattr() takes mnt_idmap]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_setattr_mnt_idmap], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_IDMAP_IOPS_SETATTR, 1, ++ [iops->setattr() takes struct mnt_idmap*]) ++ ],[ ++ AC_MSG_RESULT(no) ++ dnl # ++ dnl # Kernel 5.12 test ++ dnl # ++ AC_MSG_CHECKING([whether iops->setattr() takes user_namespace]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_setattr_userns], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_USERNS_IOPS_SETATTR, 1, ++ [iops->setattr() takes struct user_namespace*]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ++ AC_MSG_CHECKING([whether iops->setattr() exists]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_setattr], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_IOPS_SETATTR, 1, ++ [iops->setattr() exists]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ]) ++ ]) ++ ]) ++]) +diff --git a/config/kernel-is_owner_or_cap.m4 b/config/kernel-is_owner_or_cap.m4 +index a90cf3da6..4e9c002b7 100644 +--- a/config/kernel-is_owner_or_cap.m4 ++++ b/config/kernel-is_owner_or_cap.m4 +@@ -16,12 +16,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE], [ + (void) inode_owner_or_capable(ip); + ]) + +- ZFS_LINUX_TEST_SRC([inode_owner_or_capable_idmapped], [ ++ ZFS_LINUX_TEST_SRC([inode_owner_or_capable_userns], [ + #include + ],[ + struct inode *ip = NULL; + (void) inode_owner_or_capable(&init_user_ns, ip); + ]) ++ ++ ZFS_LINUX_TEST_SRC([inode_owner_or_capable_mnt_idmap], [ ++ #include ++ #include ++ ],[ ++ struct inode *ip = NULL; ++ (void) inode_owner_or_capable(&nop_mnt_idmap, ip); ++ ]) + ]) + + AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [ +@@ -35,12 +43,21 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [ + + AC_MSG_CHECKING( + [whether inode_owner_or_capable() takes user_ns]) +- ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_idmapped], [ ++ ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_userns], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED, 1, ++ AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_USERNS, 1, + [inode_owner_or_capable() takes user_ns]) + ],[ +- ZFS_LINUX_TEST_ERROR([capability]) ++ AC_MSG_RESULT(no) ++ AC_MSG_CHECKING( ++ [whether inode_owner_or_capable() takes mnt_idmap]) ++ ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_mnt_idmap], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP, 1, ++ [inode_owner_or_capable() takes mnt_idmap]) ++ ], [ ++ ZFS_LINUX_TEST_ERROR([capability]) ++ ]) + ]) + ]) + ]) +diff --git a/config/kernel-mkdir.m4 b/config/kernel-mkdir.m4 +index 6667ed04f..7407a791b 100644 +--- a/config/kernel-mkdir.m4 ++++ b/config/kernel-mkdir.m4 +@@ -2,6 +2,22 @@ dnl # + dnl # Supported mkdir() interfaces checked newest to oldest. + dnl # + AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [ ++ dnl # ++ dnl # 6.3 API change ++ dnl # mkdir() takes struct mnt_idmap * as the first arg ++ dnl # ++ ZFS_LINUX_TEST_SRC([mkdir_mnt_idmap], [ ++ #include ++ ++ int mkdir(struct mnt_idmap *idmap, ++ struct inode *inode, struct dentry *dentry, ++ umode_t umode) { return 0; } ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .mkdir = mkdir, ++ }; ++ ],[]) ++ + dnl # + dnl # 5.12 API change + dnl # The struct user_namespace arg was added as the first argument to +@@ -43,25 +59,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [ + + AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [ + dnl # +- dnl # 5.12 API change +- dnl # The struct user_namespace arg was added as the first argument to +- dnl # mkdir() of the iops structure. ++ dnl # 6.3 API change ++ dnl # mkdir() takes struct mnt_idmap * as the first arg + dnl # +- AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*]) +- ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [ ++ AC_MSG_CHECKING([whether iops->mkdir() takes struct mnt_idmap*]) ++ ZFS_LINUX_TEST_RESULT([mkdir_mnt_idmap], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1, +- [iops->mkdir() takes struct user_namespace*]) ++ AC_DEFINE(HAVE_IOPS_MKDIR_IDMAP, 1, ++ [iops->mkdir() takes struct mnt_idmap*]) + ],[ +- AC_MSG_RESULT(no) +- +- AC_MSG_CHECKING([whether iops->mkdir() takes umode_t]) +- ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [ ++ dnl # ++ dnl # 5.12 API change ++ dnl # The struct user_namespace arg was added as the first argument to ++ dnl # mkdir() of the iops structure. ++ dnl # ++ AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*]) ++ ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_MKDIR_UMODE_T, 1, +- [iops->mkdir() takes umode_t]) ++ AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1, ++ [iops->mkdir() takes struct user_namespace*]) + ],[ +- ZFS_LINUX_TEST_ERROR([mkdir()]) ++ AC_MSG_RESULT(no) ++ ++ AC_MSG_CHECKING([whether iops->mkdir() takes umode_t]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_MKDIR_UMODE_T, 1, ++ [iops->mkdir() takes umode_t]) ++ ],[ ++ ZFS_LINUX_TEST_ERROR([mkdir()]) ++ ]) + ]) + ]) + ]) +diff --git a/config/kernel-mknod.m4 b/config/kernel-mknod.m4 +index ffe451060..1494ec1ae 100644 +--- a/config/kernel-mknod.m4 ++++ b/config/kernel-mknod.m4 +@@ -1,4 +1,22 @@ + AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [ ++ dnl # ++ dnl # 6.3 API change ++ dnl # The first arg is now struct mnt_idmap* ++ dnl # ++ ZFS_LINUX_TEST_SRC([mknod_mnt_idmap], [ ++ #include ++ #include ++ ++ int tmp_mknod(struct mnt_idmap *idmap, ++ struct inode *inode ,struct dentry *dentry, ++ umode_t u, dev_t d) { return 0; } ++ ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .mknod = tmp_mknod, ++ }; ++ ],[]) ++ + dnl # + dnl # 5.12 API change that added the struct user_namespace* arg + dnl # to the front of this function type's arg list. +@@ -19,12 +37,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [ + ]) + + AC_DEFUN([ZFS_AC_KERNEL_MKNOD], [ +- AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*]) +- ZFS_LINUX_TEST_RESULT([mknod_userns], [ ++ AC_MSG_CHECKING([whether iops->mknod() takes struct mnt_idmap*]) ++ ZFS_LINUX_TEST_RESULT([mknod_mnt_idmap], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1, +- [iops->mknod() takes struct user_namespace*]) ++ AC_DEFINE(HAVE_IOPS_MKNOD_IDMAP, 1, ++ [iops->mknod() takes struct mnt_idmap*]) + ],[ + AC_MSG_RESULT(no) ++ AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*]) ++ ZFS_LINUX_TEST_RESULT([mknod_userns], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1, ++ [iops->mknod() takes struct user_namespace*]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ]) + ]) + ]) +diff --git a/config/kernel-reclaim_state.m4 b/config/kernel-reclaim_state.m4 +new file mode 100644 +index 000000000..9936b3c10 +--- /dev/null ++++ b/config/kernel-reclaim_state.m4 +@@ -0,0 +1,26 @@ ++AC_DEFUN([ZFS_AC_KERNEL_SRC_RECLAIMED], [ ++ dnl # ++ dnl # 6.4 API change ++ dnl # The reclaimed_slab of struct reclaim_state ++ dnl # is renamed to reclaimed ++ dnl # ++ ZFS_LINUX_TEST_SRC([reclaim_state_reclaimed], [ ++ #include ++ static const struct reclaim_state ++ rs __attribute__ ((unused)) = { ++ .reclaimed = 100, ++ }; ++ ],[]) ++]) ++ ++AC_DEFUN([ZFS_AC_KERNEL_RECLAIMED], [ ++ AC_MSG_CHECKING([whether struct reclaim_state has reclaimed field]) ++ ZFS_LINUX_TEST_RESULT([reclaim_state_reclaimed], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_RECLAIM_STATE_RECLAIMED, 1, ++ [struct reclaim_state has reclaimed]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ]) ++]) ++ +diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 +index 302db43f5..b33cd0bfb 100644 +--- a/config/kernel-rename.m4 ++++ b/config/kernel-rename.m4 +@@ -33,24 +33,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ + .rename = rename_fn, + }; + ],[]) ++ ++ dnl # ++ dnl # 6.3 API change - the first arg is now struct mnt_idmap* ++ dnl # ++ ZFS_LINUX_TEST_SRC([inode_operations_rename_mnt_idmap], [ ++ #include ++ int rename_fn(struct mnt_idmap *idmap, struct inode *sip, ++ struct dentry *sdp, struct inode *tip, struct dentry *tdp, ++ unsigned int flags) { return 0; } ++ ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .rename = rename_fn, ++ }; ++ ],[]) + ]) + + AC_DEFUN([ZFS_AC_KERNEL_RENAME], [ +- AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*]) +- ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [ ++ AC_MSG_CHECKING([whether iops->rename() takes struct mnt_idmap*]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_rename_mnt_idmap], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1, +- [iops->rename() takes struct user_namespace*]) ++ AC_DEFINE(HAVE_IOPS_RENAME_IDMAP, 1, ++ [iops->rename() takes struct mnt_idmap*]) + ],[ + AC_MSG_RESULT(no) + +- AC_MSG_CHECKING([whether iop->rename() wants flags]) +- ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ ++ AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, +- [iops->rename() wants flags]) ++ AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1, ++ [iops->rename() takes struct user_namespace*]) + ],[ + AC_MSG_RESULT(no) ++ ++ AC_MSG_CHECKING([whether iops->rename() wants flags]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, ++ [iops->rename() wants flags]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ]) + ]) + ]) + ]) +diff --git a/config/kernel-setattr-prepare.m4 b/config/kernel-setattr-prepare.m4 +index 24245aa53..e02d6263e 100644 +--- a/config/kernel-setattr-prepare.m4 ++++ b/config/kernel-setattr-prepare.m4 +@@ -27,26 +27,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SETATTR_PREPARE], [ + int error __attribute__ ((unused)) = + setattr_prepare(userns, dentry, attr); + ]) ++ ++ dnl # ++ dnl # 6.3 API change ++ dnl # The first arg of setattr_prepare() is changed to struct mnt_idmap* ++ dnl # ++ ZFS_LINUX_TEST_SRC([setattr_prepare_mnt_idmap], [ ++ #include ++ ], [ ++ struct dentry *dentry = NULL; ++ struct iattr *attr = NULL; ++ struct mnt_idmap *idmap = NULL; ++ int error __attribute__ ((unused)) = ++ setattr_prepare(idmap, dentry, attr); ++ ]) + ]) + + AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [ +- AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*]) +- ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns], ++ AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct mnt_idmap*]) ++ ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_mnt_idmap], + [setattr_prepare], [fs/attr.c], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1, +- [setattr_prepare() accepts user_namespace]) ++ AC_DEFINE(HAVE_SETATTR_PREPARE_IDMAP, 1, ++ [setattr_prepare() accepts mnt_idmap]) + ], [ +- AC_MSG_RESULT(no) +- +- AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace]) +- ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare], +- [setattr_prepare], [fs/attr.c], [ ++ AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*]) ++ ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns], ++ [setattr_prepare], [fs/attr.c], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1, +- [setattr_prepare() is available, doesn't accept user_namespace]) ++ AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1, ++ [setattr_prepare() accepts user_namespace]) + ], [ + AC_MSG_RESULT(no) ++ ++ AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace]) ++ ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare], ++ [setattr_prepare], [fs/attr.c], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1, ++ [setattr_prepare() is available, doesn't accept user_namespace]) ++ ], [ ++ AC_MSG_RESULT(no) ++ ]) + ]) + ]) + ]) +diff --git a/config/kernel-symlink.m4 b/config/kernel-symlink.m4 +index d90366d04..a0333ed66 100644 +--- a/config/kernel-symlink.m4 ++++ b/config/kernel-symlink.m4 +@@ -1,4 +1,20 @@ + AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [ ++ dnl # ++ dnl # 6.3 API change that changed the first arg ++ dnl # to struct mnt_idmap* ++ dnl # ++ ZFS_LINUX_TEST_SRC([symlink_mnt_idmap], [ ++ #include ++ #include ++ int tmp_symlink(struct mnt_idmap *idmap, ++ struct inode *inode ,struct dentry *dentry, ++ const char *path) { return 0; } ++ ++ static const struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .symlink = tmp_symlink, ++ }; ++ ],[]) + dnl # + dnl # 5.12 API change that added the struct user_namespace* arg + dnl # to the front of this function type's arg list. +@@ -19,12 +35,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [ + ]) + + AC_DEFUN([ZFS_AC_KERNEL_SYMLINK], [ +- AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*]) +- ZFS_LINUX_TEST_RESULT([symlink_userns], [ ++ AC_MSG_CHECKING([whether iops->symlink() takes struct mnt_idmap*]) ++ ZFS_LINUX_TEST_RESULT([symlink_mnt_idmap], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1, +- [iops->symlink() takes struct user_namespace*]) ++ AC_DEFINE(HAVE_IOPS_SYMLINK_IDMAP, 1, ++ [iops->symlink() takes struct mnt_idmap*]) + ],[ +- AC_MSG_RESULT(no) ++ AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*]) ++ ZFS_LINUX_TEST_RESULT([symlink_userns], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1, ++ [iops->symlink() takes struct user_namespace*]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ]) + ]) + ]) +diff --git a/config/kernel-tmpfile.m4 b/config/kernel-tmpfile.m4 +index 0e1deb361..cc18b8f65 100644 +--- a/config/kernel-tmpfile.m4 ++++ b/config/kernel-tmpfile.m4 +@@ -4,6 +4,19 @@ dnl # Add support for i_op->tmpfile + dnl # + AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ + dnl # ++ dnl # 6.3 API change ++ dnl # The first arg is now struct mnt_idmap * ++ dnl # ++ ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_mnt_idmap], [ ++ #include ++ int tmpfile(struct mnt_idmap *idmap, ++ struct inode *inode, struct file *file, ++ umode_t mode) { return 0; } ++ static struct inode_operations ++ iops __attribute__ ((unused)) = { ++ .tmpfile = tmpfile, ++ }; ++ ],[]) + dnl # 6.1 API change + dnl # use struct file instead of struct dentry + dnl # +@@ -44,23 +57,29 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ + + AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ + AC_MSG_CHECKING([whether i_op->tmpfile() exists]) +- ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [ ++ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_mnt_idmap], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) +- AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns]) +- ],[ +- ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [ ++ AC_DEFINE(HAVE_TMPFILE_IDMAP, 1, [i_op->tmpfile() has mnt_idmap]) ++ ], [ ++ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) + AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns]) +- AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature]) + ],[ +- ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [ ++ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) ++ AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns]) + AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature]) + ],[ +- ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11]) ++ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) ++ AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature]) ++ ],[ ++ ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11]) ++ ]) + ]) + ]) + ]) +diff --git a/config/kernel-writepage_t.m4 b/config/kernel-writepage_t.m4 +new file mode 100644 +index 000000000..3a0cffd98 +--- /dev/null ++++ b/config/kernel-writepage_t.m4 +@@ -0,0 +1,26 @@ ++AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEPAGE_T], [ ++ dnl # ++ dnl # 6.3 API change ++ dnl # The writepage_t function type now has its first argument as ++ dnl # struct folio* instead of struct page* ++ dnl # ++ ZFS_LINUX_TEST_SRC([writepage_t_folio], [ ++ #include ++ int putpage(struct folio *folio, ++ struct writeback_control *wbc, void *data) ++ { return 0; } ++ writepage_t func = putpage; ++ ],[]) ++]) ++ ++AC_DEFUN([ZFS_AC_KERNEL_WRITEPAGE_T], [ ++ AC_MSG_CHECKING([whether int (*writepage_t)() takes struct folio*]) ++ ZFS_LINUX_TEST_RESULT([writepage_t_folio], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_WRITEPAGE_T_FOLIO, 1, ++ [int (*writepage_t)() takes struct folio*]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ]) ++]) ++ +diff --git a/config/kernel-xattr-handler.m4 b/config/kernel-xattr-handler.m4 +index b6cbfa155..6b8a08dbc 100644 +--- a/config/kernel-xattr-handler.m4 ++++ b/config/kernel-xattr-handler.m4 +@@ -179,6 +179,21 @@ dnl # + dnl # Supported xattr handler set() interfaces checked newest to oldest. + dnl # + AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ++ ZFS_LINUX_TEST_SRC([xattr_handler_set_mnt_idmap], [ ++ #include ++ ++ int set(const struct xattr_handler *handler, ++ struct mnt_idmap *idmap, ++ struct dentry *dentry, struct inode *inode, ++ const char *name, const void *buffer, ++ size_t size, int flags) ++ { return 0; } ++ static const struct xattr_handler ++ xops __attribute__ ((unused)) = { ++ .set = set, ++ }; ++ ],[]) ++ + ZFS_LINUX_TEST_SRC([xattr_handler_set_userns], [ + #include + +@@ -240,53 +255,63 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ + dnl # The xattr_handler->set() callback was changed to 8 arguments, and + dnl # struct user_namespace* was inserted as arg #2 + dnl # +- AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace]) +- ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [ ++ dnl # 6.3 API change, ++ dnl # The xattr_handler->set() callback 2nd arg is now struct mnt_idmap * ++ dnl # ++ AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and mnt_idmap]) ++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_mnt_idmap], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_XATTR_SET_USERNS, 1, +- [xattr_handler->set() takes user_namespace]) +- ],[ +- dnl # +- dnl # 4.7 API change, +- dnl # The xattr_handler->set() callback was changed to take both +- dnl # dentry and inode. +- dnl # +- AC_MSG_RESULT(no) +- AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode]) +- ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [ ++ AC_DEFINE(HAVE_XATTR_SET_IDMAP, 1, ++ [xattr_handler->set() takes mnt_idmap]) ++ ], [ ++ AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace]) ++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1, +- [xattr_handler->set() wants both dentry and inode]) ++ AC_DEFINE(HAVE_XATTR_SET_USERNS, 1, ++ [xattr_handler->set() takes user_namespace]) + ],[ + dnl # +- dnl # 4.4 API change, +- dnl # The xattr_handler->set() callback was changed to take a +- dnl # xattr_handler, and handler_flags argument was removed and +- dnl # should be accessed by handler->flags. ++ dnl # 4.7 API change, ++ dnl # The xattr_handler->set() callback was changed to take both ++ dnl # dentry and inode. + dnl # + AC_MSG_RESULT(no) +- AC_MSG_CHECKING( +- [whether xattr_handler->set() wants xattr_handler]) +- ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [ ++ AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode]) ++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1, +- [xattr_handler->set() wants xattr_handler]) ++ AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1, ++ [xattr_handler->set() wants both dentry and inode]) + ],[ + dnl # +- dnl # 2.6.33 API change, +- dnl # The xattr_handler->set() callback was changed +- dnl # to take a dentry instead of an inode, and a +- dnl # handler_flags argument was added. ++ dnl # 4.4 API change, ++ dnl # The xattr_handler->set() callback was changed to take a ++ dnl # xattr_handler, and handler_flags argument was removed and ++ dnl # should be accessed by handler->flags. + dnl # + AC_MSG_RESULT(no) + AC_MSG_CHECKING( +- [whether xattr_handler->set() wants dentry]) +- ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [ ++ [whether xattr_handler->set() wants xattr_handler]) ++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [ + AC_MSG_RESULT(yes) +- AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1, +- [xattr_handler->set() wants dentry]) ++ AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1, ++ [xattr_handler->set() wants xattr_handler]) + ],[ +- ZFS_LINUX_TEST_ERROR([xattr set()]) ++ dnl # ++ dnl # 2.6.33 API change, ++ dnl # The xattr_handler->set() callback was changed ++ dnl # to take a dentry instead of an inode, and a ++ dnl # handler_flags argument was added. ++ dnl # ++ AC_MSG_RESULT(no) ++ AC_MSG_CHECKING( ++ [whether xattr_handler->set() wants dentry]) ++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1, ++ [xattr_handler->set() wants dentry]) ++ ],[ ++ ZFS_LINUX_TEST_ERROR([xattr set()]) ++ ]) + ]) + ]) + ]) +diff --git a/config/kernel.m4 b/config/kernel.m4 +index 7806da7a8..173c78a2a 100644 +--- a/config/kernel.m4 ++++ b/config/kernel.m4 +@@ -69,6 +69,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ + ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE + ZFS_AC_KERNEL_SRC_XATTR + ZFS_AC_KERNEL_SRC_ACL ++ ZFS_AC_KERNEL_SRC_INODE_SETATTR + ZFS_AC_KERNEL_SRC_INODE_GETATTR + ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS + ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION +@@ -130,7 +131,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ + ZFS_AC_KERNEL_SRC_KSTRTOUL + ZFS_AC_KERNEL_SRC_PERCPU + ZFS_AC_KERNEL_SRC_CPU_HOTPLUG +- ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS ++ ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR + ZFS_AC_KERNEL_SRC_MKNOD + ZFS_AC_KERNEL_SRC_SYMLINK + ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS +@@ -144,6 +145,15 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ + ZFS_AC_KERNEL_SRC_KTHREAD + ZFS_AC_KERNEL_SRC_ZERO_PAGE + ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC ++ ZFS_AC_KERNEL_SRC_FILEMAP ++ ZFS_AC_KERNEL_SRC_WRITEPAGE_T ++ ZFS_AC_KERNEL_SRC_RECLAIMED ++ case "$host_cpu" in ++ powerpc*) ++ ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE ++ ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE ++ ;; ++ esac + + AC_MSG_CHECKING([for available kernel interfaces]) + ZFS_LINUX_TEST_COMPILE_ALL([kabi]) +@@ -186,6 +196,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ + ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE + ZFS_AC_KERNEL_XATTR + ZFS_AC_KERNEL_ACL ++ ZFS_AC_KERNEL_INODE_SETATTR + ZFS_AC_KERNEL_INODE_GETATTR + ZFS_AC_KERNEL_INODE_SET_FLAGS + ZFS_AC_KERNEL_INODE_SET_IVERSION +@@ -247,7 +258,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ + ZFS_AC_KERNEL_KSTRTOUL + ZFS_AC_KERNEL_PERCPU + ZFS_AC_KERNEL_CPU_HOTPLUG +- ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS ++ ZFS_AC_KERNEL_GENERIC_FILLATTR + ZFS_AC_KERNEL_MKNOD + ZFS_AC_KERNEL_SYMLINK + ZFS_AC_KERNEL_BIO_MAX_SEGS +@@ -261,6 +272,15 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ + ZFS_AC_KERNEL_KTHREAD + ZFS_AC_KERNEL_ZERO_PAGE + ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC ++ ZFS_AC_KERNEL_FILEMAP ++ ZFS_AC_KERNEL_WRITEPAGE_T ++ ZFS_AC_KERNEL_RECLAIMED ++ case "$host_cpu" in ++ powerpc*) ++ ZFS_AC_KERNEL_CPU_HAS_FEATURE ++ ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE ++ ;; ++ esac + ]) + + dnl # +diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 +index 2ab6765c3..9390812cd 100644 +--- a/config/zfs-build.m4 ++++ b/config/zfs-build.m4 +@@ -81,7 +81,7 @@ AC_DEFUN([ZFS_AC_DEBUG], [ + AC_DEFUN([ZFS_AC_DEBUGINFO_ENABLE], [ + DEBUG_CFLAGS="$DEBUG_CFLAGS -g -fno-inline $NO_IPA_SRA" + +- KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $NO_IPA_SRA" ++ KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $KERNEL_NO_IPA_SRA" + KERNEL_MAKE="$KERNEL_MAKE CONFIG_DEBUG_INFO=y" + + DEBUGINFO_ZFS="_with_debuginfo" +@@ -217,6 +217,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [ + ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH + ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER + ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA ++ ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA + ZFS_AC_CONFIG_ALWAYS_CC_ASAN + ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD + ZFS_AC_CONFIG_ALWAYS_SYSTEM +diff --git a/configure.ac b/configure.ac +index 2671434af..cb339ccd4 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -222,6 +222,7 @@ AC_CONFIG_FILES([ + tests/zfs-tests/cmd/mmap_exec/Makefile + tests/zfs-tests/cmd/mmap_libaio/Makefile + tests/zfs-tests/cmd/mmap_seek/Makefile ++ tests/zfs-tests/cmd/mmap_sync/Makefile + tests/zfs-tests/cmd/mmapwrite/Makefile + tests/zfs-tests/cmd/nvlist_to_lua/Makefile + tests/zfs-tests/cmd/randfree_file/Makefile +diff --git a/contrib/initramfs/scripts/zfs b/contrib/initramfs/scripts/zfs +index 4ce739fda..3c51b53ee 100644 +--- a/contrib/initramfs/scripts/zfs ++++ b/contrib/initramfs/scripts/zfs +@@ -326,7 +326,7 @@ mount_fs() + + # Need the _original_ datasets mountpoint! + mountpoint=$(get_fs_value "$fs" mountpoint) +- ZFS_CMD="mount.zfs -o zfsutil" ++ ZFS_CMD="mount -o zfsutil -t zfs" + if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then + # Can't use the mountpoint property. Might be one of our + # clones. Check the 'org.zol:mountpoint' property set in +@@ -343,7 +343,7 @@ mount_fs() + fi + # Don't use mount.zfs -o zfsutils for legacy mountpoint + if [ "$mountpoint" = "legacy" ]; then +- ZFS_CMD="mount.zfs" ++ ZFS_CMD="mount -t zfs" + fi + # Last hail-mary: Hope 'rootmnt' is set! + mountpoint="" +@@ -914,7 +914,7 @@ mountroot() + echo " not specified on the kernel command line." + echo "" + echo "Manually mount the root filesystem on $rootmnt and then exit." +- echo "Hint: Try: mount.zfs -o zfsutil ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt" ++ echo "Hint: Try: mount -o zfsutil -t zfs ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt" + shell + fi + +diff --git a/contrib/pam_zfs_key/pam_zfs_key.c b/contrib/pam_zfs_key/pam_zfs_key.c +index 0db119382..313703770 100644 +--- a/contrib/pam_zfs_key/pam_zfs_key.c ++++ b/contrib/pam_zfs_key/pam_zfs_key.c +@@ -548,16 +548,11 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh, + errno); + return (-1); + } +- size_t runtime_path_len = strlen(runtime_path); +- size_t counter_path_len = runtime_path_len + 1 + 10; +- char *counter_path = malloc(counter_path_len + 1); +- if (!counter_path) { ++ ++ char *counter_path; ++ if (asprintf(&counter_path, "%s/%u", runtime_path, config->uid) == -1) + return (-1); +- } +- counter_path[0] = 0; +- strcat(counter_path, runtime_path); +- snprintf(counter_path + runtime_path_len, counter_path_len, "/%d", +- config->uid); ++ + const int fd = open(counter_path, + O_RDWR | O_CLOEXEC | O_CREAT | O_NOFOLLOW, + S_IRUSR | S_IWUSR); +diff --git a/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/include/os/freebsd/zfs/sys/zfs_znode_impl.h +index 3d93525b4..120884116 100644 +--- a/include/os/freebsd/zfs/sys/zfs_znode_impl.h ++++ b/include/os/freebsd/zfs/sys/zfs_znode_impl.h +@@ -118,7 +118,8 @@ extern minor_t zfsdev_minor_alloc(void); + #define Z_ISLNK(type) ((type) == VLNK) + #define Z_ISDIR(type) ((type) == VDIR) + +-#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp)) ++#define zn_has_cached_data(zp, start, end) \ ++ vn_has_cached_data(ZTOV(zp)) + #define zn_flush_cached_data(zp, sync) vn_flush_cached_data(ZTOV(zp), sync) + #define zn_rlimit_fsize(zp, uio) \ + vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio)) +diff --git a/include/os/linux/kernel/linux/dcache_compat.h b/include/os/linux/kernel/linux/dcache_compat.h +index c90135fd3..f87f1653a 100644 +--- a/include/os/linux/kernel/linux/dcache_compat.h ++++ b/include/os/linux/kernel/linux/dcache_compat.h +@@ -39,6 +39,21 @@ + #define d_alias d_u.d_alias + #endif + ++/* ++ * Starting from Linux 5.13, flush_dcache_page() becomes an inline function ++ * and under some configurations, may indirectly referencing GPL-only ++ * cpu_feature_keys on powerpc. Override this function when it is detected ++ * being GPL-only. ++ */ ++#if defined __powerpc__ && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY ++#include ++#define flush_dcache_page(page) do { \ ++ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && \ ++ test_bit(PG_dcache_clean, &(page)->flags)) \ ++ clear_bit(PG_dcache_clean, &(page)->flags); \ ++ } while (0) ++#endif ++ + /* + * 2.6.30 API change, + * The const keyword was added to the 'struct dentry_operations' in +diff --git a/include/os/linux/kernel/linux/simd_powerpc.h b/include/os/linux/kernel/linux/simd_powerpc.h +index 108cef22f..422b85af3 100644 +--- a/include/os/linux/kernel/linux/simd_powerpc.h ++++ b/include/os/linux/kernel/linux/simd_powerpc.h +@@ -76,6 +76,17 @@ + #define kfpu_init() 0 + #define kfpu_fini() ((void) 0) + ++/* ++ * Linux 4.7 makes cpu_has_feature to use jump labels on powerpc if ++ * CONFIG_JUMP_LABEL_FEATURE_CHECKS is enabled, in this case however it ++ * references GPL-only symbol cpu_feature_keys. Therefore we overrides this ++ * interface when it is detected being GPL-only. ++ */ ++#if defined(CONFIG_JUMP_LABEL_FEATURE_CHECKS) && \ ++ defined(HAVE_CPU_HAS_FEATURE_GPL_ONLY) ++#define cpu_has_feature(feature) early_cpu_has_feature(feature) ++#endif ++ + /* + * Check if AltiVec instruction set is available + */ +diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h +index 91e908598..e82bbf755 100644 +--- a/include/os/linux/kernel/linux/vfs_compat.h ++++ b/include/os/linux/kernel/linux/vfs_compat.h +@@ -344,7 +344,8 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid) + * 4.9 API change + */ + #if !(defined(HAVE_SETATTR_PREPARE_NO_USERNS) || \ +- defined(HAVE_SETATTR_PREPARE_USERNS)) ++ defined(HAVE_SETATTR_PREPARE_USERNS) || \ ++ defined(HAVE_SETATTR_PREPARE_IDMAP)) + static inline int + setattr_prepare(struct dentry *dentry, struct iattr *ia) + { +@@ -399,6 +400,15 @@ func(struct user_namespace *user_ns, const struct path *path, \ + return (func##_impl(user_ns, path, stat, request_mask, \ + query_flags)); \ + } ++#elif defined(HAVE_IDMAP_IOPS_GETATTR) ++#define ZPL_GETATTR_WRAPPER(func) \ ++static int \ ++func(struct mnt_idmap *user_ns, const struct path *path, \ ++ struct kstat *stat, u32 request_mask, unsigned int query_flags) \ ++{ \ ++ return (func##_impl(user_ns, path, stat, request_mask, \ ++ query_flags)); \ ++} + #else + #error + #endif +@@ -450,8 +460,15 @@ zpl_is_32bit_api(void) + * 5.12 API change + * To support id-mapped mounts, generic_fillattr() was modified to + * accept a new struct user_namespace* as its first arg. ++ * ++ * 6.3 API change ++ * generic_fillattr() first arg is changed to struct mnt_idmap * ++ * + */ +-#ifdef HAVE_GENERIC_FILLATTR_USERNS ++#ifdef HAVE_GENERIC_FILLATTR_IDMAP ++#define zpl_generic_fillattr(idmap, ip, sp) \ ++ generic_fillattr(idmap, ip, sp) ++#elif defined(HAVE_GENERIC_FILLATTR_USERNS) + #define zpl_generic_fillattr(user_ns, ip, sp) \ + generic_fillattr(user_ns, ip, sp) + #else +diff --git a/include/os/linux/kernel/linux/xattr_compat.h b/include/os/linux/kernel/linux/xattr_compat.h +index 30403fe87..3ffd00169 100644 +--- a/include/os/linux/kernel/linux/xattr_compat.h ++++ b/include/os/linux/kernel/linux/xattr_compat.h +@@ -133,20 +133,35 @@ fn(const struct xattr_handler *handler, struct dentry *dentry, \ + #error "Unsupported kernel" + #endif + ++/* ++ * 6.3 API change, ++ * The xattr_handler->set() callback was changed to take the ++ * struct mnt_idmap* as the first arg, to support idmapped ++ * mounts. ++ */ ++#if defined(HAVE_XATTR_SET_IDMAP) ++#define ZPL_XATTR_SET_WRAPPER(fn) \ ++static int \ ++fn(const struct xattr_handler *handler, struct mnt_idmap *user_ns, \ ++ struct dentry *dentry, struct inode *inode, const char *name, \ ++ const void *buffer, size_t size, int flags) \ ++{ \ ++ return (__ ## fn(user_ns, inode, name, buffer, size, flags)); \ ++} + /* + * 5.12 API change, + * The xattr_handler->set() callback was changed to take the + * struct user_namespace* as the first arg, to support idmapped + * mounts. + */ +-#if defined(HAVE_XATTR_SET_USERNS) ++#elif defined(HAVE_XATTR_SET_USERNS) + #define ZPL_XATTR_SET_WRAPPER(fn) \ + static int \ + fn(const struct xattr_handler *handler, struct user_namespace *user_ns, \ + struct dentry *dentry, struct inode *inode, const char *name, \ + const void *buffer, size_t size, int flags) \ + { \ +- return (__ ## fn(inode, name, buffer, size, flags)); \ ++ return (__ ## fn(user_ns, inode, name, buffer, size, flags)); \ + } + /* + * 4.7 API change, +@@ -160,7 +175,7 @@ fn(const struct xattr_handler *handler, struct dentry *dentry, \ + struct inode *inode, const char *name, const void *buffer, \ + size_t size, int flags) \ + { \ +- return (__ ## fn(inode, name, buffer, size, flags)); \ ++ return (__ ## fn(kcred->user_ns, inode, name, buffer, size, flags));\ + } + /* + * 4.4 API change, +@@ -174,7 +189,8 @@ static int \ + fn(const struct xattr_handler *handler, struct dentry *dentry, \ + const char *name, const void *buffer, size_t size, int flags) \ + { \ +- return (__ ## fn(dentry->d_inode, name, buffer, size, flags)); \ ++ return (__ ## fn(kcred->user_ns, dentry->d_inode, name, \ ++ buffer, size, flags)); \ + } + /* + * 2.6.33 API change, +@@ -187,7 +203,8 @@ static int \ + fn(struct dentry *dentry, const char *name, const void *buffer, \ + size_t size, int flags, int unused_handler_flags) \ + { \ +- return (__ ## fn(dentry->d_inode, name, buffer, size, flags)); \ ++ return (__ ## fn(kcred->user_ns, dentry->d_inode, name, buffer, \ ++ size, flags)); \ + } + #else + #error "Unsupported kernel" +diff --git a/include/os/linux/spl/sys/cred.h b/include/os/linux/spl/sys/cred.h +index b7d3f38d7..501bd4566 100644 +--- a/include/os/linux/spl/sys/cred.h ++++ b/include/os/linux/spl/sys/cred.h +@@ -45,6 +45,8 @@ typedef struct cred cred_t; + #define SGID_TO_KGID(x) (KGIDT_INIT(x)) + #define KGIDP_TO_SGIDP(x) (&(x)->val) + ++extern zidmap_t *zfs_get_init_idmap(void); ++ + extern void crhold(cred_t *cr); + extern void crfree(cred_t *cr); + extern uid_t crgetuid(const cred_t *cr); +diff --git a/include/os/linux/spl/sys/types.h b/include/os/linux/spl/sys/types.h +index b44c94518..4d638efbb 100644 +--- a/include/os/linux/spl/sys/types.h ++++ b/include/os/linux/spl/sys/types.h +@@ -54,4 +54,18 @@ typedef ulong_t pgcnt_t; + typedef int major_t; + typedef int minor_t; + ++struct user_namespace; ++#ifdef HAVE_IOPS_CREATE_IDMAP ++#include ++struct mnt_idmap { ++ struct user_namespace *owner; ++ refcount_t count; ++}; ++typedef struct mnt_idmap zidmap_t; ++#else ++typedef struct user_namespace zidmap_t; ++#endif ++ ++extern zidmap_t *zfs_init_idmap; ++ + #endif /* _SPL_TYPES_H */ +diff --git a/include/os/linux/zfs/sys/trace_acl.h b/include/os/linux/zfs/sys/trace_acl.h +index 21bcefa4e..656552749 100644 +--- a/include/os/linux/zfs/sys/trace_acl.h ++++ b/include/os/linux/zfs/sys/trace_acl.h +@@ -58,9 +58,10 @@ DECLARE_EVENT_CLASS(zfs_ace_class, + __field(uint64_t, z_size) + __field(uint64_t, z_pflags) + __field(uint32_t, z_sync_cnt) ++ __field(uint32_t, z_sync_writes_cnt) ++ __field(uint32_t, z_async_writes_cnt) + __field(mode_t, z_mode) + __field(boolean_t, z_is_sa) +- __field(boolean_t, z_is_mapped) + __field(boolean_t, z_is_ctldir) + + __field(uint32_t, i_uid) +@@ -90,9 +91,10 @@ DECLARE_EVENT_CLASS(zfs_ace_class, + __entry->z_size = zn->z_size; + __entry->z_pflags = zn->z_pflags; + __entry->z_sync_cnt = zn->z_sync_cnt; ++ __entry->z_sync_writes_cnt = zn->z_sync_writes_cnt; ++ __entry->z_async_writes_cnt = zn->z_async_writes_cnt; + __entry->z_mode = zn->z_mode; + __entry->z_is_sa = zn->z_is_sa; +- __entry->z_is_mapped = zn->z_is_mapped; + __entry->z_is_ctldir = zn->z_is_ctldir; + + __entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid); +@@ -114,18 +116,18 @@ DECLARE_EVENT_CLASS(zfs_ace_class, + TP_printk("zn { id %llu unlinked %u atime_dirty %u " + "zn_prefetch %u blksz %u seq %u " + "mapcnt %llu size %llu pflags %llu " +- "sync_cnt %u mode 0x%x is_sa %d " +- "is_mapped %d is_ctldir %d inode { " +- "uid %u gid %u ino %lu nlink %u size %lli " ++ "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u " ++ "mode 0x%x is_sa %d is_ctldir %d " ++ "inode { uid %u gid %u ino %lu nlink %u size %lli " + "blkbits %u bytes %u mode 0x%x generation %x } } " + "ace { type %u flags %u access_mask %u } mask_matched %u", + __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty, + __entry->z_zn_prefetch, __entry->z_blksz, + __entry->z_seq, __entry->z_mapcnt, __entry->z_size, +- __entry->z_pflags, __entry->z_sync_cnt, __entry->z_mode, +- __entry->z_is_sa, __entry->z_is_mapped, +- __entry->z_is_ctldir, __entry->i_uid, +- __entry->i_gid, __entry->i_ino, __entry->i_nlink, ++ __entry->z_pflags, __entry->z_sync_cnt, ++ __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt, ++ __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir, ++ __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink, + __entry->i_size, __entry->i_blkbits, + __entry->i_bytes, __entry->i_mode, __entry->i_generation, + __entry->z_type, __entry->z_flags, __entry->z_access_mask, +diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h +index 47f91e4a6..331f2e2bc 100644 +--- a/include/os/linux/zfs/sys/zfs_vnops_os.h ++++ b/include/os/linux/zfs/sys/zfs_vnops_os.h +@@ -54,8 +54,7 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, + extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, + cred_t *cr, int flags); + extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr); +-extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip, +- struct kstat *sp); ++extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp); + extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr); + extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, + char *tnm, cred_t *cr, int flags); +@@ -68,9 +67,9 @@ extern void zfs_inactive(struct inode *ip); + extern int zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag, + offset_t offset, cred_t *cr); + extern int zfs_fid(struct inode *ip, fid_t *fidp); +-extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages); ++extern int zfs_getpage(struct inode *ip, struct page *pp); + extern int zfs_putpage(struct inode *ip, struct page *pp, +- struct writeback_control *wbc); ++ struct writeback_control *wbc, boolean_t for_sync); + extern int zfs_dirty_inode(struct inode *ip, int flags); + extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, + size_t len, unsigned long vm_flags); +diff --git a/include/os/linux/zfs/sys/zfs_znode_impl.h b/include/os/linux/zfs/sys/zfs_znode_impl.h +index de46fc8f2..9b9ac7a4f 100644 +--- a/include/os/linux/zfs/sys/zfs_znode_impl.h ++++ b/include/os/linux/zfs/sys/zfs_znode_impl.h +@@ -47,9 +47,16 @@ + extern "C" { + #endif + ++#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE) + #define ZNODE_OS_FIELDS \ + inode_timespec_t z_btime; /* creation/birth time (cached) */ \ + struct inode z_inode; ++#else ++#define ZNODE_OS_FIELDS \ ++ inode_timespec_t z_btime; /* creation/birth time (cached) */ \ ++ struct inode z_inode; \ ++ boolean_t z_is_mapped; /* we are mmap'ed */ ++#endif + + /* + * Convert between znode pointers and inode pointers +@@ -70,7 +77,14 @@ extern "C" { + #define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type)) + #define Z_ISDIR(type) S_ISDIR(type) + +-#define zn_has_cached_data(zp) ((zp)->z_is_mapped) ++#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE) ++#define zn_has_cached_data(zp, start, end) \ ++ filemap_range_has_page(ZTOI(zp)->i_mapping, start, end) ++#else ++#define zn_has_cached_data(zp, start, end) \ ++ ((zp)->z_is_mapped) ++#endif ++ + #define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync) + #define zn_rlimit_fsize(zp, uio) (0) + +diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h +index ac9815d4e..4e08470e7 100644 +--- a/include/os/linux/zfs/sys/zpl.h ++++ b/include/os/linux/zfs/sys/zpl.h +@@ -64,7 +64,10 @@ extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip, + const struct qstr *qstr); + #if defined(CONFIG_FS_POSIX_ACL) + #if defined(HAVE_SET_ACL) +-#if defined(HAVE_SET_ACL_USERNS) ++#if defined(HAVE_SET_ACL_IDMAP_DENTRY) ++extern int zpl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, ++ struct posix_acl *acl, int type); ++#elif defined(HAVE_SET_ACL_USERNS) + extern int zpl_set_acl(struct user_namespace *userns, struct inode *ip, + struct posix_acl *acl, int type); + #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2) +@@ -186,13 +189,15 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx) + + #if defined(HAVE_INODE_OWNER_OR_CAPABLE) + #define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ip) +-#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED) ++#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_USERNS) + #define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ns, ip) ++#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP) ++#define zpl_inode_owner_or_capable(idmap, ip) inode_owner_or_capable(idmap, ip) + #else + #error "Unsupported kernel" + #endif + +-#ifdef HAVE_SETATTR_PREPARE_USERNS ++#if defined(HAVE_SETATTR_PREPARE_USERNS) || defined(HAVE_SETATTR_PREPARE_IDMAP) + #define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(ns, dentry, ia) + #else + /* +diff --git a/include/sys/dmu.h b/include/sys/dmu.h +index 7bdd42e8b..12bd88720 100644 +--- a/include/sys/dmu.h ++++ b/include/sys/dmu.h +@@ -778,6 +778,9 @@ dmu_tx_t *dmu_tx_create(objset_t *os); + void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len); + void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, + int len); ++void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len); ++void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, ++ int len); + void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, + uint64_t len); + void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, +diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h +index 7ade2dc91..fffcbcfca 100644 +--- a/include/sys/dmu_objset.h ++++ b/include/sys/dmu_objset.h +@@ -72,6 +72,10 @@ struct dmu_tx; + */ + #define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0) + ++#if defined(__clang__) ++#pragma clang diagnostic push ++#pragma clang diagnostic ignored "-Wgnu-variable-sized-type-not-at-end" ++#endif + typedef struct objset_phys { + dnode_phys_t os_meta_dnode; + zil_header_t os_zil_header; +@@ -88,6 +92,9 @@ typedef struct objset_phys { + char os_pad1[OBJSET_PHYS_SIZE_V3 - OBJSET_PHYS_SIZE_V2 - + sizeof (dnode_phys_t)]; + } objset_phys_t; ++#if defined(__clang__) ++#pragma clang diagnostic pop ++#endif + + typedef int (*dmu_objset_upgrade_cb_t)(objset_t *); + +diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h +index ad3f1b0e4..e8886fd4e 100644 +--- a/include/sys/dmu_tx.h ++++ b/include/sys/dmu_tx.h +@@ -90,6 +90,7 @@ enum dmu_tx_hold_type { + THT_ZAP, + THT_SPACE, + THT_SPILL, ++ THT_APPEND, + THT_NUMTYPES + }; + +diff --git a/include/sys/dnode.h b/include/sys/dnode.h +index 20b7c2aaf..39bbdae44 100644 +--- a/include/sys/dnode.h ++++ b/include/sys/dnode.h +@@ -120,7 +120,11 @@ extern "C" { + #define DN_MAX_LEVELS (DIV_ROUND_UP(DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT, \ + DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT) + 1) + +-#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ ++/* ++ * Use the flexible array instead of the fixed length one dn_bonus ++ * to address memcpy/memmove fortify error ++ */ ++#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus_flexible + \ + (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) + #define DN_MAX_BONUS_LEN(dnp) \ + ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \ +@@ -266,6 +270,10 @@ typedef struct dnode_phys { + sizeof (blkptr_t)]; + blkptr_t dn_spill; + }; ++ struct { ++ blkptr_t __dn_ignore4; ++ uint8_t dn_bonus_flexible[]; ++ }; + }; + } dnode_phys_t; + +diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h +index 111e70ece..84f5aee59 100644 +--- a/include/sys/fs/zfs.h ++++ b/include/sys/fs/zfs.h +@@ -1173,6 +1173,7 @@ typedef enum pool_initialize_func { + POOL_INITIALIZE_START, + POOL_INITIALIZE_CANCEL, + POOL_INITIALIZE_SUSPEND, ++ POOL_INITIALIZE_UNINIT, + POOL_INITIALIZE_FUNCS + } pool_initialize_func_t; + +diff --git a/include/sys/spa.h b/include/sys/spa.h +index fedadab45..42f7fec0f 100644 +--- a/include/sys/spa.h ++++ b/include/sys/spa.h +@@ -785,6 +785,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx); + #define SPA_ASYNC_L2CACHE_REBUILD 0x800 + #define SPA_ASYNC_L2CACHE_TRIM 0x1000 + #define SPA_ASYNC_REBUILD_DONE 0x2000 ++#define SPA_ASYNC_DETACH_SPARE 0x4000 + + /* device manipulation */ + extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot); +@@ -971,6 +972,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid, + /* Pool configuration locks */ + extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw); + extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw); ++extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, ++ krw_t rw); + extern void spa_config_exit(spa_t *spa, int locks, const void *tag); + extern int spa_config_held(spa_t *spa, int locks, krw_t rw); + +diff --git a/include/sys/vdev_initialize.h b/include/sys/vdev_initialize.h +index 81d39ebeb..942fc71c5 100644 +--- a/include/sys/vdev_initialize.h ++++ b/include/sys/vdev_initialize.h +@@ -33,6 +33,7 @@ extern "C" { + #endif + + extern void vdev_initialize(vdev_t *vd); ++extern void vdev_uninitialize(vdev_t *vd); + extern void vdev_initialize_stop(vdev_t *vd, + vdev_initializing_state_t tgt_state, list_t *vd_list); + extern void vdev_initialize_stop_all(vdev_t *vd, +diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h +index 0df8a0e4b..48dab671d 100644 +--- a/include/sys/zfs_znode.h ++++ b/include/sys/zfs_znode.h +@@ -188,7 +188,6 @@ typedef struct znode { + boolean_t z_atime_dirty; /* atime needs to be synced */ + boolean_t z_zn_prefetch; /* Prefetch znodes? */ + boolean_t z_is_sa; /* are we native sa? */ +- boolean_t z_is_mapped; /* are we mmap'ed */ + boolean_t z_is_ctldir; /* are we .zfs entry */ + boolean_t z_suspended; /* extra ref from a suspend? */ + uint_t z_blksz; /* block size in bytes */ +@@ -198,6 +197,8 @@ typedef struct znode { + uint64_t z_size; /* file size (cached) */ + uint64_t z_pflags; /* pflags (cached) */ + uint32_t z_sync_cnt; /* synchronous open count */ ++ uint32_t z_sync_writes_cnt; /* synchronous write count */ ++ uint32_t z_async_writes_cnt; /* asynchronous write count */ + mode_t z_mode; /* mode (cached) */ + kmutex_t z_acl_lock; /* acl data lock */ + zfs_acl_t *z_acl_cached; /* cached acl */ +diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi +index 13ce19df9..58c2d7635 100644 +--- a/lib/libzfs/libzfs.abi ++++ b/lib/libzfs/libzfs.abi +@@ -5410,7 +5410,8 @@ + + + +- ++ ++ + + + +diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c +index f2219d1c3..f6d844bdf 100644 +--- a/lib/libzfs/libzfs_dataset.c ++++ b/lib/libzfs/libzfs_dataset.c +@@ -1017,6 +1017,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, + nvlist_t *ret; + int chosen_normal = -1; + int chosen_utf = -1; ++ int set_maxbs = 0; + + if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) { + (void) no_memory(hdl); +@@ -1234,12 +1235,17 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } ++ /* save the ZFS_PROP_RECORDSIZE during create op */ ++ if (zpool_hdl == NULL && prop == ZFS_PROP_RECORDSIZE) { ++ set_maxbs = intval; ++ } + break; + } + + case ZFS_PROP_SPECIAL_SMALL_BLOCKS: + { +- int maxbs = SPA_OLD_MAXBLOCKSIZE; ++ int maxbs = ++ set_maxbs == 0 ? SPA_OLD_MAXBLOCKSIZE : set_maxbs; + char buf[64]; + + if (zpool_hdl != NULL) { +diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c +index 29f077841..fc6c6e8e2 100644 +--- a/lib/libzfs/libzfs_pool.c ++++ b/lib/libzfs/libzfs_pool.c +@@ -2224,8 +2224,8 @@ xlate_init_err(int err) + } + + /* +- * Begin, suspend, or cancel the initialization (initializing of all free +- * blocks) for the given vdevs in the given pool. ++ * Begin, suspend, cancel, or uninit (clear) the initialization (initializing ++ * of all free blocks) for the given vdevs in the given pool. + */ + static int + zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type, +@@ -2251,11 +2251,16 @@ zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type, + vdev_guids, &errlist); + + if (err != 0) { +- if (errlist != NULL) { +- vd_errlist = fnvlist_lookup_nvlist(errlist, +- ZPOOL_INITIALIZE_VDEVS); ++ if (errlist != NULL && nvlist_lookup_nvlist(errlist, ++ ZPOOL_INITIALIZE_VDEVS, &vd_errlist) == 0) { + goto list_errors; + } ++ ++ if (err == EINVAL && cmd_type == POOL_INITIALIZE_UNINIT) { ++ zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, ++ "uninitialize is not supported by kernel")); ++ } ++ + (void) zpool_standard_error(zhp->zpool_hdl, err, + dgettext(TEXT_DOMAIN, "operation failed")); + goto out; +diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi +index 1b03a5c42..7ede3e097 100644 +--- a/lib/libzfs_core/libzfs_core.abi ++++ b/lib/libzfs_core/libzfs_core.abi +@@ -1726,7 +1726,8 @@ + + + +- ++ ++ + + + +diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 +index 71a95c3bd..0c60a9c8e 100644 +--- a/man/man4/zfs.4 ++++ b/man/man4/zfs.4 +@@ -1712,7 +1712,7 @@ completes in order to verify the checksums of all blocks which have been + resilvered. + This is enabled by default and strongly recommended. + . +-.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong ++.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq ulong + Maximum amount of I/O that can be concurrently issued for a sequential + resilver per leaf device, given in bytes. + . +@@ -1831,6 +1831,13 @@ When we cross this limit from above it is because we are issuing verification I/ + In this case (unless the metadata scan is done) we stop issuing verification I/O + and start scanning metadata again until we get to the hard limit. + . ++.It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint ++When reporting resilver throughput and estimated completion time use the ++performance observed over roughly the last ++.Sy zfs_scan_report_txgs ++TXGs. ++When set to zero performance is calculated over the time between checkpoints. ++. + .It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int + Enforce tight memory limits on pool scans when a sequential scan is in progress. + When disabled, the memory limit may be exceeded by fast disks. +@@ -1839,7 +1846,7 @@ When disabled, the memory limit may be exceeded by fast disks. + Freezes a scrub/resilver in progress without actually pausing it. + Intended for testing/debugging. + . +-.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int ++.It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int + Maximum amount of data that can be concurrently issued at once for scrubs and + resilvers per leaf device, given in bytes. + . +diff --git a/man/man8/zpool-initialize.8 b/man/man8/zpool-initialize.8 +index 0a108180d..ada00bb1b 100644 +--- a/man/man8/zpool-initialize.8 ++++ b/man/man8/zpool-initialize.8 +@@ -36,7 +36,7 @@ + .Sh SYNOPSIS + .Nm zpool + .Cm initialize +-.Op Fl c Ns | Ns Fl s ++.Op Fl c Ns | Ns Fl s | Ns Fl u + .Op Fl w + .Ar pool + .Oo Ar device Oc Ns … +@@ -60,6 +60,14 @@ initialized, the command will fail and no suspension will occur on any device. + Initializing can then be resumed by running + .Nm zpool Cm initialize + with no flags on the relevant target devices. ++.It Fl u , -uninit ++Clears the initialization state on the specified devices, or all eligible ++devices if none are specified. ++If the devices are being actively initialized the command will fail. ++After being cleared ++.Nm zpool Cm initialize ++with no flags can be used to re-initialize all unallocoated regions on ++the relevant target devices. + .It Fl w , -wait + Wait until the devices have finished initializing before returning. + .El +diff --git a/module/Kbuild.in b/module/Kbuild.in +index 1507965c5..7675d614f 100644 +--- a/module/Kbuild.in ++++ b/module/Kbuild.in +@@ -44,4 +44,5 @@ endif + subdir-asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) + subdir-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) + ++ + endif +diff --git a/module/icp/algs/edonr/edonr.c b/module/icp/algs/edonr/edonr.c +index 7a3ba30c0..baf8bb885 100644 +--- a/module/icp/algs/edonr/edonr.c ++++ b/module/icp/algs/edonr/edonr.c +@@ -343,9 +343,11 @@ Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p) + * which only goes over it by a hair (1248 bytes on ARM32). + */ + #include /* for _ILP32 */ +-#ifdef _ILP32 /* We're 32-bit, assume small stack frames */ ++#if defined(_ILP32) /* We're 32-bit, assume small stack frames */ ++#if defined(__GNUC__) && !defined(__clang__) + #pragma GCC diagnostic ignored "-Wframe-larger-than=" + #endif ++#endif + + #if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__) + static inline size_t +diff --git a/module/icp/algs/skein/skein_block.c b/module/icp/algs/skein/skein_block.c +index 7ba165a48..3ad52da5f 100644 +--- a/module/icp/algs/skein/skein_block.c ++++ b/module/icp/algs/skein/skein_block.c +@@ -30,7 +30,9 @@ + * the #pragma here to ignore the warning. + */ + #if defined(_ILP32) || defined(__powerpc) /* Assume small stack */ ++#if defined(__GNUC__) && !defined(__clang__) + #pragma GCC diagnostic ignored "-Wframe-larger-than=" ++#endif + /* + * We're running on 32-bit, don't unroll loops to save stack frame space + * +diff --git a/module/lua/ldo.c b/module/lua/ldo.c +index a9835c4f5..e4abe04e9 100644 +--- a/module/lua/ldo.c ++++ b/module/lua/ldo.c +@@ -197,7 +197,8 @@ l_noret luaD_throw (lua_State *L, int errcode) { + } + } + +-#if defined(HAVE_INFINITE_RECURSION) ++#if defined(__GNUC__) && !defined(__clang__) && \ ++ defined(HAVE_INFINITE_RECURSION) + #pragma GCC diagnostic pop + #endif + +diff --git a/module/os/freebsd/zfs/zfs_ctldir.c b/module/os/freebsd/zfs/zfs_ctldir.c +index 5bd2e1510..cfc4bab2f 100644 +--- a/module/os/freebsd/zfs/zfs_ctldir.c ++++ b/module/os/freebsd/zfs/zfs_ctldir.c +@@ -204,6 +204,10 @@ sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id, + return (error); + } + ++#if __FreeBSD_version >= 1400077 ++ vn_set_state(vp, VSTATE_CONSTRUCTED); ++#endif ++ + *vpp = vp; + return (0); + } +@@ -675,6 +679,17 @@ zfsctl_root_readdir(struct vop_readdir_args *ap) + + ASSERT3S(vp->v_type, ==, VDIR); + ++ /* ++ * FIXME: this routine only ever emits 3 entries and does not tolerate ++ * being called with a buffer too small to handle all of them. ++ * ++ * The check below facilitates the idiom of repeating calls until the ++ * count to return is 0. ++ */ ++ if (zfs_uio_offset(&uio) == 3 * sizeof (entry)) { ++ return (0); ++ } ++ + error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio, + &dots_offset); + if (error != 0) { +@@ -800,6 +815,9 @@ static struct vop_vector zfsctl_ops_root = { + .vop_default = &default_vnodeops, + #if __FreeBSD_version >= 1300121 + .vop_fplookup_vexec = VOP_EAGAIN, ++#endif ++#if __FreeBSD_version >= 1300139 ++ .vop_fplookup_symlink = VOP_EAGAIN, + #endif + .vop_open = zfsctl_common_open, + .vop_close = zfsctl_common_close, +@@ -1126,6 +1144,9 @@ static struct vop_vector zfsctl_ops_snapdir = { + .vop_default = &default_vnodeops, + #if __FreeBSD_version >= 1300121 + .vop_fplookup_vexec = VOP_EAGAIN, ++#endif ++#if __FreeBSD_version >= 1300139 ++ .vop_fplookup_symlink = VOP_EAGAIN, + #endif + .vop_open = zfsctl_common_open, + .vop_close = zfsctl_common_close, +@@ -1150,7 +1171,7 @@ zfsctl_snapshot_inactive(struct vop_inactive_args *ap) + { + vnode_t *vp = ap->a_vp; + +- VERIFY3S(vrecycle(vp), ==, 1); ++ vrecycle(vp); + return (0); + } + +@@ -1234,6 +1255,11 @@ static struct vop_vector zfsctl_ops_snapshot = { + #if __FreeBSD_version >= 1300121 + .vop_fplookup_vexec = VOP_EAGAIN, + #endif ++#if __FreeBSD_version >= 1300139 ++ .vop_fplookup_symlink = VOP_EAGAIN, ++#endif ++ .vop_open = zfsctl_common_open, ++ .vop_close = zfsctl_common_close, + .vop_inactive = zfsctl_snapshot_inactive, + #if __FreeBSD_version >= 1300045 + .vop_need_inactive = vop_stdneed_inactive, +diff --git a/module/os/freebsd/zfs/zfs_ioctl_os.c b/module/os/freebsd/zfs/zfs_ioctl_os.c +index 7f7e2b72c..effc11518 100644 +--- a/module/os/freebsd/zfs/zfs_ioctl_os.c ++++ b/module/os/freebsd/zfs/zfs_ioctl_os.c +@@ -59,7 +59,7 @@ zfs_vfs_ref(zfsvfs_t **zfvp) + return (error); + } + +-int ++boolean_t + zfs_vfs_held(zfsvfs_t *zfsvfs) + { + return (zfsvfs->z_vfs != NULL); +diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode.c +index 1debc3ec3..92e3bdd2e 100644 +--- a/module/os/freebsd/zfs/zfs_znode.c ++++ b/module/os/freebsd/zfs/zfs_znode.c +@@ -153,6 +153,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) + zp->z_xattr_cached = NULL; + zp->z_xattr_parent = 0; + zp->z_vnode = NULL; ++ zp->z_sync_writes_cnt = 0; ++ zp->z_async_writes_cnt = 0; ++ + return (0); + } + +@@ -172,6 +175,9 @@ zfs_znode_cache_destructor(void *buf, void *arg) + + ASSERT3P(zp->z_acl_cached, ==, NULL); + ASSERT3P(zp->z_xattr_cached, ==, NULL); ++ ++ ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt)); ++ ASSERT0(atomic_load_32(&zp->z_async_writes_cnt)); + } + + +@@ -457,6 +463,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, + zp->z_blksz = blksz; + zp->z_seq = 0x7A4653; + zp->z_sync_cnt = 0; ++ zp->z_sync_writes_cnt = 0; ++ zp->z_async_writes_cnt = 0; + #if __FreeBSD_version >= 1300139 + atomic_store_ptr(&zp->z_cached_symlink, NULL); + #endif +diff --git a/module/os/linux/spl/spl-cred.c b/module/os/linux/spl/spl-cred.c +index f81b9540a..d407fc66b 100644 +--- a/module/os/linux/spl/spl-cred.c ++++ b/module/os/linux/spl/spl-cred.c +@@ -145,6 +145,18 @@ crgetgid(const cred_t *cr) + return (KGID_TO_SGID(cr->fsgid)); + } + ++/* Return the initial user ns or nop_mnt_idmap */ ++zidmap_t * ++zfs_get_init_idmap(void) ++{ ++#ifdef HAVE_IOPS_CREATE_IDMAP ++ return ((zidmap_t *)&nop_mnt_idmap); ++#else ++ return ((zidmap_t *)&init_user_ns); ++#endif ++} ++ ++EXPORT_SYMBOL(zfs_get_init_idmap); + EXPORT_SYMBOL(crhold); + EXPORT_SYMBOL(crfree); + EXPORT_SYMBOL(crgetuid); +diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c +index 508fb9d4c..2cb5251d7 100644 +--- a/module/os/linux/spl/spl-generic.c ++++ b/module/os/linux/spl/spl-generic.c +@@ -225,8 +225,10 @@ __div_u64(uint64_t u, uint32_t v) + * replacements for libgcc-provided functions and will never be called + * directly. + */ ++#if defined(__GNUC__) && !defined(__clang__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wmissing-prototypes" ++#endif + + /* + * Implementation of 64-bit unsigned division for 32-bit machines. +@@ -425,7 +427,9 @@ __aeabi_ldivmod(int64_t u, int64_t v) + EXPORT_SYMBOL(__aeabi_ldivmod); + #endif /* __arm || __arm__ */ + ++#if defined(__GNUC__) && !defined(__clang__) + #pragma GCC diagnostic pop ++#endif + + #endif /* BITS_PER_LONG */ + +diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c +index 5a318e0a5..d586afa9b 100644 +--- a/module/os/linux/spl/spl-kmem-cache.c ++++ b/module/os/linux/spl/spl-kmem-cache.c +@@ -183,8 +183,11 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size) + * of that infrastructure we are responsible for incrementing it. + */ + if (current->reclaim_state) ++#ifdef HAVE_RECLAIM_STATE_RECLAIMED ++ current->reclaim_state->reclaimed += size >> PAGE_SHIFT; ++#else + current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT; +- ++#endif + vfree(ptr); + } + +diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c +index f96cd1271..fc76fe0e0 100644 +--- a/module/os/linux/zfs/arc_os.c ++++ b/module/os/linux/zfs/arc_os.c +@@ -219,7 +219,11 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) + arc_reduce_target_size(ptob(sc->nr_to_scan)); + arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE); + if (current->reclaim_state != NULL) ++#ifdef HAVE_RECLAIM_STATE_RECLAIMED ++ current->reclaim_state->reclaimed += sc->nr_to_scan; ++#else + current->reclaim_state->reclaimed_slab += sc->nr_to_scan; ++#endif + + /* + * We are experiencing memory pressure which the arc_evict_zthr was +diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c +index 5a52092bb..8d508bcb4 100644 +--- a/module/os/linux/zfs/policy.c ++++ b/module/os/linux/zfs/policy.c +@@ -124,7 +124,7 @@ secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner) + if (crgetuid(cr) == owner) + return (0); + +- if (zpl_inode_owner_or_capable(kcred->user_ns, ip)) ++ if (zpl_inode_owner_or_capable(zfs_init_idmap, ip)) + return (0); + + #if defined(CONFIG_USER_NS) +diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c +index c45644a69..743b03412 100644 +--- a/module/os/linux/zfs/zfs_ctldir.c ++++ b/module/os/linux/zfs/zfs_ctldir.c +@@ -468,7 +468,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, + zp->z_atime_dirty = B_FALSE; + zp->z_zn_prefetch = B_FALSE; + zp->z_is_sa = B_FALSE; ++#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) + zp->z_is_mapped = B_FALSE; ++#endif + zp->z_is_ctldir = B_TRUE; + zp->z_sa_hdl = NULL; + zp->z_blksz = 0; +@@ -478,6 +480,8 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, + zp->z_pflags = 0; + zp->z_mode = 0; + zp->z_sync_cnt = 0; ++ zp->z_sync_writes_cnt = 0; ++ zp->z_async_writes_cnt = 0; + ip->i_generation = 0; + ip->i_ino = id; + ip->i_mode = (S_IFDIR | S_IRWXUGO); +diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c +index 79b9d777d..767d3a377 100644 +--- a/module/os/linux/zfs/zfs_ioctl_os.c ++++ b/module/os/linux/zfs/zfs_ioctl_os.c +@@ -288,6 +288,8 @@ zfsdev_detach(void) + #define ZFS_DEBUG_STR "" + #endif + ++zidmap_t *zfs_init_idmap; ++ + static int __init + openzfs_init(void) + { +@@ -311,6 +313,8 @@ openzfs_init(void) + printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n"); + #endif /* CONFIG_FS_POSIX_ACL */ + ++ zfs_init_idmap = (zidmap_t *)zfs_get_init_idmap(); ++ + return (0); + } + +diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c +index da897f120..e620eb43a 100644 +--- a/module/os/linux/zfs/zfs_vfsops.c ++++ b/module/os/linux/zfs/zfs_vfsops.c +@@ -1192,7 +1192,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan) + int objects = 0; + int i = 0, j = 0; + +- zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP); ++ zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP); + + mutex_enter(&zfsvfs->z_znodes_lock); + while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) { +@@ -1228,7 +1228,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan) + zrele(zp); + } + +- kmem_free(zp_array, max_array * sizeof (znode_t *)); ++ vmem_free(zp_array, max_array * sizeof (znode_t *)); + + return (objects); + } +diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c +index ae0401e60..af0d553d5 100644 +--- a/module/os/linux/zfs/zfs_vnops_os.c ++++ b/module/os/linux/zfs/zfs_vnops_os.c +@@ -244,43 +244,46 @@ zfs_close(struct inode *ip, int flag, cred_t *cr) + } + + #if defined(_KERNEL) ++ ++static int zfs_fillpage(struct inode *ip, struct page *pp); ++ + /* + * When a file is memory mapped, we must keep the IO data synchronized +- * between the DMU cache and the memory mapped pages. What this means: +- * +- * On Write: If we find a memory mapped page, we write to *both* +- * the page and the dmu buffer. ++ * between the DMU cache and the memory mapped pages. Update all mapped ++ * pages with the contents of the coresponding dmu buffer. + */ + void + update_pages(znode_t *zp, int64_t start, int len, objset_t *os) + { +- struct inode *ip = ZTOI(zp); +- struct address_space *mp = ip->i_mapping; +- struct page *pp; +- uint64_t nbytes; +- int64_t off; +- void *pb; ++ struct address_space *mp = ZTOI(zp)->i_mapping; ++ int64_t off = start & (PAGE_SIZE - 1); + +- off = start & (PAGE_SIZE-1); + for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) { +- nbytes = MIN(PAGE_SIZE - off, len); ++ uint64_t nbytes = MIN(PAGE_SIZE - off, len); + +- pp = find_lock_page(mp, start >> PAGE_SHIFT); ++ struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT); + if (pp) { + if (mapping_writably_mapped(mp)) + flush_dcache_page(pp); + +- pb = kmap(pp); +- (void) dmu_read(os, zp->z_id, start + off, nbytes, +- pb + off, DMU_READ_PREFETCH); ++ void *pb = kmap(pp); ++ int error = dmu_read(os, zp->z_id, start + off, ++ nbytes, pb + off, DMU_READ_PREFETCH); + kunmap(pp); + +- if (mapping_writably_mapped(mp)) +- flush_dcache_page(pp); ++ if (error) { ++ SetPageError(pp); ++ ClearPageUptodate(pp); ++ } else { ++ ClearPageError(pp); ++ SetPageUptodate(pp); ++ ++ if (mapping_writably_mapped(mp)) ++ flush_dcache_page(pp); ++ ++ mark_page_accessed(pp); ++ } + +- mark_page_accessed(pp); +- SetPageUptodate(pp); +- ClearPageError(pp); + unlock_page(pp); + put_page(pp); + } +@@ -291,38 +294,44 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os) + } + + /* +- * When a file is memory mapped, we must keep the IO data synchronized +- * between the DMU cache and the memory mapped pages. What this means: +- * +- * On Read: We "read" preferentially from memory mapped pages, +- * else we default from the dmu buffer. +- * +- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when +- * the file is memory mapped. ++ * When a file is memory mapped, we must keep the I/O data synchronized ++ * between the DMU cache and the memory mapped pages. Preferentially read ++ * from memory mapped pages, otherwise fallback to reading through the dmu. + */ + int + mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) + { + struct inode *ip = ZTOI(zp); + struct address_space *mp = ip->i_mapping; +- struct page *pp; +- int64_t start, off; +- uint64_t bytes; ++ int64_t start = uio->uio_loffset; ++ int64_t off = start & (PAGE_SIZE - 1); + int len = nbytes; + int error = 0; +- void *pb; + +- start = uio->uio_loffset; +- off = start & (PAGE_SIZE-1); + for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) { +- bytes = MIN(PAGE_SIZE - off, len); ++ uint64_t bytes = MIN(PAGE_SIZE - off, len); + +- pp = find_lock_page(mp, start >> PAGE_SHIFT); ++ struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT); + if (pp) { +- ASSERT(PageUptodate(pp)); ++ /* ++ * If filemap_fault() retries there exists a window ++ * where the page will be unlocked and not up to date. ++ * In this case we must try and fill the page. ++ */ ++ if (unlikely(!PageUptodate(pp))) { ++ error = zfs_fillpage(ip, pp); ++ if (error) { ++ unlock_page(pp); ++ put_page(pp); ++ return (error); ++ } ++ } ++ ++ ASSERT(PageUptodate(pp) || PageDirty(pp)); ++ + unlock_page(pp); + +- pb = kmap(pp); ++ void *pb = kmap(pp); + error = zfs_uiomove(pb + off, bytes, UIO_READ, uio); + kunmap(pp); + +@@ -338,9 +347,11 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) + + len -= bytes; + off = 0; ++ + if (error) + break; + } ++ + return (error); + } + #endif /* _KERNEL */ +@@ -1010,7 +1021,7 @@ top: + + mutex_enter(&zp->z_lock); + may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 && +- !(zp->z_is_mapped); ++ !zn_has_cached_data(zp, 0, LLONG_MAX); + mutex_exit(&zp->z_lock); + + /* +@@ -1098,7 +1109,8 @@ top: + &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); + delete_now = may_delete_now && !toobig && + atomic_read(&ZTOI(zp)->i_count) == 1 && +- !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked && ++ !zn_has_cached_data(zp, 0, LLONG_MAX) && ++ xattr_obj == xattr_obj_unlinked && + zfs_external_acl(zp) == acl_obj; + } + +@@ -1663,8 +1675,7 @@ out: + */ + /* ARGSUSED */ + int +-zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip, +- struct kstat *sp) ++zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp) + { + znode_t *zp = ITOZ(ip); + zfsvfs_t *zfsvfs = ITOZSB(ip); +@@ -3434,21 +3445,34 @@ top: + } + + static void +-zfs_putpage_commit_cb(void *arg) ++zfs_putpage_sync_commit_cb(void *arg) ++{ ++ struct page *pp = arg; ++ ++ ClearPageError(pp); ++ end_page_writeback(pp); ++} ++ ++static void ++zfs_putpage_async_commit_cb(void *arg) + { + struct page *pp = arg; ++ znode_t *zp = ITOZ(pp->mapping->host); + + ClearPageError(pp); + end_page_writeback(pp); ++ atomic_dec_32(&zp->z_async_writes_cnt); + } + + /* + * Push a page out to disk, once the page is on stable storage the + * registered commit callback will be run as notification of completion. + * +- * IN: ip - page mapped for inode. +- * pp - page to push (page is locked) +- * wbc - writeback control data ++ * IN: ip - page mapped for inode. ++ * pp - page to push (page is locked) ++ * wbc - writeback control data ++ * for_sync - does the caller intend to wait synchronously for the ++ * page writeback to complete? + * + * RETURN: 0 if success + * error code if failure +@@ -3458,7 +3482,8 @@ zfs_putpage_commit_cb(void *arg) + */ + /* ARGSUSED */ + int +-zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) ++zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, ++ boolean_t for_sync) + { + znode_t *zp = ITOZ(ip); + zfsvfs_t *zfsvfs = ITOZSB(ip); +@@ -3556,6 +3581,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) + zfs_rangelock_exit(lr); + + if (wbc->sync_mode != WB_SYNC_NONE) { ++ /* ++ * Speed up any non-sync page writebacks since ++ * they may take several seconds to complete. ++ * Refer to the comment in zpl_fsync() (when ++ * HAVE_FSYNC_RANGE is defined) for details. ++ */ ++ if (atomic_load_32(&zp->z_async_writes_cnt) > 0) { ++ zil_commit(zfsvfs->z_log, zp->z_id); ++ } ++ + if (PageWriteback(pp)) + #ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT + folio_wait_bit(page_folio(pp), PG_writeback); +@@ -3581,6 +3616,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) + * was in fact not skipped and should not be counted as if it were. + */ + wbc->pages_skipped--; ++ if (!for_sync) ++ atomic_inc_32(&zp->z_async_writes_cnt); + set_page_writeback(pp); + unlock_page(pp); + +@@ -3602,6 +3639,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) + #endif + ClearPageError(pp); + end_page_writeback(pp); ++ if (!for_sync) ++ atomic_dec_32(&zp->z_async_writes_cnt); + zfs_rangelock_exit(lr); + ZFS_EXIT(zfsvfs); + return (err); +@@ -3626,7 +3665,9 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) + err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx); + + zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0, +- zfs_putpage_commit_cb, pp); ++ for_sync ? zfs_putpage_sync_commit_cb : ++ zfs_putpage_async_commit_cb, pp); ++ + dmu_tx_commit(tx); + + zfs_rangelock_exit(lr); +@@ -3638,6 +3679,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) + * performance reasons. + */ + zil_commit(zfsvfs->z_log, zp->z_id); ++ } else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) { ++ /* ++ * If the caller does not intend to wait synchronously ++ * for this page writeback to complete and there are active ++ * synchronous calls on this file, do a commit so that ++ * the latter don't accidentally end up waiting for ++ * our writeback to complete. Refer to the comment in ++ * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details. ++ */ ++ zil_commit(zfsvfs->z_log, zp->z_id); + } + + dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen); +@@ -3766,55 +3817,45 @@ zfs_inactive(struct inode *ip) + * Fill pages with data from the disk. + */ + static int +-zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages) ++zfs_fillpage(struct inode *ip, struct page *pp) + { +- znode_t *zp = ITOZ(ip); + zfsvfs_t *zfsvfs = ITOZSB(ip); +- objset_t *os; +- struct page *cur_pp; +- u_offset_t io_off, total; +- size_t io_len; +- loff_t i_size; +- unsigned page_idx; +- int err; ++ loff_t i_size = i_size_read(ip); ++ u_offset_t io_off = page_offset(pp); ++ size_t io_len = PAGE_SIZE; + +- os = zfsvfs->z_os; +- io_len = nr_pages << PAGE_SHIFT; +- i_size = i_size_read(ip); +- io_off = page_offset(pl[0]); ++ ASSERT3U(io_off, <, i_size); + + if (io_off + io_len > i_size) + io_len = i_size - io_off; + +- /* +- * Iterate over list of pages and read each page individually. +- */ +- page_idx = 0; +- for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { +- caddr_t va; ++ void *va = kmap(pp); ++ int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off, ++ io_len, va, DMU_READ_PREFETCH); ++ if (io_len != PAGE_SIZE) ++ memset((char *)va + io_len, 0, PAGE_SIZE - io_len); ++ kunmap(pp); + +- cur_pp = pl[page_idx++]; +- va = kmap(cur_pp); +- err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, +- DMU_READ_PREFETCH); +- kunmap(cur_pp); +- if (err) { +- /* convert checksum errors into IO errors */ +- if (err == ECKSUM) +- err = SET_ERROR(EIO); +- return (err); +- } ++ if (error) { ++ /* convert checksum errors into IO errors */ ++ if (error == ECKSUM) ++ error = SET_ERROR(EIO); ++ ++ SetPageError(pp); ++ ClearPageUptodate(pp); ++ } else { ++ ClearPageError(pp); ++ SetPageUptodate(pp); + } + +- return (0); ++ return (error); + } + + /* +- * Uses zfs_fillpage to read data from the file and fill the pages. ++ * Uses zfs_fillpage to read data from the file and fill the page. + * + * IN: ip - inode of file to get data from. +- * pl - list of pages to read +- * nr_pages - number of pages to read ++ * pp - page to read + * + * RETURN: 0 on success, error code on failure. + * +@@ -3823,24 +3864,22 @@ zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages) + */ + /* ARGSUSED */ + int +-zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages) ++zfs_getpage(struct inode *ip, struct page *pp) + { +- znode_t *zp = ITOZ(ip); + zfsvfs_t *zfsvfs = ITOZSB(ip); +- int err; +- +- if (pl == NULL) +- return (0); ++ znode_t *zp = ITOZ(ip); ++ int error; + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + +- err = zfs_fillpage(ip, pl, nr_pages); +- +- dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nr_pages*PAGESIZE); ++ error = zfs_fillpage(ip, pp); ++ if (error == 0) ++ dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE); + + ZFS_EXIT(zfsvfs); +- return (err); ++ ++ return (error); + } + + /* +diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c +index f3475b4d9..0236b3216 100644 +--- a/module/os/linux/zfs/zfs_znode.c ++++ b/module/os/linux/zfs/zfs_znode.c +@@ -134,6 +134,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) + zp->z_acl_cached = NULL; + zp->z_xattr_cached = NULL; + zp->z_xattr_parent = 0; ++ zp->z_sync_writes_cnt = 0; ++ zp->z_async_writes_cnt = 0; ++ + return (0); + } + +@@ -151,9 +154,12 @@ zfs_znode_cache_destructor(void *buf, void *arg) + rw_destroy(&zp->z_xattr_lock); + zfs_rangelock_fini(&zp->z_rangelock); + +- ASSERT(zp->z_dirlocks == NULL); +- ASSERT(zp->z_acl_cached == NULL); +- ASSERT(zp->z_xattr_cached == NULL); ++ ASSERT3P(zp->z_dirlocks, ==, NULL); ++ ASSERT3P(zp->z_acl_cached, ==, NULL); ++ ASSERT3P(zp->z_xattr_cached, ==, NULL); ++ ++ ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt)); ++ ASSERT0(atomic_load_32(&zp->z_async_writes_cnt)); + } + + static int +@@ -540,7 +546,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, + ASSERT3P(zp->z_xattr_cached, ==, NULL); + zp->z_unlinked = B_FALSE; + zp->z_atime_dirty = B_FALSE; ++#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) + zp->z_is_mapped = B_FALSE; ++#endif + zp->z_is_ctldir = B_FALSE; + zp->z_suspended = B_FALSE; + zp->z_sa_hdl = NULL; +@@ -549,6 +557,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, + zp->z_blksz = blksz; + zp->z_seq = 0x7A4653; + zp->z_sync_cnt = 0; ++ zp->z_sync_writes_cnt = 0; ++ zp->z_async_writes_cnt = 0; + + zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); + +@@ -1628,7 +1638,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) + * Zero partial page cache entries. This must be done under a + * range lock in order to keep the ARC and page cache in sync. + */ +- if (zp->z_is_mapped) { ++ if (zn_has_cached_data(zp, off, off + len - 1)) { + loff_t first_page, last_page, page_len; + loff_t first_page_offset, last_page_offset; + +diff --git a/module/os/linux/zfs/zpl_ctldir.c b/module/os/linux/zfs/zpl_ctldir.c +index 9b526afd0..cf4da470f 100644 +--- a/module/os/linux/zfs/zpl_ctldir.c ++++ b/module/os/linux/zfs/zpl_ctldir.c +@@ -101,7 +101,11 @@ zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir) + */ + /* ARGSUSED */ + static int +-#ifdef HAVE_USERNS_IOPS_GETATTR ++#ifdef HAVE_IDMAP_IOPS_GETATTR ++zpl_root_getattr_impl(struct mnt_idmap *user_ns, ++ const struct path *path, struct kstat *stat, u32 request_mask, ++ unsigned int query_flags) ++#elif defined(HAVE_USERNS_IOPS_GETATTR) + zpl_root_getattr_impl(struct user_namespace *user_ns, + const struct path *path, struct kstat *stat, u32 request_mask, + unsigned int query_flags) +@@ -112,8 +116,14 @@ zpl_root_getattr_impl(const struct path *path, struct kstat *stat, + { + struct inode *ip = path->dentry->d_inode; + +-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR) ++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) ++#ifdef HAVE_GENERIC_FILLATTR_USERNS + generic_fillattr(user_ns, ip, stat); ++#elif defined(HAVE_GENERIC_FILLATTR_IDMAP) ++ generic_fillattr(user_ns, ip, stat); ++#else ++ (void) user_ns; ++#endif + #else + generic_fillattr(ip, stat); + #endif +@@ -304,6 +314,10 @@ static int + zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip, + struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry, + unsigned int flags) ++#elif defined(HAVE_IOPS_RENAME_IDMAP) ++zpl_snapdir_rename2(struct mnt_idmap *user_ns, struct inode *sdip, ++ struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry, ++ unsigned int flags) + #else + zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry, + struct inode *tdip, struct dentry *tdentry, unsigned int flags) +@@ -325,7 +339,9 @@ zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry, + return (error); + } + +-#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS) ++#if (!defined(HAVE_RENAME_WANTS_FLAGS) && \ ++ !defined(HAVE_IOPS_RENAME_USERNS) && \ ++ !defined(HAVE_IOPS_RENAME_IDMAP)) + static int + zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry, + struct inode *tdip, struct dentry *tdentry) +@@ -352,6 +368,9 @@ static int + #ifdef HAVE_IOPS_MKDIR_USERNS + zpl_snapdir_mkdir(struct user_namespace *user_ns, struct inode *dip, + struct dentry *dentry, umode_t mode) ++#elif defined(HAVE_IOPS_MKDIR_IDMAP) ++zpl_snapdir_mkdir(struct mnt_idmap *user_ns, struct inode *dip, ++ struct dentry *dentry, umode_t mode) + #else + zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) + #endif +@@ -384,7 +403,11 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) + */ + /* ARGSUSED */ + static int +-#ifdef HAVE_USERNS_IOPS_GETATTR ++#ifdef HAVE_IDMAP_IOPS_GETATTR ++zpl_snapdir_getattr_impl(struct mnt_idmap *user_ns, ++ const struct path *path, struct kstat *stat, u32 request_mask, ++ unsigned int query_flags) ++#elif defined(HAVE_USERNS_IOPS_GETATTR) + zpl_snapdir_getattr_impl(struct user_namespace *user_ns, + const struct path *path, struct kstat *stat, u32 request_mask, + unsigned int query_flags) +@@ -397,8 +420,14 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat, + zfsvfs_t *zfsvfs = ITOZSB(ip); + + ZPL_ENTER(zfsvfs); +-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR) ++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) ++#ifdef HAVE_GENERIC_FILLATTR_USERNS ++ generic_fillattr(user_ns, ip, stat); ++#elif defined(HAVE_GENERIC_FILLATTR_IDMAP) + generic_fillattr(user_ns, ip, stat); ++#else ++ (void) user_ns; ++#endif + #else + generic_fillattr(ip, stat); + #endif +@@ -439,7 +468,9 @@ const struct file_operations zpl_fops_snapdir = { + const struct inode_operations zpl_ops_snapdir = { + .lookup = zpl_snapdir_lookup, + .getattr = zpl_snapdir_getattr, +-#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) ++#if (defined(HAVE_RENAME_WANTS_FLAGS) || \ ++ defined(HAVE_IOPS_RENAME_USERNS) || \ ++ defined(HAVE_IOPS_RENAME_IDMAP)) + .rename = zpl_snapdir_rename2, + #else + .rename = zpl_snapdir_rename, +@@ -530,6 +561,10 @@ static int + zpl_shares_getattr_impl(struct user_namespace *user_ns, + const struct path *path, struct kstat *stat, u32 request_mask, + unsigned int query_flags) ++#elif defined(HAVE_IDMAP_IOPS_GETATTR) ++zpl_shares_getattr_impl(struct mnt_idmap *user_ns, ++ const struct path *path, struct kstat *stat, u32 request_mask, ++ unsigned int query_flags) + #else + zpl_shares_getattr_impl(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +@@ -543,8 +578,14 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat, + ZPL_ENTER(zfsvfs); + + if (zfsvfs->z_shares_dir == 0) { +-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR) ++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) ++#ifdef HAVE_GENERIC_FILLATTR_USERNS ++ generic_fillattr(user_ns, path->dentry->d_inode, stat); ++#elif defined(HAVE_GENERIC_FILLATTR_IDMAP) + generic_fillattr(user_ns, path->dentry->d_inode, stat); ++#else ++ (void) user_ns; ++#endif + #else + generic_fillattr(path->dentry->d_inode, stat); + #endif +@@ -556,7 +597,7 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat, + + error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp); + if (error == 0) { +-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR) ++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) + error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat); + #else + error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat); +diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c +index 38d2bd147..d5d354db1 100644 +--- a/module/os/linux/zfs/zpl_file.c ++++ b/module/os/linux/zfs/zpl_file.c +@@ -165,17 +165,56 @@ static int + zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) + { + struct inode *inode = filp->f_mapping->host; ++ znode_t *zp = ITOZ(inode); ++ zfsvfs_t *zfsvfs = ITOZSB(inode); + cred_t *cr = CRED(); + int error; + fstrans_cookie_t cookie; + ++ /* ++ * The variables z_sync_writes_cnt and z_async_writes_cnt work in ++ * tandem so that sync writes can detect if there are any non-sync ++ * writes going on and vice-versa. The "vice-versa" part to this logic ++ * is located in zfs_putpage() where non-sync writes check if there are ++ * any ongoing sync writes. If any sync and non-sync writes overlap, ++ * we do a commit to complete the non-sync writes since the latter can ++ * potentially take several seconds to complete and thus block sync ++ * writes in the upcoming call to filemap_write_and_wait_range(). ++ */ ++ atomic_inc_32(&zp->z_sync_writes_cnt); ++ /* ++ * If the following check does not detect an overlapping non-sync write ++ * (say because it's just about to start), then it is guaranteed that ++ * the non-sync write will detect this sync write. This is because we ++ * always increment z_sync_writes_cnt / z_async_writes_cnt before doing ++ * the check on z_async_writes_cnt / z_sync_writes_cnt here and in ++ * zfs_putpage() respectively. ++ */ ++ if (atomic_load_32(&zp->z_async_writes_cnt) > 0) { ++ ZPL_ENTER(zfsvfs); ++ zil_commit(zfsvfs->z_log, zp->z_id); ++ ZPL_EXIT(zfsvfs); ++ } ++ + error = filemap_write_and_wait_range(inode->i_mapping, start, end); ++ ++ /* ++ * The sync write is not complete yet but we decrement ++ * z_sync_writes_cnt since zfs_fsync() increments and decrements ++ * it internally. If a non-sync write starts just after the decrement ++ * operation but before we call zfs_fsync(), it may not detect this ++ * overlapping sync write but it does not matter since we have already ++ * gone past filemap_write_and_wait_range() and we won't block due to ++ * the non-sync write. ++ */ ++ atomic_dec_32(&zp->z_sync_writes_cnt); ++ + if (error) + return (error); + + crhold(cr); + cookie = spl_fstrans_mark(); +- error = -zfs_fsync(ITOZ(inode), datasync, cr); ++ error = -zfs_fsync(zp, datasync, cr); + spl_fstrans_unmark(cookie); + crfree(cr); + ASSERT3S(error, <=, 0); +@@ -579,7 +618,6 @@ static int + zpl_mmap(struct file *filp, struct vm_area_struct *vma) + { + struct inode *ip = filp->f_mapping->host; +- znode_t *zp = ITOZ(ip); + int error; + fstrans_cookie_t cookie; + +@@ -594,9 +632,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma) + if (error) + return (error); + ++#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) ++ znode_t *zp = ITOZ(ip); + mutex_enter(&zp->z_lock); + zp->z_is_mapped = B_TRUE; + mutex_exit(&zp->z_lock); ++#endif + + return (error); + } +@@ -609,29 +650,16 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma) + static inline int + zpl_readpage_common(struct page *pp) + { +- struct inode *ip; +- struct page *pl[1]; +- int error = 0; + fstrans_cookie_t cookie; + + ASSERT(PageLocked(pp)); +- ip = pp->mapping->host; +- pl[0] = pp; + + cookie = spl_fstrans_mark(); +- error = -zfs_getpage(ip, pl, 1); ++ int error = -zfs_getpage(pp->mapping->host, pp); + spl_fstrans_unmark(cookie); + +- if (error) { +- SetPageError(pp); +- ClearPageUptodate(pp); +- } else { +- ClearPageError(pp); +- SetPageUptodate(pp); +- flush_dcache_page(pp); +- } +- + unlock_page(pp); ++ + return (error); + } + +@@ -688,19 +716,42 @@ zpl_readahead(struct readahead_control *ractl) + static int + zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) + { +- struct address_space *mapping = data; ++ boolean_t *for_sync = data; + fstrans_cookie_t cookie; + + ASSERT(PageLocked(pp)); + ASSERT(!PageWriteback(pp)); + + cookie = spl_fstrans_mark(); +- (void) zfs_putpage(mapping->host, pp, wbc); ++ (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); + spl_fstrans_unmark(cookie); + + return (0); + } + ++#ifdef HAVE_WRITEPAGE_T_FOLIO ++static int ++zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data) ++{ ++ (void) zpl_putpage(&pp->page, wbc, data); ++ return (0); ++} ++#endif ++ ++static inline int ++zpl_write_cache_pages(struct address_space *mapping, ++ struct writeback_control *wbc, void *data) ++{ ++ int result; ++ ++#ifdef HAVE_WRITEPAGE_T_FOLIO ++ result = write_cache_pages(mapping, wbc, zpl_putfolio, data); ++#else ++ result = write_cache_pages(mapping, wbc, zpl_putpage, data); ++#endif ++ return (result); ++} ++ + static int + zpl_writepages(struct address_space *mapping, struct writeback_control *wbc) + { +@@ -722,8 +773,9 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc) + * we run it once in non-SYNC mode so that the ZIL gets all the data, + * and then we commit it all in one go. + */ ++ boolean_t for_sync = (sync_mode == WB_SYNC_ALL); + wbc->sync_mode = WB_SYNC_NONE; +- result = write_cache_pages(mapping, wbc, zpl_putpage, mapping); ++ result = zpl_write_cache_pages(mapping, wbc, &for_sync); + if (sync_mode != wbc->sync_mode) { + ZPL_ENTER(zfsvfs); + ZPL_VERIFY_ZP(zp); +@@ -739,7 +791,7 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc) + * details). That being said, this is a no-op in most cases. + */ + wbc->sync_mode = sync_mode; +- result = write_cache_pages(mapping, wbc, zpl_putpage, mapping); ++ result = zpl_write_cache_pages(mapping, wbc, &for_sync); + } + return (result); + } +@@ -756,7 +808,9 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc) + if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS) + wbc->sync_mode = WB_SYNC_ALL; + +- return (zpl_putpage(pp, wbc, pp->mapping)); ++ boolean_t for_sync = (wbc->sync_mode == WB_SYNC_ALL); ++ ++ return (zpl_putpage(pp, wbc, &for_sync)); + } + + /* +@@ -924,7 +978,7 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva) + !capable(CAP_LINUX_IMMUTABLE)) + return (-EPERM); + +- if (!zpl_inode_owner_or_capable(kcred->user_ns, ip)) ++ if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip)) + return (-EACCES); + + xva_init(xva); +diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c +index dd634f70e..6efaaf438 100644 +--- a/module/os/linux/zfs/zpl_inode.c ++++ b/module/os/linux/zfs/zpl_inode.c +@@ -131,6 +131,9 @@ static int + #ifdef HAVE_IOPS_CREATE_USERNS + zpl_create(struct user_namespace *user_ns, struct inode *dir, + struct dentry *dentry, umode_t mode, bool flag) ++#elif defined(HAVE_IOPS_CREATE_IDMAP) ++zpl_create(struct mnt_idmap *user_ns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, bool flag) + #else + zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag) + #endif +@@ -174,6 +177,9 @@ static int + #ifdef HAVE_IOPS_MKNOD_USERNS + zpl_mknod(struct user_namespace *user_ns, struct inode *dir, + struct dentry *dentry, umode_t mode, ++#elif defined(HAVE_IOPS_MKNOD_IDMAP) ++zpl_mknod(struct mnt_idmap *user_ns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, + #else + zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, + #endif +@@ -224,7 +230,10 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, + + #ifdef HAVE_TMPFILE + static int +-#ifndef HAVE_TMPFILE_DENTRY ++#ifdef HAVE_TMPFILE_IDMAP ++zpl_tmpfile(struct mnt_idmap *userns, struct inode *dir, ++ struct file *file, umode_t mode) ++#elif !defined(HAVE_TMPFILE_DENTRY) + zpl_tmpfile(struct user_namespace *userns, struct inode *dir, + struct file *file, umode_t mode) + #else +@@ -317,6 +326,9 @@ static int + #ifdef HAVE_IOPS_MKDIR_USERNS + zpl_mkdir(struct user_namespace *user_ns, struct inode *dir, + struct dentry *dentry, umode_t mode) ++#elif defined(HAVE_IOPS_MKDIR_IDMAP) ++zpl_mkdir(struct mnt_idmap *user_ns, struct inode *dir, ++ struct dentry *dentry, umode_t mode) + #else + zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) + #endif +@@ -386,6 +398,10 @@ static int + zpl_getattr_impl(struct user_namespace *user_ns, + const struct path *path, struct kstat *stat, u32 request_mask, + unsigned int query_flags) ++#elif defined(HAVE_IDMAP_IOPS_GETATTR) ++zpl_getattr_impl(struct mnt_idmap *user_ns, ++ const struct path *path, struct kstat *stat, u32 request_mask, ++ unsigned int query_flags) + #else + zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask, + unsigned int query_flags) +@@ -402,7 +418,7 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask, + * XXX query_flags currently ignored. + */ + +-#ifdef HAVE_USERNS_IOPS_GETATTR ++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) + error = -zfs_getattr_fast(user_ns, ip, stat); + #else + error = -zfs_getattr_fast(kcred->user_ns, ip, stat); +@@ -441,9 +457,12 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask, + ZPL_GETATTR_WRAPPER(zpl_getattr); + + static int +-#ifdef HAVE_SETATTR_PREPARE_USERNS ++#ifdef HAVE_USERNS_IOPS_SETATTR + zpl_setattr(struct user_namespace *user_ns, struct dentry *dentry, + struct iattr *ia) ++#elif defined(HAVE_IDMAP_IOPS_SETATTR) ++zpl_setattr(struct mnt_idmap *user_ns, struct dentry *dentry, ++ struct iattr *ia) + #else + zpl_setattr(struct dentry *dentry, struct iattr *ia) + #endif +@@ -454,7 +473,13 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia) + int error; + fstrans_cookie_t cookie; + +- error = zpl_setattr_prepare(kcred->user_ns, dentry, ia); ++#ifdef HAVE_SETATTR_PREPARE_USERNS ++ error = zpl_setattr_prepare(user_ns, dentry, ia); ++#elif defined(HAVE_SETATTR_PREPARE_IDMAP) ++ error = zpl_setattr_prepare(user_ns, dentry, ia); ++#else ++ error = zpl_setattr_prepare(zfs_init_idmap, dentry, ia); ++#endif + if (error) + return (error); + +@@ -489,10 +514,14 @@ static int + #ifdef HAVE_IOPS_RENAME_USERNS + zpl_rename2(struct user_namespace *user_ns, struct inode *sdip, + struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry, +- unsigned int flags) ++ unsigned int rflags) ++#elif defined(HAVE_IOPS_RENAME_IDMAP) ++zpl_rename2(struct mnt_idmap *user_ns, struct inode *sdip, ++ struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry, ++ unsigned int rflags) + #else + zpl_rename2(struct inode *sdip, struct dentry *sdentry, +- struct inode *tdip, struct dentry *tdentry, unsigned int flags) ++ struct inode *tdip, struct dentry *tdentry, unsigned int rflags) + #endif + { + cred_t *cr = CRED(); +@@ -500,7 +529,7 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry, + fstrans_cookie_t cookie; + + /* We don't have renameat2(2) support */ +- if (flags) ++ if (rflags) + return (-EINVAL); + + crhold(cr); +@@ -514,7 +543,9 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry, + return (error); + } + +-#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS) ++#if !defined(HAVE_IOPS_RENAME_USERNS) && \ ++ !defined(HAVE_RENAME_WANTS_FLAGS) && \ ++ !defined(HAVE_IOPS_RENAME_IDMAP) + static int + zpl_rename(struct inode *sdip, struct dentry *sdentry, + struct inode *tdip, struct dentry *tdentry) +@@ -527,6 +558,9 @@ static int + #ifdef HAVE_IOPS_SYMLINK_USERNS + zpl_symlink(struct user_namespace *user_ns, struct inode *dir, + struct dentry *dentry, const char *name) ++#elif defined(HAVE_IOPS_SYMLINK_IDMAP) ++zpl_symlink(struct mnt_idmap *user_ns, struct inode *dir, ++ struct dentry *dentry, const char *name) + #else + zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name) + #endif +@@ -745,6 +779,8 @@ const struct inode_operations zpl_dir_inode_operations = { + .mknod = zpl_mknod, + #if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) + .rename = zpl_rename2, ++#elif defined(HAVE_IOPS_RENAME_IDMAP) ++ .rename = zpl_rename2, + #else + .rename = zpl_rename, + #endif +diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c +index 364cd34c1..084817609 100644 +--- a/module/os/linux/zfs/zpl_xattr.c ++++ b/module/os/linux/zfs/zpl_xattr.c +@@ -725,9 +725,11 @@ __zpl_xattr_user_get(struct inode *ip, const char *name, + ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get); + + static int +-__zpl_xattr_user_set(struct inode *ip, const char *name, ++__zpl_xattr_user_set(zidmap_t *user_ns, ++ struct inode *ip, const char *name, + const void *value, size_t size, int flags) + { ++ (void) user_ns; + char *xattr_name; + int error; + /* xattr_resolve_name will do this for us if this is defined */ +@@ -794,9 +796,11 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name, + ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get); + + static int +-__zpl_xattr_trusted_set(struct inode *ip, const char *name, ++__zpl_xattr_trusted_set(zidmap_t *user_ns, ++ struct inode *ip, const char *name, + const void *value, size_t size, int flags) + { ++ (void) user_ns; + char *xattr_name; + int error; + +@@ -863,9 +867,11 @@ __zpl_xattr_security_get(struct inode *ip, const char *name, + ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get); + + static int +-__zpl_xattr_security_set(struct inode *ip, const char *name, ++__zpl_xattr_security_set(zidmap_t *user_ns, ++ struct inode *ip, const char *name, + const void *value, size_t size, int flags) + { ++ (void) user_ns; + char *xattr_name; + int error; + /* xattr_resolve_name will do this for us if this is defined */ +@@ -889,7 +895,7 @@ zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs, + int error = 0; + + for (xattr = xattrs; xattr->name != NULL; xattr++) { +- error = __zpl_xattr_security_set(ip, ++ error = __zpl_xattr_security_set(NULL, ip, + xattr->name, xattr->value, xattr->value_len, 0); + + if (error < 0) +@@ -1004,6 +1010,9 @@ int + #ifdef HAVE_SET_ACL_USERNS + zpl_set_acl(struct user_namespace *userns, struct inode *ip, + struct posix_acl *acl, int type) ++#elif defined(HAVE_SET_ACL_IDMAP_DENTRY) ++zpl_set_acl(struct mnt_idmap *userns, struct dentry *dentry, ++ struct posix_acl *acl, int type) + #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2) + zpl_set_acl(struct user_namespace *userns, struct dentry *dentry, + struct posix_acl *acl, int type) +@@ -1013,6 +1022,8 @@ zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type) + { + #ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2 + return (zpl_set_acl_impl(d_inode(dentry), acl, type)); ++#elif defined(HAVE_SET_ACL_IDMAP_DENTRY) ++ return (zpl_set_acl_impl(d_inode(dentry), acl, type)); + #else + return (zpl_set_acl_impl(ip, acl, type)); + #endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */ +@@ -1256,7 +1267,8 @@ __zpl_xattr_acl_get_default(struct inode *ip, const char *name, + ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default); + + static int +-__zpl_xattr_acl_set_access(struct inode *ip, const char *name, ++__zpl_xattr_acl_set_access(zidmap_t *mnt_ns, ++ struct inode *ip, const char *name, + const void *value, size_t size, int flags) + { + struct posix_acl *acl; +@@ -1270,8 +1282,14 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name, + if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) + return (-EOPNOTSUPP); + +- if (!zpl_inode_owner_or_capable(kcred->user_ns, ip)) ++#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP) ++ if (!zpl_inode_owner_or_capable(mnt_ns, ip)) ++ return (-EPERM); ++#else ++ (void) mnt_ns; ++ if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip)) + return (-EPERM); ++#endif + + if (value) { + acl = zpl_acl_from_xattr(value, size); +@@ -1295,7 +1313,8 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name, + ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access); + + static int +-__zpl_xattr_acl_set_default(struct inode *ip, const char *name, ++__zpl_xattr_acl_set_default(zidmap_t *mnt_ns, ++ struct inode *ip, const char *name, + const void *value, size_t size, int flags) + { + struct posix_acl *acl; +@@ -1309,8 +1328,14 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name, + if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) + return (-EOPNOTSUPP); + +- if (!zpl_inode_owner_or_capable(kcred->user_ns, ip)) ++#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP) ++ if (!zpl_inode_owner_or_capable(mnt_ns, ip)) ++ return (-EPERM); ++#else ++ (void) mnt_ns; ++ if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip)) + return (-EPERM); ++#endif + + if (value) { + acl = zpl_acl_from_xattr(value, size); +diff --git a/module/zcommon/Makefile.in b/module/zcommon/Makefile.in +index ebc538440..614968a42 100644 +--- a/module/zcommon/Makefile.in ++++ b/module/zcommon/Makefile.in +@@ -26,3 +26,7 @@ $(MODULE)-$(CONFIG_X86) += zfs_fletcher_intel.o + $(MODULE)-$(CONFIG_X86) += zfs_fletcher_sse.o + $(MODULE)-$(CONFIG_X86) += zfs_fletcher_avx512.o + $(MODULE)-$(CONFIG_ARM64) += zfs_fletcher_aarch64_neon.o ++ ++ifeq ($(CONFIG_ARM64),y) ++CFLAGS_REMOVE_zfs_fletcher_aarch64_neon.o += -mgeneral-regs-only ++endif +diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in +index 653ea0da9..0e04d7ef0 100644 +--- a/module/zfs/Makefile.in ++++ b/module/zfs/Makefile.in +@@ -154,4 +154,9 @@ ifeq ($(CONFIG_ALTIVEC),y) + $(obj)/vdev_raidz_math_powerpc_altivec.o: c_flags += -maltivec + endif + ++ifeq ($(CONFIG_ARM64),y) ++CFLAGS_REMOVE_vdev_raidz_math_aarch64_neon.o += -mgeneral-regs-only ++CFLAGS_REMOVE_vdev_raidz_math_aarch64_neonx2.o += -mgeneral-regs-only ++endif ++ + include $(mfdir)/../os/linux/zfs/Makefile +diff --git a/module/zfs/abd.c b/module/zfs/abd.c +index 8ee8e7e57..754974a55 100644 +--- a/module/zfs/abd.c ++++ b/module/zfs/abd.c +@@ -109,7 +109,6 @@ void + abd_verify(abd_t *abd) + { + #ifdef ZFS_DEBUG +- ASSERT3U(abd->abd_size, >, 0); + ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); + ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | + ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE | +@@ -118,6 +117,7 @@ abd_verify(abd_t *abd) + IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); + IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); + if (abd_is_linear(abd)) { ++ ASSERT3U(abd->abd_size, >, 0); + ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL); + } else if (abd_is_gang(abd)) { + uint_t child_sizes = 0; +@@ -130,6 +130,7 @@ abd_verify(abd_t *abd) + } + ASSERT3U(abd->abd_size, ==, child_sizes); + } else { ++ ASSERT3U(abd->abd_size, >, 0); + abd_verify_scatter(abd); + } + #endif +@@ -369,7 +370,20 @@ abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free) + * will retain all the free_on_free settings after being + * added to the parents list. + */ ++#ifdef ZFS_DEBUG ++ /* ++ * If cabd had abd_parent, we have to drop it here. We can't ++ * transfer it to pabd, nor we can clear abd_size leaving it. ++ */ ++ if (cabd->abd_parent != NULL) { ++ (void) zfs_refcount_remove_many( ++ &cabd->abd_parent->abd_children, ++ cabd->abd_size, cabd); ++ cabd->abd_parent = NULL; ++ } ++#endif + pabd->abd_size += cabd->abd_size; ++ cabd->abd_size = 0; + list_move_tail(&ABD_GANG(pabd).abd_gang_chain, + &ABD_GANG(cabd).abd_gang_chain); + ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain)); +@@ -407,7 +421,6 @@ abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free) + */ + if (abd_is_gang(cabd)) { + ASSERT(!list_link_active(&cabd->abd_gang_link)); +- ASSERT(!list_is_empty(&ABD_GANG(cabd).abd_gang_chain)); + return (abd_gang_add_gang(pabd, cabd, free_on_free)); + } + ASSERT(!abd_is_gang(cabd)); +diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c +index 98ca2b3bc..b8161f710 100644 +--- a/module/zfs/dmu_recv.c ++++ b/module/zfs/dmu_recv.c +@@ -71,6 +71,12 @@ int zfs_recv_write_batch_size = 1024 * 1024; + static char *dmu_recv_tag = "dmu_recv_tag"; + const char *recv_clone_name = "%recv"; + ++typedef enum { ++ ORNS_NO, ++ ORNS_YES, ++ ORNS_MAYBE ++} or_need_sync_t; ++ + static int receive_read_payload_and_next_header(dmu_recv_cookie_t *ra, int len, + void *buf); + +@@ -121,6 +127,9 @@ struct receive_writer_arg { + uint8_t or_iv[ZIO_DATA_IV_LEN]; + uint8_t or_mac[ZIO_DATA_MAC_LEN]; + boolean_t or_byteorder; ++ ++ /* Keep track of DRR_FREEOBJECTS right after DRR_OBJECT_RANGE */ ++ or_need_sync_t or_need_sync; + }; + + typedef struct dmu_recv_begin_arg { +@@ -1658,10 +1667,22 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + /* object was freed and we are about to allocate a new one */ + object_to_hold = DMU_NEW_OBJECT; + } else { ++ /* ++ * If the only record in this range so far was DRR_FREEOBJECTS ++ * with at least one actually freed object, it's possible that ++ * the block will now be converted to a hole. We need to wait ++ * for the txg to sync to prevent races. ++ */ ++ if (rwa->or_need_sync == ORNS_YES) ++ txg_wait_synced(dmu_objset_pool(rwa->os), 0); ++ + /* object is free and we are about to allocate a new one */ + object_to_hold = DMU_NEW_OBJECT; + } + ++ /* Only relevant for the first object in the range */ ++ rwa->or_need_sync = ORNS_NO; ++ + /* + * If this is a multi-slot dnode there is a chance that this + * object will expand into a slot that is already used by +@@ -1856,6 +1877,9 @@ receive_freeobjects(struct receive_writer_arg *rwa, + + if (err != 0) + return (err); ++ ++ if (rwa->or_need_sync == ORNS_MAYBE) ++ rwa->or_need_sync = ORNS_YES; + } + if (next_err != ESRCH) + return (next_err); +@@ -2298,6 +2322,8 @@ receive_object_range(struct receive_writer_arg *rwa, + bcopy(drror->drr_mac, rwa->or_mac, ZIO_DATA_MAC_LEN); + rwa->or_byteorder = byteorder; + ++ rwa->or_need_sync = ORNS_MAYBE; ++ + return (0); + } + +diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c +index cd9ecc07f..0dd1ec210 100644 +--- a/module/zfs/dmu_send.c ++++ b/module/zfs/dmu_send.c +@@ -2797,6 +2797,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, + } + + if (err == 0) { ++ owned = B_TRUE; + err = zap_lookup(dspp.dp->dp_meta_objset, + dspp.to_ds->ds_object, + DS_FIELD_RESUME_TOGUID, 8, 1, +@@ -2810,21 +2811,24 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, + sizeof (dspp.saved_toname), + dspp.saved_toname); + } +- if (err != 0) ++ /* Only disown if there was an error in the lookups */ ++ if (owned && (err != 0)) + dsl_dataset_disown(dspp.to_ds, dsflags, FTAG); + + kmem_strfree(name); + } else { + err = dsl_dataset_own(dspp.dp, tosnap, dsflags, + FTAG, &dspp.to_ds); ++ if (err == 0) ++ owned = B_TRUE; + } +- owned = B_TRUE; + } else { + err = dsl_dataset_hold_flags(dspp.dp, tosnap, dsflags, FTAG, + &dspp.to_ds); + } + + if (err != 0) { ++ /* Note: dsl dataset is not owned at this point */ + dsl_pool_rele(dspp.dp, FTAG); + return (err); + } +diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c +index 1eed0526b..063934f39 100644 +--- a/module/zfs/dmu_tx.c ++++ b/module/zfs/dmu_tx.c +@@ -290,6 +290,53 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) + } + } + ++static void ++dmu_tx_count_append(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) ++{ ++ dnode_t *dn = txh->txh_dnode; ++ int err = 0; ++ ++ if (len == 0) ++ return; ++ ++ (void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG); ++ ++ if (dn == NULL) ++ return; ++ ++ /* ++ * For i/o error checking, read the blocks that will be needed ++ * to perform the append; first level-0 block (if not aligned, i.e. ++ * if they are partial-block writes), no additional blocks are read. ++ */ ++ if (dn->dn_maxblkid == 0) { ++ if (off < dn->dn_datablksz && ++ (off > 0 || len < dn->dn_datablksz)) { ++ err = dmu_tx_check_ioerr(NULL, dn, 0, 0); ++ if (err != 0) { ++ txh->txh_tx->tx_err = err; ++ } ++ } ++ } else { ++ zio_t *zio = zio_root(dn->dn_objset->os_spa, ++ NULL, NULL, ZIO_FLAG_CANFAIL); ++ ++ /* first level-0 block */ ++ uint64_t start = off >> dn->dn_datablkshift; ++ if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) { ++ err = dmu_tx_check_ioerr(zio, dn, 0, start); ++ if (err != 0) { ++ txh->txh_tx->tx_err = err; ++ } ++ } ++ ++ err = zio_wait(zio); ++ if (err != 0) { ++ txh->txh_tx->tx_err = err; ++ } ++ } ++} ++ + static void + dmu_tx_count_dnode(dmu_tx_hold_t *txh) + { +@@ -330,6 +377,42 @@ dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len) + } + } + ++/* ++ * Should be used when appending to an object and the exact offset is unknown. ++ * The write must occur at or beyond the specified offset. Only the L0 block ++ * at provided offset will be prefetched. ++ */ ++void ++dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) ++{ ++ dmu_tx_hold_t *txh; ++ ++ ASSERT0(tx->tx_txg); ++ ASSERT3U(len, <=, DMU_MAX_ACCESS); ++ ++ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, ++ object, THT_APPEND, off, DMU_OBJECT_END); ++ if (txh != NULL) { ++ dmu_tx_count_append(txh, off, len); ++ dmu_tx_count_dnode(txh); ++ } ++} ++ ++void ++dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len) ++{ ++ dmu_tx_hold_t *txh; ++ ++ ASSERT0(tx->tx_txg); ++ ASSERT3U(len, <=, DMU_MAX_ACCESS); ++ ++ txh = dmu_tx_hold_dnode_impl(tx, dn, THT_APPEND, off, DMU_OBJECT_END); ++ if (txh != NULL) { ++ dmu_tx_count_append(txh, off, len); ++ dmu_tx_count_dnode(txh); ++ } ++} ++ + /* + * This function marks the transaction as being a "net free". The end + * result is that refquotas will be disabled for this transaction, and +@@ -638,6 +721,26 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) + if (blkid == 0) + match_offset = TRUE; + break; ++ case THT_APPEND: ++ if (blkid >= beginblk && (blkid <= endblk || ++ txh->txh_arg2 == DMU_OBJECT_END)) ++ match_offset = TRUE; ++ ++ /* ++ * THT_WRITE used for bonus and spill blocks. ++ */ ++ ASSERT(blkid != DMU_BONUS_BLKID && ++ blkid != DMU_SPILL_BLKID); ++ ++ /* ++ * They might have to increase nlevels, ++ * thus dirtying the new TLIBs. Or the ++ * might have to change the block size, ++ * thus dirying the new lvl=0 blk=0. ++ */ ++ if (blkid == 0) ++ match_offset = TRUE; ++ break; + case THT_FREE: + /* + * We will dirty all the level 1 blocks in +@@ -1421,6 +1524,8 @@ dmu_tx_fini(void) + EXPORT_SYMBOL(dmu_tx_create); + EXPORT_SYMBOL(dmu_tx_hold_write); + EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode); ++EXPORT_SYMBOL(dmu_tx_hold_append); ++EXPORT_SYMBOL(dmu_tx_hold_append_by_dnode); + EXPORT_SYMBOL(dmu_tx_hold_free); + EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode); + EXPORT_SYMBOL(dmu_tx_hold_zap); +diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c +index d5fe2ee56..9827eb147 100644 +--- a/module/zfs/dsl_deadlist.c ++++ b/module/zfs/dsl_deadlist.c +@@ -859,7 +859,7 @@ void + dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) + { + zap_cursor_t zc, pzc; +- zap_attribute_t za, pza; ++ zap_attribute_t *za, *pza; + dmu_buf_t *bonus; + dsl_deadlist_phys_t *dlp; + dmu_object_info_t doi; +@@ -874,28 +874,31 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) + return; + } + ++ za = kmem_alloc(sizeof (*za), KM_SLEEP); ++ pza = kmem_alloc(sizeof (*pza), KM_SLEEP); ++ + mutex_enter(&dl->dl_lock); + /* + * Prefetch up to 128 deadlists first and then more as we progress. + * The limit is a balance between ARC use and diminishing returns. + */ + for (zap_cursor_init(&pzc, dl->dl_os, obj), i = 0; +- (perror = zap_cursor_retrieve(&pzc, &pza)) == 0 && i < 128; ++ (perror = zap_cursor_retrieve(&pzc, pza)) == 0 && i < 128; + zap_cursor_advance(&pzc), i++) { +- dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer, +- zfs_strtonum(pza.za_name, NULL)); ++ dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer, ++ zfs_strtonum(pza->za_name, NULL)); + } + for (zap_cursor_init(&zc, dl->dl_os, obj); +- (error = zap_cursor_retrieve(&zc, &za)) == 0; ++ (error = zap_cursor_retrieve(&zc, za)) == 0; + zap_cursor_advance(&zc)) { +- uint64_t mintxg = zfs_strtonum(za.za_name, NULL); +- dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx); ++ uint64_t mintxg = zfs_strtonum(za->za_name, NULL); ++ dsl_deadlist_insert_bpobj(dl, za->za_first_integer, mintxg, tx); + VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx)); + if (perror == 0) { +- dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer, +- zfs_strtonum(pza.za_name, NULL)); ++ dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer, ++ zfs_strtonum(pza->za_name, NULL)); + zap_cursor_advance(&pzc); +- perror = zap_cursor_retrieve(&pzc, &pza); ++ perror = zap_cursor_retrieve(&pzc, pza); + } + } + VERIFY3U(error, ==, ENOENT); +@@ -908,6 +911,9 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) + bzero(dlp, sizeof (*dlp)); + dmu_buf_rele(bonus, FTAG); + mutex_exit(&dl->dl_lock); ++ ++ kmem_free(za, sizeof (*za)); ++ kmem_free(pza, sizeof (*pza)); + } + + /* +diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c +index f3c639b0d..f0a851ff5 100644 +--- a/module/zfs/dsl_scan.c ++++ b/module/zfs/dsl_scan.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -126,11 +127,20 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj, + static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); + static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); + static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); +-static uint64_t dsl_scan_count_data_disks(vdev_t *vd); ++static uint64_t dsl_scan_count_data_disks(spa_t *spa); + + extern int zfs_vdev_async_write_active_min_dirty_percent; + static int zfs_scan_blkstats = 0; + ++/* ++ * 'zpool status' uses bytes processed per pass to report throughput and ++ * estimate time remaining. We define a pass to start when the scanning ++ * phase completes for a sequential resilver. Optionally, this value ++ * may be used to reset the pass statistics every N txgs to provide an ++ * estimated completion time based on currently observed performance. ++ */ ++static uint_t zfs_scan_report_txgs = 0; ++ + /* + * By default zfs will check to ensure it is not over the hard memory + * limit before each txg. If finer-grained control of this is needed +@@ -147,7 +157,7 @@ int zfs_scan_strict_mem_lim = B_FALSE; + * overload the drives with I/O, since that is protected by + * zfs_vdev_scrub_max_active. + */ +-unsigned long zfs_scan_vdev_limit = 4 << 20; ++unsigned long zfs_scan_vdev_limit = 16 << 20; + + int zfs_scan_issue_strategy = 0; + int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */ +@@ -450,11 +460,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) + + /* + * Calculate the max number of in-flight bytes for pool-wide +- * scanning operations (minimum 1MB). Limits for the issuing +- * phase are done per top-level vdev and are handled separately. ++ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). ++ * Limits for the issuing phase are done per top-level vdev and ++ * are handled separately. + */ +- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * +- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); ++ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, ++ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); + + avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t), + offsetof(scan_ds_t, sds_node)); +@@ -584,6 +595,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) + } + + spa_scan_stat_init(spa); ++ vdev_scan_stat_init(spa->spa_root_vdev); ++ + return (0); + } + +@@ -742,6 +755,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) + scn->scn_last_checkpoint = 0; + scn->scn_checkpointing = B_FALSE; + spa_scan_stat_init(spa); ++ vdev_scan_stat_init(spa->spa_root_vdev); + + if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { + scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; +@@ -2797,8 +2811,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) + } + + static uint64_t +-dsl_scan_count_data_disks(vdev_t *rvd) ++dsl_scan_count_data_disks(spa_t *spa) + { ++ vdev_t *rvd = spa->spa_root_vdev; + uint64_t i, leaves = 0; + + for (i = 0; i < rvd->vdev_children; i++) { +@@ -3637,6 +3652,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) + return; + } + ++ /* ++ * Disabled by default, set zfs_scan_report_txgs to report ++ * average performance over the last zfs_scan_report_txgs TXGs. ++ */ ++ if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 && ++ tx->tx_txg % zfs_scan_report_txgs == 0) { ++ scn->scn_issued_before_pass += spa->spa_scan_pass_issued; ++ spa_scan_stat_init(spa); ++ } ++ + /* + * It is possible to switch from unsorted to sorted at any time, + * but afterwards the scan will remain sorted unless reloaded from +@@ -3693,12 +3718,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) + taskqid_t prefetch_tqid; + + /* +- * Recalculate the max number of in-flight bytes for pool-wide +- * scanning operations (minimum 1MB). Limits for the issuing +- * phase are done per top-level vdev and are handled separately. ++ * Calculate the max number of in-flight bytes for pool-wide ++ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). ++ * Limits for the issuing phase are done per top-level vdev and ++ * are handled separately. + */ +- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * +- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); ++ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, ++ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); + + if (scnp->scn_ddt_bookmark.ddb_class <= + scnp->scn_ddt_class_max) { +@@ -3759,6 +3785,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) + if (scn->scn_is_sorted) { + scn->scn_checkpointing = B_TRUE; + scn->scn_clearing = B_TRUE; ++ scn->scn_issued_before_pass += ++ spa->spa_scan_pass_issued; ++ spa_scan_stat_init(spa); + } + zfs_dbgmsg("scan complete txg %llu", + (longlong_t)tx->tx_txg); +@@ -4485,6 +4514,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_strict_mem_lim, INT, ZMOD_RW, + ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, INT, ZMOD_RW, + "Tunable to adjust bias towards more filled segments during scans"); + ++ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW, ++ "Tunable to report resilver performance over the last N txgs"); ++ + ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, + "Process all resilvers immediately"); + /* END CSTYLED */ +diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c +index f67a4eb22..139bb0acd 100644 +--- a/module/zfs/mmp.c ++++ b/module/zfs/mmp.c +@@ -444,7 +444,7 @@ mmp_write_uberblock(spa_t *spa) + uint64_t offset; + + hrtime_t lock_acquire_time = gethrtime(); +- spa_config_enter(spa, SCL_STATE, mmp_tag, RW_READER); ++ spa_config_enter_mmp(spa, SCL_STATE, mmp_tag, RW_READER); + lock_acquire_time = gethrtime() - lock_acquire_time; + if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10)) + zfs_dbgmsg("MMP SCL_STATE acquisition pool '%s' took %llu ns " +diff --git a/module/zfs/spa.c b/module/zfs/spa.c +index 1ed79eed3..5f238e691 100644 +--- a/module/zfs/spa.c ++++ b/module/zfs/spa.c +@@ -33,6 +33,7 @@ + * Copyright 2017 Joyent, Inc. + * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2021, Colm Buckley ++ * Copyright (c) 2023 Hewlett Packard Enterprise Development LP. + */ + + /* +@@ -6261,6 +6262,16 @@ spa_tryimport(nvlist_t *tryconfig) + spa->spa_config_source = SPA_CONFIG_SRC_SCAN; + } + ++ /* ++ * spa_import() relies on a pool config fetched by spa_try_import() ++ * for spare/cache devices. Import flags are not passed to ++ * spa_tryimport(), which makes it return early due to a missing log ++ * device and missing retrieving the cache device and spare eventually. ++ * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch ++ * the correct configuration regardless of the missing log device. ++ */ ++ spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG; ++ + error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING); + + /* +@@ -6747,9 +6758,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, + if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD)) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + +- if (dsl_scan_resilvering(spa_get_dsl(spa))) ++ if (dsl_scan_resilvering(spa_get_dsl(spa)) || ++ dsl_scan_resilver_scheduled(spa_get_dsl(spa))) { + return (spa_vdev_exit(spa, NULL, txg, + ZFS_ERR_RESILVER_IN_PROGRESS)); ++ } + } else { + if (vdev_rebuild_active(rvd)) + return (spa_vdev_exit(spa, NULL, txg, +@@ -6987,7 +7000,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, + * Detach a device from a mirror or replacing vdev. + * + * If 'replace_done' is specified, only detach if the parent +- * is a replacing vdev. ++ * is a replacing or a spare vdev. + */ + int + spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) +@@ -7294,6 +7307,10 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type, + vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) { + mutex_exit(&vd->vdev_initialize_lock); + return (SET_ERROR(ESRCH)); ++ } else if (cmd_type == POOL_INITIALIZE_UNINIT && ++ vd->vdev_initialize_thread != NULL) { ++ mutex_exit(&vd->vdev_initialize_lock); ++ return (SET_ERROR(EBUSY)); + } + + switch (cmd_type) { +@@ -7306,6 +7323,9 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type, + case POOL_INITIALIZE_SUSPEND: + vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED, vd_list); + break; ++ case POOL_INITIALIZE_UNINIT: ++ vdev_uninitialize(vd); ++ break; + default: + panic("invalid cmd_type %llu", (unsigned long long)cmd_type); + } +@@ -8210,7 +8230,8 @@ spa_async_thread(void *arg) + * If any devices are done replacing, detach them. + */ + if (tasks & SPA_ASYNC_RESILVER_DONE || +- tasks & SPA_ASYNC_REBUILD_DONE) { ++ tasks & SPA_ASYNC_REBUILD_DONE || ++ tasks & SPA_ASYNC_DETACH_SPARE) { + spa_vdev_resilver_done(spa); + } + +diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c +index a57f0727d..113943026 100644 +--- a/module/zfs/spa_misc.c ++++ b/module/zfs/spa_misc.c +@@ -494,8 +494,9 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw) + return (1); + } + +-void +-spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) ++static void ++spa_config_enter_impl(spa_t *spa, int locks, const void *tag, krw_t rw, ++ int mmp_flag) + { + (void) tag; + int wlocks_held = 0; +@@ -510,7 +511,8 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) + continue; + mutex_enter(&scl->scl_lock); + if (rw == RW_READER) { +- while (scl->scl_writer || scl->scl_write_wanted) { ++ while (scl->scl_writer || ++ (!mmp_flag && scl->scl_write_wanted)) { + cv_wait(&scl->scl_cv, &scl->scl_lock); + } + } else { +@@ -528,6 +530,27 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) + ASSERT3U(wlocks_held, <=, locks); + } + ++void ++spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) ++{ ++ spa_config_enter_impl(spa, locks, tag, rw, 0); ++} ++ ++/* ++ * The spa_config_enter_mmp() allows the mmp thread to cut in front of ++ * outstanding write lock requests. This is needed since the mmp updates are ++ * time sensitive and failure to service them promptly will result in a ++ * suspended pool. This pool suspension has been seen in practice when there is ++ * a single disk in a pool that is responding slowly and presumably about to ++ * fail. ++ */ ++ ++void ++spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, krw_t rw) ++{ ++ spa_config_enter_impl(spa, locks, tag, rw, 1); ++} ++ + void + spa_config_exit(spa_t *spa, int locks, const void *tag) + { +@@ -2564,7 +2587,6 @@ spa_scan_stat_init(spa_t *spa) + spa->spa_scan_pass_scrub_spent_paused = 0; + spa->spa_scan_pass_exam = 0; + spa->spa_scan_pass_issued = 0; +- vdev_scan_stat_init(spa->spa_root_vdev); + } + + /* +diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c +index 4b9d7e7c0..57259b8ce 100644 +--- a/module/zfs/vdev.c ++++ b/module/zfs/vdev.c +@@ -28,7 +28,7 @@ + * Copyright 2017 Joyent, Inc. + * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019, Datto Inc. All rights reserved. +- * Copyright [2021] Hewlett Packard Enterprise Development LP ++ * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP. + */ + + #include +@@ -2645,6 +2645,17 @@ vdev_reopen(vdev_t *vd) + (void) vdev_validate(vd); + } + ++ /* ++ * Recheck if resilver is still needed and cancel any ++ * scheduled resilver if resilver is unneeded. ++ */ ++ if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) && ++ spa->spa_async_tasks & SPA_ASYNC_RESILVER) { ++ mutex_enter(&spa->spa_async_lock); ++ spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER; ++ mutex_exit(&spa->spa_async_lock); ++ } ++ + /* + * Reassess parent vdev's health. + */ +@@ -3983,11 +3994,18 @@ vdev_remove_wanted(spa_t *spa, uint64_t guid) + return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV))); + + /* +- * If the vdev is already removed, then don't do anything. ++ * If the vdev is already removed, or expanding which can trigger ++ * repartition add/remove events, then don't do anything. + */ +- if (vd->vdev_removed) ++ if (vd->vdev_removed || vd->vdev_expanding) + return (spa_vdev_state_exit(spa, NULL, 0)); + ++ /* ++ * Confirm the vdev has been removed, otherwise don't do anything. ++ */ ++ if (vd->vdev_ops->vdev_op_leaf && !zio_wait(vdev_probe(vd, NULL))) ++ return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST))); ++ + vd->vdev_remove_wanted = B_TRUE; + spa_async_request(spa, SPA_ASYNC_REMOVE); + +@@ -4085,9 +4103,19 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate) + + if (wasoffline || + (oldstate < VDEV_STATE_DEGRADED && +- vd->vdev_state >= VDEV_STATE_DEGRADED)) ++ vd->vdev_state >= VDEV_STATE_DEGRADED)) { + spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_ONLINE); + ++ /* ++ * Asynchronously detach spare vdev if resilver or ++ * rebuild is not required ++ */ ++ if (vd->vdev_unspare && ++ !dsl_scan_resilvering(spa->spa_dsl_pool) && ++ !dsl_scan_resilver_scheduled(spa->spa_dsl_pool) && ++ !vdev_rebuild_active(tvd)) ++ spa_async_request(spa, SPA_ASYNC_DETACH_SPARE); ++ } + return (spa_vdev_state_exit(spa, vd, 0)); + } + +diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c +index 6ffd0d618..5d90fd67c 100644 +--- a/module/zfs/vdev_initialize.c ++++ b/module/zfs/vdev_initialize.c +@@ -100,6 +100,39 @@ vdev_initialize_zap_update_sync(void *arg, dmu_tx_t *tx) + &initialize_state, tx)); + } + ++static void ++vdev_initialize_zap_remove_sync(void *arg, dmu_tx_t *tx) ++{ ++ uint64_t guid = *(uint64_t *)arg; ++ ++ kmem_free(arg, sizeof (uint64_t)); ++ ++ vdev_t *vd = spa_lookup_by_guid(tx->tx_pool->dp_spa, guid, B_FALSE); ++ if (vd == NULL || vd->vdev_top->vdev_removing || !vdev_is_concrete(vd)) ++ return; ++ ++ ASSERT3S(vd->vdev_initialize_state, ==, VDEV_INITIALIZE_NONE); ++ ASSERT3U(vd->vdev_leaf_zap, !=, 0); ++ ++ vd->vdev_initialize_last_offset = 0; ++ vd->vdev_initialize_action_time = 0; ++ ++ objset_t *mos = vd->vdev_spa->spa_meta_objset; ++ int error; ++ ++ error = zap_remove(mos, vd->vdev_leaf_zap, ++ VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET, tx); ++ VERIFY(error == 0 || error == ENOENT); ++ ++ error = zap_remove(mos, vd->vdev_leaf_zap, ++ VDEV_LEAF_ZAP_INITIALIZE_STATE, tx); ++ VERIFY(error == 0 || error == ENOENT); ++ ++ error = zap_remove(mos, vd->vdev_leaf_zap, ++ VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME, tx); ++ VERIFY(error == 0 || error == ENOENT); ++} ++ + static void + vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state) + { +@@ -127,8 +160,14 @@ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state) + + dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); + VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); +- dsl_sync_task_nowait(spa_get_dsl(spa), vdev_initialize_zap_update_sync, +- guid, tx); ++ ++ if (new_state != VDEV_INITIALIZE_NONE) { ++ dsl_sync_task_nowait(spa_get_dsl(spa), ++ vdev_initialize_zap_update_sync, guid, tx); ++ } else { ++ dsl_sync_task_nowait(spa_get_dsl(spa), ++ vdev_initialize_zap_remove_sync, guid, tx); ++ } + + switch (new_state) { + case VDEV_INITIALIZE_ACTIVE: +@@ -149,6 +188,10 @@ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state) + spa_history_log_internal(spa, "initialize", tx, + "vdev=%s complete", vd->vdev_path); + break; ++ case VDEV_INITIALIZE_NONE: ++ spa_history_log_internal(spa, "uninitialize", tx, ++ "vdev=%s", vd->vdev_path); ++ break; + default: + panic("invalid state %llu", (unsigned long long)new_state); + } +@@ -604,6 +647,24 @@ vdev_initialize(vdev_t *vd) + vdev_initialize_thread, vd, 0, &p0, TS_RUN, maxclsyspri); + } + ++/* ++ * Uninitializes a device. Caller must hold vdev_initialize_lock. ++ * Device must be a leaf and not already be initializing. ++ */ ++void ++vdev_uninitialize(vdev_t *vd) ++{ ++ ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock)); ++ ASSERT(vd->vdev_ops->vdev_op_leaf); ++ ASSERT(vdev_is_concrete(vd)); ++ ASSERT3P(vd->vdev_initialize_thread, ==, NULL); ++ ASSERT(!vd->vdev_detached); ++ ASSERT(!vd->vdev_initialize_exit_wanted); ++ ASSERT(!vd->vdev_top->vdev_removing); ++ ++ vdev_initialize_change_state(vd, VDEV_INITIALIZE_NONE); ++} ++ + /* + * Wait for the initialize thread to be terminated (cancelled or stopped). + */ +@@ -760,6 +821,7 @@ vdev_initialize_restart(vdev_t *vd) + } + + EXPORT_SYMBOL(vdev_initialize); ++EXPORT_SYMBOL(vdev_uninitialize); + EXPORT_SYMBOL(vdev_initialize_stop); + EXPORT_SYMBOL(vdev_initialize_stop_all); + EXPORT_SYMBOL(vdev_initialize_stop_wait); +diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c +index 9dfbe0cf6..b180fa146 100644 +--- a/module/zfs/vdev_rebuild.c ++++ b/module/zfs/vdev_rebuild.c +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + #include + + /* +@@ -116,13 +117,12 @@ unsigned long zfs_rebuild_max_segment = 1024 * 1024; + * segment size is also large (zfs_rebuild_max_segment=1M). This helps keep + * the queue depth short. + * +- * 32MB was selected as the default value to achieve good performance with +- * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential +- * rebuild was unable to saturate all of the drives using smaller values. +- * With a value of 32MB the sequential resilver write rate was measured at +- * 800MB/s sustained while rebuilding to a distributed spare. ++ * 64MB was observed to deliver the best performance and set as the default. ++ * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c) ++ * and a rebuild rate of 1.2GB/s was measured to the distribute spare. ++ * Smaller values were unable to fully saturate the available pool I/O. + */ +-unsigned long zfs_rebuild_vdev_limit = 32 << 20; ++unsigned long zfs_rebuild_vdev_limit = 64 << 20; + + /* + * Automatically start a pool scrub when the last active sequential resilver +@@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg) + { + vdev_t *vd = arg; + spa_t *spa = vd->vdev_spa; ++ vdev_t *rvd = spa->spa_root_vdev; + int error = 0; + + /* +@@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg) + vr->vr_pass_bytes_scanned = 0; + vr->vr_pass_bytes_issued = 0; + +- vr->vr_bytes_inflight_max = MAX(1ULL << 20, +- zfs_rebuild_vdev_limit * vd->vdev_children); +- + uint64_t update_est_time = gethrtime(); + vdev_rebuild_update_bytes_est(vd, 0); + +@@ -804,6 +802,17 @@ vdev_rebuild_thread(void *arg) + metaslab_t *msp = vd->vdev_ms[i]; + vr->vr_scan_msp = msp; + ++ /* ++ * Calculate the max number of in-flight bytes for top-level ++ * vdev scanning operations (minimum 1MB, maximum 1/4 of ++ * arc_c_max shared by all top-level vdevs). Limits for the ++ * issuing phase are done per top-level vdev and are handled ++ * separately. ++ */ ++ uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1); ++ vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20, ++ zfs_rebuild_vdev_limit * vd->vdev_children)); ++ + /* + * Removal of vdevs from the vdev tree may eliminate the need + * for the rebuild, in which case it should be canceled. The +diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c +index a4b391cbe..f441328f3 100644 +--- a/module/zfs/zfs_ioctl.c ++++ b/module/zfs/zfs_ioctl.c +@@ -3985,7 +3985,8 @@ zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) + + if (!(cmd_type == POOL_INITIALIZE_CANCEL || + cmd_type == POOL_INITIALIZE_START || +- cmd_type == POOL_INITIALIZE_SUSPEND)) { ++ cmd_type == POOL_INITIALIZE_SUSPEND || ++ cmd_type == POOL_INITIALIZE_UNINIT)) { + return (SET_ERROR(EINVAL)); + } + +diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c +index b9498d17e..0987fd0f7 100644 +--- a/module/zfs/zfs_vnops.c ++++ b/module/zfs/zfs_vnops.c +@@ -68,7 +68,9 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr) + if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); ++ atomic_inc_32(&zp->z_sync_writes_cnt); + zil_commit(zfsvfs->z_log, zp->z_id); ++ atomic_dec_32(&zp->z_sync_writes_cnt); + ZFS_EXIT(zfsvfs); + } + tsd_set(zfs_fsyncer_key, NULL); +@@ -102,7 +104,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) + hole = B_FALSE; + + /* Flush any mmap()'d data to disk */ +- if (zn_has_cached_data(zp)) ++ if (zn_has_cached_data(zp, 0, file_sz - 1)) + zn_flush_cached_data(zp, B_FALSE); + + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER); +@@ -275,7 +277,8 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) + error = mappedread_sf(zp, nbytes, uio); + else + #endif +- if (zn_has_cached_data(zp) && !(ioflag & O_DIRECT)) { ++ if (zn_has_cached_data(zp, zfs_uio_offset(uio), ++ zfs_uio_offset(uio) + nbytes - 1) && !(ioflag & O_DIRECT)) { + error = mappedread(zp, nbytes, uio); + } else { + error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), +@@ -686,7 +689,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) + zfs_uioskip(uio, nbytes); + tx_bytes = nbytes; + } +- if (tx_bytes && zn_has_cached_data(zp) && ++ if (tx_bytes && ++ zn_has_cached_data(zp, woff, woff + tx_bytes - 1) && + !(ioflag & O_DIRECT)) { + update_pages(zp, woff, tx_bytes, zfsvfs->z_os); + } +diff --git a/module/zfs/zil.c b/module/zfs/zil.c +index aaf509a2f..f2aaeb550 100644 +--- a/module/zfs/zil.c ++++ b/module/zfs/zil.c +@@ -226,11 +226,10 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) + */ + static int + zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, +- blkptr_t *nbp, void *dst, char **end) ++ blkptr_t *nbp, char **begin, char **end, arc_buf_t **abuf) + { + enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; + arc_flags_t aflags = ARC_FLAG_WAIT; +- arc_buf_t *abuf = NULL; + zbookmark_phys_t zb; + int error; + +@@ -247,7 +246,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, + ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); + + error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, +- &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); ++ abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); + + if (error == 0) { + zio_cksum_t cksum = bp->blk_cksum; +@@ -262,23 +261,23 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, + */ + cksum.zc_word[ZIL_ZC_SEQ]++; + ++ uint64_t size = BP_GET_LSIZE(bp); + if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) { +- zil_chain_t *zilc = abuf->b_data; ++ zil_chain_t *zilc = (*abuf)->b_data; + char *lr = (char *)(zilc + 1); +- uint64_t len = zilc->zc_nused - sizeof (zil_chain_t); + + if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum, +- sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) { ++ sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) || ++ zilc->zc_nused < sizeof (*zilc) || ++ zilc->zc_nused > size) { + error = SET_ERROR(ECKSUM); + } else { +- ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE); +- bcopy(lr, dst, len); +- *end = (char *)dst + len; ++ *begin = lr; ++ *end = lr + zilc->zc_nused - sizeof (*zilc); + *nbp = zilc->zc_next_blk; + } + } else { +- char *lr = abuf->b_data; +- uint64_t size = BP_GET_LSIZE(bp); ++ char *lr = (*abuf)->b_data; + zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1; + + if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum, +@@ -286,15 +285,11 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, + (zilc->zc_nused > (size - sizeof (*zilc)))) { + error = SET_ERROR(ECKSUM); + } else { +- ASSERT3U(zilc->zc_nused, <=, +- SPA_OLD_MAXBLOCKSIZE); +- bcopy(lr, dst, zilc->zc_nused); +- *end = (char *)dst + zilc->zc_nused; ++ *begin = lr; ++ *end = lr + zilc->zc_nused; + *nbp = zilc->zc_next_blk; + } + } +- +- arc_buf_destroy(abuf, &abuf); + } + + return (error); +@@ -362,7 +357,6 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, + uint64_t blk_count = 0; + uint64_t lr_count = 0; + blkptr_t blk, next_blk; +- char *lrbuf, *lrp; + int error = 0; + + bzero(&next_blk, sizeof (blkptr_t)); +@@ -382,13 +376,13 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, + * If the log has been claimed, stop if we encounter a sequence + * number greater than the highest claimed sequence number. + */ +- lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE); + zil_bp_tree_init(zilog); + + for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) { + uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ]; + int reclen; +- char *end = NULL; ++ char *lrp, *end; ++ arc_buf_t *abuf = NULL; + + if (blk_seq > claim_blk_seq) + break; +@@ -404,8 +398,10 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, + break; + + error = zil_read_log_block(zilog, decrypt, &blk, &next_blk, +- lrbuf, &end); ++ &lrp, &end, &abuf); + if (error != 0) { ++ if (abuf) ++ arc_buf_destroy(abuf, &abuf); + if (claimed) { + char name[ZFS_MAX_DATASET_NAME_LEN]; + +@@ -418,7 +414,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, + break; + } + +- for (lrp = lrbuf; lrp < end; lrp += reclen) { ++ for (; lrp < end; lrp += reclen) { + lr_t *lr = (lr_t *)lrp; + reclen = lr->lrc_reclen; + ASSERT3U(reclen, >=, sizeof (lr_t)); +@@ -432,6 +428,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, + max_lr_seq = lr->lrc_seq; + lr_count++; + } ++ arc_buf_destroy(abuf, &abuf); + } + done: + zilog->zl_parse_error = error; +@@ -441,7 +438,6 @@ done: + zilog->zl_parse_lr_count = lr_count; + + zil_bp_tree_fini(zilog); +- zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE); + + return (error); + } +@@ -1593,6 +1589,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) + wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t); + ASSERT3U(wsz, <=, lwb->lwb_sz); + zio_shrink(lwb->lwb_write_zio, wsz); ++ wsz = lwb->lwb_write_zio->io_size; + + } else { + wsz = lwb->lwb_sz; +@@ -2848,7 +2845,14 @@ static void + zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw) + { + dmu_tx_t *tx = dmu_tx_create(zilog->zl_os); +- VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); ++ ++ /* ++ * Since we are not going to create any new dirty data, and we ++ * can even help with clearing the existing dirty data, we ++ * should not be subject to the dirty data based delays. We ++ * use TXG_NOTHROTTLE to bypass the delay mechanism. ++ */ ++ VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE)); + + itx_t *itx = zil_itx_create(TX_COMMIT, sizeof (lr_t)); + itx->itx_sync = B_TRUE; +diff --git a/module/zfs/zio.c b/module/zfs/zio.c +index 700f87910..c367ef721 100644 +--- a/module/zfs/zio.c ++++ b/module/zfs/zio.c +@@ -2287,7 +2287,7 @@ zio_nowait(zio_t *zio) + ASSERT3P(zio->io_executor, ==, NULL); + + if (zio->io_child_type == ZIO_CHILD_LOGICAL && +- zio_unique_parent(zio) == NULL) { ++ list_is_empty(&zio->io_parent_list)) { + zio_t *pio; + + /* +diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run +index 554cf96f8..6c2296d4c 100644 +--- a/tests/runfiles/common.run ++++ b/tests/runfiles/common.run +@@ -37,7 +37,7 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos', + 'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos', + 'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos', + 'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos', +- 'alloc_class_013_pos'] ++ 'alloc_class_013_pos', 'alloc_class_014_neg', 'alloc_class_015_pos'] + tags = ['functional', 'alloc_class'] + + [tests/functional/arc] +@@ -407,7 +407,7 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', + 'import_cachefile_mirror_detached', + 'import_cachefile_paths_changed', + 'import_cachefile_shared_device', +- 'import_devices_missing', ++ 'import_devices_missing', 'import_log_missing', + 'import_paths_changed', + 'import_rewind_config_changed', + 'import_rewind_device_replaced'] +@@ -431,6 +431,7 @@ tests = ['zpool_initialize_attach_detach_add_remove', + 'zpool_initialize_start_and_cancel_neg', + 'zpool_initialize_start_and_cancel_pos', + 'zpool_initialize_suspend_resume', ++ 'zpool_initialize_uninit', + 'zpool_initialize_unsupported_vdevs', + 'zpool_initialize_verify_checksums', + 'zpool_initialize_verify_initialized'] +@@ -456,7 +457,8 @@ tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift'] + tags = ['functional', 'cli_root', 'zpool_replace'] + + [tests/functional/cli_root/zpool_resilver] +-tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart'] ++tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart', ++ 'zpool_resilver_concurrent'] + tags = ['functional', 'cli_root', 'zpool_resilver'] + + [tests/functional/cli_root/zpool_scrub] +@@ -669,7 +671,8 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos', + tags = ['functional', 'migration'] + + [tests/functional/mmap] +-tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_seek_001_pos'] ++tests = ['mmap_mixed', 'mmap_read_001_pos', 'mmap_seek_001_pos', ++ 'mmap_write_001_pos', 'mmap_sync_001_pos'] + tags = ['functional', 'mmap'] + + [tests/functional/mount] +@@ -823,9 +826,9 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos', + 'send-c_mixed_compression', 'send-c_stream_size_estimate', + 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', + 'send-c_recv_dedup', 'send-L_toggle', +- 'send_encrypted_incremental.ksh', 'send_encrypted_hierarchy', +- 'send_encrypted_props', 'send_encrypted_truncated_files', +- 'send_freeobjects', 'send_realloc_files', ++ 'send_encrypted_incremental.ksh', 'send_encrypted_freeobjects', ++ 'send_encrypted_hierarchy', 'send_encrypted_props', ++ 'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_files', + 'send_realloc_encrypted_files', 'send_spill_block', 'send_holds', + 'send_hole_birth', 'send_mixed_raw', 'send-wR_encrypted_zvol', + 'send_partial_dataset', 'send_invalid', 'send_doall', +diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run +index fb39fa54b..0a3d42cb2 100644 +--- a/tests/runfiles/sanity.run ++++ b/tests/runfiles/sanity.run +@@ -547,6 +547,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos', + 'rsend_014_pos', 'rsend_016_neg', 'send-c_verify_contents', + 'send-c_volume', 'send-c_zstreamdump', 'send-c_recv_dedup', + 'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props', ++ 'send_encrypted_freeobjects', + 'send_encrypted_truncated_files', 'send_freeobjects', 'send_holds', + 'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset', + 'send_invalid'] +diff --git a/tests/test-runner/bin/test-runner.py.in b/tests/test-runner/bin/test-runner.py.in +index a652d3d4a..5c868d945 100755 +--- a/tests/test-runner/bin/test-runner.py.in ++++ b/tests/test-runner/bin/test-runner.py.in +@@ -33,7 +33,7 @@ from subprocess import PIPE + from subprocess import Popen + from subprocess import check_output + from threading import Timer +-from time import time, CLOCK_MONOTONIC_RAW ++from time import time, CLOCK_MONOTONIC + from os.path import exists + + BASEDIR = '/var/tmp/test_results' +@@ -62,7 +62,7 @@ clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)] + + def monotonic_time(): + t = timespec() +- if clock_gettime(CLOCK_MONOTONIC_RAW, ctypes.pointer(t)) != 0: ++ if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(t)) != 0: + errno_ = ctypes.get_errno() + raise OSError(errno_, os.strerror(errno_)) + return t.tv_sec + t.tv_nsec * 1e-9 +diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in +index 432899c21..878b30025 100755 +--- a/tests/test-runner/bin/zts-report.py.in ++++ b/tests/test-runner/bin/zts-report.py.in +@@ -183,10 +183,13 @@ if sys.platform.startswith('freebsd'): + known.update({ + 'cli_root/zfs_receive/receive-o-x_props_override': + ['FAIL', known_reason], ++ 'cli_root/zpool_resilver/zpool_resilver_concurrent': ++ ['SKIP', na_reason], + 'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason], + 'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason], + 'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason], + 'link_count/link_count_001': ['SKIP', na_reason], ++ 'mmap/mmap_sync_001_pos': ['SKIP', na_reason], + }) + elif sys.platform.startswith('linux'): + known.update({ +@@ -210,6 +213,7 @@ elif sys.platform.startswith('linux'): + # reasons listed above can be used. + # + maybe = { ++ 'threadsappend/threadsappend_001_pos': ['FAIL', 6136], + 'chattr/setup': ['SKIP', exec_reason], + 'crtime/crtime_001_pos': ['SKIP', statx_reason], + 'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason], +@@ -243,6 +247,7 @@ maybe = { + 'mmp/mmp_on_uberblocks': ['FAIL', known_reason], + 'pyzfs/pyzfs_unittest': ['SKIP', python_deps_reason], + 'pool_checkpoint/checkpoint_discard_busy': ['FAIL', '11946'], ++ 'pam/setup': ['SKIP', "pamtester might be not available"], + 'projectquota/setup': ['SKIP', exec_reason], + 'removal/removal_condense_export': ['FAIL', known_reason], + 'reservation/reservation_008_pos': ['FAIL', '7741'], +@@ -252,14 +257,12 @@ maybe = { + 'snapshot/snapshot_010_pos': ['FAIL', '7961'], + 'snapused/snapused_004_pos': ['FAIL', '5513'], + 'tmpfile/setup': ['SKIP', tmpfile_reason], +- 'threadsappend/threadsappend_001_pos': ['FAIL', '6136'], + 'trim/setup': ['SKIP', trim_reason], + 'upgrade/upgrade_projectquota_001_pos': ['SKIP', project_id_reason], + 'user_namespace/setup': ['SKIP', user_ns_reason], + 'userquota/setup': ['SKIP', exec_reason], +- 'vdev_zaps/vdev_zaps_004_pos': ['FAIL', '6935'], ++ 'vdev_zaps/vdev_zaps_004_pos': ['FAIL', known_reason], + 'zvol/zvol_ENOSPC/zvol_ENOSPC_001_pos': ['FAIL', '5848'], +- 'pam/setup': ['SKIP', "pamtester might be not available"], + } + + if sys.platform.startswith('freebsd'): +@@ -275,12 +278,18 @@ if sys.platform.startswith('freebsd'): + 'resilver/resilver_restart_001': ['FAIL', known_reason], + 'pool_checkpoint/checkpoint_big_rewind': ['FAIL', '12622'], + 'pool_checkpoint/checkpoint_indirect': ['FAIL', '12623'], ++ 'snapshot/snapshot_002_pos': ['FAIL', '14831'], + }) + elif sys.platform.startswith('linux'): + maybe.update({ + 'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason], + 'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason], +- 'fault/auto_spare_shared': ['FAIL', '11889'], ++ 'fault/auto_online_002_pos': ['FAIL', 11889], ++ 'fault/auto_replace_001_pos': ['FAIL', 14851], ++ 'fault/auto_spare_002_pos': ['FAIL', 11889], ++ 'fault/auto_spare_multiple': ['FAIL', 11889], ++ 'fault/auto_spare_shared': ['FAIL', 11889], ++ 'fault/decompress_fault': ['FAIL', 11889], + 'io/io_uring': ['SKIP', 'io_uring support required'], + 'limits/filesystem_limit': ['SKIP', known_reason], + 'limits/snapshot_limit': ['SKIP', known_reason], +diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am +index d1c29fcd1..7ec4cb619 100644 +--- a/tests/zfs-tests/cmd/Makefile.am ++++ b/tests/zfs-tests/cmd/Makefile.am +@@ -20,6 +20,7 @@ SUBDIRS = \ + mmap_exec \ + mmap_libaio \ + mmap_seek \ ++ mmap_sync \ + mmapwrite \ + nvlist_to_lua \ + randwritecomp \ +diff --git a/tests/zfs-tests/cmd/mmap_sync/.gitignore b/tests/zfs-tests/cmd/mmap_sync/.gitignore +new file mode 100644 +index 000000000..c721f472b +--- /dev/null ++++ b/tests/zfs-tests/cmd/mmap_sync/.gitignore +@@ -0,0 +1 @@ ++/mmap_sync +diff --git a/tests/zfs-tests/cmd/mmap_sync/Makefile.am b/tests/zfs-tests/cmd/mmap_sync/Makefile.am +new file mode 100644 +index 000000000..313e8db5c +--- /dev/null ++++ b/tests/zfs-tests/cmd/mmap_sync/Makefile.am +@@ -0,0 +1,6 @@ ++include $(top_srcdir)/config/Rules.am ++ ++pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin ++ ++pkgexec_PROGRAMS = mmap_sync ++mmap_sync_SOURCES = mmap_sync.c +diff --git a/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c b/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c +new file mode 100644 +index 000000000..226e71be2 +--- /dev/null ++++ b/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c +@@ -0,0 +1,152 @@ ++/* ++ * CDDL HEADER START ++ * ++ * The contents of this file are subject to the terms of the ++ * Common Development and Distribution License (the "License"). ++ * You may not use this file except in compliance with the License. ++ * ++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++ * or http://opensource.org/licenses/CDDL-1.0. ++ * See the License for the specific language governing permissions ++ * and limitations under the License. ++ * ++ * When distributing Covered Code, include this CDDL HEADER in each ++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++ * If applicable, add the following below this CDDL HEADER, with the ++ * fields enclosed by brackets "[]" replaced with your own identifying ++ * information: Portions Copyright [yyyy] [name of copyright owner] ++ * ++ * CDDL HEADER END ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static void ++cleanup(char *file) ++{ ++ (void) remove(file); ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ char *testdir = getenv("TESTDIR"); ++ if (!testdir) { ++ fprintf(stderr, "environment variable TESTDIR not set\n"); ++ return (1); ++ } ++ ++ struct stat st; ++ umask(0); ++ if (stat(testdir, &st) != 0 && ++ mkdir(testdir, 0777) != 0) { ++ perror("mkdir"); ++ return (1); ++ } ++ ++ if (argc > 3) { ++ fprintf(stderr, "usage: %s " ++ "[run time in mins] " ++ "[max msync time in ms]\n", argv[0]); ++ return (1); ++ } ++ ++ int run_time_mins = 1; ++ if (argc >= 2) { ++ run_time_mins = atoi(argv[1]); ++ } ++ ++ int max_msync_time_ms = 1000; ++ if (argc >= 3) { ++ max_msync_time_ms = atoi(argv[2]); ++ } ++ ++ char filepath[512]; ++ filepath[0] = '\0'; ++ char *file = &filepath[0]; ++ ++ (void) snprintf(file, 512, "%s/msync_file", testdir); ++ ++ const int LEN = 8; ++ cleanup(file); ++ ++ int fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR | ++ S_IRGRP | S_IROTH); ++ ++ if (fd == -1) { ++ (void) fprintf(stderr, "%s: %s: ", argv[0], file); ++ perror("open"); ++ return (1); ++ } ++ ++ if (ftruncate(fd, LEN) != 0) { ++ perror("ftruncate"); ++ cleanup(file); ++ return (1); ++ } ++ ++ void *ptr = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ++ ++ if (ptr == MAP_FAILED) { ++ perror("mmap"); ++ cleanup(file); ++ return (1); ++ } ++ ++ struct timeval tstart; ++ gettimeofday(&tstart, NULL); ++ ++ long long x = 0LL; ++ ++ for (;;) { ++ *((long long *)ptr) = x; ++ x++; ++ ++ struct timeval t1, t2; ++ gettimeofday(&t1, NULL); ++ if (msync(ptr, LEN, MS_SYNC|MS_INVALIDATE) != 0) { ++ perror("msync"); ++ cleanup(file); ++ return (1); ++ } ++ ++ gettimeofday(&t2, NULL); ++ ++ double elapsed = (t2.tv_sec - t1.tv_sec) * 1000.0; ++ elapsed += ((t2.tv_usec - t1.tv_usec) / 1000.0); ++ if (elapsed > max_msync_time_ms) { ++ fprintf(stderr, "slow msync: %f ms\n", elapsed); ++ if (munmap(ptr, LEN) != 0) ++ perror("munmap"); ++ cleanup(file); ++ return (1); ++ } ++ ++ double elapsed_start = (t2.tv_sec - tstart.tv_sec) * 1000.0; ++ elapsed_start += ((t2.tv_usec - tstart.tv_usec) / 1000.0); ++ if (elapsed_start > run_time_mins * 60 * 1000) { ++ break; ++ } ++ } ++ ++ if (munmap(ptr, LEN) != 0) { ++ perror("munmap"); ++ cleanup(file); ++ return (1); ++ } ++ ++ if (close(fd) != 0) { ++ perror("close"); ++ } ++ ++ cleanup(file); ++ return (0); ++} +diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg +index 78802c9fb..8ac38dfd8 100644 +--- a/tests/zfs-tests/include/commands.cfg ++++ b/tests/zfs-tests/include/commands.cfg +@@ -207,6 +207,7 @@ export ZFSTEST_FILES='badsend + mmap_exec + mmap_libaio + mmap_seek ++ mmap_sync + mmapwrite + nvlist_to_lua + randfree_file +diff --git a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am +index 7cffb2eac..82fd9f340 100644 +--- a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am ++++ b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am +@@ -14,7 +14,9 @@ dist_pkgdata_SCRIPTS = \ + alloc_class_010_pos.ksh \ + alloc_class_011_neg.ksh \ + alloc_class_012_pos.ksh \ +- alloc_class_013_pos.ksh ++ alloc_class_013_pos.ksh \ ++ alloc_class_014_neg.ksh \ ++ alloc_class_015_pos.ksh + + dist_pkgdata_DATA = \ + alloc_class.cfg \ +diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh +index 2ce22a624..790a47f26 100755 +--- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh ++++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh +@@ -42,7 +42,8 @@ log_must display_status "$TESTPOOL" + + log_must zfs create -o dedup=on -V 2G $TESTPOOL/$TESTVOL + +-log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null 2>&1" ++block_device_wait "$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL" ++log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null" + + sync_pool + log_must zpool list -v $TESTPOOL +diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh +new file mode 100755 +index 000000000..1b52014fd +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh +@@ -0,0 +1,38 @@ ++#!/bin/ksh -p ++ ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++ ++. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib ++ ++# ++# DESCRIPTION: ++# Setting the special_small_blocks property greater than recordsize fails. ++# ++ ++verify_runnable "global" ++ ++claim="Setting the special_small_blocks property greater than recordsize fails" ++ ++log_assert $claim ++log_onexit cleanup ++log_must disk_setup ++ ++for size in 512 4096 32768 131072 524288 1048576 ++do ++ let bigger=$size*2 ++ log_mustnot zpool create -O recordsize=$size \ ++ -O special_small_blocks=$bigger \ ++ $TESTPOOL raidz $ZPOOL_DISKS special mirror \ ++ $CLASS_DISK0 $CLASS_DISK1 ++done ++ ++log_pass $claim +diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh +new file mode 100755 +index 000000000..49c468af6 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh +@@ -0,0 +1,45 @@ ++#!/bin/ksh -p ++ ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++ ++. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib ++ ++# ++# DESCRIPTION: ++# Can set special_small_blocks property less than or equal to recordsize. ++# ++ ++verify_runnable "global" ++ ++claim="Can set special_small_blocks property less than or equal to recordsize" ++ ++log_assert $claim ++log_onexit cleanup ++log_must disk_setup ++ ++for size in 8192 32768 131072 524288 1048576 ++do ++ let smaller=$size/2 ++ log_must zpool create -O recordsize=$size \ ++ -O special_small_blocks=$smaller \ ++ $TESTPOOL raidz $ZPOOL_DISKS special mirror \ ++ $CLASS_DISK0 $CLASS_DISK1 ++ log_must zpool destroy -f "$TESTPOOL" ++ ++ log_must zpool create -O recordsize=$size \ ++ -O special_small_blocks=$size \ ++ $TESTPOOL raidz $ZPOOL_DISKS special mirror \ ++ $CLASS_DISK0 $CLASS_DISK1 ++ log_must zpool destroy -f "$TESTPOOL" ++done ++ ++log_pass $claim +diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am +index a8c9a31dc..4230ec557 100644 +--- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am ++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am +@@ -12,6 +12,7 @@ dist_pkgdata_SCRIPTS = \ + import_cachefile_paths_changed.ksh \ + import_cachefile_shared_device.ksh \ + import_devices_missing.ksh \ ++ import_log_missing.ksh \ + import_paths_changed.ksh \ + import_rewind_config_changed.ksh \ + import_rewind_device_replaced.ksh \ +diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh +new file mode 100755 +index 000000000..f12cac785 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh +@@ -0,0 +1,75 @@ ++#!/bin/ksh -p ++ ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++ ++. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib ++ ++# ++# DESCRIPTION: ++# Import with missing log device should not remove spare/cache. ++# ++# STRATEGY: ++# 1. Create a pool. ++# 2. Add spare, cache and log devices to the pool. ++# 3. Export the pool. ++# 4. Remove the log device. ++# 5. Import the pool with -m flag. ++# 6. Verify that spare and cache are still present in the pool. ++# ++ ++verify_runnable "global" ++ ++log_onexit cleanup ++ ++function test_missing_log ++{ ++ typeset poolcreate="$1" ++ typeset cachevdev="$2" ++ typeset sparevdev="$3" ++ typeset logvdev="$4" ++ typeset missingvdev="$4" ++ ++ log_note "$0: pool '$poolcreate', adding $cachevdev, $sparevdev," \ ++ "$logvdev then moving away $missingvdev." ++ ++ log_must zpool create $TESTPOOL1 $poolcreate ++ ++ log_must zpool add $TESTPOOL1 cache $cachevdev spare $sparevdev \ ++ log $logvdev ++ ++ log_must_busy zpool export $TESTPOOL1 ++ ++ log_must mv $missingvdev $BACKUP_DEVICE_DIR ++ ++ log_must zpool import -m -d $DEVICE_DIR $TESTPOOL1 ++ ++ CACHE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $cachevdev) ++ ++ SPARE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $sparevdev) ++ ++ if [ -z "$CACHE_PRESENT"] || [ -z "SPARE_PRESENT"] ++ then ++ log_fail "cache/spare vdev missing after importing with missing" \ ++ "log device" ++ fi ++ ++ # Cleanup ++ log_must zpool destroy $TESTPOOL1 ++ ++ log_note "" ++} ++ ++log_must mkdir -p $BACKUP_DEVICE_DIR ++ ++test_missing_log "$VDEV0" "$VDEV1" "$VDEV2" "$VDEV3" ++ ++log_pass "zpool import succeeded with missing log device" +diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am +index 3968902ec..483c1c2f5 100644 +--- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am ++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am +@@ -10,6 +10,7 @@ dist_pkgdata_SCRIPTS = \ + zpool_initialize_start_and_cancel_neg.ksh \ + zpool_initialize_start_and_cancel_pos.ksh \ + zpool_initialize_suspend_resume.ksh \ ++ zpool_initialize_uninit.ksh \ + zpool_initialize_unsupported_vdevs.ksh \ + zpool_initialize_verify_checksums.ksh \ + zpool_initialize_verify_initialized.ksh +diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh +new file mode 100755 +index 000000000..17f776cfb +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh +@@ -0,0 +1,141 @@ ++#!/bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or https://opensource.org/licenses/CDDL-1.0. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++# ++ ++# ++# Copyright (c) 2016 by Delphix. All rights reserved. ++# Copyright (C) 2023 Lawrence Livermore National Security, LLC. ++# ++. $STF_SUITE/include/libtest.shlib ++. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib ++ ++# ++# DESCRIPTION: ++# Starting, stopping, uninitializing, and restart an initialize works. ++# ++# STRATEGY: ++# 1. Create a one-disk pool. ++# 2. Verify uninitialize succeeds for uninitialized pool. ++# 3. Verify pool wide cancel|suspend + uninit ++# a. Start initializing and verify that initializing is active. ++# b. Verify uninitialize fails when actively initializing. ++# c. Cancel or suspend initializing and verify that initializing is not active. ++# d. Verify uninitialize succeeds after being cancelled. ++# 4. Verify per-disk cancel|suspend + uninit ++# ++ ++DISK1="$(echo $DISKS | cut -d' ' -f1)" ++DISK2="$(echo $DISKS | cut -d' ' -f2)" ++DISK3="$(echo $DISKS | cut -d' ' -f3)" ++ ++function status_check # pool disk1-state disk2-state disk3-state ++{ ++ typeset pool="$1" ++ typeset disk1_state="$2" ++ typeset disk2_state="$3" ++ typeset disk3_state="$4" ++ ++ state=$(zpool status -i "$pool" | grep "$DISK1" | grep "$disk1_state") ++ if [[ -z "$state" ]]; then ++ log_fail "DISK1 state; expected='$disk1_state' got '$state'" ++ fi ++ ++ state=$(zpool status -i "$pool" | grep "$DISK2" | grep "$disk2_state") ++ if [[ -z "$state" ]]; then ++ log_fail "DISK2 state; expected='$disk2_state' got '$state'" ++ fi ++ ++ state=$(zpool status -i "$pool" | grep "$DISK3" | grep "$disk3_state") ++ if [[ -z "$state" ]]; then ++ log_fail "DISK3 state; expected='$disk3_state' got '$state'" ++ fi ++} ++ ++function status_check_all # pool disk-state ++{ ++ typeset pool="$1" ++ typeset disk_state="$2" ++ ++ status_check "$pool" "$disk_state" "$disk_state" "$disk_state" ++} ++ ++# 1. Create a one-disk pool. ++log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3 ++status_check_all $TESTPOOL "uninitialized" ++ ++# 2. Verify uninitialize succeeds for uninitialized pool. ++log_must zpool initialize -u $TESTPOOL ++status_check_all $TESTPOOL "uninitialized" ++ ++# 3. Verify pool wide cancel + uninit ++log_must zpool initialize $TESTPOOL ++status_check_all $TESTPOOL "[[:digit:]]* initialized" ++ ++log_mustnot zpool initialize -u $TESTPOOL ++status_check_all $TESTPOOL "[[:digit:]]* initialized" ++ ++log_must zpool initialize -c $TESTPOOL ++status_check_all $TESTPOOL "uninitialized" ++ ++log_must zpool initialize -u $TESTPOOL ++status_check_all $TESTPOOL "uninitialized" ++ ++# 3. Verify pool wide suspend + uninit ++log_must zpool initialize $TESTPOOL ++status_check_all $TESTPOOL "[[:digit:]]* initialized" ++ ++log_mustnot zpool initialize -u $TESTPOOL ++status_check_all $TESTPOOL "[[:digit:]]* initialized" ++ ++log_must zpool initialize -s $TESTPOOL ++status_check_all $TESTPOOL "suspended" ++ ++log_must zpool initialize -u $TESTPOOL ++status_check_all $TESTPOOL "uninitialized" ++ ++# 4. Verify per-disk cancel|suspend + uninit ++log_must zpool initialize $TESTPOOL ++status_check_all $TESTPOOL "[[:digit:]]* initialized" ++ ++log_must zpool initialize -c $TESTPOOL $DISK1 ++log_must zpool initialize -s $TESTPOOL $DISK2 ++log_mustnot zpool initialize -u $TESTPOOL $DISK3 ++status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized" ++ ++log_must zpool initialize -u $TESTPOOL $DISK1 ++status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized" ++ ++log_must zpool initialize -u $TESTPOOL $DISK2 ++status_check $TESTPOOL "uninitialized" "uninitialized" "[[:digit:]]* initialized" ++ ++log_must zpool initialize $TESTPOOL $DISK1 ++status_check $TESTPOOL "[[:digit:]]* initialized" "uninitialized" "[[:digit:]]* initialized" ++ ++log_must zpool initialize $TESTPOOL $DISK2 ++status_check_all $TESTPOOL "[[:digit:]]* initialized" ++ ++log_must zpool initialize -s $TESTPOOL ++status_check_all $TESTPOOL "suspended" ++ ++log_must zpool initialize -u $TESTPOOL $DISK1 $DISK2 $DISK3 ++status_check_all $TESTPOOL "uninitialized" ++ ++log_pass "Initialize start + cancel/suspend + uninit + start works" +diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am +index 2cec5335f..7ca9e81c1 100644 +--- a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am ++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am +@@ -3,7 +3,8 @@ dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + zpool_resilver_bad_args.ksh \ +- zpool_resilver_restart.ksh ++ zpool_resilver_restart.ksh \ ++ zpool_resilver_concurrent.ksh + + dist_pkgdata_DATA = \ + zpool_resilver.cfg +diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh +new file mode 100755 +index 000000000..4c3b09796 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh +@@ -0,0 +1,101 @@ ++#!/bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or http://www.opensolaris.org/os/licensing. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++# ++ ++# ++# Copyright (c) 2023 Hewlett Packard Enterprise Development LP. ++# ++ ++. $STF_SUITE/include/libtest.shlib ++. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib ++ ++# ++# DESCRIPTION: ++# Verify 'zpool clear' doesn't cause concurrent resilvers ++# ++# STRATEGY: ++# 1. Create N(10) virtual disk files. ++# 2. Create draid pool based on the virtual disk files. ++# 3. Fill the filesystem with directories and files. ++# 4. Force-fault 2 vdevs and verify distributed spare is kicked in. ++# 5. Free the distributed spare by replacing the faulty drive. ++# 6. Run zpool clear and verify that it does not initiate 2 resilvers ++# concurrently while distributed spare gets kicked in. ++# ++ ++verify_runnable "global" ++ ++typeset -ir devs=10 ++typeset -ir nparity=1 ++typeset -ir ndata=8 ++typeset -ir dspare=1 ++ ++function cleanup ++{ ++ poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL" ++ ++ for i in {0..$devs}; do ++ log_must rm -f "$BASEDIR/vdev$i" ++ done ++ ++ for dir in $BASEDIR; do ++ if [[ -d $dir ]]; then ++ log_must rm -rf $dir ++ fi ++ done ++ ++ zed_stop ++ zed_cleanup ++} ++ ++log_assert "Verify zpool clear on draid pool doesn't cause concurrent resilvers" ++log_onexit cleanup ++ ++setup_test_env $TESTPOOL draid${nparity}:${ndata}d:${dspare}s $devs ++ ++# ZED needed for sequential resilver ++zed_setup ++log_must zed_start ++ ++log_must zpool offline -f $TESTPOOL $BASEDIR/vdev5 ++log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60 ++log_must zpool wait -t resilver $TESTPOOL ++log_must zpool offline -f $TESTPOOL $BASEDIR/vdev6 ++ ++log_must zpool labelclear -f $BASEDIR/vdev5 ++log_must zpool labelclear -f $BASEDIR/vdev6 ++ ++log_must zpool replace -w $TESTPOOL $BASEDIR/vdev5 ++sync_pool $TESTPOOL ++ ++log_must zpool events -c ++log_must zpool clear $TESTPOOL ++log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60 ++log_must zpool wait -t resilver $TESTPOOL ++log_must zpool wait -t scrub $TESTPOOL ++ ++nof_resilver=$(zpool events | grep -c resilver_start) ++if [ $nof_resilver = 1 ] ; then ++ log_must verify_pool $TESTPOOL ++ log_pass "zpool clear on draid pool doesn't cause concurrent resilvers" ++else ++ log_fail "FAIL: sequential and healing resilver initiated concurrently" ++fi +diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh +index fbb0c2910..19781137d 100755 +--- a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh ++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh +@@ -35,7 +35,7 @@ + DISK1=${DISKS%% *} + + log_must zpool create -f $TESTPOOL $DISK1 +-log_must zpool trim $TESTPOOL ++log_must zpool trim -r 1 $TESTPOOL + + [[ -z "$(trim_progress $TESTPOOL $DISK1)" ]] && \ + log_fail "TRIM did not start" +diff --git a/tests/zfs-tests/tests/functional/mmap/Makefile.am b/tests/zfs-tests/tests/functional/mmap/Makefile.am +index b26791ee7..526405954 100644 +--- a/tests/zfs-tests/tests/functional/mmap/Makefile.am ++++ b/tests/zfs-tests/tests/functional/mmap/Makefile.am +@@ -2,10 +2,12 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/mmap + dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ ++ mmap_mixed.ksh \ + mmap_read_001_pos.ksh \ + mmap_write_001_pos.ksh \ + mmap_libaio_001_pos.ksh \ +- mmap_seek_001_pos.ksh ++ mmap_seek_001_pos.ksh \ ++ mmap_sync_001_pos.ksh + + dist_pkgdata_DATA = \ + mmap.cfg +diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh +new file mode 100755 +index 000000000..6c8246d48 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh +@@ -0,0 +1,86 @@ ++#!/bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or https://opensource.org/licenses/CDDL-1.0. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++# ++ ++# ++# Copyright (c) 2023 by Lawrence Livermore National Security, LLC. ++# ++ ++. $STF_SUITE/include/libtest.shlib ++. $STF_SUITE/tests/functional/mmap/mmap.cfg ++ ++# ++# DESCRIPTION: ++# Verify mixed buffered and mmap IO. ++# ++# STRATEGY: ++# 1. Create an empty file. ++# 2. Start a background buffered read/write fio to the file. ++# 3. Start a background mmap read/write fio to the file. ++# ++ ++verify_runnable "global" ++ ++function cleanup ++{ ++ log_must rm -f "$tmp_file" ++} ++ ++log_assert "Verify mixed buffered and mmap IO" ++ ++log_onexit cleanup ++ ++mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) ++tmp_file=$mntpnt/file ++bs=$((128 * 1024)) ++blocks=64 ++size=$((bs * blocks)) ++runtime=60 ++ ++log_must dd if=/dev/zero of=$tmp_file bs=$bs count=$blocks ++ ++# Buffered IO writes ++log_must eval "fio --filename=$tmp_file --name=buffer-write \ ++ --rw=randwrite --size=$size --bs=$bs --direct=0 --numjobs=1 \ ++ --ioengine=sync --fallocate=none --group_reporting --minimal \ ++ --runtime=$runtime --time_based --norandommap &" ++ ++# Buffered IO reads ++log_must eval "fio --filename=$tmp_file --name=buffer-read \ ++ --rw=randread --size=$size --bs=$bs --direct=0 --numjobs=1 \ ++ --ioengine=sync --fallocate=none --group_reporting --minimal \ ++ --runtime=$runtime --time_based --norandommap &" ++ ++# mmap IO writes ++log_must eval "fio --filename=$tmp_file --name=mmap-write \ ++ --rw=randwrite --size=$size --bs=$bs --numjobs=1 \ ++ --ioengine=mmap --fallocate=none --group_reporting --minimal \ ++ --runtime=$runtime --time_based --norandommap &" ++ ++# mmap IO reads ++log_must eval "fio --filename=$tmp_file --name=mmap-read \ ++ --rw=randread --size=$size --bs=$bs --numjobs=1 \ ++ --ioengine=mmap --fallocate=none --group_reporting --minimal \ ++ --runtime=$runtime --time_based --norandommap &" ++ ++log_must wait ++ ++log_pass "Verfied mixed buffered and mmap IO" +diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh +new file mode 100755 +index 000000000..b764d6607 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh +@@ -0,0 +1,63 @@ ++#!/bin/ksh -p ++ ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++ ++# ++# Copyright (c) 2015, 2016 by Delphix. All rights reserved. ++# ++ ++. $STF_SUITE/include/libtest.shlib ++ ++# ++# DESCRIPTION: ++# msync()s of mmap()'ed file should complete quickly during ++# background dirty page writebacks by the kernel. ++# ++ ++function cleanup ++{ ++ log_must eval "echo $saved_vm_dirty_expire_centisecs > /proc/sys/vm/dirty_expire_centisecs" ++ log_must eval "echo $saved_vm_dirty_background_ratio > /proc/sys/vm/dirty_background_ratio" ++ log_must eval "echo $saved_vm_dirty_writeback_centisecs > /proc/sys/vm/dirty_writeback_centisecs" ++ ++ # revert to some sensible defaults if the values we saved ++ # were incorrect due to a previous run being interrupted ++ if [ $( /proc/sys/vm/dirty_expire_centisecs" ++ fi ++ ++ if [ $( /proc/sys/vm/dirty_background_ratio" ++ fi ++ ++ if [ $( /proc/sys/vm/dirty_writeback_centisecs" ++ fi ++} ++ ++if ! is_linux; then ++ log_unsupported "Only supported on Linux, requires /proc/sys/vm/ tunables" ++fi ++ ++log_onexit cleanup ++log_assert "Run the tests for mmap_sync" ++ ++read -r saved_vm_dirty_expire_centisecs < /proc/sys/vm/dirty_expire_centisecs ++read -r saved_vm_dirty_background_ratio < /proc/sys/vm/dirty_background_ratio ++read -r saved_vm_dirty_writeback_centisecs < /proc/sys/vm/dirty_writeback_centisecs ++ ++log_must eval "echo 1 > /proc/sys/vm/dirty_expire_centisecs" ++log_must eval "echo 1 > /proc/sys/vm/dirty_background_bytes" ++log_must eval "echo 1 > /proc/sys/vm/dirty_writeback_centisecs" ++ ++log_must mmap_sync ++log_pass "mmap_sync tests passed." +diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am +index d80d2124e..2cedf03d3 100644 +--- a/tests/zfs-tests/tests/functional/rsend/Makefile.am ++++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am +@@ -25,6 +25,7 @@ dist_pkgdata_SCRIPTS = \ + rsend_022_pos.ksh \ + rsend_024_pos.ksh \ + send_encrypted_files.ksh \ ++ send_encrypted_freeobjects.ksh \ + send_encrypted_hierarchy.ksh \ + send_encrypted_props.ksh \ + send_encrypted_truncated_files.ksh \ +diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh +index 988ed91b9..1bf234823 100755 +--- a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh ++++ b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh +@@ -29,6 +29,7 @@ + + function cleanup + { ++ rm $BACKDIR/copy + log_must_busy zfs destroy -r $vol + cleanup_pool $POOL2 + } +@@ -60,7 +61,9 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/full" + + verify_stream_size $BACKDIR/full $vol + verify_stream_size $BACKDIR/full $vol2 +-md5=$(dd if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest) ++block_device_wait $voldev2 ++log_must dd if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs ++md5=$(md5digest $BACKDIR/copy) + [[ $md5 = $md5_1 ]] || log_fail "md5 mismatch: $md5 != $md5_1" + + # Repeat, for an incremental send +@@ -72,7 +75,9 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/inc" + + verify_stream_size $BACKDIR/inc $vol 90 $vol@snap + verify_stream_size $BACKDIR/inc $vol2 90 $vol2@snap +-md5=$(dd skip=$megs if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest) ++block_device_wait $voldev2 ++log_must dd skip=$megs if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs ++md5=$(md5digest $BACKDIR/copy) + [[ $md5 = $md5_2 ]] || log_fail "md5 mismatch: $md5 != $md5_2" + + log_pass "Verify compressed send works with volumes" +diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh +new file mode 100755 +index 000000000..92451bd1a +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh +@@ -0,0 +1,87 @@ ++#!/bin/ksh ++ ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++ ++# ++# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. ++# Copyright (c) 2023 by Findity AB ++# ++ ++. $STF_SUITE/tests/functional/rsend/rsend.kshlib ++ ++# ++# Description: ++# Verify that receiving a raw encrypted stream, with a FREEOBJECTS ++# removing all existing objects in a block followed by an OBJECT write ++# to the same block, does not result in a panic. ++# ++# Strategy: ++# 1. Create a new encrypted filesystem ++# 2. Create file f1 as the first object in some block (here object 128) ++# 3. Take snapshot A ++# 4. Create file f2 as the second object in the same block (here object 129) ++# 5. Delete f1 ++# 6. Take snapshot B ++# 7. Receive a full raw encrypted send of A ++# 8. Receive an incremental raw send of B ++# ++verify_runnable "both" ++ ++function create_object_with_num ++{ ++ file=$1 ++ num=$2 ++ ++ tries=100 ++ for ((i=0; i<$tries; i++)); do ++ touch $file ++ onum=$(ls -li $file | awk '{print $1}') ++ ++ if [[ $onum -ne $num ]] ; then ++ rm -f $file ++ else ++ break ++ fi ++ done ++ if [[ $i -eq $tries ]]; then ++ log_fail "Failed to create object with number $num" ++ fi ++} ++ ++log_assert "FREEOBJECTS followed by OBJECT in encrypted stream does not crash" ++ ++sendds=sendencfods ++recvds=recvencfods ++keyfile=/$POOL/keyencfods ++f1=/$POOL/$sendds/f1 ++f2=/$POOL/$sendds/f2 ++ ++log_must eval "echo 'password' > $keyfile" ++ ++# ++# xattr=sa and dnodesize=legacy for sequential object numbers, see ++# note in send_freeobjects.ksh. ++# ++log_must zfs create -o xattr=sa -o dnodesize=legacy -o encryption=on \ ++ -o keyformat=passphrase -o keylocation=file://$keyfile $POOL/$sendds ++ ++create_object_with_num $f1 128 ++log_must zfs snap $POOL/$sendds@A ++create_object_with_num $f2 129 ++log_must rm $f1 ++log_must zfs snap $POOL/$sendds@B ++ ++log_must eval "zfs send -w $POOL/$sendds@A | zfs recv $POOL/$recvds" ++log_must eval "zfs send -w -i $POOL/$sendds@A $POOL/$sendds@B |" \ ++ "zfs recv $POOL/$recvds" ++ ++log_pass "FREEOBJECTS followed by OBJECT in encrypted stream did not crash" diff --git a/zfs.spec b/zfs.spec index 1e73fd5..4502058 100644 --- a/zfs.spec +++ b/zfs.spec @@ -29,16 +29,16 @@ exit 1 Summary: Native Linux port of the ZFS filesystem Summary(pl.UTF-8): Natywny linuksowy port systemu plików ZFS Name: %{pname}%{?_pld_builder:%{?with_kernel:-kernel}}%{_alt_kernel} -Version: 2.1.9 +Version: 2.1.11 Release: %{rel}%{?_pld_builder:%{?with_kernel:@%{_kernel_ver_str}}} License: CDDL Group: Applications/System Source0: https://github.com/openzfs/zfs/releases/download/zfs-%{version}/%{pname}-%{version}.tar.gz -# Source0-md5: d464a712eb43411f2360214badd3b35a +# Source0-md5: 2a7b9d2a487a02d373404c48719488ed Patch0: initdir.patch Patch1: am.patch Patch2: no-Werror.patch -Patch3: blkdev.patch +Patch3: staging.patch URL: https://zfsonlinux.org/ BuildRequires: autoconf >= 2.50 BuildRequires: automake @@ -290,9 +290,6 @@ p=`pwd`\ %patch2 -p1 %patch3 -p1 -%{__sed} -E -i -e '1s,#!\s*/usr/bin/env\s+python2(\s|$),#!%{__python}\1,' \ - cmd/arc_summary/arc_summary2 - %{__sed} -E -i -e '1s,#!\s*/usr/bin/env\s+python3(\s|$),#!%{__python3}\1,' \ cmd/arc_summary/arc_summary3 @@ -621,6 +618,7 @@ rm -rf $RPM_BUILD_ROOT %attr(755,root,root) %{dracutlibdir}/modules.d/90zfs/zfs-lib.sh %attr(755,root,root) %{dracutlibdir}/modules.d/90zfs/zfs-load-key.sh %attr(755,root,root) %{dracutlibdir}/modules.d/90zfs/zfs-needshutdown.sh +%{dracutlibdir}/modules.d/90zfs/zfs-nonroot-necessities.service %{dracutlibdir}/modules.d/90zfs/zfs-rollback-bootfs.service %{dracutlibdir}/modules.d/90zfs/zfs-snapshot-bootfs.service %{_mandir}/man7/dracut.zfs.7* -- 2.44.0