--- /dev/null
+diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
+index f424dd771..bcd520de3 100644
+--- a/cmd/zdb/zdb.c
++++ b/cmd/zdb/zdb.c
+@@ -3102,13 +3102,22 @@ dump_znode_sa_xattr(sa_handle_t *hdl)
+ (void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
+ sa_xattr_size, sa_xattr_entries);
+ while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
++ boolean_t can_print = !dump_opt['P'];
+ uchar_t *value;
+ uint_t cnt, idx;
+
+ (void) printf("\t\t%s = ", nvpair_name(elem));
+ nvpair_value_byte_array(elem, &value, &cnt);
++
++ for (idx = 0; idx < cnt; ++idx) {
++ if (!isprint(value[idx])) {
++ can_print = B_FALSE;
++ break;
++ }
++ }
++
+ for (idx = 0; idx < cnt; ++idx) {
+- if (isprint(value[idx]))
++ if (can_print)
+ (void) putchar(value[idx]);
+ else
+ (void) printf("\\%3.3o", value[idx]);
+diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c
+index b4794e311..29eaee750 100644
+--- a/cmd/zed/agents/zfs_retire.c
++++ b/cmd/zed/agents/zfs_retire.c
+@@ -444,14 +444,16 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
+ return;
+
+ /* Remove the vdev since device is unplugged */
++ int remove_status = 0;
+ if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
+- int status = zpool_vdev_remove_wanted(zhp, devname);
++ remove_status = zpool_vdev_remove_wanted(zhp, devname);
+ fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
+- ", ret:%d", devname, status);
++ ", err:%d", devname, libzfs_errno(zhdl));
+ }
+
+ /* Replace the vdev with a spare if its not a l2arc */
+- if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
++ if (!l2arc && !remove_status &&
++ (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
+ replace_with_spare(hdl, zhp, vdev) == B_FALSE)) {
+ /* Could not handle with spare */
+ fmd_hdl_debug(hdl, "no spare for '%s'", devname);
+diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
+index 2311d4f04..a06af9aec 100644
+--- a/cmd/zpool/zpool_main.c
++++ b/cmd/zpool/zpool_main.c
+@@ -392,7 +392,7 @@ get_usage(zpool_help_t idx)
+ case HELP_REOPEN:
+ return (gettext("\treopen [-n] <pool>\n"));
+ case HELP_INITIALIZE:
+- return (gettext("\tinitialize [-c | -s] [-w] <pool> "
++ return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> "
+ "[<device> ...]\n"));
+ case HELP_SCRUB:
+ return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
+@@ -548,12 +548,13 @@ usage(boolean_t requested)
+ }
+
+ /*
+- * zpool initialize [-c | -s] [-w] <pool> [<vdev> ...]
++ * zpool initialize [-c | -s | -u] [-w] <pool> [<vdev> ...]
+ * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool
+ * if none specified.
+ *
+ * -c Cancel. Ends active initializing.
+ * -s Suspend. Initializing can then be restarted with no flags.
++ * -u Uninitialize. Clears initialization state.
+ * -w Wait. Blocks until initializing has completed.
+ */
+ int
+@@ -569,12 +570,14 @@ zpool_do_initialize(int argc, char **argv)
+ struct option long_options[] = {
+ {"cancel", no_argument, NULL, 'c'},
+ {"suspend", no_argument, NULL, 's'},
++ {"uninit", no_argument, NULL, 'u'},
+ {"wait", no_argument, NULL, 'w'},
+ {0, 0, 0, 0}
+ };
+
+ pool_initialize_func_t cmd_type = POOL_INITIALIZE_START;
+- while ((c = getopt_long(argc, argv, "csw", long_options, NULL)) != -1) {
++ while ((c = getopt_long(argc, argv, "csuw", long_options,
++ NULL)) != -1) {
+ switch (c) {
+ case 'c':
+ if (cmd_type != POOL_INITIALIZE_START &&
+@@ -594,6 +597,15 @@ zpool_do_initialize(int argc, char **argv)
+ }
+ cmd_type = POOL_INITIALIZE_SUSPEND;
+ break;
++ case 'u':
++ if (cmd_type != POOL_INITIALIZE_START &&
++ cmd_type != POOL_INITIALIZE_UNINIT) {
++ (void) fprintf(stderr, gettext("-u cannot be "
++ "combined with other options\n"));
++ usage(B_FALSE);
++ }
++ cmd_type = POOL_INITIALIZE_UNINIT;
++ break;
+ case 'w':
+ wait = B_TRUE;
+ break;
+@@ -620,8 +632,8 @@ zpool_do_initialize(int argc, char **argv)
+ }
+
+ if (wait && (cmd_type != POOL_INITIALIZE_START)) {
+- (void) fprintf(stderr, gettext("-w cannot be used with -c or "
+- "-s\n"));
++ (void) fprintf(stderr, gettext("-w cannot be used with -c, -s"
++ "or -u\n"));
+ usage(B_FALSE);
+ }
+
+@@ -6921,6 +6933,17 @@ zpool_do_online(int argc, char **argv)
+ return (1);
+
+ for (i = 1; i < argc; i++) {
++ vdev_state_t oldstate;
++ boolean_t avail_spare, l2cache;
++ nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare,
++ &l2cache, NULL);
++ if (tgt == NULL) {
++ ret = 1;
++ continue;
++ }
++ uint_t vsc;
++ oldstate = ((vdev_stat_t *)fnvlist_lookup_uint64_array(tgt,
++ ZPOOL_CONFIG_VDEV_STATS, &vsc))->vs_state;
+ if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) {
+ if (newstate != VDEV_STATE_HEALTHY) {
+ (void) printf(gettext("warning: device '%s' "
+@@ -6934,6 +6957,17 @@ zpool_do_online(int argc, char **argv)
+ (void) printf(gettext("use 'zpool "
+ "replace' to replace devices "
+ "that are no longer present\n"));
++ if ((flags & ZFS_ONLINE_EXPAND)) {
++ (void) printf(gettext("%s: failed "
++ "to expand usable space on "
++ "unhealthy device '%s'\n"),
++ (oldstate >= VDEV_STATE_DEGRADED ?
++ "error" : "warning"), argv[i]);
++ if (oldstate >= VDEV_STATE_DEGRADED) {
++ ret = 1;
++ break;
++ }
++ }
+ }
+ } else {
+ ret = 1;
+@@ -7549,19 +7583,20 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
+
+ zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));
+
+- assert(ps->pss_func == POOL_SCAN_SCRUB ||
+- ps->pss_func == POOL_SCAN_RESILVER);
++ int is_resilver = ps->pss_func == POOL_SCAN_RESILVER;
++ int is_scrub = ps->pss_func == POOL_SCAN_SCRUB;
++ assert(is_resilver || is_scrub);
+
+ /* Scan is finished or canceled. */
+ if (ps->pss_state == DSS_FINISHED) {
+ secs_to_dhms(end - start, time_buf);
+
+- if (ps->pss_func == POOL_SCAN_SCRUB) {
++ if (is_scrub) {
+ (void) printf(gettext("scrub repaired %s "
+ "in %s with %llu errors on %s"), processed_buf,
+ time_buf, (u_longlong_t)ps->pss_errors,
+ ctime(&end));
+- } else if (ps->pss_func == POOL_SCAN_RESILVER) {
++ } else if (is_resilver) {
+ (void) printf(gettext("resilvered %s "
+ "in %s with %llu errors on %s"), processed_buf,
+ time_buf, (u_longlong_t)ps->pss_errors,
+@@ -7569,10 +7604,10 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
+ }
+ return;
+ } else if (ps->pss_state == DSS_CANCELED) {
+- if (ps->pss_func == POOL_SCAN_SCRUB) {
++ if (is_scrub) {
+ (void) printf(gettext("scrub canceled on %s"),
+ ctime(&end));
+- } else if (ps->pss_func == POOL_SCAN_RESILVER) {
++ } else if (is_resilver) {
+ (void) printf(gettext("resilver canceled on %s"),
+ ctime(&end));
+ }
+@@ -7582,7 +7617,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
+ assert(ps->pss_state == DSS_SCANNING);
+
+ /* Scan is in progress. Resilvers can't be paused. */
+- if (ps->pss_func == POOL_SCAN_SCRUB) {
++ if (is_scrub) {
+ if (pause == 0) {
+ (void) printf(gettext("scrub in progress since %s"),
+ ctime(&start));
+@@ -7592,7 +7627,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
+ (void) printf(gettext("\tscrub started on %s"),
+ ctime(&start));
+ }
+- } else if (ps->pss_func == POOL_SCAN_RESILVER) {
++ } else if (is_resilver) {
+ (void) printf(gettext("resilver in progress since %s"),
+ ctime(&start));
+ }
+@@ -7634,17 +7669,27 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
+ scanned_buf, issued_buf, total_buf);
+ }
+
+- if (ps->pss_func == POOL_SCAN_RESILVER) {
++ if (is_resilver) {
+ (void) printf(gettext("\t%s resilvered, %.2f%% done"),
+ processed_buf, 100 * fraction_done);
+- } else if (ps->pss_func == POOL_SCAN_SCRUB) {
++ } else if (is_scrub) {
+ (void) printf(gettext("\t%s repaired, %.2f%% done"),
+ processed_buf, 100 * fraction_done);
+ }
+
+ if (pause == 0) {
++ /*
++ * Only provide an estimate iff:
++ * 1) the time remaining is valid, and
++ * 2) the issue rate exceeds 10 MB/s, and
++ * 3) it's either:
++ * a) a resilver which has started repairs, or
++ * b) a scrub which has entered the issue phase.
++ */
+ if (total_secs_left != UINT64_MAX &&
+- issue_rate >= 10 * 1024 * 1024) {
++ issue_rate >= 10 * 1024 * 1024 &&
++ ((is_resilver && ps->pss_processed > 0) ||
++ (is_scrub && issued > 0))) {
+ (void) printf(gettext(", %s to go\n"), time_buf);
+ } else {
+ (void) printf(gettext(", no estimated "
+diff --git a/config/always-compiler-options.m4 b/config/always-compiler-options.m4
+index 5046ce0dd..0f66db584 100644
+--- a/config/always-compiler-options.m4
++++ b/config/always-compiler-options.m4
+@@ -221,3 +221,34 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA], [
+ CFLAGS="$saved_flags"
+ AC_SUBST([NO_IPA_SRA])
+ ])
++
++dnl #
++dnl # Check if kernel cc supports -fno-ipa-sra option.
++dnl #
++AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [
++ AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra])
++
++ saved_cc="$CC"
++ saved_flags="$CFLAGS"
++ CC="gcc"
++ CFLAGS="$CFLAGS -Werror -fno-ipa-sra"
++
++ AS_IF([ test -n "$KERNEL_CC" ], [
++ CC="$KERNEL_CC"
++ ])
++ AS_IF([ test -n "$KERNEL_LLVM" ], [
++ CC="clang"
++ ])
++
++ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
++ KERNEL_NO_IPA_SRA=-fno-ipa-sra
++ AC_MSG_RESULT([yes])
++ ], [
++ KERNEL_NO_IPA_SRA=
++ AC_MSG_RESULT([no])
++ ])
++
++ CC="$saved_cc"
++ CFLAGS="$saved_flags"
++ AC_SUBST([KERNEL_NO_IPA_SRA])
++])
+diff --git a/config/kernel-acl.m4 b/config/kernel-acl.m4
+index 6e92da97d..be08c3c60 100644
+--- a/config/kernel-acl.m4
++++ b/config/kernel-acl.m4
+@@ -236,7 +236,22 @@ dnl #
+ dnl # 6.2 API change,
+ dnl # set_acl() second paramter changed to a struct dentry *
+ dnl #
++dnl # 6.3 API change,
++dnl # set_acl() first parameter changed to struct mnt_idmap *
++dnl #
+ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [
++ ZFS_LINUX_TEST_SRC([inode_operations_set_acl_mnt_idmap_dentry], [
++ #include <linux/fs.h>
++
++ int set_acl_fn(struct mnt_idmap *idmap,
++ struct dentry *dent, struct posix_acl *acl,
++ int type) { return 0; }
++
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .set_acl = set_acl_fn,
++ };
++ ],[])
+ ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns_dentry], [
+ #include <linux/fs.h>
+
+@@ -281,17 +296,24 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [
+ AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
+ AC_DEFINE(HAVE_SET_ACL_USERNS, 1, [iops->set_acl() takes 4 args])
+ ],[
+- ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [
++ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_mnt_idmap_dentry], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
+- AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1,
+- [iops->set_acl() takes 4 args, arg2 is struct dentry *])
++ AC_DEFINE(HAVE_SET_ACL_IDMAP_DENTRY, 1,
++ [iops->set_acl() takes 4 args, arg1 is struct mnt_idmap *])
+ ],[
+- ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [
++ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args])
++ AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
++ AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1,
++ [iops->set_acl() takes 4 args, arg2 is struct dentry *])
+ ],[
+- ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14])
++ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args])
++ ],[
++ ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14])
++ ])
+ ])
+ ])
+ ])
+diff --git a/config/kernel-cpu_has_feature.m4 b/config/kernel-cpu_has_feature.m4
+new file mode 100644
+index 000000000..608faf0f8
+--- /dev/null
++++ b/config/kernel-cpu_has_feature.m4
+@@ -0,0 +1,29 @@
++dnl #
++dnl # cpu_has_feature() may referencing GPL-only cpu_feature_keys on powerpc
++dnl #
++
++dnl #
++dnl # Checking if cpu_has_feature is exported GPL-only
++dnl #
++AC_DEFUN([ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE], [
++ ZFS_LINUX_TEST_SRC([cpu_has_feature], [
++ #include <linux/version.h>
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
++ #include <asm/cpu_has_feature.h>
++ #else
++ #include <asm/cputable.h>
++ #endif
++ ], [
++ return cpu_has_feature(CPU_FTR_ALTIVEC) ? 0 : 1;
++ ], [], [ZFS_META_LICENSE])
++])
++AC_DEFUN([ZFS_AC_KERNEL_CPU_HAS_FEATURE], [
++ AC_MSG_CHECKING([whether cpu_has_feature() is GPL-only])
++ ZFS_LINUX_TEST_RESULT([cpu_has_feature_license], [
++ AC_MSG_RESULT(no)
++ ], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_CPU_HAS_FEATURE_GPL_ONLY, 1,
++ [cpu_has_feature() is GPL-only])
++ ])
++])
+diff --git a/config/kernel-filemap.m4 b/config/kernel-filemap.m4
+new file mode 100644
+index 000000000..745928168
+--- /dev/null
++++ b/config/kernel-filemap.m4
+@@ -0,0 +1,26 @@
++dnl #
++dnl # filemap_range_has_page was not available till 4.13
++dnl #
++AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [
++ ZFS_LINUX_TEST_SRC([filemap_range_has_page], [
++ #include <linux/fs.h>
++ ],[
++ struct address_space *mapping = NULL;
++ loff_t lstart = 0;
++ loff_t lend = 0;
++ bool ret __attribute__ ((unused));
++
++ ret = filemap_range_has_page(mapping, lstart, lend);
++ ])
++])
++
++AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [
++ AC_MSG_CHECKING([whether filemap_range_has_page() is available])
++ ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1,
++ [filemap_range_has_page() is available])
++ ],[
++ AC_MSG_RESULT(no)
++ ])
++])
+diff --git a/config/kernel-flush_dcache_page.m4 b/config/kernel-flush_dcache_page.m4
+new file mode 100644
+index 000000000..2340c386e
+--- /dev/null
++++ b/config/kernel-flush_dcache_page.m4
+@@ -0,0 +1,26 @@
++dnl #
++dnl # Starting from Linux 5.13, flush_dcache_page() becomes an inline
++dnl # function and may indirectly referencing GPL-only cpu_feature_keys on
++dnl # powerpc
++dnl #
++
++dnl #
++dnl # Checking if flush_dcache_page is exported GPL-only
++dnl #
++AC_DEFUN([ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE], [
++ ZFS_LINUX_TEST_SRC([flush_dcache_page], [
++ #include <asm/cacheflush.h>
++ ], [
++ flush_dcache_page(0);
++ ], [], [ZFS_META_LICENSE])
++])
++AC_DEFUN([ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE], [
++ AC_MSG_CHECKING([whether flush_dcache_page() is GPL-only])
++ ZFS_LINUX_TEST_RESULT([flush_dcache_page_license], [
++ AC_MSG_RESULT(no)
++ ], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY, 1,
++ [flush_dcache_page() is GPL-only])
++ ])
++])
+diff --git a/config/kernel-generic_fillattr.m4 b/config/kernel-generic_fillattr.m4
+index 0acd5d531..02dee4d4c 100644
+--- a/config/kernel-generic_fillattr.m4
++++ b/config/kernel-generic_fillattr.m4
+@@ -4,7 +4,10 @@ dnl #
+ dnl # generic_fillattr in linux/fs.h now requires a struct user_namespace*
+ dnl # as the first arg, to support idmapped mounts.
+ dnl #
+-AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [
++dnl # 6.3 API
++dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument
++dnl #
++AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
+ ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
+ #include <linux/fs.h>
+ ],[
+@@ -13,16 +16,32 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [
+ struct kstat *k = NULL;
+ generic_fillattr(userns, in, k);
+ ])
++
++ ZFS_LINUX_TEST_SRC([generic_fillattr_mnt_idmap], [
++ #include <linux/fs.h>
++ ],[
++ struct mnt_idmap *idmap = NULL;
++ struct inode *in = NULL;
++ struct kstat *k = NULL;
++ generic_fillattr(idmap, in, k);
++ ])
+ ])
+
+-AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS], [
+- AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
+- ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
++AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [
++ AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*])
++ ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [
+ AC_MSG_RESULT([yes])
+- AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
+- [generic_fillattr requires struct user_namespace*])
++ AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1,
++ [generic_fillattr requires struct mnt_idmap*])
+ ],[
+- AC_MSG_RESULT([no])
++ AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
++ ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
++ AC_MSG_RESULT([yes])
++ AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
++ [generic_fillattr requires struct user_namespace*])
++ ],[
++ AC_MSG_RESULT([no])
++ ])
+ ])
+ ])
+
+diff --git a/config/kernel-inode-create.m4 b/config/kernel-inode-create.m4
+index a6ea11fb6..9e9e43180 100644
+--- a/config/kernel-inode-create.m4
++++ b/config/kernel-inode-create.m4
+@@ -1,4 +1,22 @@
+ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [
++ dnl #
++ dnl # 6.3 API change
++ dnl # The first arg is changed to struct mnt_idmap *
++ dnl #
++ ZFS_LINUX_TEST_SRC([create_mnt_idmap], [
++ #include <linux/fs.h>
++ #include <linux/sched.h>
++
++ int inode_create(struct mnt_idmap *idmap,
++ struct inode *inode ,struct dentry *dentry,
++ umode_t umode, bool flag) { return 0; }
++
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .create = inode_create,
++ };
++ ],[])
++
+ dnl #
+ dnl # 5.12 API change that added the struct user_namespace* arg
+ dnl # to the front of this function type's arg list.
+@@ -35,19 +53,28 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [
+ ])
+
+ AC_DEFUN([ZFS_AC_KERNEL_CREATE], [
+- AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*])
+- ZFS_LINUX_TEST_RESULT([create_userns], [
++ AC_MSG_CHECKING([whether iops->create() takes struct mnt_idmap*])
++ ZFS_LINUX_TEST_RESULT([create_mnt_idmap], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1,
+- [iops->create() takes struct user_namespace*])
++ AC_DEFINE(HAVE_IOPS_CREATE_IDMAP, 1,
++ [iops->create() takes struct mnt_idmap*])
+ ],[
+ AC_MSG_RESULT(no)
+
+- AC_MSG_CHECKING([whether iops->create() passes flags])
+- ZFS_LINUX_TEST_RESULT([create_flags], [
++ AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*])
++ ZFS_LINUX_TEST_RESULT([create_userns], [
+ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1,
++ [iops->create() takes struct user_namespace*])
+ ],[
+- ZFS_LINUX_TEST_ERROR([iops->create()])
++ AC_MSG_RESULT(no)
++
++ AC_MSG_CHECKING([whether iops->create() passes flags])
++ ZFS_LINUX_TEST_RESULT([create_flags], [
++ AC_MSG_RESULT(yes)
++ ],[
++ ZFS_LINUX_TEST_ERROR([iops->create()])
++ ])
+ ])
+ ])
+ ])
+diff --git a/config/kernel-inode-getattr.m4 b/config/kernel-inode-getattr.m4
+index f62e82f52..c8bfb0786 100644
+--- a/config/kernel-inode-getattr.m4
++++ b/config/kernel-inode-getattr.m4
+@@ -1,4 +1,24 @@
+ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
++ dnl #
++ dnl # Linux 6.3 API
++ dnl # The first arg of getattr I/O operations handler type
++ dnl # is changed to struct mnt_idmap*
++ dnl #
++ ZFS_LINUX_TEST_SRC([inode_operations_getattr_mnt_idmap], [
++ #include <linux/fs.h>
++
++ int test_getattr(
++ struct mnt_idmap *idmap,
++ const struct path *p, struct kstat *k,
++ u32 request_mask, unsigned int query_flags)
++ { return 0; }
++
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .getattr = test_getattr,
++ };
++ ],[])
++
+ dnl #
+ dnl # Linux 5.12 API
+ dnl # The getattr I/O operations handler type was extended to require
+@@ -55,37 +75,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
+
+ AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [
+ dnl #
+- dnl # Kernel 5.12 test
++ dnl # Kernel 6.3 test
+ dnl #
+- AC_MSG_CHECKING([whether iops->getattr() takes user_namespace])
+- ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [
++ AC_MSG_CHECKING([whether iops->getattr() takes mnt_idmap])
++ ZFS_LINUX_TEST_RESULT([inode_operations_getattr_mnt_idmap], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1,
+- [iops->getattr() takes struct user_namespace*])
++ AC_DEFINE(HAVE_IDMAP_IOPS_GETATTR, 1,
++ [iops->getattr() takes struct mnt_idmap*])
+ ],[
+ AC_MSG_RESULT(no)
+-
+ dnl #
+- dnl # Kernel 4.11 test
++ dnl # Kernel 5.12 test
+ dnl #
+- AC_MSG_CHECKING([whether iops->getattr() takes a path])
+- ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
++ AC_MSG_CHECKING([whether iops->getattr() takes user_namespace])
++ ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
+- [iops->getattr() takes a path])
++ AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1,
++ [iops->getattr() takes struct user_namespace*])
+ ],[
+ AC_MSG_RESULT(no)
+
+ dnl #
+- dnl # Kernel < 4.11 test
++ dnl # Kernel 4.11 test
+ dnl #
+- AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
+- ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
++ AC_MSG_CHECKING([whether iops->getattr() takes a path])
++ ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
+- [iops->getattr() takes a vfsmount])
++ AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
++ [iops->getattr() takes a path])
+ ],[
+ AC_MSG_RESULT(no)
++
++ dnl #
++ dnl # Kernel < 4.11 test
++ dnl #
++ AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
++ ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
++ [iops->getattr() takes a vfsmount])
++ ],[
++ AC_MSG_RESULT(no)
++ ])
+ ])
+ ])
+ ])
+diff --git a/config/kernel-inode-setattr.m4 b/config/kernel-inode-setattr.m4
+new file mode 100644
+index 000000000..45755b4eb
+--- /dev/null
++++ b/config/kernel-inode-setattr.m4
+@@ -0,0 +1,87 @@
++AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [
++ dnl #
++ dnl # Linux 6.3 API
++ dnl # The first arg of setattr I/O operations handler type
++ dnl # is changed to struct mnt_idmap*
++ dnl #
++ ZFS_LINUX_TEST_SRC([inode_operations_setattr_mnt_idmap], [
++ #include <linux/fs.h>
++
++ int test_setattr(
++ struct mnt_idmap *idmap,
++ struct dentry *de, struct iattr *ia)
++ { return 0; }
++
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .setattr = test_setattr,
++ };
++ ],[])
++
++ dnl #
++ dnl # Linux 5.12 API
++ dnl # The setattr I/O operations handler type was extended to require
++ dnl # a struct user_namespace* as its first arg, to support idmapped
++ dnl # mounts.
++ dnl #
++ ZFS_LINUX_TEST_SRC([inode_operations_setattr_userns], [
++ #include <linux/fs.h>
++
++ int test_setattr(
++ struct user_namespace *userns,
++ struct dentry *de, struct iattr *ia)
++ { return 0; }
++
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .setattr = test_setattr,
++ };
++ ],[])
++
++ ZFS_LINUX_TEST_SRC([inode_operations_setattr], [
++ #include <linux/fs.h>
++
++ int test_setattr(
++ struct dentry *de, struct iattr *ia)
++ { return 0; }
++
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .setattr = test_setattr,
++ };
++ ],[])
++])
++
++AC_DEFUN([ZFS_AC_KERNEL_INODE_SETATTR], [
++ dnl #
++ dnl # Kernel 6.3 test
++ dnl #
++ AC_MSG_CHECKING([whether iops->setattr() takes mnt_idmap])
++ ZFS_LINUX_TEST_RESULT([inode_operations_setattr_mnt_idmap], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_IDMAP_IOPS_SETATTR, 1,
++ [iops->setattr() takes struct mnt_idmap*])
++ ],[
++ AC_MSG_RESULT(no)
++ dnl #
++ dnl # Kernel 5.12 test
++ dnl #
++ AC_MSG_CHECKING([whether iops->setattr() takes user_namespace])
++ ZFS_LINUX_TEST_RESULT([inode_operations_setattr_userns], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_USERNS_IOPS_SETATTR, 1,
++ [iops->setattr() takes struct user_namespace*])
++ ],[
++ AC_MSG_RESULT(no)
++
++ AC_MSG_CHECKING([whether iops->setattr() exists])
++ ZFS_LINUX_TEST_RESULT([inode_operations_setattr], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_IOPS_SETATTR, 1,
++ [iops->setattr() exists])
++ ],[
++ AC_MSG_RESULT(no)
++ ])
++ ])
++ ])
++])
+diff --git a/config/kernel-is_owner_or_cap.m4 b/config/kernel-is_owner_or_cap.m4
+index a90cf3da6..4e9c002b7 100644
+--- a/config/kernel-is_owner_or_cap.m4
++++ b/config/kernel-is_owner_or_cap.m4
+@@ -16,12 +16,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE], [
+ (void) inode_owner_or_capable(ip);
+ ])
+
+- ZFS_LINUX_TEST_SRC([inode_owner_or_capable_idmapped], [
++ ZFS_LINUX_TEST_SRC([inode_owner_or_capable_userns], [
+ #include <linux/fs.h>
+ ],[
+ struct inode *ip = NULL;
+ (void) inode_owner_or_capable(&init_user_ns, ip);
+ ])
++
++ ZFS_LINUX_TEST_SRC([inode_owner_or_capable_mnt_idmap], [
++ #include <linux/fs.h>
++ #include <linux/mnt_idmapping.h>
++ ],[
++ struct inode *ip = NULL;
++ (void) inode_owner_or_capable(&nop_mnt_idmap, ip);
++ ])
+ ])
+
+ AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [
+@@ -35,12 +43,21 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [
+
+ AC_MSG_CHECKING(
+ [whether inode_owner_or_capable() takes user_ns])
+- ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_idmapped], [
++ ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_userns], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED, 1,
++ AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_USERNS, 1,
+ [inode_owner_or_capable() takes user_ns])
+ ],[
+- ZFS_LINUX_TEST_ERROR([capability])
++ AC_MSG_RESULT(no)
++ AC_MSG_CHECKING(
++ [whether inode_owner_or_capable() takes mnt_idmap])
++ ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_mnt_idmap], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP, 1,
++ [inode_owner_or_capable() takes mnt_idmap])
++ ], [
++ ZFS_LINUX_TEST_ERROR([capability])
++ ])
+ ])
+ ])
+ ])
+diff --git a/config/kernel-mkdir.m4 b/config/kernel-mkdir.m4
+index 6667ed04f..7407a791b 100644
+--- a/config/kernel-mkdir.m4
++++ b/config/kernel-mkdir.m4
+@@ -2,6 +2,22 @@ dnl #
+ dnl # Supported mkdir() interfaces checked newest to oldest.
+ dnl #
+ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [
++ dnl #
++ dnl # 6.3 API change
++ dnl # mkdir() takes struct mnt_idmap * as the first arg
++ dnl #
++ ZFS_LINUX_TEST_SRC([mkdir_mnt_idmap], [
++ #include <linux/fs.h>
++
++ int mkdir(struct mnt_idmap *idmap,
++ struct inode *inode, struct dentry *dentry,
++ umode_t umode) { return 0; }
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .mkdir = mkdir,
++ };
++ ],[])
++
+ dnl #
+ dnl # 5.12 API change
+ dnl # The struct user_namespace arg was added as the first argument to
+@@ -43,25 +59,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [
+
+ AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [
+ dnl #
+- dnl # 5.12 API change
+- dnl # The struct user_namespace arg was added as the first argument to
+- dnl # mkdir() of the iops structure.
++ dnl # 6.3 API change
++ dnl # mkdir() takes struct mnt_idmap * as the first arg
+ dnl #
+- AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*])
+- ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [
++ AC_MSG_CHECKING([whether iops->mkdir() takes struct mnt_idmap*])
++ ZFS_LINUX_TEST_RESULT([mkdir_mnt_idmap], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
+- [iops->mkdir() takes struct user_namespace*])
++ AC_DEFINE(HAVE_IOPS_MKDIR_IDMAP, 1,
++ [iops->mkdir() takes struct mnt_idmap*])
+ ],[
+- AC_MSG_RESULT(no)
+-
+- AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
+- ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
++ dnl #
++ dnl # 5.12 API change
++ dnl # The struct user_namespace arg was added as the first argument to
++ dnl # mkdir() of the iops structure.
++ dnl #
++ AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*])
++ ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
+- [iops->mkdir() takes umode_t])
++ AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
++ [iops->mkdir() takes struct user_namespace*])
+ ],[
+- ZFS_LINUX_TEST_ERROR([mkdir()])
++ AC_MSG_RESULT(no)
++
++ AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
++ ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
++ [iops->mkdir() takes umode_t])
++ ],[
++ ZFS_LINUX_TEST_ERROR([mkdir()])
++ ])
+ ])
+ ])
+ ])
+diff --git a/config/kernel-mknod.m4 b/config/kernel-mknod.m4
+index ffe451060..1494ec1ae 100644
+--- a/config/kernel-mknod.m4
++++ b/config/kernel-mknod.m4
+@@ -1,4 +1,22 @@
+ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [
++ dnl #
++ dnl # 6.3 API change
++ dnl # The first arg is now struct mnt_idmap*
++ dnl #
++ ZFS_LINUX_TEST_SRC([mknod_mnt_idmap], [
++ #include <linux/fs.h>
++ #include <linux/sched.h>
++
++ int tmp_mknod(struct mnt_idmap *idmap,
++ struct inode *inode ,struct dentry *dentry,
++ umode_t u, dev_t d) { return 0; }
++
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .mknod = tmp_mknod,
++ };
++ ],[])
++
+ dnl #
+ dnl # 5.12 API change that added the struct user_namespace* arg
+ dnl # to the front of this function type's arg list.
+@@ -19,12 +37,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [
+ ])
+
+ AC_DEFUN([ZFS_AC_KERNEL_MKNOD], [
+- AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*])
+- ZFS_LINUX_TEST_RESULT([mknod_userns], [
++ AC_MSG_CHECKING([whether iops->mknod() takes struct mnt_idmap*])
++ ZFS_LINUX_TEST_RESULT([mknod_mnt_idmap], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1,
+- [iops->mknod() takes struct user_namespace*])
++ AC_DEFINE(HAVE_IOPS_MKNOD_IDMAP, 1,
++ [iops->mknod() takes struct mnt_idmap*])
+ ],[
+ AC_MSG_RESULT(no)
++ AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*])
++ ZFS_LINUX_TEST_RESULT([mknod_userns], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1,
++ [iops->mknod() takes struct user_namespace*])
++ ],[
++ AC_MSG_RESULT(no)
++ ])
+ ])
+ ])
+diff --git a/config/kernel-reclaim_state.m4 b/config/kernel-reclaim_state.m4
+new file mode 100644
+index 000000000..9936b3c10
+--- /dev/null
++++ b/config/kernel-reclaim_state.m4
+@@ -0,0 +1,26 @@
++AC_DEFUN([ZFS_AC_KERNEL_SRC_RECLAIMED], [
++ dnl #
++ dnl # 6.4 API change
++ dnl # The reclaimed_slab of struct reclaim_state
++ dnl # is renamed to reclaimed
++ dnl #
++ ZFS_LINUX_TEST_SRC([reclaim_state_reclaimed], [
++ #include <linux/swap.h>
++ static const struct reclaim_state
++ rs __attribute__ ((unused)) = {
++ .reclaimed = 100,
++ };
++ ],[])
++])
++
++AC_DEFUN([ZFS_AC_KERNEL_RECLAIMED], [
++ AC_MSG_CHECKING([whether struct reclaim_state has reclaimed field])
++ ZFS_LINUX_TEST_RESULT([reclaim_state_reclaimed], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_RECLAIM_STATE_RECLAIMED, 1,
++ [struct reclaim_state has reclaimed])
++ ],[
++ AC_MSG_RESULT(no)
++ ])
++])
++
+diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4
+index 302db43f5..b33cd0bfb 100644
+--- a/config/kernel-rename.m4
++++ b/config/kernel-rename.m4
+@@ -33,24 +33,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
+ .rename = rename_fn,
+ };
+ ],[])
++
++ dnl #
++ dnl # 6.3 API change - the first arg is now struct mnt_idmap*
++ dnl #
++ ZFS_LINUX_TEST_SRC([inode_operations_rename_mnt_idmap], [
++ #include <linux/fs.h>
++ int rename_fn(struct mnt_idmap *idmap, struct inode *sip,
++ struct dentry *sdp, struct inode *tip, struct dentry *tdp,
++ unsigned int flags) { return 0; }
++
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .rename = rename_fn,
++ };
++ ],[])
+ ])
+
+ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [
+- AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*])
+- ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [
++ AC_MSG_CHECKING([whether iops->rename() takes struct mnt_idmap*])
++ ZFS_LINUX_TEST_RESULT([inode_operations_rename_mnt_idmap], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1,
+- [iops->rename() takes struct user_namespace*])
++ AC_DEFINE(HAVE_IOPS_RENAME_IDMAP, 1,
++ [iops->rename() takes struct mnt_idmap*])
+ ],[
+ AC_MSG_RESULT(no)
+
+- AC_MSG_CHECKING([whether iop->rename() wants flags])
+- ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
++ AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*])
++ ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
+- [iops->rename() wants flags])
++ AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1,
++ [iops->rename() takes struct user_namespace*])
+ ],[
+ AC_MSG_RESULT(no)
++
++ AC_MSG_CHECKING([whether iops->rename() wants flags])
++ ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
++ [iops->rename() wants flags])
++ ],[
++ AC_MSG_RESULT(no)
++ ])
+ ])
+ ])
+ ])
+diff --git a/config/kernel-setattr-prepare.m4 b/config/kernel-setattr-prepare.m4
+index 24245aa53..e02d6263e 100644
+--- a/config/kernel-setattr-prepare.m4
++++ b/config/kernel-setattr-prepare.m4
+@@ -27,26 +27,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SETATTR_PREPARE], [
+ int error __attribute__ ((unused)) =
+ setattr_prepare(userns, dentry, attr);
+ ])
++
++ dnl #
++ dnl # 6.3 API change
++ dnl # The first arg of setattr_prepare() is changed to struct mnt_idmap*
++ dnl #
++ ZFS_LINUX_TEST_SRC([setattr_prepare_mnt_idmap], [
++ #include <linux/fs.h>
++ ], [
++ struct dentry *dentry = NULL;
++ struct iattr *attr = NULL;
++ struct mnt_idmap *idmap = NULL;
++ int error __attribute__ ((unused)) =
++ setattr_prepare(idmap, dentry, attr);
++ ])
+ ])
+
+ AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [
+- AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*])
+- ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns],
++ AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct mnt_idmap*])
++ ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_mnt_idmap],
+ [setattr_prepare], [fs/attr.c], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1,
+- [setattr_prepare() accepts user_namespace])
++ AC_DEFINE(HAVE_SETATTR_PREPARE_IDMAP, 1,
++ [setattr_prepare() accepts mnt_idmap])
+ ], [
+- AC_MSG_RESULT(no)
+-
+- AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace])
+- ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
+- [setattr_prepare], [fs/attr.c], [
++ AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*])
++ ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns],
++ [setattr_prepare], [fs/attr.c], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1,
+- [setattr_prepare() is available, doesn't accept user_namespace])
++ AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1,
++ [setattr_prepare() accepts user_namespace])
+ ], [
+ AC_MSG_RESULT(no)
++
++ AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace])
++ ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
++ [setattr_prepare], [fs/attr.c], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1,
++ [setattr_prepare() is available, doesn't accept user_namespace])
++ ], [
++ AC_MSG_RESULT(no)
++ ])
+ ])
+ ])
+ ])
+diff --git a/config/kernel-symlink.m4 b/config/kernel-symlink.m4
+index d90366d04..a0333ed66 100644
+--- a/config/kernel-symlink.m4
++++ b/config/kernel-symlink.m4
+@@ -1,4 +1,20 @@
+ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [
++ dnl #
++ dnl # 6.3 API change that changed the first arg
++ dnl # to struct mnt_idmap*
++ dnl #
++ ZFS_LINUX_TEST_SRC([symlink_mnt_idmap], [
++ #include <linux/fs.h>
++ #include <linux/sched.h>
++ int tmp_symlink(struct mnt_idmap *idmap,
++ struct inode *inode ,struct dentry *dentry,
++ const char *path) { return 0; }
++
++ static const struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .symlink = tmp_symlink,
++ };
++ ],[])
+ dnl #
+ dnl # 5.12 API change that added the struct user_namespace* arg
+ dnl # to the front of this function type's arg list.
+@@ -19,12 +35,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [
+ ])
+
+ AC_DEFUN([ZFS_AC_KERNEL_SYMLINK], [
+- AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*])
+- ZFS_LINUX_TEST_RESULT([symlink_userns], [
++ AC_MSG_CHECKING([whether iops->symlink() takes struct mnt_idmap*])
++ ZFS_LINUX_TEST_RESULT([symlink_mnt_idmap], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1,
+- [iops->symlink() takes struct user_namespace*])
++ AC_DEFINE(HAVE_IOPS_SYMLINK_IDMAP, 1,
++ [iops->symlink() takes struct mnt_idmap*])
+ ],[
+- AC_MSG_RESULT(no)
++ AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*])
++ ZFS_LINUX_TEST_RESULT([symlink_userns], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1,
++ [iops->symlink() takes struct user_namespace*])
++ ],[
++ AC_MSG_RESULT(no)
++ ])
+ ])
+ ])
+diff --git a/config/kernel-tmpfile.m4 b/config/kernel-tmpfile.m4
+index 0e1deb361..cc18b8f65 100644
+--- a/config/kernel-tmpfile.m4
++++ b/config/kernel-tmpfile.m4
+@@ -4,6 +4,19 @@ dnl # Add support for i_op->tmpfile
+ dnl #
+ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [
+ dnl #
++ dnl # 6.3 API change
++ dnl # The first arg is now struct mnt_idmap *
++ dnl #
++ ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_mnt_idmap], [
++ #include <linux/fs.h>
++ int tmpfile(struct mnt_idmap *idmap,
++ struct inode *inode, struct file *file,
++ umode_t mode) { return 0; }
++ static struct inode_operations
++ iops __attribute__ ((unused)) = {
++ .tmpfile = tmpfile,
++ };
++ ],[])
+ dnl # 6.1 API change
+ dnl # use struct file instead of struct dentry
+ dnl #
+@@ -44,23 +57,29 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [
+
+ AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [
+ AC_MSG_CHECKING([whether i_op->tmpfile() exists])
+- ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [
++ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_mnt_idmap], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
+- AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
+- ],[
+- ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [
++ AC_DEFINE(HAVE_TMPFILE_IDMAP, 1, [i_op->tmpfile() has mnt_idmap])
++ ], [
++ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
+ AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
+- AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
+ ],[
+- ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [
++ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
++ AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
+ AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
+ ],[
+- ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11])
++ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
++ AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
++ ],[
++ ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11])
++ ])
+ ])
+ ])
+ ])
+diff --git a/config/kernel-writepage_t.m4 b/config/kernel-writepage_t.m4
+new file mode 100644
+index 000000000..3a0cffd98
+--- /dev/null
++++ b/config/kernel-writepage_t.m4
+@@ -0,0 +1,26 @@
++AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEPAGE_T], [
++ dnl #
++ dnl # 6.3 API change
++ dnl # The writepage_t function type now has its first argument as
++ dnl # struct folio* instead of struct page*
++ dnl #
++ ZFS_LINUX_TEST_SRC([writepage_t_folio], [
++ #include <linux/writeback.h>
++ int putpage(struct folio *folio,
++ struct writeback_control *wbc, void *data)
++ { return 0; }
++ writepage_t func = putpage;
++ ],[])
++])
++
++AC_DEFUN([ZFS_AC_KERNEL_WRITEPAGE_T], [
++ AC_MSG_CHECKING([whether int (*writepage_t)() takes struct folio*])
++ ZFS_LINUX_TEST_RESULT([writepage_t_folio], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_WRITEPAGE_T_FOLIO, 1,
++ [int (*writepage_t)() takes struct folio*])
++ ],[
++ AC_MSG_RESULT(no)
++ ])
++])
++
+diff --git a/config/kernel-xattr-handler.m4 b/config/kernel-xattr-handler.m4
+index b6cbfa155..6b8a08dbc 100644
+--- a/config/kernel-xattr-handler.m4
++++ b/config/kernel-xattr-handler.m4
+@@ -179,6 +179,21 @@ dnl #
+ dnl # Supported xattr handler set() interfaces checked newest to oldest.
+ dnl #
+ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [
++ ZFS_LINUX_TEST_SRC([xattr_handler_set_mnt_idmap], [
++ #include <linux/xattr.h>
++
++ int set(const struct xattr_handler *handler,
++ struct mnt_idmap *idmap,
++ struct dentry *dentry, struct inode *inode,
++ const char *name, const void *buffer,
++ size_t size, int flags)
++ { return 0; }
++ static const struct xattr_handler
++ xops __attribute__ ((unused)) = {
++ .set = set,
++ };
++ ],[])
++
+ ZFS_LINUX_TEST_SRC([xattr_handler_set_userns], [
+ #include <linux/xattr.h>
+
+@@ -240,53 +255,63 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [
+ dnl # The xattr_handler->set() callback was changed to 8 arguments, and
+ dnl # struct user_namespace* was inserted as arg #2
+ dnl #
+- AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace])
+- ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [
++ dnl # 6.3 API change,
++ dnl # The xattr_handler->set() callback 2nd arg is now struct mnt_idmap *
++ dnl #
++ AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and mnt_idmap])
++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_mnt_idmap], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_XATTR_SET_USERNS, 1,
+- [xattr_handler->set() takes user_namespace])
+- ],[
+- dnl #
+- dnl # 4.7 API change,
+- dnl # The xattr_handler->set() callback was changed to take both
+- dnl # dentry and inode.
+- dnl #
+- AC_MSG_RESULT(no)
+- AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
+- ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
++ AC_DEFINE(HAVE_XATTR_SET_IDMAP, 1,
++ [xattr_handler->set() takes mnt_idmap])
++ ], [
++ AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace])
++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
+- [xattr_handler->set() wants both dentry and inode])
++ AC_DEFINE(HAVE_XATTR_SET_USERNS, 1,
++ [xattr_handler->set() takes user_namespace])
+ ],[
+ dnl #
+- dnl # 4.4 API change,
+- dnl # The xattr_handler->set() callback was changed to take a
+- dnl # xattr_handler, and handler_flags argument was removed and
+- dnl # should be accessed by handler->flags.
++ dnl # 4.7 API change,
++ dnl # The xattr_handler->set() callback was changed to take both
++ dnl # dentry and inode.
+ dnl #
+ AC_MSG_RESULT(no)
+- AC_MSG_CHECKING(
+- [whether xattr_handler->set() wants xattr_handler])
+- ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
++ AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
+- [xattr_handler->set() wants xattr_handler])
++ AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
++ [xattr_handler->set() wants both dentry and inode])
+ ],[
+ dnl #
+- dnl # 2.6.33 API change,
+- dnl # The xattr_handler->set() callback was changed
+- dnl # to take a dentry instead of an inode, and a
+- dnl # handler_flags argument was added.
++ dnl # 4.4 API change,
++ dnl # The xattr_handler->set() callback was changed to take a
++ dnl # xattr_handler, and handler_flags argument was removed and
++ dnl # should be accessed by handler->flags.
+ dnl #
+ AC_MSG_RESULT(no)
+ AC_MSG_CHECKING(
+- [whether xattr_handler->set() wants dentry])
+- ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
++ [whether xattr_handler->set() wants xattr_handler])
++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
+ AC_MSG_RESULT(yes)
+- AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
+- [xattr_handler->set() wants dentry])
++ AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
++ [xattr_handler->set() wants xattr_handler])
+ ],[
+- ZFS_LINUX_TEST_ERROR([xattr set()])
++ dnl #
++ dnl # 2.6.33 API change,
++ dnl # The xattr_handler->set() callback was changed
++ dnl # to take a dentry instead of an inode, and a
++ dnl # handler_flags argument was added.
++ dnl #
++ AC_MSG_RESULT(no)
++ AC_MSG_CHECKING(
++ [whether xattr_handler->set() wants dentry])
++ ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
++ [xattr_handler->set() wants dentry])
++ ],[
++ ZFS_LINUX_TEST_ERROR([xattr set()])
++ ])
+ ])
+ ])
+ ])
+diff --git a/config/kernel.m4 b/config/kernel.m4
+index 7806da7a8..173c78a2a 100644
+--- a/config/kernel.m4
++++ b/config/kernel.m4
+@@ -69,6 +69,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
+ ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE
+ ZFS_AC_KERNEL_SRC_XATTR
+ ZFS_AC_KERNEL_SRC_ACL
++ ZFS_AC_KERNEL_SRC_INODE_SETATTR
+ ZFS_AC_KERNEL_SRC_INODE_GETATTR
+ ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS
+ ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION
+@@ -130,7 +131,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
+ ZFS_AC_KERNEL_SRC_KSTRTOUL
+ ZFS_AC_KERNEL_SRC_PERCPU
+ ZFS_AC_KERNEL_SRC_CPU_HOTPLUG
+- ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS
++ ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR
+ ZFS_AC_KERNEL_SRC_MKNOD
+ ZFS_AC_KERNEL_SRC_SYMLINK
+ ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS
+@@ -144,6 +145,15 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
+ ZFS_AC_KERNEL_SRC_KTHREAD
+ ZFS_AC_KERNEL_SRC_ZERO_PAGE
+ ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
++ ZFS_AC_KERNEL_SRC_FILEMAP
++ ZFS_AC_KERNEL_SRC_WRITEPAGE_T
++ ZFS_AC_KERNEL_SRC_RECLAIMED
++ case "$host_cpu" in
++ powerpc*)
++ ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
++ ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE
++ ;;
++ esac
+
+ AC_MSG_CHECKING([for available kernel interfaces])
+ ZFS_LINUX_TEST_COMPILE_ALL([kabi])
+@@ -186,6 +196,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
+ ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE
+ ZFS_AC_KERNEL_XATTR
+ ZFS_AC_KERNEL_ACL
++ ZFS_AC_KERNEL_INODE_SETATTR
+ ZFS_AC_KERNEL_INODE_GETATTR
+ ZFS_AC_KERNEL_INODE_SET_FLAGS
+ ZFS_AC_KERNEL_INODE_SET_IVERSION
+@@ -247,7 +258,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
+ ZFS_AC_KERNEL_KSTRTOUL
+ ZFS_AC_KERNEL_PERCPU
+ ZFS_AC_KERNEL_CPU_HOTPLUG
+- ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS
++ ZFS_AC_KERNEL_GENERIC_FILLATTR
+ ZFS_AC_KERNEL_MKNOD
+ ZFS_AC_KERNEL_SYMLINK
+ ZFS_AC_KERNEL_BIO_MAX_SEGS
+@@ -261,6 +272,15 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
+ ZFS_AC_KERNEL_KTHREAD
+ ZFS_AC_KERNEL_ZERO_PAGE
+ ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
++ ZFS_AC_KERNEL_FILEMAP
++ ZFS_AC_KERNEL_WRITEPAGE_T
++ ZFS_AC_KERNEL_RECLAIMED
++ case "$host_cpu" in
++ powerpc*)
++ ZFS_AC_KERNEL_CPU_HAS_FEATURE
++ ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE
++ ;;
++ esac
+ ])
+
+ dnl #
+diff --git a/config/zfs-build.m4 b/config/zfs-build.m4
+index 2ab6765c3..9390812cd 100644
+--- a/config/zfs-build.m4
++++ b/config/zfs-build.m4
+@@ -81,7 +81,7 @@ AC_DEFUN([ZFS_AC_DEBUG], [
+ AC_DEFUN([ZFS_AC_DEBUGINFO_ENABLE], [
+ DEBUG_CFLAGS="$DEBUG_CFLAGS -g -fno-inline $NO_IPA_SRA"
+
+- KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $NO_IPA_SRA"
++ KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $KERNEL_NO_IPA_SRA"
+ KERNEL_MAKE="$KERNEL_MAKE CONFIG_DEBUG_INFO=y"
+
+ DEBUGINFO_ZFS="_with_debuginfo"
+@@ -217,6 +217,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
+ ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH
+ ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER
+ ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA
++ ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA
+ ZFS_AC_CONFIG_ALWAYS_CC_ASAN
+ ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD
+ ZFS_AC_CONFIG_ALWAYS_SYSTEM
+diff --git a/configure.ac b/configure.ac
+index 2671434af..cb339ccd4 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -222,6 +222,7 @@ AC_CONFIG_FILES([
+ tests/zfs-tests/cmd/mmap_exec/Makefile
+ tests/zfs-tests/cmd/mmap_libaio/Makefile
+ tests/zfs-tests/cmd/mmap_seek/Makefile
++ tests/zfs-tests/cmd/mmap_sync/Makefile
+ tests/zfs-tests/cmd/mmapwrite/Makefile
+ tests/zfs-tests/cmd/nvlist_to_lua/Makefile
+ tests/zfs-tests/cmd/randfree_file/Makefile
+diff --git a/contrib/initramfs/scripts/zfs b/contrib/initramfs/scripts/zfs
+index 4ce739fda..3c51b53ee 100644
+--- a/contrib/initramfs/scripts/zfs
++++ b/contrib/initramfs/scripts/zfs
+@@ -326,7 +326,7 @@ mount_fs()
+
+ # Need the _original_ datasets mountpoint!
+ mountpoint=$(get_fs_value "$fs" mountpoint)
+- ZFS_CMD="mount.zfs -o zfsutil"
++ ZFS_CMD="mount -o zfsutil -t zfs"
+ if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
+ # Can't use the mountpoint property. Might be one of our
+ # clones. Check the 'org.zol:mountpoint' property set in
+@@ -343,7 +343,7 @@ mount_fs()
+ fi
+ # Don't use mount.zfs -o zfsutils for legacy mountpoint
+ if [ "$mountpoint" = "legacy" ]; then
+- ZFS_CMD="mount.zfs"
++ ZFS_CMD="mount -t zfs"
+ fi
+ # Last hail-mary: Hope 'rootmnt' is set!
+ mountpoint=""
+@@ -914,7 +914,7 @@ mountroot()
+ echo " not specified on the kernel command line."
+ echo ""
+ echo "Manually mount the root filesystem on $rootmnt and then exit."
+- echo "Hint: Try: mount.zfs -o zfsutil ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
++ echo "Hint: Try: mount -o zfsutil -t zfs ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
+ shell
+ fi
+
+diff --git a/contrib/pam_zfs_key/pam_zfs_key.c b/contrib/pam_zfs_key/pam_zfs_key.c
+index 0db119382..313703770 100644
+--- a/contrib/pam_zfs_key/pam_zfs_key.c
++++ b/contrib/pam_zfs_key/pam_zfs_key.c
+@@ -548,16 +548,11 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,
+ errno);
+ return (-1);
+ }
+- size_t runtime_path_len = strlen(runtime_path);
+- size_t counter_path_len = runtime_path_len + 1 + 10;
+- char *counter_path = malloc(counter_path_len + 1);
+- if (!counter_path) {
++
++ char *counter_path;
++ if (asprintf(&counter_path, "%s/%u", runtime_path, config->uid) == -1)
+ return (-1);
+- }
+- counter_path[0] = 0;
+- strcat(counter_path, runtime_path);
+- snprintf(counter_path + runtime_path_len, counter_path_len, "/%d",
+- config->uid);
++
+ const int fd = open(counter_path,
+ O_RDWR | O_CLOEXEC | O_CREAT | O_NOFOLLOW,
+ S_IRUSR | S_IWUSR);
+diff --git a/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/include/os/freebsd/zfs/sys/zfs_znode_impl.h
+index 3d93525b4..120884116 100644
+--- a/include/os/freebsd/zfs/sys/zfs_znode_impl.h
++++ b/include/os/freebsd/zfs/sys/zfs_znode_impl.h
+@@ -118,7 +118,8 @@ extern minor_t zfsdev_minor_alloc(void);
+ #define Z_ISLNK(type) ((type) == VLNK)
+ #define Z_ISDIR(type) ((type) == VDIR)
+
+-#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp))
++#define zn_has_cached_data(zp, start, end) \
++ vn_has_cached_data(ZTOV(zp))
+ #define zn_flush_cached_data(zp, sync) vn_flush_cached_data(ZTOV(zp), sync)
+ #define zn_rlimit_fsize(zp, uio) \
+ vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio))
+diff --git a/include/os/linux/kernel/linux/dcache_compat.h b/include/os/linux/kernel/linux/dcache_compat.h
+index c90135fd3..f87f1653a 100644
+--- a/include/os/linux/kernel/linux/dcache_compat.h
++++ b/include/os/linux/kernel/linux/dcache_compat.h
+@@ -39,6 +39,21 @@
+ #define d_alias d_u.d_alias
+ #endif
+
++/*
++ * Starting from Linux 5.13, flush_dcache_page() becomes an inline function
++ * and under some configurations, may indirectly referencing GPL-only
++ * cpu_feature_keys on powerpc. Override this function when it is detected
++ * being GPL-only.
++ */
++#if defined __powerpc__ && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY
++#include <linux/simd_powerpc.h>
++#define flush_dcache_page(page) do { \
++ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && \
++ test_bit(PG_dcache_clean, &(page)->flags)) \
++ clear_bit(PG_dcache_clean, &(page)->flags); \
++ } while (0)
++#endif
++
+ /*
+ * 2.6.30 API change,
+ * The const keyword was added to the 'struct dentry_operations' in
+diff --git a/include/os/linux/kernel/linux/simd_powerpc.h b/include/os/linux/kernel/linux/simd_powerpc.h
+index 108cef22f..422b85af3 100644
+--- a/include/os/linux/kernel/linux/simd_powerpc.h
++++ b/include/os/linux/kernel/linux/simd_powerpc.h
+@@ -76,6 +76,17 @@
+ #define kfpu_init() 0
+ #define kfpu_fini() ((void) 0)
+
++/*
++ * Linux 4.7 makes cpu_has_feature to use jump labels on powerpc if
++ * CONFIG_JUMP_LABEL_FEATURE_CHECKS is enabled, in this case however it
++ * references GPL-only symbol cpu_feature_keys. Therefore we overrides this
++ * interface when it is detected being GPL-only.
++ */
++#if defined(CONFIG_JUMP_LABEL_FEATURE_CHECKS) && \
++ defined(HAVE_CPU_HAS_FEATURE_GPL_ONLY)
++#define cpu_has_feature(feature) early_cpu_has_feature(feature)
++#endif
++
+ /*
+ * Check if AltiVec instruction set is available
+ */
+diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h
+index 91e908598..e82bbf755 100644
+--- a/include/os/linux/kernel/linux/vfs_compat.h
++++ b/include/os/linux/kernel/linux/vfs_compat.h
+@@ -344,7 +344,8 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid)
+ * 4.9 API change
+ */
+ #if !(defined(HAVE_SETATTR_PREPARE_NO_USERNS) || \
+- defined(HAVE_SETATTR_PREPARE_USERNS))
++ defined(HAVE_SETATTR_PREPARE_USERNS) || \
++ defined(HAVE_SETATTR_PREPARE_IDMAP))
+ static inline int
+ setattr_prepare(struct dentry *dentry, struct iattr *ia)
+ {
+@@ -399,6 +400,15 @@ func(struct user_namespace *user_ns, const struct path *path, \
+ return (func##_impl(user_ns, path, stat, request_mask, \
+ query_flags)); \
+ }
++#elif defined(HAVE_IDMAP_IOPS_GETATTR)
++#define ZPL_GETATTR_WRAPPER(func) \
++static int \
++func(struct mnt_idmap *user_ns, const struct path *path, \
++ struct kstat *stat, u32 request_mask, unsigned int query_flags) \
++{ \
++ return (func##_impl(user_ns, path, stat, request_mask, \
++ query_flags)); \
++}
+ #else
+ #error
+ #endif
+@@ -450,8 +460,15 @@ zpl_is_32bit_api(void)
+ * 5.12 API change
+ * To support id-mapped mounts, generic_fillattr() was modified to
+ * accept a new struct user_namespace* as its first arg.
++ *
++ * 6.3 API change
++ * generic_fillattr() first arg is changed to struct mnt_idmap *
++ *
+ */
+-#ifdef HAVE_GENERIC_FILLATTR_USERNS
++#ifdef HAVE_GENERIC_FILLATTR_IDMAP
++#define zpl_generic_fillattr(idmap, ip, sp) \
++ generic_fillattr(idmap, ip, sp)
++#elif defined(HAVE_GENERIC_FILLATTR_USERNS)
+ #define zpl_generic_fillattr(user_ns, ip, sp) \
+ generic_fillattr(user_ns, ip, sp)
+ #else
+diff --git a/include/os/linux/kernel/linux/xattr_compat.h b/include/os/linux/kernel/linux/xattr_compat.h
+index 30403fe87..3ffd00169 100644
+--- a/include/os/linux/kernel/linux/xattr_compat.h
++++ b/include/os/linux/kernel/linux/xattr_compat.h
+@@ -133,20 +133,35 @@ fn(const struct xattr_handler *handler, struct dentry *dentry, \
+ #error "Unsupported kernel"
+ #endif
+
++/*
++ * 6.3 API change,
++ * The xattr_handler->set() callback was changed to take the
++ * struct mnt_idmap* as the first arg, to support idmapped
++ * mounts.
++ */
++#if defined(HAVE_XATTR_SET_IDMAP)
++#define ZPL_XATTR_SET_WRAPPER(fn) \
++static int \
++fn(const struct xattr_handler *handler, struct mnt_idmap *user_ns, \
++ struct dentry *dentry, struct inode *inode, const char *name, \
++ const void *buffer, size_t size, int flags) \
++{ \
++ return (__ ## fn(user_ns, inode, name, buffer, size, flags)); \
++}
+ /*
+ * 5.12 API change,
+ * The xattr_handler->set() callback was changed to take the
+ * struct user_namespace* as the first arg, to support idmapped
+ * mounts.
+ */
+-#if defined(HAVE_XATTR_SET_USERNS)
++#elif defined(HAVE_XATTR_SET_USERNS)
+ #define ZPL_XATTR_SET_WRAPPER(fn) \
+ static int \
+ fn(const struct xattr_handler *handler, struct user_namespace *user_ns, \
+ struct dentry *dentry, struct inode *inode, const char *name, \
+ const void *buffer, size_t size, int flags) \
+ { \
+- return (__ ## fn(inode, name, buffer, size, flags)); \
++ return (__ ## fn(user_ns, inode, name, buffer, size, flags)); \
+ }
+ /*
+ * 4.7 API change,
+@@ -160,7 +175,7 @@ fn(const struct xattr_handler *handler, struct dentry *dentry, \
+ struct inode *inode, const char *name, const void *buffer, \
+ size_t size, int flags) \
+ { \
+- return (__ ## fn(inode, name, buffer, size, flags)); \
++ return (__ ## fn(kcred->user_ns, inode, name, buffer, size, flags));\
+ }
+ /*
+ * 4.4 API change,
+@@ -174,7 +189,8 @@ static int \
+ fn(const struct xattr_handler *handler, struct dentry *dentry, \
+ const char *name, const void *buffer, size_t size, int flags) \
+ { \
+- return (__ ## fn(dentry->d_inode, name, buffer, size, flags)); \
++ return (__ ## fn(kcred->user_ns, dentry->d_inode, name, \
++ buffer, size, flags)); \
+ }
+ /*
+ * 2.6.33 API change,
+@@ -187,7 +203,8 @@ static int \
+ fn(struct dentry *dentry, const char *name, const void *buffer, \
+ size_t size, int flags, int unused_handler_flags) \
+ { \
+- return (__ ## fn(dentry->d_inode, name, buffer, size, flags)); \
++ return (__ ## fn(kcred->user_ns, dentry->d_inode, name, buffer, \
++ size, flags)); \
+ }
+ #else
+ #error "Unsupported kernel"
+diff --git a/include/os/linux/spl/sys/cred.h b/include/os/linux/spl/sys/cred.h
+index b7d3f38d7..501bd4566 100644
+--- a/include/os/linux/spl/sys/cred.h
++++ b/include/os/linux/spl/sys/cred.h
+@@ -45,6 +45,8 @@ typedef struct cred cred_t;
+ #define SGID_TO_KGID(x) (KGIDT_INIT(x))
+ #define KGIDP_TO_SGIDP(x) (&(x)->val)
+
++extern zidmap_t *zfs_get_init_idmap(void);
++
+ extern void crhold(cred_t *cr);
+ extern void crfree(cred_t *cr);
+ extern uid_t crgetuid(const cred_t *cr);
+diff --git a/include/os/linux/spl/sys/types.h b/include/os/linux/spl/sys/types.h
+index b44c94518..4d638efbb 100644
+--- a/include/os/linux/spl/sys/types.h
++++ b/include/os/linux/spl/sys/types.h
+@@ -54,4 +54,18 @@ typedef ulong_t pgcnt_t;
+ typedef int major_t;
+ typedef int minor_t;
+
++struct user_namespace;
++#ifdef HAVE_IOPS_CREATE_IDMAP
++#include <linux/refcount.h>
++struct mnt_idmap {
++ struct user_namespace *owner;
++ refcount_t count;
++};
++typedef struct mnt_idmap zidmap_t;
++#else
++typedef struct user_namespace zidmap_t;
++#endif
++
++extern zidmap_t *zfs_init_idmap;
++
+ #endif /* _SPL_TYPES_H */
+diff --git a/include/os/linux/zfs/sys/trace_acl.h b/include/os/linux/zfs/sys/trace_acl.h
+index 21bcefa4e..656552749 100644
+--- a/include/os/linux/zfs/sys/trace_acl.h
++++ b/include/os/linux/zfs/sys/trace_acl.h
+@@ -58,9 +58,10 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
+ __field(uint64_t, z_size)
+ __field(uint64_t, z_pflags)
+ __field(uint32_t, z_sync_cnt)
++ __field(uint32_t, z_sync_writes_cnt)
++ __field(uint32_t, z_async_writes_cnt)
+ __field(mode_t, z_mode)
+ __field(boolean_t, z_is_sa)
+- __field(boolean_t, z_is_mapped)
+ __field(boolean_t, z_is_ctldir)
+
+ __field(uint32_t, i_uid)
+@@ -90,9 +91,10 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
+ __entry->z_size = zn->z_size;
+ __entry->z_pflags = zn->z_pflags;
+ __entry->z_sync_cnt = zn->z_sync_cnt;
++ __entry->z_sync_writes_cnt = zn->z_sync_writes_cnt;
++ __entry->z_async_writes_cnt = zn->z_async_writes_cnt;
+ __entry->z_mode = zn->z_mode;
+ __entry->z_is_sa = zn->z_is_sa;
+- __entry->z_is_mapped = zn->z_is_mapped;
+ __entry->z_is_ctldir = zn->z_is_ctldir;
+
+ __entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid);
+@@ -114,18 +116,18 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
+ TP_printk("zn { id %llu unlinked %u atime_dirty %u "
+ "zn_prefetch %u blksz %u seq %u "
+ "mapcnt %llu size %llu pflags %llu "
+- "sync_cnt %u mode 0x%x is_sa %d "
+- "is_mapped %d is_ctldir %d inode { "
+- "uid %u gid %u ino %lu nlink %u size %lli "
++ "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
++ "mode 0x%x is_sa %d is_ctldir %d "
++ "inode { uid %u gid %u ino %lu nlink %u size %lli "
+ "blkbits %u bytes %u mode 0x%x generation %x } } "
+ "ace { type %u flags %u access_mask %u } mask_matched %u",
+ __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
+ __entry->z_zn_prefetch, __entry->z_blksz,
+ __entry->z_seq, __entry->z_mapcnt, __entry->z_size,
+- __entry->z_pflags, __entry->z_sync_cnt, __entry->z_mode,
+- __entry->z_is_sa, __entry->z_is_mapped,
+- __entry->z_is_ctldir, __entry->i_uid,
+- __entry->i_gid, __entry->i_ino, __entry->i_nlink,
++ __entry->z_pflags, __entry->z_sync_cnt,
++ __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
++ __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir,
++ __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
+ __entry->i_size, __entry->i_blkbits,
+ __entry->i_bytes, __entry->i_mode, __entry->i_generation,
+ __entry->z_type, __entry->z_flags, __entry->z_access_mask,
+diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h
+index 47f91e4a6..331f2e2bc 100644
+--- a/include/os/linux/zfs/sys/zfs_vnops_os.h
++++ b/include/os/linux/zfs/sys/zfs_vnops_os.h
+@@ -54,8 +54,7 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
+ extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
+ cred_t *cr, int flags);
+ extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
+-extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip,
+- struct kstat *sp);
++extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp);
+ extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
+ extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
+ char *tnm, cred_t *cr, int flags);
+@@ -68,9 +67,9 @@ extern void zfs_inactive(struct inode *ip);
+ extern int zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
+ offset_t offset, cred_t *cr);
+ extern int zfs_fid(struct inode *ip, fid_t *fidp);
+-extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
++extern int zfs_getpage(struct inode *ip, struct page *pp);
+ extern int zfs_putpage(struct inode *ip, struct page *pp,
+- struct writeback_control *wbc);
++ struct writeback_control *wbc, boolean_t for_sync);
+ extern int zfs_dirty_inode(struct inode *ip, int flags);
+ extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
+ size_t len, unsigned long vm_flags);
+diff --git a/include/os/linux/zfs/sys/zfs_znode_impl.h b/include/os/linux/zfs/sys/zfs_znode_impl.h
+index de46fc8f2..9b9ac7a4f 100644
+--- a/include/os/linux/zfs/sys/zfs_znode_impl.h
++++ b/include/os/linux/zfs/sys/zfs_znode_impl.h
+@@ -47,9 +47,16 @@
+ extern "C" {
+ #endif
+
++#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+ #define ZNODE_OS_FIELDS \
+ inode_timespec_t z_btime; /* creation/birth time (cached) */ \
+ struct inode z_inode;
++#else
++#define ZNODE_OS_FIELDS \
++ inode_timespec_t z_btime; /* creation/birth time (cached) */ \
++ struct inode z_inode; \
++ boolean_t z_is_mapped; /* we are mmap'ed */
++#endif
+
+ /*
+ * Convert between znode pointers and inode pointers
+@@ -70,7 +77,14 @@ extern "C" {
+ #define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type))
+ #define Z_ISDIR(type) S_ISDIR(type)
+
+-#define zn_has_cached_data(zp) ((zp)->z_is_mapped)
++#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
++#define zn_has_cached_data(zp, start, end) \
++ filemap_range_has_page(ZTOI(zp)->i_mapping, start, end)
++#else
++#define zn_has_cached_data(zp, start, end) \
++ ((zp)->z_is_mapped)
++#endif
++
+ #define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync)
+ #define zn_rlimit_fsize(zp, uio) (0)
+
+diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h
+index ac9815d4e..4e08470e7 100644
+--- a/include/os/linux/zfs/sys/zpl.h
++++ b/include/os/linux/zfs/sys/zpl.h
+@@ -64,7 +64,10 @@ extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip,
+ const struct qstr *qstr);
+ #if defined(CONFIG_FS_POSIX_ACL)
+ #if defined(HAVE_SET_ACL)
+-#if defined(HAVE_SET_ACL_USERNS)
++#if defined(HAVE_SET_ACL_IDMAP_DENTRY)
++extern int zpl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
++ struct posix_acl *acl, int type);
++#elif defined(HAVE_SET_ACL_USERNS)
+ extern int zpl_set_acl(struct user_namespace *userns, struct inode *ip,
+ struct posix_acl *acl, int type);
+ #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
+@@ -186,13 +189,15 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
+
+ #if defined(HAVE_INODE_OWNER_OR_CAPABLE)
+ #define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ip)
+-#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED)
++#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_USERNS)
+ #define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ns, ip)
++#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP)
++#define zpl_inode_owner_or_capable(idmap, ip) inode_owner_or_capable(idmap, ip)
+ #else
+ #error "Unsupported kernel"
+ #endif
+
+-#ifdef HAVE_SETATTR_PREPARE_USERNS
++#if defined(HAVE_SETATTR_PREPARE_USERNS) || defined(HAVE_SETATTR_PREPARE_IDMAP)
+ #define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(ns, dentry, ia)
+ #else
+ /*
+diff --git a/include/sys/dmu.h b/include/sys/dmu.h
+index 7bdd42e8b..12bd88720 100644
+--- a/include/sys/dmu.h
++++ b/include/sys/dmu.h
+@@ -778,6 +778,9 @@ dmu_tx_t *dmu_tx_create(objset_t *os);
+ void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
+ void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+ int len);
++void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
++void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
++ int len);
+ void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
+ uint64_t len);
+ void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
+index 7ade2dc91..fffcbcfca 100644
+--- a/include/sys/dmu_objset.h
++++ b/include/sys/dmu_objset.h
+@@ -72,6 +72,10 @@ struct dmu_tx;
+ */
+ #define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0)
+
++#if defined(__clang__)
++#pragma clang diagnostic push
++#pragma clang diagnostic ignored "-Wgnu-variable-sized-type-not-at-end"
++#endif
+ typedef struct objset_phys {
+ dnode_phys_t os_meta_dnode;
+ zil_header_t os_zil_header;
+@@ -88,6 +92,9 @@ typedef struct objset_phys {
+ char os_pad1[OBJSET_PHYS_SIZE_V3 - OBJSET_PHYS_SIZE_V2 -
+ sizeof (dnode_phys_t)];
+ } objset_phys_t;
++#if defined(__clang__)
++#pragma clang diagnostic pop
++#endif
+
+ typedef int (*dmu_objset_upgrade_cb_t)(objset_t *);
+
+diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h
+index ad3f1b0e4..e8886fd4e 100644
+--- a/include/sys/dmu_tx.h
++++ b/include/sys/dmu_tx.h
+@@ -90,6 +90,7 @@ enum dmu_tx_hold_type {
+ THT_ZAP,
+ THT_SPACE,
+ THT_SPILL,
++ THT_APPEND,
+ THT_NUMTYPES
+ };
+
+diff --git a/include/sys/dnode.h b/include/sys/dnode.h
+index 20b7c2aaf..39bbdae44 100644
+--- a/include/sys/dnode.h
++++ b/include/sys/dnode.h
+@@ -120,7 +120,11 @@ extern "C" {
+ #define DN_MAX_LEVELS (DIV_ROUND_UP(DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT, \
+ DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT) + 1)
+
+-#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
++/*
++ * Use the flexible array instead of the fixed length one dn_bonus
++ * to address memcpy/memmove fortify error
++ */
++#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus_flexible + \
+ (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
+ #define DN_MAX_BONUS_LEN(dnp) \
+ ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
+@@ -266,6 +270,10 @@ typedef struct dnode_phys {
+ sizeof (blkptr_t)];
+ blkptr_t dn_spill;
+ };
++ struct {
++ blkptr_t __dn_ignore4;
++ uint8_t dn_bonus_flexible[];
++ };
+ };
+ } dnode_phys_t;
+
+diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
+index 111e70ece..84f5aee59 100644
+--- a/include/sys/fs/zfs.h
++++ b/include/sys/fs/zfs.h
+@@ -1173,6 +1173,7 @@ typedef enum pool_initialize_func {
+ POOL_INITIALIZE_START,
+ POOL_INITIALIZE_CANCEL,
+ POOL_INITIALIZE_SUSPEND,
++ POOL_INITIALIZE_UNINIT,
+ POOL_INITIALIZE_FUNCS
+ } pool_initialize_func_t;
+
+diff --git a/include/sys/spa.h b/include/sys/spa.h
+index fedadab45..42f7fec0f 100644
+--- a/include/sys/spa.h
++++ b/include/sys/spa.h
+@@ -785,6 +785,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
+ #define SPA_ASYNC_L2CACHE_REBUILD 0x800
+ #define SPA_ASYNC_L2CACHE_TRIM 0x1000
+ #define SPA_ASYNC_REBUILD_DONE 0x2000
++#define SPA_ASYNC_DETACH_SPARE 0x4000
+
+ /* device manipulation */
+ extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
+@@ -971,6 +972,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid,
+ /* Pool configuration locks */
+ extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
+ extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
++extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag,
++ krw_t rw);
+ extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
+ extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
+
+diff --git a/include/sys/vdev_initialize.h b/include/sys/vdev_initialize.h
+index 81d39ebeb..942fc71c5 100644
+--- a/include/sys/vdev_initialize.h
++++ b/include/sys/vdev_initialize.h
+@@ -33,6 +33,7 @@ extern "C" {
+ #endif
+
+ extern void vdev_initialize(vdev_t *vd);
++extern void vdev_uninitialize(vdev_t *vd);
+ extern void vdev_initialize_stop(vdev_t *vd,
+ vdev_initializing_state_t tgt_state, list_t *vd_list);
+ extern void vdev_initialize_stop_all(vdev_t *vd,
+diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
+index 0df8a0e4b..48dab671d 100644
+--- a/include/sys/zfs_znode.h
++++ b/include/sys/zfs_znode.h
+@@ -188,7 +188,6 @@ typedef struct znode {
+ boolean_t z_atime_dirty; /* atime needs to be synced */
+ boolean_t z_zn_prefetch; /* Prefetch znodes? */
+ boolean_t z_is_sa; /* are we native sa? */
+- boolean_t z_is_mapped; /* are we mmap'ed */
+ boolean_t z_is_ctldir; /* are we .zfs entry */
+ boolean_t z_suspended; /* extra ref from a suspend? */
+ uint_t z_blksz; /* block size in bytes */
+@@ -198,6 +197,8 @@ typedef struct znode {
+ uint64_t z_size; /* file size (cached) */
+ uint64_t z_pflags; /* pflags (cached) */
+ uint32_t z_sync_cnt; /* synchronous open count */
++ uint32_t z_sync_writes_cnt; /* synchronous write count */
++ uint32_t z_async_writes_cnt; /* asynchronous write count */
+ mode_t z_mode; /* mode (cached) */
+ kmutex_t z_acl_lock; /* acl data lock */
+ zfs_acl_t *z_acl_cached; /* cached acl */
+diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi
+index 13ce19df9..58c2d7635 100644
+--- a/lib/libzfs/libzfs.abi
++++ b/lib/libzfs/libzfs.abi
+@@ -5410,7 +5410,8 @@
+ <enumerator name='POOL_INITIALIZE_START' value='0'/>
+ <enumerator name='POOL_INITIALIZE_CANCEL' value='1'/>
+ <enumerator name='POOL_INITIALIZE_SUSPEND' value='2'/>
+- <enumerator name='POOL_INITIALIZE_FUNCS' value='3'/>
++ <enumerator name='POOL_INITIALIZE_UNINIT' value='3'/>
++ <enumerator name='POOL_INITIALIZE_FUNCS' value='4'/>
+ </enum-decl>
+ <typedef-decl name='pool_initialize_func_t' type-id='5c246ad4' id='7063e1ab'/>
+ <enum-decl name='pool_trim_func' id='54ed608a'>
+diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
+index f2219d1c3..f6d844bdf 100644
+--- a/lib/libzfs/libzfs_dataset.c
++++ b/lib/libzfs/libzfs_dataset.c
+@@ -1017,6 +1017,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
+ nvlist_t *ret;
+ int chosen_normal = -1;
+ int chosen_utf = -1;
++ int set_maxbs = 0;
+
+ if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
+ (void) no_memory(hdl);
+@@ -1234,12 +1235,17 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ goto error;
+ }
++ /* save the ZFS_PROP_RECORDSIZE during create op */
++ if (zpool_hdl == NULL && prop == ZFS_PROP_RECORDSIZE) {
++ set_maxbs = intval;
++ }
+ break;
+ }
+
+ case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
+ {
+- int maxbs = SPA_OLD_MAXBLOCKSIZE;
++ int maxbs =
++ set_maxbs == 0 ? SPA_OLD_MAXBLOCKSIZE : set_maxbs;
+ char buf[64];
+
+ if (zpool_hdl != NULL) {
+diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
+index 29f077841..fc6c6e8e2 100644
+--- a/lib/libzfs/libzfs_pool.c
++++ b/lib/libzfs/libzfs_pool.c
+@@ -2224,8 +2224,8 @@ xlate_init_err(int err)
+ }
+
+ /*
+- * Begin, suspend, or cancel the initialization (initializing of all free
+- * blocks) for the given vdevs in the given pool.
++ * Begin, suspend, cancel, or uninit (clear) the initialization (initializing
++ * of all free blocks) for the given vdevs in the given pool.
+ */
+ static int
+ zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
+@@ -2251,11 +2251,16 @@ zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
+ vdev_guids, &errlist);
+
+ if (err != 0) {
+- if (errlist != NULL) {
+- vd_errlist = fnvlist_lookup_nvlist(errlist,
+- ZPOOL_INITIALIZE_VDEVS);
++ if (errlist != NULL && nvlist_lookup_nvlist(errlist,
++ ZPOOL_INITIALIZE_VDEVS, &vd_errlist) == 0) {
+ goto list_errors;
+ }
++
++ if (err == EINVAL && cmd_type == POOL_INITIALIZE_UNINIT) {
++ zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
++ "uninitialize is not supported by kernel"));
++ }
++
+ (void) zpool_standard_error(zhp->zpool_hdl, err,
+ dgettext(TEXT_DOMAIN, "operation failed"));
+ goto out;
+diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi
+index 1b03a5c42..7ede3e097 100644
+--- a/lib/libzfs_core/libzfs_core.abi
++++ b/lib/libzfs_core/libzfs_core.abi
+@@ -1726,7 +1726,8 @@
+ <enumerator name='POOL_INITIALIZE_START' value='0'/>
+ <enumerator name='POOL_INITIALIZE_CANCEL' value='1'/>
+ <enumerator name='POOL_INITIALIZE_SUSPEND' value='2'/>
+- <enumerator name='POOL_INITIALIZE_FUNCS' value='3'/>
++ <enumerator name='POOL_INITIALIZE_UNINIT' value='3'/>
++ <enumerator name='POOL_INITIALIZE_FUNCS' value='4'/>
+ </enum-decl>
+ <typedef-decl name='pool_initialize_func_t' type-id='5c246ad4' id='7063e1ab'/>
+ <enum-decl name='pool_trim_func' id='54ed608a'>
+diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
+index 71a95c3bd..0c60a9c8e 100644
+--- a/man/man4/zfs.4
++++ b/man/man4/zfs.4
+@@ -1712,7 +1712,7 @@ completes in order to verify the checksums of all blocks which have been
+ resilvered.
+ This is enabled by default and strongly recommended.
+ .
+-.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong
++.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq ulong
+ Maximum amount of I/O that can be concurrently issued for a sequential
+ resilver per leaf device, given in bytes.
+ .
+@@ -1831,6 +1831,13 @@ When we cross this limit from above it is because we are issuing verification I/
+ In this case (unless the metadata scan is done) we stop issuing verification I/O
+ and start scanning metadata again until we get to the hard limit.
+ .
++.It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint
++When reporting resilver throughput and estimated completion time use the
++performance observed over roughly the last
++.Sy zfs_scan_report_txgs
++TXGs.
++When set to zero performance is calculated over the time between checkpoints.
++.
+ .It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int
+ Enforce tight memory limits on pool scans when a sequential scan is in progress.
+ When disabled, the memory limit may be exceeded by fast disks.
+@@ -1839,7 +1846,7 @@ When disabled, the memory limit may be exceeded by fast disks.
+ Freezes a scrub/resilver in progress without actually pausing it.
+ Intended for testing/debugging.
+ .
+-.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int
++.It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int
+ Maximum amount of data that can be concurrently issued at once for scrubs and
+ resilvers per leaf device, given in bytes.
+ .
+diff --git a/man/man8/zpool-initialize.8 b/man/man8/zpool-initialize.8
+index 0a108180d..ada00bb1b 100644
+--- a/man/man8/zpool-initialize.8
++++ b/man/man8/zpool-initialize.8
+@@ -36,7 +36,7 @@
+ .Sh SYNOPSIS
+ .Nm zpool
+ .Cm initialize
+-.Op Fl c Ns | Ns Fl s
++.Op Fl c Ns | Ns Fl s | Ns Fl u
+ .Op Fl w
+ .Ar pool
+ .Oo Ar device Oc Ns …
+@@ -60,6 +60,14 @@ initialized, the command will fail and no suspension will occur on any device.
+ Initializing can then be resumed by running
+ .Nm zpool Cm initialize
+ with no flags on the relevant target devices.
++.It Fl u , -uninit
++Clears the initialization state on the specified devices, or all eligible
++devices if none are specified.
++If the devices are being actively initialized the command will fail.
++After being cleared
++.Nm zpool Cm initialize
++with no flags can be used to re-initialize all unallocoated regions on
++the relevant target devices.
+ .It Fl w , -wait
+ Wait until the devices have finished initializing before returning.
+ .El
+diff --git a/module/Kbuild.in b/module/Kbuild.in
+index 1507965c5..7675d614f 100644
+--- a/module/Kbuild.in
++++ b/module/Kbuild.in
+@@ -44,4 +44,5 @@ endif
+ subdir-asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
+ subdir-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
+
++
+ endif
+diff --git a/module/icp/algs/edonr/edonr.c b/module/icp/algs/edonr/edonr.c
+index 7a3ba30c0..baf8bb885 100644
+--- a/module/icp/algs/edonr/edonr.c
++++ b/module/icp/algs/edonr/edonr.c
+@@ -343,9 +343,11 @@ Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p)
+ * which only goes over it by a hair (1248 bytes on ARM32).
+ */
+ #include <sys/isa_defs.h> /* for _ILP32 */
+-#ifdef _ILP32 /* We're 32-bit, assume small stack frames */
++#if defined(_ILP32) /* We're 32-bit, assume small stack frames */
++#if defined(__GNUC__) && !defined(__clang__)
+ #pragma GCC diagnostic ignored "-Wframe-larger-than="
+ #endif
++#endif
+
+ #if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__)
+ static inline size_t
+diff --git a/module/icp/algs/skein/skein_block.c b/module/icp/algs/skein/skein_block.c
+index 7ba165a48..3ad52da5f 100644
+--- a/module/icp/algs/skein/skein_block.c
++++ b/module/icp/algs/skein/skein_block.c
+@@ -30,7 +30,9 @@
+ * the #pragma here to ignore the warning.
+ */
+ #if defined(_ILP32) || defined(__powerpc) /* Assume small stack */
++#if defined(__GNUC__) && !defined(__clang__)
+ #pragma GCC diagnostic ignored "-Wframe-larger-than="
++#endif
+ /*
+ * We're running on 32-bit, don't unroll loops to save stack frame space
+ *
+diff --git a/module/lua/ldo.c b/module/lua/ldo.c
+index a9835c4f5..e4abe04e9 100644
+--- a/module/lua/ldo.c
++++ b/module/lua/ldo.c
+@@ -197,7 +197,8 @@ l_noret luaD_throw (lua_State *L, int errcode) {
+ }
+ }
+
+-#if defined(HAVE_INFINITE_RECURSION)
++#if defined(__GNUC__) && !defined(__clang__) && \
++ defined(HAVE_INFINITE_RECURSION)
+ #pragma GCC diagnostic pop
+ #endif
+
+diff --git a/module/os/freebsd/zfs/zfs_ctldir.c b/module/os/freebsd/zfs/zfs_ctldir.c
+index 5bd2e1510..cfc4bab2f 100644
+--- a/module/os/freebsd/zfs/zfs_ctldir.c
++++ b/module/os/freebsd/zfs/zfs_ctldir.c
+@@ -204,6 +204,10 @@ sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
+ return (error);
+ }
+
++#if __FreeBSD_version >= 1400077
++ vn_set_state(vp, VSTATE_CONSTRUCTED);
++#endif
++
+ *vpp = vp;
+ return (0);
+ }
+@@ -675,6 +679,17 @@ zfsctl_root_readdir(struct vop_readdir_args *ap)
+
+ ASSERT3S(vp->v_type, ==, VDIR);
+
++ /*
++ * FIXME: this routine only ever emits 3 entries and does not tolerate
++ * being called with a buffer too small to handle all of them.
++ *
++ * The check below facilitates the idiom of repeating calls until the
++ * count to return is 0.
++ */
++ if (zfs_uio_offset(&uio) == 3 * sizeof (entry)) {
++ return (0);
++ }
++
+ error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio,
+ &dots_offset);
+ if (error != 0) {
+@@ -800,6 +815,9 @@ static struct vop_vector zfsctl_ops_root = {
+ .vop_default = &default_vnodeops,
+ #if __FreeBSD_version >= 1300121
+ .vop_fplookup_vexec = VOP_EAGAIN,
++#endif
++#if __FreeBSD_version >= 1300139
++ .vop_fplookup_symlink = VOP_EAGAIN,
+ #endif
+ .vop_open = zfsctl_common_open,
+ .vop_close = zfsctl_common_close,
+@@ -1126,6 +1144,9 @@ static struct vop_vector zfsctl_ops_snapdir = {
+ .vop_default = &default_vnodeops,
+ #if __FreeBSD_version >= 1300121
+ .vop_fplookup_vexec = VOP_EAGAIN,
++#endif
++#if __FreeBSD_version >= 1300139
++ .vop_fplookup_symlink = VOP_EAGAIN,
+ #endif
+ .vop_open = zfsctl_common_open,
+ .vop_close = zfsctl_common_close,
+@@ -1150,7 +1171,7 @@ zfsctl_snapshot_inactive(struct vop_inactive_args *ap)
+ {
+ vnode_t *vp = ap->a_vp;
+
+- VERIFY3S(vrecycle(vp), ==, 1);
++ vrecycle(vp);
+ return (0);
+ }
+
+@@ -1234,6 +1255,11 @@ static struct vop_vector zfsctl_ops_snapshot = {
+ #if __FreeBSD_version >= 1300121
+ .vop_fplookup_vexec = VOP_EAGAIN,
+ #endif
++#if __FreeBSD_version >= 1300139
++ .vop_fplookup_symlink = VOP_EAGAIN,
++#endif
++ .vop_open = zfsctl_common_open,
++ .vop_close = zfsctl_common_close,
+ .vop_inactive = zfsctl_snapshot_inactive,
+ #if __FreeBSD_version >= 1300045
+ .vop_need_inactive = vop_stdneed_inactive,
+diff --git a/module/os/freebsd/zfs/zfs_ioctl_os.c b/module/os/freebsd/zfs/zfs_ioctl_os.c
+index 7f7e2b72c..effc11518 100644
+--- a/module/os/freebsd/zfs/zfs_ioctl_os.c
++++ b/module/os/freebsd/zfs/zfs_ioctl_os.c
+@@ -59,7 +59,7 @@ zfs_vfs_ref(zfsvfs_t **zfvp)
+ return (error);
+ }
+
+-int
++boolean_t
+ zfs_vfs_held(zfsvfs_t *zfsvfs)
+ {
+ return (zfsvfs->z_vfs != NULL);
+diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode.c
+index 1debc3ec3..92e3bdd2e 100644
+--- a/module/os/freebsd/zfs/zfs_znode.c
++++ b/module/os/freebsd/zfs/zfs_znode.c
+@@ -153,6 +153,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
+ zp->z_xattr_cached = NULL;
+ zp->z_xattr_parent = 0;
+ zp->z_vnode = NULL;
++ zp->z_sync_writes_cnt = 0;
++ zp->z_async_writes_cnt = 0;
++
+ return (0);
+ }
+
+@@ -172,6 +175,9 @@ zfs_znode_cache_destructor(void *buf, void *arg)
+
+ ASSERT3P(zp->z_acl_cached, ==, NULL);
+ ASSERT3P(zp->z_xattr_cached, ==, NULL);
++
++ ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
++ ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
+ }
+
+
+@@ -457,6 +463,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
+ zp->z_blksz = blksz;
+ zp->z_seq = 0x7A4653;
+ zp->z_sync_cnt = 0;
++ zp->z_sync_writes_cnt = 0;
++ zp->z_async_writes_cnt = 0;
+ #if __FreeBSD_version >= 1300139
+ atomic_store_ptr(&zp->z_cached_symlink, NULL);
+ #endif
+diff --git a/module/os/linux/spl/spl-cred.c b/module/os/linux/spl/spl-cred.c
+index f81b9540a..d407fc66b 100644
+--- a/module/os/linux/spl/spl-cred.c
++++ b/module/os/linux/spl/spl-cred.c
+@@ -145,6 +145,18 @@ crgetgid(const cred_t *cr)
+ return (KGID_TO_SGID(cr->fsgid));
+ }
+
++/* Return the initial user ns or nop_mnt_idmap */
++zidmap_t *
++zfs_get_init_idmap(void)
++{
++#ifdef HAVE_IOPS_CREATE_IDMAP
++ return ((zidmap_t *)&nop_mnt_idmap);
++#else
++ return ((zidmap_t *)&init_user_ns);
++#endif
++}
++
++EXPORT_SYMBOL(zfs_get_init_idmap);
+ EXPORT_SYMBOL(crhold);
+ EXPORT_SYMBOL(crfree);
+ EXPORT_SYMBOL(crgetuid);
+diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c
+index 508fb9d4c..2cb5251d7 100644
+--- a/module/os/linux/spl/spl-generic.c
++++ b/module/os/linux/spl/spl-generic.c
+@@ -225,8 +225,10 @@ __div_u64(uint64_t u, uint32_t v)
+ * replacements for libgcc-provided functions and will never be called
+ * directly.
+ */
++#if defined(__GNUC__) && !defined(__clang__)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wmissing-prototypes"
++#endif
+
+ /*
+ * Implementation of 64-bit unsigned division for 32-bit machines.
+@@ -425,7 +427,9 @@ __aeabi_ldivmod(int64_t u, int64_t v)
+ EXPORT_SYMBOL(__aeabi_ldivmod);
+ #endif /* __arm || __arm__ */
+
++#if defined(__GNUC__) && !defined(__clang__)
+ #pragma GCC diagnostic pop
++#endif
+
+ #endif /* BITS_PER_LONG */
+
+diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c
+index 5a318e0a5..d586afa9b 100644
+--- a/module/os/linux/spl/spl-kmem-cache.c
++++ b/module/os/linux/spl/spl-kmem-cache.c
+@@ -183,8 +183,11 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
+ * of that infrastructure we are responsible for incrementing it.
+ */
+ if (current->reclaim_state)
++#ifdef HAVE_RECLAIM_STATE_RECLAIMED
++ current->reclaim_state->reclaimed += size >> PAGE_SHIFT;
++#else
+ current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
+-
++#endif
+ vfree(ptr);
+ }
+
+diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c
+index f96cd1271..fc76fe0e0 100644
+--- a/module/os/linux/zfs/arc_os.c
++++ b/module/os/linux/zfs/arc_os.c
+@@ -219,7 +219,11 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+ arc_reduce_target_size(ptob(sc->nr_to_scan));
+ arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
+ if (current->reclaim_state != NULL)
++#ifdef HAVE_RECLAIM_STATE_RECLAIMED
++ current->reclaim_state->reclaimed += sc->nr_to_scan;
++#else
+ current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
++#endif
+
+ /*
+ * We are experiencing memory pressure which the arc_evict_zthr was
+diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c
+index 5a52092bb..8d508bcb4 100644
+--- a/module/os/linux/zfs/policy.c
++++ b/module/os/linux/zfs/policy.c
+@@ -124,7 +124,7 @@ secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
+ if (crgetuid(cr) == owner)
+ return (0);
+
+- if (zpl_inode_owner_or_capable(kcred->user_ns, ip))
++ if (zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+ return (0);
+
+ #if defined(CONFIG_USER_NS)
+diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c
+index c45644a69..743b03412 100644
+--- a/module/os/linux/zfs/zfs_ctldir.c
++++ b/module/os/linux/zfs/zfs_ctldir.c
+@@ -468,7 +468,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
+ zp->z_atime_dirty = B_FALSE;
+ zp->z_zn_prefetch = B_FALSE;
+ zp->z_is_sa = B_FALSE;
++#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+ zp->z_is_mapped = B_FALSE;
++#endif
+ zp->z_is_ctldir = B_TRUE;
+ zp->z_sa_hdl = NULL;
+ zp->z_blksz = 0;
+@@ -478,6 +480,8 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
+ zp->z_pflags = 0;
+ zp->z_mode = 0;
+ zp->z_sync_cnt = 0;
++ zp->z_sync_writes_cnt = 0;
++ zp->z_async_writes_cnt = 0;
+ ip->i_generation = 0;
+ ip->i_ino = id;
+ ip->i_mode = (S_IFDIR | S_IRWXUGO);
+diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c
+index 79b9d777d..767d3a377 100644
+--- a/module/os/linux/zfs/zfs_ioctl_os.c
++++ b/module/os/linux/zfs/zfs_ioctl_os.c
+@@ -288,6 +288,8 @@ zfsdev_detach(void)
+ #define ZFS_DEBUG_STR ""
+ #endif
+
++zidmap_t *zfs_init_idmap;
++
+ static int __init
+ openzfs_init(void)
+ {
+@@ -311,6 +313,8 @@ openzfs_init(void)
+ printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
+ #endif /* CONFIG_FS_POSIX_ACL */
+
++ zfs_init_idmap = (zidmap_t *)zfs_get_init_idmap();
++
+ return (0);
+ }
+
+diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
+index da897f120..e620eb43a 100644
+--- a/module/os/linux/zfs/zfs_vfsops.c
++++ b/module/os/linux/zfs/zfs_vfsops.c
+@@ -1192,7 +1192,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
+ int objects = 0;
+ int i = 0, j = 0;
+
+- zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
++ zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
+
+ mutex_enter(&zfsvfs->z_znodes_lock);
+ while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
+@@ -1228,7 +1228,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
+ zrele(zp);
+ }
+
+- kmem_free(zp_array, max_array * sizeof (znode_t *));
++ vmem_free(zp_array, max_array * sizeof (znode_t *));
+
+ return (objects);
+ }
+diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
+index ae0401e60..af0d553d5 100644
+--- a/module/os/linux/zfs/zfs_vnops_os.c
++++ b/module/os/linux/zfs/zfs_vnops_os.c
+@@ -244,43 +244,46 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
+ }
+
+ #if defined(_KERNEL)
++
++static int zfs_fillpage(struct inode *ip, struct page *pp);
++
+ /*
+ * When a file is memory mapped, we must keep the IO data synchronized
+- * between the DMU cache and the memory mapped pages. What this means:
+- *
+- * On Write: If we find a memory mapped page, we write to *both*
+- * the page and the dmu buffer.
++ * between the DMU cache and the memory mapped pages. Update all mapped
++ * pages with the contents of the coresponding dmu buffer.
+ */
+ void
+ update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
+ {
+- struct inode *ip = ZTOI(zp);
+- struct address_space *mp = ip->i_mapping;
+- struct page *pp;
+- uint64_t nbytes;
+- int64_t off;
+- void *pb;
++ struct address_space *mp = ZTOI(zp)->i_mapping;
++ int64_t off = start & (PAGE_SIZE - 1);
+
+- off = start & (PAGE_SIZE-1);
+ for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
+- nbytes = MIN(PAGE_SIZE - off, len);
++ uint64_t nbytes = MIN(PAGE_SIZE - off, len);
+
+- pp = find_lock_page(mp, start >> PAGE_SHIFT);
++ struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
+ if (pp) {
+ if (mapping_writably_mapped(mp))
+ flush_dcache_page(pp);
+
+- pb = kmap(pp);
+- (void) dmu_read(os, zp->z_id, start + off, nbytes,
+- pb + off, DMU_READ_PREFETCH);
++ void *pb = kmap(pp);
++ int error = dmu_read(os, zp->z_id, start + off,
++ nbytes, pb + off, DMU_READ_PREFETCH);
+ kunmap(pp);
+
+- if (mapping_writably_mapped(mp))
+- flush_dcache_page(pp);
++ if (error) {
++ SetPageError(pp);
++ ClearPageUptodate(pp);
++ } else {
++ ClearPageError(pp);
++ SetPageUptodate(pp);
++
++ if (mapping_writably_mapped(mp))
++ flush_dcache_page(pp);
++
++ mark_page_accessed(pp);
++ }
+
+- mark_page_accessed(pp);
+- SetPageUptodate(pp);
+- ClearPageError(pp);
+ unlock_page(pp);
+ put_page(pp);
+ }
+@@ -291,38 +294,44 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
+ }
+
+ /*
+- * When a file is memory mapped, we must keep the IO data synchronized
+- * between the DMU cache and the memory mapped pages. What this means:
+- *
+- * On Read: We "read" preferentially from memory mapped pages,
+- * else we default from the dmu buffer.
+- *
+- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
+- * the file is memory mapped.
++ * When a file is memory mapped, we must keep the I/O data synchronized
++ * between the DMU cache and the memory mapped pages. Preferentially read
++ * from memory mapped pages, otherwise fallback to reading through the dmu.
+ */
+ int
+ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
+ {
+ struct inode *ip = ZTOI(zp);
+ struct address_space *mp = ip->i_mapping;
+- struct page *pp;
+- int64_t start, off;
+- uint64_t bytes;
++ int64_t start = uio->uio_loffset;
++ int64_t off = start & (PAGE_SIZE - 1);
+ int len = nbytes;
+ int error = 0;
+- void *pb;
+
+- start = uio->uio_loffset;
+- off = start & (PAGE_SIZE-1);
+ for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
+- bytes = MIN(PAGE_SIZE - off, len);
++ uint64_t bytes = MIN(PAGE_SIZE - off, len);
+
+- pp = find_lock_page(mp, start >> PAGE_SHIFT);
++ struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
+ if (pp) {
+- ASSERT(PageUptodate(pp));
++ /*
++ * If filemap_fault() retries there exists a window
++ * where the page will be unlocked and not up to date.
++ * In this case we must try and fill the page.
++ */
++ if (unlikely(!PageUptodate(pp))) {
++ error = zfs_fillpage(ip, pp);
++ if (error) {
++ unlock_page(pp);
++ put_page(pp);
++ return (error);
++ }
++ }
++
++ ASSERT(PageUptodate(pp) || PageDirty(pp));
++
+ unlock_page(pp);
+
+- pb = kmap(pp);
++ void *pb = kmap(pp);
+ error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
+ kunmap(pp);
+
+@@ -338,9 +347,11 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
+
+ len -= bytes;
+ off = 0;
++
+ if (error)
+ break;
+ }
++
+ return (error);
+ }
+ #endif /* _KERNEL */
+@@ -1010,7 +1021,7 @@ top:
+
+ mutex_enter(&zp->z_lock);
+ may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
+- !(zp->z_is_mapped);
++ !zn_has_cached_data(zp, 0, LLONG_MAX);
+ mutex_exit(&zp->z_lock);
+
+ /*
+@@ -1098,7 +1109,8 @@ top:
+ &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
+ delete_now = may_delete_now && !toobig &&
+ atomic_read(&ZTOI(zp)->i_count) == 1 &&
+- !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked &&
++ !zn_has_cached_data(zp, 0, LLONG_MAX) &&
++ xattr_obj == xattr_obj_unlinked &&
+ zfs_external_acl(zp) == acl_obj;
+ }
+
+@@ -1663,8 +1675,7 @@ out:
+ */
+ /* ARGSUSED */
+ int
+-zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip,
+- struct kstat *sp)
++zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
+ {
+ znode_t *zp = ITOZ(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
+@@ -3434,21 +3445,34 @@ top:
+ }
+
+ static void
+-zfs_putpage_commit_cb(void *arg)
++zfs_putpage_sync_commit_cb(void *arg)
++{
++ struct page *pp = arg;
++
++ ClearPageError(pp);
++ end_page_writeback(pp);
++}
++
++static void
++zfs_putpage_async_commit_cb(void *arg)
+ {
+ struct page *pp = arg;
++ znode_t *zp = ITOZ(pp->mapping->host);
+
+ ClearPageError(pp);
+ end_page_writeback(pp);
++ atomic_dec_32(&zp->z_async_writes_cnt);
+ }
+
+ /*
+ * Push a page out to disk, once the page is on stable storage the
+ * registered commit callback will be run as notification of completion.
+ *
+- * IN: ip - page mapped for inode.
+- * pp - page to push (page is locked)
+- * wbc - writeback control data
++ * IN: ip - page mapped for inode.
++ * pp - page to push (page is locked)
++ * wbc - writeback control data
++ * for_sync - does the caller intend to wait synchronously for the
++ * page writeback to complete?
+ *
+ * RETURN: 0 if success
+ * error code if failure
+@@ -3458,7 +3482,8 @@ zfs_putpage_commit_cb(void *arg)
+ */
+ /* ARGSUSED */
+ int
+-zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
++zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
++ boolean_t for_sync)
+ {
+ znode_t *zp = ITOZ(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
+@@ -3556,6 +3581,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
+ zfs_rangelock_exit(lr);
+
+ if (wbc->sync_mode != WB_SYNC_NONE) {
++ /*
++ * Speed up any non-sync page writebacks since
++ * they may take several seconds to complete.
++ * Refer to the comment in zpl_fsync() (when
++ * HAVE_FSYNC_RANGE is defined) for details.
++ */
++ if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
++ zil_commit(zfsvfs->z_log, zp->z_id);
++ }
++
+ if (PageWriteback(pp))
+ #ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
+ folio_wait_bit(page_folio(pp), PG_writeback);
+@@ -3581,6 +3616,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
+ * was in fact not skipped and should not be counted as if it were.
+ */
+ wbc->pages_skipped--;
++ if (!for_sync)
++ atomic_inc_32(&zp->z_async_writes_cnt);
+ set_page_writeback(pp);
+ unlock_page(pp);
+
+@@ -3602,6 +3639,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
+ #endif
+ ClearPageError(pp);
+ end_page_writeback(pp);
++ if (!for_sync)
++ atomic_dec_32(&zp->z_async_writes_cnt);
+ zfs_rangelock_exit(lr);
+ ZFS_EXIT(zfsvfs);
+ return (err);
+@@ -3626,7 +3665,9 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
+ err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
+
+ zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
+- zfs_putpage_commit_cb, pp);
++ for_sync ? zfs_putpage_sync_commit_cb :
++ zfs_putpage_async_commit_cb, pp);
++
+ dmu_tx_commit(tx);
+
+ zfs_rangelock_exit(lr);
+@@ -3638,6 +3679,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
+ * performance reasons.
+ */
+ zil_commit(zfsvfs->z_log, zp->z_id);
++ } else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
++ /*
++ * If the caller does not intend to wait synchronously
++ * for this page writeback to complete and there are active
++ * synchronous calls on this file, do a commit so that
++ * the latter don't accidentally end up waiting for
++ * our writeback to complete. Refer to the comment in
++ * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
++ */
++ zil_commit(zfsvfs->z_log, zp->z_id);
+ }
+
+ dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
+@@ -3766,55 +3817,45 @@ zfs_inactive(struct inode *ip)
+ * Fill pages with data from the disk.
+ */
+ static int
+-zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
++zfs_fillpage(struct inode *ip, struct page *pp)
+ {
+- znode_t *zp = ITOZ(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
+- objset_t *os;
+- struct page *cur_pp;
+- u_offset_t io_off, total;
+- size_t io_len;
+- loff_t i_size;
+- unsigned page_idx;
+- int err;
++ loff_t i_size = i_size_read(ip);
++ u_offset_t io_off = page_offset(pp);
++ size_t io_len = PAGE_SIZE;
+
+- os = zfsvfs->z_os;
+- io_len = nr_pages << PAGE_SHIFT;
+- i_size = i_size_read(ip);
+- io_off = page_offset(pl[0]);
++ ASSERT3U(io_off, <, i_size);
+
+ if (io_off + io_len > i_size)
+ io_len = i_size - io_off;
+
+- /*
+- * Iterate over list of pages and read each page individually.
+- */
+- page_idx = 0;
+- for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
+- caddr_t va;
++ void *va = kmap(pp);
++ int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
++ io_len, va, DMU_READ_PREFETCH);
++ if (io_len != PAGE_SIZE)
++ memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
++ kunmap(pp);
+
+- cur_pp = pl[page_idx++];
+- va = kmap(cur_pp);
+- err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
+- DMU_READ_PREFETCH);
+- kunmap(cur_pp);
+- if (err) {
+- /* convert checksum errors into IO errors */
+- if (err == ECKSUM)
+- err = SET_ERROR(EIO);
+- return (err);
+- }
++ if (error) {
++ /* convert checksum errors into IO errors */
++ if (error == ECKSUM)
++ error = SET_ERROR(EIO);
++
++ SetPageError(pp);
++ ClearPageUptodate(pp);
++ } else {
++ ClearPageError(pp);
++ SetPageUptodate(pp);
+ }
+
+- return (0);
++ return (error);
+ }
+
+ /*
+- * Uses zfs_fillpage to read data from the file and fill the pages.
++ * Uses zfs_fillpage to read data from the file and fill the page.
+ *
+ * IN: ip - inode of file to get data from.
+- * pl - list of pages to read
+- * nr_pages - number of pages to read
++ * pp - page to read
+ *
+ * RETURN: 0 on success, error code on failure.
+ *
+@@ -3823,24 +3864,22 @@ zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
+ */
+ /* ARGSUSED */
+ int
+-zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
++zfs_getpage(struct inode *ip, struct page *pp)
+ {
+- znode_t *zp = ITOZ(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
+- int err;
+-
+- if (pl == NULL)
+- return (0);
++ znode_t *zp = ITOZ(ip);
++ int error;
+
+ ZFS_ENTER(zfsvfs);
+ ZFS_VERIFY_ZP(zp);
+
+- err = zfs_fillpage(ip, pl, nr_pages);
+-
+- dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nr_pages*PAGESIZE);
++ error = zfs_fillpage(ip, pp);
++ if (error == 0)
++ dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
+
+ ZFS_EXIT(zfsvfs);
+- return (err);
++
++ return (error);
+ }
+
+ /*
+diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c
+index f3475b4d9..0236b3216 100644
+--- a/module/os/linux/zfs/zfs_znode.c
++++ b/module/os/linux/zfs/zfs_znode.c
+@@ -134,6 +134,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
+ zp->z_acl_cached = NULL;
+ zp->z_xattr_cached = NULL;
+ zp->z_xattr_parent = 0;
++ zp->z_sync_writes_cnt = 0;
++ zp->z_async_writes_cnt = 0;
++
+ return (0);
+ }
+
+@@ -151,9 +154,12 @@ zfs_znode_cache_destructor(void *buf, void *arg)
+ rw_destroy(&zp->z_xattr_lock);
+ zfs_rangelock_fini(&zp->z_rangelock);
+
+- ASSERT(zp->z_dirlocks == NULL);
+- ASSERT(zp->z_acl_cached == NULL);
+- ASSERT(zp->z_xattr_cached == NULL);
++ ASSERT3P(zp->z_dirlocks, ==, NULL);
++ ASSERT3P(zp->z_acl_cached, ==, NULL);
++ ASSERT3P(zp->z_xattr_cached, ==, NULL);
++
++ ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
++ ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
+ }
+
+ static int
+@@ -540,7 +546,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
+ ASSERT3P(zp->z_xattr_cached, ==, NULL);
+ zp->z_unlinked = B_FALSE;
+ zp->z_atime_dirty = B_FALSE;
++#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+ zp->z_is_mapped = B_FALSE;
++#endif
+ zp->z_is_ctldir = B_FALSE;
+ zp->z_suspended = B_FALSE;
+ zp->z_sa_hdl = NULL;
+@@ -549,6 +557,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
+ zp->z_blksz = blksz;
+ zp->z_seq = 0x7A4653;
+ zp->z_sync_cnt = 0;
++ zp->z_sync_writes_cnt = 0;
++ zp->z_async_writes_cnt = 0;
+
+ zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
+
+@@ -1628,7 +1638,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
+ * Zero partial page cache entries. This must be done under a
+ * range lock in order to keep the ARC and page cache in sync.
+ */
+- if (zp->z_is_mapped) {
++ if (zn_has_cached_data(zp, off, off + len - 1)) {
+ loff_t first_page, last_page, page_len;
+ loff_t first_page_offset, last_page_offset;
+
+diff --git a/module/os/linux/zfs/zpl_ctldir.c b/module/os/linux/zfs/zpl_ctldir.c
+index 9b526afd0..cf4da470f 100644
+--- a/module/os/linux/zfs/zpl_ctldir.c
++++ b/module/os/linux/zfs/zpl_ctldir.c
+@@ -101,7 +101,11 @@ zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
+ */
+ /* ARGSUSED */
+ static int
+-#ifdef HAVE_USERNS_IOPS_GETATTR
++#ifdef HAVE_IDMAP_IOPS_GETATTR
++zpl_root_getattr_impl(struct mnt_idmap *user_ns,
++ const struct path *path, struct kstat *stat, u32 request_mask,
++ unsigned int query_flags)
++#elif defined(HAVE_USERNS_IOPS_GETATTR)
+ zpl_root_getattr_impl(struct user_namespace *user_ns,
+ const struct path *path, struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+@@ -112,8 +116,14 @@ zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
+ {
+ struct inode *ip = path->dentry->d_inode;
+
+-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
++#ifdef HAVE_GENERIC_FILLATTR_USERNS
+ generic_fillattr(user_ns, ip, stat);
++#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
++ generic_fillattr(user_ns, ip, stat);
++#else
++ (void) user_ns;
++#endif
+ #else
+ generic_fillattr(ip, stat);
+ #endif
+@@ -304,6 +314,10 @@ static int
+ zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip,
+ struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+ unsigned int flags)
++#elif defined(HAVE_IOPS_RENAME_IDMAP)
++zpl_snapdir_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
++ struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
++ unsigned int flags)
+ #else
+ zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
+ struct inode *tdip, struct dentry *tdentry, unsigned int flags)
+@@ -325,7 +339,9 @@ zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
+ return (error);
+ }
+
+-#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
++#if (!defined(HAVE_RENAME_WANTS_FLAGS) && \
++ !defined(HAVE_IOPS_RENAME_USERNS) && \
++ !defined(HAVE_IOPS_RENAME_IDMAP))
+ static int
+ zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
+ struct inode *tdip, struct dentry *tdentry)
+@@ -352,6 +368,9 @@ static int
+ #ifdef HAVE_IOPS_MKDIR_USERNS
+ zpl_snapdir_mkdir(struct user_namespace *user_ns, struct inode *dip,
+ struct dentry *dentry, umode_t mode)
++#elif defined(HAVE_IOPS_MKDIR_IDMAP)
++zpl_snapdir_mkdir(struct mnt_idmap *user_ns, struct inode *dip,
++ struct dentry *dentry, umode_t mode)
+ #else
+ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
+ #endif
+@@ -384,7 +403,11 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
+ */
+ /* ARGSUSED */
+ static int
+-#ifdef HAVE_USERNS_IOPS_GETATTR
++#ifdef HAVE_IDMAP_IOPS_GETATTR
++zpl_snapdir_getattr_impl(struct mnt_idmap *user_ns,
++ const struct path *path, struct kstat *stat, u32 request_mask,
++ unsigned int query_flags)
++#elif defined(HAVE_USERNS_IOPS_GETATTR)
+ zpl_snapdir_getattr_impl(struct user_namespace *user_ns,
+ const struct path *path, struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+@@ -397,8 +420,14 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
+
+ ZPL_ENTER(zfsvfs);
+-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
++#ifdef HAVE_GENERIC_FILLATTR_USERNS
++ generic_fillattr(user_ns, ip, stat);
++#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+ generic_fillattr(user_ns, ip, stat);
++#else
++ (void) user_ns;
++#endif
+ #else
+ generic_fillattr(ip, stat);
+ #endif
+@@ -439,7 +468,9 @@ const struct file_operations zpl_fops_snapdir = {
+ const struct inode_operations zpl_ops_snapdir = {
+ .lookup = zpl_snapdir_lookup,
+ .getattr = zpl_snapdir_getattr,
+-#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
++#if (defined(HAVE_RENAME_WANTS_FLAGS) || \
++ defined(HAVE_IOPS_RENAME_USERNS) || \
++ defined(HAVE_IOPS_RENAME_IDMAP))
+ .rename = zpl_snapdir_rename2,
+ #else
+ .rename = zpl_snapdir_rename,
+@@ -530,6 +561,10 @@ static int
+ zpl_shares_getattr_impl(struct user_namespace *user_ns,
+ const struct path *path, struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
++#elif defined(HAVE_IDMAP_IOPS_GETATTR)
++zpl_shares_getattr_impl(struct mnt_idmap *user_ns,
++ const struct path *path, struct kstat *stat, u32 request_mask,
++ unsigned int query_flags)
+ #else
+ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+@@ -543,8 +578,14 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
+ ZPL_ENTER(zfsvfs);
+
+ if (zfsvfs->z_shares_dir == 0) {
+-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
++#ifdef HAVE_GENERIC_FILLATTR_USERNS
++ generic_fillattr(user_ns, path->dentry->d_inode, stat);
++#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+ generic_fillattr(user_ns, path->dentry->d_inode, stat);
++#else
++ (void) user_ns;
++#endif
+ #else
+ generic_fillattr(path->dentry->d_inode, stat);
+ #endif
+@@ -556,7 +597,7 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
+
+ error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
+ if (error == 0) {
+-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+ error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
+ #else
+ error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
+diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
+index 38d2bd147..d5d354db1 100644
+--- a/module/os/linux/zfs/zpl_file.c
++++ b/module/os/linux/zfs/zpl_file.c
+@@ -165,17 +165,56 @@ static int
+ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
+ {
+ struct inode *inode = filp->f_mapping->host;
++ znode_t *zp = ITOZ(inode);
++ zfsvfs_t *zfsvfs = ITOZSB(inode);
+ cred_t *cr = CRED();
+ int error;
+ fstrans_cookie_t cookie;
+
++ /*
++ * The variables z_sync_writes_cnt and z_async_writes_cnt work in
++ * tandem so that sync writes can detect if there are any non-sync
++ * writes going on and vice-versa. The "vice-versa" part to this logic
++ * is located in zfs_putpage() where non-sync writes check if there are
++ * any ongoing sync writes. If any sync and non-sync writes overlap,
++ * we do a commit to complete the non-sync writes since the latter can
++ * potentially take several seconds to complete and thus block sync
++ * writes in the upcoming call to filemap_write_and_wait_range().
++ */
++ atomic_inc_32(&zp->z_sync_writes_cnt);
++ /*
++ * If the following check does not detect an overlapping non-sync write
++ * (say because it's just about to start), then it is guaranteed that
++ * the non-sync write will detect this sync write. This is because we
++ * always increment z_sync_writes_cnt / z_async_writes_cnt before doing
++ * the check on z_async_writes_cnt / z_sync_writes_cnt here and in
++ * zfs_putpage() respectively.
++ */
++ if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
++ ZPL_ENTER(zfsvfs);
++ zil_commit(zfsvfs->z_log, zp->z_id);
++ ZPL_EXIT(zfsvfs);
++ }
++
+ error = filemap_write_and_wait_range(inode->i_mapping, start, end);
++
++ /*
++ * The sync write is not complete yet but we decrement
++ * z_sync_writes_cnt since zfs_fsync() increments and decrements
++ * it internally. If a non-sync write starts just after the decrement
++ * operation but before we call zfs_fsync(), it may not detect this
++ * overlapping sync write but it does not matter since we have already
++ * gone past filemap_write_and_wait_range() and we won't block due to
++ * the non-sync write.
++ */
++ atomic_dec_32(&zp->z_sync_writes_cnt);
++
+ if (error)
+ return (error);
+
+ crhold(cr);
+ cookie = spl_fstrans_mark();
+- error = -zfs_fsync(ITOZ(inode), datasync, cr);
++ error = -zfs_fsync(zp, datasync, cr);
+ spl_fstrans_unmark(cookie);
+ crfree(cr);
+ ASSERT3S(error, <=, 0);
+@@ -579,7 +618,6 @@ static int
+ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
+ {
+ struct inode *ip = filp->f_mapping->host;
+- znode_t *zp = ITOZ(ip);
+ int error;
+ fstrans_cookie_t cookie;
+
+@@ -594,9 +632,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
+ if (error)
+ return (error);
+
++#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
++ znode_t *zp = ITOZ(ip);
+ mutex_enter(&zp->z_lock);
+ zp->z_is_mapped = B_TRUE;
+ mutex_exit(&zp->z_lock);
++#endif
+
+ return (error);
+ }
+@@ -609,29 +650,16 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
+ static inline int
+ zpl_readpage_common(struct page *pp)
+ {
+- struct inode *ip;
+- struct page *pl[1];
+- int error = 0;
+ fstrans_cookie_t cookie;
+
+ ASSERT(PageLocked(pp));
+- ip = pp->mapping->host;
+- pl[0] = pp;
+
+ cookie = spl_fstrans_mark();
+- error = -zfs_getpage(ip, pl, 1);
++ int error = -zfs_getpage(pp->mapping->host, pp);
+ spl_fstrans_unmark(cookie);
+
+- if (error) {
+- SetPageError(pp);
+- ClearPageUptodate(pp);
+- } else {
+- ClearPageError(pp);
+- SetPageUptodate(pp);
+- flush_dcache_page(pp);
+- }
+-
+ unlock_page(pp);
++
+ return (error);
+ }
+
+@@ -688,19 +716,42 @@ zpl_readahead(struct readahead_control *ractl)
+ static int
+ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
+ {
+- struct address_space *mapping = data;
++ boolean_t *for_sync = data;
+ fstrans_cookie_t cookie;
+
+ ASSERT(PageLocked(pp));
+ ASSERT(!PageWriteback(pp));
+
+ cookie = spl_fstrans_mark();
+- (void) zfs_putpage(mapping->host, pp, wbc);
++ (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
+ spl_fstrans_unmark(cookie);
+
+ return (0);
+ }
+
++#ifdef HAVE_WRITEPAGE_T_FOLIO
++static int
++zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
++{
++ (void) zpl_putpage(&pp->page, wbc, data);
++ return (0);
++}
++#endif
++
++static inline int
++zpl_write_cache_pages(struct address_space *mapping,
++ struct writeback_control *wbc, void *data)
++{
++ int result;
++
++#ifdef HAVE_WRITEPAGE_T_FOLIO
++ result = write_cache_pages(mapping, wbc, zpl_putfolio, data);
++#else
++ result = write_cache_pages(mapping, wbc, zpl_putpage, data);
++#endif
++ return (result);
++}
++
+ static int
+ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
+ {
+@@ -722,8 +773,9 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
+ * we run it once in non-SYNC mode so that the ZIL gets all the data,
+ * and then we commit it all in one go.
+ */
++ boolean_t for_sync = (sync_mode == WB_SYNC_ALL);
+ wbc->sync_mode = WB_SYNC_NONE;
+- result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
++ result = zpl_write_cache_pages(mapping, wbc, &for_sync);
+ if (sync_mode != wbc->sync_mode) {
+ ZPL_ENTER(zfsvfs);
+ ZPL_VERIFY_ZP(zp);
+@@ -739,7 +791,7 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
+ * details). That being said, this is a no-op in most cases.
+ */
+ wbc->sync_mode = sync_mode;
+- result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
++ result = zpl_write_cache_pages(mapping, wbc, &for_sync);
+ }
+ return (result);
+ }
+@@ -756,7 +808,9 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
+ if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ wbc->sync_mode = WB_SYNC_ALL;
+
+- return (zpl_putpage(pp, wbc, pp->mapping));
++ boolean_t for_sync = (wbc->sync_mode == WB_SYNC_ALL);
++
++ return (zpl_putpage(pp, wbc, &for_sync));
+ }
+
+ /*
+@@ -924,7 +978,7 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
+ !capable(CAP_LINUX_IMMUTABLE))
+ return (-EPERM);
+
+- if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
++ if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+ return (-EACCES);
+
+ xva_init(xva);
+diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c
+index dd634f70e..6efaaf438 100644
+--- a/module/os/linux/zfs/zpl_inode.c
++++ b/module/os/linux/zfs/zpl_inode.c
+@@ -131,6 +131,9 @@ static int
+ #ifdef HAVE_IOPS_CREATE_USERNS
+ zpl_create(struct user_namespace *user_ns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool flag)
++#elif defined(HAVE_IOPS_CREATE_IDMAP)
++zpl_create(struct mnt_idmap *user_ns, struct inode *dir,
++ struct dentry *dentry, umode_t mode, bool flag)
+ #else
+ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
+ #endif
+@@ -174,6 +177,9 @@ static int
+ #ifdef HAVE_IOPS_MKNOD_USERNS
+ zpl_mknod(struct user_namespace *user_ns, struct inode *dir,
+ struct dentry *dentry, umode_t mode,
++#elif defined(HAVE_IOPS_MKNOD_IDMAP)
++zpl_mknod(struct mnt_idmap *user_ns, struct inode *dir,
++ struct dentry *dentry, umode_t mode,
+ #else
+ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+ #endif
+@@ -224,7 +230,10 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+
+ #ifdef HAVE_TMPFILE
+ static int
+-#ifndef HAVE_TMPFILE_DENTRY
++#ifdef HAVE_TMPFILE_IDMAP
++zpl_tmpfile(struct mnt_idmap *userns, struct inode *dir,
++ struct file *file, umode_t mode)
++#elif !defined(HAVE_TMPFILE_DENTRY)
+ zpl_tmpfile(struct user_namespace *userns, struct inode *dir,
+ struct file *file, umode_t mode)
+ #else
+@@ -317,6 +326,9 @@ static int
+ #ifdef HAVE_IOPS_MKDIR_USERNS
+ zpl_mkdir(struct user_namespace *user_ns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
++#elif defined(HAVE_IOPS_MKDIR_IDMAP)
++zpl_mkdir(struct mnt_idmap *user_ns, struct inode *dir,
++ struct dentry *dentry, umode_t mode)
+ #else
+ zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+ #endif
+@@ -386,6 +398,10 @@ static int
+ zpl_getattr_impl(struct user_namespace *user_ns,
+ const struct path *path, struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
++#elif defined(HAVE_IDMAP_IOPS_GETATTR)
++zpl_getattr_impl(struct mnt_idmap *user_ns,
++ const struct path *path, struct kstat *stat, u32 request_mask,
++ unsigned int query_flags)
+ #else
+ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+@@ -402,7 +418,7 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
+ * XXX query_flags currently ignored.
+ */
+
+-#ifdef HAVE_USERNS_IOPS_GETATTR
++#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+ error = -zfs_getattr_fast(user_ns, ip, stat);
+ #else
+ error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
+@@ -441,9 +457,12 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
+ ZPL_GETATTR_WRAPPER(zpl_getattr);
+
+ static int
+-#ifdef HAVE_SETATTR_PREPARE_USERNS
++#ifdef HAVE_USERNS_IOPS_SETATTR
+ zpl_setattr(struct user_namespace *user_ns, struct dentry *dentry,
+ struct iattr *ia)
++#elif defined(HAVE_IDMAP_IOPS_SETATTR)
++zpl_setattr(struct mnt_idmap *user_ns, struct dentry *dentry,
++ struct iattr *ia)
+ #else
+ zpl_setattr(struct dentry *dentry, struct iattr *ia)
+ #endif
+@@ -454,7 +473,13 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
+ int error;
+ fstrans_cookie_t cookie;
+
+- error = zpl_setattr_prepare(kcred->user_ns, dentry, ia);
++#ifdef HAVE_SETATTR_PREPARE_USERNS
++ error = zpl_setattr_prepare(user_ns, dentry, ia);
++#elif defined(HAVE_SETATTR_PREPARE_IDMAP)
++ error = zpl_setattr_prepare(user_ns, dentry, ia);
++#else
++ error = zpl_setattr_prepare(zfs_init_idmap, dentry, ia);
++#endif
+ if (error)
+ return (error);
+
+@@ -489,10 +514,14 @@ static int
+ #ifdef HAVE_IOPS_RENAME_USERNS
+ zpl_rename2(struct user_namespace *user_ns, struct inode *sdip,
+ struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+- unsigned int flags)
++ unsigned int rflags)
++#elif defined(HAVE_IOPS_RENAME_IDMAP)
++zpl_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
++ struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
++ unsigned int rflags)
+ #else
+ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
+- struct inode *tdip, struct dentry *tdentry, unsigned int flags)
++ struct inode *tdip, struct dentry *tdentry, unsigned int rflags)
+ #endif
+ {
+ cred_t *cr = CRED();
+@@ -500,7 +529,7 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
+ fstrans_cookie_t cookie;
+
+ /* We don't have renameat2(2) support */
+- if (flags)
++ if (rflags)
+ return (-EINVAL);
+
+ crhold(cr);
+@@ -514,7 +543,9 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
+ return (error);
+ }
+
+-#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
++#if !defined(HAVE_IOPS_RENAME_USERNS) && \
++ !defined(HAVE_RENAME_WANTS_FLAGS) && \
++ !defined(HAVE_IOPS_RENAME_IDMAP)
+ static int
+ zpl_rename(struct inode *sdip, struct dentry *sdentry,
+ struct inode *tdip, struct dentry *tdentry)
+@@ -527,6 +558,9 @@ static int
+ #ifdef HAVE_IOPS_SYMLINK_USERNS
+ zpl_symlink(struct user_namespace *user_ns, struct inode *dir,
+ struct dentry *dentry, const char *name)
++#elif defined(HAVE_IOPS_SYMLINK_IDMAP)
++zpl_symlink(struct mnt_idmap *user_ns, struct inode *dir,
++ struct dentry *dentry, const char *name)
+ #else
+ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
+ #endif
+@@ -745,6 +779,8 @@ const struct inode_operations zpl_dir_inode_operations = {
+ .mknod = zpl_mknod,
+ #if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
+ .rename = zpl_rename2,
++#elif defined(HAVE_IOPS_RENAME_IDMAP)
++ .rename = zpl_rename2,
+ #else
+ .rename = zpl_rename,
+ #endif
+diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c
+index 364cd34c1..084817609 100644
+--- a/module/os/linux/zfs/zpl_xattr.c
++++ b/module/os/linux/zfs/zpl_xattr.c
+@@ -725,9 +725,11 @@ __zpl_xattr_user_get(struct inode *ip, const char *name,
+ ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
+
+ static int
+-__zpl_xattr_user_set(struct inode *ip, const char *name,
++__zpl_xattr_user_set(zidmap_t *user_ns,
++ struct inode *ip, const char *name,
+ const void *value, size_t size, int flags)
+ {
++ (void) user_ns;
+ char *xattr_name;
+ int error;
+ /* xattr_resolve_name will do this for us if this is defined */
+@@ -794,9 +796,11 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name,
+ ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
+
+ static int
+-__zpl_xattr_trusted_set(struct inode *ip, const char *name,
++__zpl_xattr_trusted_set(zidmap_t *user_ns,
++ struct inode *ip, const char *name,
+ const void *value, size_t size, int flags)
+ {
++ (void) user_ns;
+ char *xattr_name;
+ int error;
+
+@@ -863,9 +867,11 @@ __zpl_xattr_security_get(struct inode *ip, const char *name,
+ ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
+
+ static int
+-__zpl_xattr_security_set(struct inode *ip, const char *name,
++__zpl_xattr_security_set(zidmap_t *user_ns,
++ struct inode *ip, const char *name,
+ const void *value, size_t size, int flags)
+ {
++ (void) user_ns;
+ char *xattr_name;
+ int error;
+ /* xattr_resolve_name will do this for us if this is defined */
+@@ -889,7 +895,7 @@ zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
+ int error = 0;
+
+ for (xattr = xattrs; xattr->name != NULL; xattr++) {
+- error = __zpl_xattr_security_set(ip,
++ error = __zpl_xattr_security_set(NULL, ip,
+ xattr->name, xattr->value, xattr->value_len, 0);
+
+ if (error < 0)
+@@ -1004,6 +1010,9 @@ int
+ #ifdef HAVE_SET_ACL_USERNS
+ zpl_set_acl(struct user_namespace *userns, struct inode *ip,
+ struct posix_acl *acl, int type)
++#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
++zpl_set_acl(struct mnt_idmap *userns, struct dentry *dentry,
++ struct posix_acl *acl, int type)
+ #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
+ zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
+ struct posix_acl *acl, int type)
+@@ -1013,6 +1022,8 @@ zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
+ {
+ #ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2
+ return (zpl_set_acl_impl(d_inode(dentry), acl, type));
++#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
++ return (zpl_set_acl_impl(d_inode(dentry), acl, type));
+ #else
+ return (zpl_set_acl_impl(ip, acl, type));
+ #endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */
+@@ -1256,7 +1267,8 @@ __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
+ ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
+
+ static int
+-__zpl_xattr_acl_set_access(struct inode *ip, const char *name,
++__zpl_xattr_acl_set_access(zidmap_t *mnt_ns,
++ struct inode *ip, const char *name,
+ const void *value, size_t size, int flags)
+ {
+ struct posix_acl *acl;
+@@ -1270,8 +1282,14 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+ return (-EOPNOTSUPP);
+
+- if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
++#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
++ if (!zpl_inode_owner_or_capable(mnt_ns, ip))
++ return (-EPERM);
++#else
++ (void) mnt_ns;
++ if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+ return (-EPERM);
++#endif
+
+ if (value) {
+ acl = zpl_acl_from_xattr(value, size);
+@@ -1295,7 +1313,8 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
+ ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
+
+ static int
+-__zpl_xattr_acl_set_default(struct inode *ip, const char *name,
++__zpl_xattr_acl_set_default(zidmap_t *mnt_ns,
++ struct inode *ip, const char *name,
+ const void *value, size_t size, int flags)
+ {
+ struct posix_acl *acl;
+@@ -1309,8 +1328,14 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
+ if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
+ return (-EOPNOTSUPP);
+
+- if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
++#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
++ if (!zpl_inode_owner_or_capable(mnt_ns, ip))
++ return (-EPERM);
++#else
++ (void) mnt_ns;
++ if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+ return (-EPERM);
++#endif
+
+ if (value) {
+ acl = zpl_acl_from_xattr(value, size);
+diff --git a/module/zcommon/Makefile.in b/module/zcommon/Makefile.in
+index ebc538440..614968a42 100644
+--- a/module/zcommon/Makefile.in
++++ b/module/zcommon/Makefile.in
+@@ -26,3 +26,7 @@ $(MODULE)-$(CONFIG_X86) += zfs_fletcher_intel.o
+ $(MODULE)-$(CONFIG_X86) += zfs_fletcher_sse.o
+ $(MODULE)-$(CONFIG_X86) += zfs_fletcher_avx512.o
+ $(MODULE)-$(CONFIG_ARM64) += zfs_fletcher_aarch64_neon.o
++
++ifeq ($(CONFIG_ARM64),y)
++CFLAGS_REMOVE_zfs_fletcher_aarch64_neon.o += -mgeneral-regs-only
++endif
+diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
+index 653ea0da9..0e04d7ef0 100644
+--- a/module/zfs/Makefile.in
++++ b/module/zfs/Makefile.in
+@@ -154,4 +154,9 @@ ifeq ($(CONFIG_ALTIVEC),y)
+ $(obj)/vdev_raidz_math_powerpc_altivec.o: c_flags += -maltivec
+ endif
+
++ifeq ($(CONFIG_ARM64),y)
++CFLAGS_REMOVE_vdev_raidz_math_aarch64_neon.o += -mgeneral-regs-only
++CFLAGS_REMOVE_vdev_raidz_math_aarch64_neonx2.o += -mgeneral-regs-only
++endif
++
+ include $(mfdir)/../os/linux/zfs/Makefile
+diff --git a/module/zfs/abd.c b/module/zfs/abd.c
+index 8ee8e7e57..754974a55 100644
+--- a/module/zfs/abd.c
++++ b/module/zfs/abd.c
+@@ -109,7 +109,6 @@ void
+ abd_verify(abd_t *abd)
+ {
+ #ifdef ZFS_DEBUG
+- ASSERT3U(abd->abd_size, >, 0);
+ ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
+ ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
+ ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
+@@ -118,6 +117,7 @@ abd_verify(abd_t *abd)
+ IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
+ IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
+ if (abd_is_linear(abd)) {
++ ASSERT3U(abd->abd_size, >, 0);
+ ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL);
+ } else if (abd_is_gang(abd)) {
+ uint_t child_sizes = 0;
+@@ -130,6 +130,7 @@ abd_verify(abd_t *abd)
+ }
+ ASSERT3U(abd->abd_size, ==, child_sizes);
+ } else {
++ ASSERT3U(abd->abd_size, >, 0);
+ abd_verify_scatter(abd);
+ }
+ #endif
+@@ -369,7 +370,20 @@ abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
+ * will retain all the free_on_free settings after being
+ * added to the parents list.
+ */
++#ifdef ZFS_DEBUG
++ /*
++ * If cabd had abd_parent, we have to drop it here. We can't
++ * transfer it to pabd, nor we can clear abd_size leaving it.
++ */
++ if (cabd->abd_parent != NULL) {
++ (void) zfs_refcount_remove_many(
++ &cabd->abd_parent->abd_children,
++ cabd->abd_size, cabd);
++ cabd->abd_parent = NULL;
++ }
++#endif
+ pabd->abd_size += cabd->abd_size;
++ cabd->abd_size = 0;
+ list_move_tail(&ABD_GANG(pabd).abd_gang_chain,
+ &ABD_GANG(cabd).abd_gang_chain);
+ ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
+@@ -407,7 +421,6 @@ abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
+ */
+ if (abd_is_gang(cabd)) {
+ ASSERT(!list_link_active(&cabd->abd_gang_link));
+- ASSERT(!list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
+ return (abd_gang_add_gang(pabd, cabd, free_on_free));
+ }
+ ASSERT(!abd_is_gang(cabd));
+diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c
+index 98ca2b3bc..b8161f710 100644
+--- a/module/zfs/dmu_recv.c
++++ b/module/zfs/dmu_recv.c
+@@ -71,6 +71,12 @@ int zfs_recv_write_batch_size = 1024 * 1024;
+ static char *dmu_recv_tag = "dmu_recv_tag";
+ const char *recv_clone_name = "%recv";
+
++typedef enum {
++ ORNS_NO,
++ ORNS_YES,
++ ORNS_MAYBE
++} or_need_sync_t;
++
+ static int receive_read_payload_and_next_header(dmu_recv_cookie_t *ra, int len,
+ void *buf);
+
+@@ -121,6 +127,9 @@ struct receive_writer_arg {
+ uint8_t or_iv[ZIO_DATA_IV_LEN];
+ uint8_t or_mac[ZIO_DATA_MAC_LEN];
+ boolean_t or_byteorder;
++
++ /* Keep track of DRR_FREEOBJECTS right after DRR_OBJECT_RANGE */
++ or_need_sync_t or_need_sync;
+ };
+
+ typedef struct dmu_recv_begin_arg {
+@@ -1658,10 +1667,22 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ /* object was freed and we are about to allocate a new one */
+ object_to_hold = DMU_NEW_OBJECT;
+ } else {
++ /*
++ * If the only record in this range so far was DRR_FREEOBJECTS
++ * with at least one actually freed object, it's possible that
++ * the block will now be converted to a hole. We need to wait
++ * for the txg to sync to prevent races.
++ */
++ if (rwa->or_need_sync == ORNS_YES)
++ txg_wait_synced(dmu_objset_pool(rwa->os), 0);
++
+ /* object is free and we are about to allocate a new one */
+ object_to_hold = DMU_NEW_OBJECT;
+ }
+
++ /* Only relevant for the first object in the range */
++ rwa->or_need_sync = ORNS_NO;
++
+ /*
+ * If this is a multi-slot dnode there is a chance that this
+ * object will expand into a slot that is already used by
+@@ -1856,6 +1877,9 @@ receive_freeobjects(struct receive_writer_arg *rwa,
+
+ if (err != 0)
+ return (err);
++
++ if (rwa->or_need_sync == ORNS_MAYBE)
++ rwa->or_need_sync = ORNS_YES;
+ }
+ if (next_err != ESRCH)
+ return (next_err);
+@@ -2298,6 +2322,8 @@ receive_object_range(struct receive_writer_arg *rwa,
+ bcopy(drror->drr_mac, rwa->or_mac, ZIO_DATA_MAC_LEN);
+ rwa->or_byteorder = byteorder;
+
++ rwa->or_need_sync = ORNS_MAYBE;
++
+ return (0);
+ }
+
+diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
+index cd9ecc07f..0dd1ec210 100644
+--- a/module/zfs/dmu_send.c
++++ b/module/zfs/dmu_send.c
+@@ -2797,6 +2797,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
+ }
+
+ if (err == 0) {
++ owned = B_TRUE;
+ err = zap_lookup(dspp.dp->dp_meta_objset,
+ dspp.to_ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, 8, 1,
+@@ -2810,21 +2811,24 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
+ sizeof (dspp.saved_toname),
+ dspp.saved_toname);
+ }
+- if (err != 0)
++ /* Only disown if there was an error in the lookups */
++ if (owned && (err != 0))
+ dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);
+
+ kmem_strfree(name);
+ } else {
+ err = dsl_dataset_own(dspp.dp, tosnap, dsflags,
+ FTAG, &dspp.to_ds);
++ if (err == 0)
++ owned = B_TRUE;
+ }
+- owned = B_TRUE;
+ } else {
+ err = dsl_dataset_hold_flags(dspp.dp, tosnap, dsflags, FTAG,
+ &dspp.to_ds);
+ }
+
+ if (err != 0) {
++ /* Note: dsl dataset is not owned at this point */
+ dsl_pool_rele(dspp.dp, FTAG);
+ return (err);
+ }
+diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
+index 1eed0526b..063934f39 100644
+--- a/module/zfs/dmu_tx.c
++++ b/module/zfs/dmu_tx.c
+@@ -290,6 +290,53 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
+ }
+ }
+
++static void
++dmu_tx_count_append(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
++{
++ dnode_t *dn = txh->txh_dnode;
++ int err = 0;
++
++ if (len == 0)
++ return;
++
++ (void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG);
++
++ if (dn == NULL)
++ return;
++
++ /*
++ * For i/o error checking, read the blocks that will be needed
++ * to perform the append; first level-0 block (if not aligned, i.e.
++ * if they are partial-block writes), no additional blocks are read.
++ */
++ if (dn->dn_maxblkid == 0) {
++ if (off < dn->dn_datablksz &&
++ (off > 0 || len < dn->dn_datablksz)) {
++ err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
++ if (err != 0) {
++ txh->txh_tx->tx_err = err;
++ }
++ }
++ } else {
++ zio_t *zio = zio_root(dn->dn_objset->os_spa,
++ NULL, NULL, ZIO_FLAG_CANFAIL);
++
++ /* first level-0 block */
++ uint64_t start = off >> dn->dn_datablkshift;
++ if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) {
++ err = dmu_tx_check_ioerr(zio, dn, 0, start);
++ if (err != 0) {
++ txh->txh_tx->tx_err = err;
++ }
++ }
++
++ err = zio_wait(zio);
++ if (err != 0) {
++ txh->txh_tx->tx_err = err;
++ }
++ }
++}
++
+ static void
+ dmu_tx_count_dnode(dmu_tx_hold_t *txh)
+ {
+@@ -330,6 +377,42 @@ dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
+ }
+ }
+
++/*
++ * Should be used when appending to an object and the exact offset is unknown.
++ * The write must occur at or beyond the specified offset. Only the L0 block
++ * at provided offset will be prefetched.
++ */
++void
++dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
++{
++ dmu_tx_hold_t *txh;
++
++ ASSERT0(tx->tx_txg);
++ ASSERT3U(len, <=, DMU_MAX_ACCESS);
++
++ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
++ object, THT_APPEND, off, DMU_OBJECT_END);
++ if (txh != NULL) {
++ dmu_tx_count_append(txh, off, len);
++ dmu_tx_count_dnode(txh);
++ }
++}
++
++void
++dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
++{
++ dmu_tx_hold_t *txh;
++
++ ASSERT0(tx->tx_txg);
++ ASSERT3U(len, <=, DMU_MAX_ACCESS);
++
++ txh = dmu_tx_hold_dnode_impl(tx, dn, THT_APPEND, off, DMU_OBJECT_END);
++ if (txh != NULL) {
++ dmu_tx_count_append(txh, off, len);
++ dmu_tx_count_dnode(txh);
++ }
++}
++
+ /*
+ * This function marks the transaction as being a "net free". The end
+ * result is that refquotas will be disabled for this transaction, and
+@@ -638,6 +721,26 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
+ if (blkid == 0)
+ match_offset = TRUE;
+ break;
++ case THT_APPEND:
++ if (blkid >= beginblk && (blkid <= endblk ||
++ txh->txh_arg2 == DMU_OBJECT_END))
++ match_offset = TRUE;
++
++ /*
++ * THT_WRITE used for bonus and spill blocks.
++ */
++ ASSERT(blkid != DMU_BONUS_BLKID &&
++ blkid != DMU_SPILL_BLKID);
++
++ /*
++ * They might have to increase nlevels,
++ * thus dirtying the new TLIBs. Or the
++ * might have to change the block size,
++ * thus dirying the new lvl=0 blk=0.
++ */
++ if (blkid == 0)
++ match_offset = TRUE;
++ break;
+ case THT_FREE:
+ /*
+ * We will dirty all the level 1 blocks in
+@@ -1421,6 +1524,8 @@ dmu_tx_fini(void)
+ EXPORT_SYMBOL(dmu_tx_create);
+ EXPORT_SYMBOL(dmu_tx_hold_write);
+ EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode);
++EXPORT_SYMBOL(dmu_tx_hold_append);
++EXPORT_SYMBOL(dmu_tx_hold_append_by_dnode);
+ EXPORT_SYMBOL(dmu_tx_hold_free);
+ EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode);
+ EXPORT_SYMBOL(dmu_tx_hold_zap);
+diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c
+index d5fe2ee56..9827eb147 100644
+--- a/module/zfs/dsl_deadlist.c
++++ b/module/zfs/dsl_deadlist.c
+@@ -859,7 +859,7 @@ void
+ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
+ {
+ zap_cursor_t zc, pzc;
+- zap_attribute_t za, pza;
++ zap_attribute_t *za, *pza;
+ dmu_buf_t *bonus;
+ dsl_deadlist_phys_t *dlp;
+ dmu_object_info_t doi;
+@@ -874,28 +874,31 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
+ return;
+ }
+
++ za = kmem_alloc(sizeof (*za), KM_SLEEP);
++ pza = kmem_alloc(sizeof (*pza), KM_SLEEP);
++
+ mutex_enter(&dl->dl_lock);
+ /*
+ * Prefetch up to 128 deadlists first and then more as we progress.
+ * The limit is a balance between ARC use and diminishing returns.
+ */
+ for (zap_cursor_init(&pzc, dl->dl_os, obj), i = 0;
+- (perror = zap_cursor_retrieve(&pzc, &pza)) == 0 && i < 128;
++ (perror = zap_cursor_retrieve(&pzc, pza)) == 0 && i < 128;
+ zap_cursor_advance(&pzc), i++) {
+- dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
+- zfs_strtonum(pza.za_name, NULL));
++ dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
++ zfs_strtonum(pza->za_name, NULL));
+ }
+ for (zap_cursor_init(&zc, dl->dl_os, obj);
+- (error = zap_cursor_retrieve(&zc, &za)) == 0;
++ (error = zap_cursor_retrieve(&zc, za)) == 0;
+ zap_cursor_advance(&zc)) {
+- uint64_t mintxg = zfs_strtonum(za.za_name, NULL);
+- dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
++ uint64_t mintxg = zfs_strtonum(za->za_name, NULL);
++ dsl_deadlist_insert_bpobj(dl, za->za_first_integer, mintxg, tx);
+ VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
+ if (perror == 0) {
+- dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
+- zfs_strtonum(pza.za_name, NULL));
++ dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
++ zfs_strtonum(pza->za_name, NULL));
+ zap_cursor_advance(&pzc);
+- perror = zap_cursor_retrieve(&pzc, &pza);
++ perror = zap_cursor_retrieve(&pzc, pza);
+ }
+ }
+ VERIFY3U(error, ==, ENOENT);
+@@ -908,6 +911,9 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
+ bzero(dlp, sizeof (*dlp));
+ dmu_buf_rele(bonus, FTAG);
+ mutex_exit(&dl->dl_lock);
++
++ kmem_free(za, sizeof (*za));
++ kmem_free(pza, sizeof (*pza));
+ }
+
+ /*
+diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
+index f3c639b0d..f0a851ff5 100644
+--- a/module/zfs/dsl_scan.c
++++ b/module/zfs/dsl_scan.c
+@@ -37,6 +37,7 @@
+ #include <sys/dmu_tx.h>
+ #include <sys/dmu_objset.h>
+ #include <sys/arc.h>
++#include <sys/arc_impl.h>
+ #include <sys/zap.h>
+ #include <sys/zio.h>
+ #include <sys/zfs_context.h>
+@@ -126,11 +127,20 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj,
+ static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
+ static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
+ static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
+-static uint64_t dsl_scan_count_data_disks(vdev_t *vd);
++static uint64_t dsl_scan_count_data_disks(spa_t *spa);
+
+ extern int zfs_vdev_async_write_active_min_dirty_percent;
+ static int zfs_scan_blkstats = 0;
+
++/*
++ * 'zpool status' uses bytes processed per pass to report throughput and
++ * estimate time remaining. We define a pass to start when the scanning
++ * phase completes for a sequential resilver. Optionally, this value
++ * may be used to reset the pass statistics every N txgs to provide an
++ * estimated completion time based on currently observed performance.
++ */
++static uint_t zfs_scan_report_txgs = 0;
++
+ /*
+ * By default zfs will check to ensure it is not over the hard memory
+ * limit before each txg. If finer-grained control of this is needed
+@@ -147,7 +157,7 @@ int zfs_scan_strict_mem_lim = B_FALSE;
+ * overload the drives with I/O, since that is protected by
+ * zfs_vdev_scrub_max_active.
+ */
+-unsigned long zfs_scan_vdev_limit = 4 << 20;
++unsigned long zfs_scan_vdev_limit = 16 << 20;
+
+ int zfs_scan_issue_strategy = 0;
+ int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */
+@@ -450,11 +460,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
+
+ /*
+ * Calculate the max number of in-flight bytes for pool-wide
+- * scanning operations (minimum 1MB). Limits for the issuing
+- * phase are done per top-level vdev and are handled separately.
++ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
++ * Limits for the issuing phase are done per top-level vdev and
++ * are handled separately.
+ */
+- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
+- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20);
++ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
++ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
+
+ avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
+ offsetof(scan_ds_t, sds_node));
+@@ -584,6 +595,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
+ }
+
+ spa_scan_stat_init(spa);
++ vdev_scan_stat_init(spa->spa_root_vdev);
++
+ return (0);
+ }
+
+@@ -742,6 +755,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
+ scn->scn_last_checkpoint = 0;
+ scn->scn_checkpointing = B_FALSE;
+ spa_scan_stat_init(spa);
++ vdev_scan_stat_init(spa->spa_root_vdev);
+
+ if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
+ scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
+@@ -2797,8 +2811,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
+ }
+
+ static uint64_t
+-dsl_scan_count_data_disks(vdev_t *rvd)
++dsl_scan_count_data_disks(spa_t *spa)
+ {
++ vdev_t *rvd = spa->spa_root_vdev;
+ uint64_t i, leaves = 0;
+
+ for (i = 0; i < rvd->vdev_children; i++) {
+@@ -3637,6 +3652,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
+ return;
+ }
+
++ /*
++ * Disabled by default, set zfs_scan_report_txgs to report
++ * average performance over the last zfs_scan_report_txgs TXGs.
++ */
++ if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
++ tx->tx_txg % zfs_scan_report_txgs == 0) {
++ scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
++ spa_scan_stat_init(spa);
++ }
++
+ /*
+ * It is possible to switch from unsorted to sorted at any time,
+ * but afterwards the scan will remain sorted unless reloaded from
+@@ -3693,12 +3718,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
+ taskqid_t prefetch_tqid;
+
+ /*
+- * Recalculate the max number of in-flight bytes for pool-wide
+- * scanning operations (minimum 1MB). Limits for the issuing
+- * phase are done per top-level vdev and are handled separately.
++ * Calculate the max number of in-flight bytes for pool-wide
++ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
++ * Limits for the issuing phase are done per top-level vdev and
++ * are handled separately.
+ */
+- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
+- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20);
++ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
++ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
+
+ if (scnp->scn_ddt_bookmark.ddb_class <=
+ scnp->scn_ddt_class_max) {
+@@ -3759,6 +3785,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
+ if (scn->scn_is_sorted) {
+ scn->scn_checkpointing = B_TRUE;
+ scn->scn_clearing = B_TRUE;
++ scn->scn_issued_before_pass +=
++ spa->spa_scan_pass_issued;
++ spa_scan_stat_init(spa);
+ }
+ zfs_dbgmsg("scan complete txg %llu",
+ (longlong_t)tx->tx_txg);
+@@ -4485,6 +4514,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_strict_mem_lim, INT, ZMOD_RW,
+ ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, INT, ZMOD_RW,
+ "Tunable to adjust bias towards more filled segments during scans");
+
++ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW,
++ "Tunable to report resilver performance over the last N txgs");
++
+ ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW,
+ "Process all resilvers immediately");
+ /* END CSTYLED */
+diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c
+index f67a4eb22..139bb0acd 100644
+--- a/module/zfs/mmp.c
++++ b/module/zfs/mmp.c
+@@ -444,7 +444,7 @@ mmp_write_uberblock(spa_t *spa)
+ uint64_t offset;
+
+ hrtime_t lock_acquire_time = gethrtime();
+- spa_config_enter(spa, SCL_STATE, mmp_tag, RW_READER);
++ spa_config_enter_mmp(spa, SCL_STATE, mmp_tag, RW_READER);
+ lock_acquire_time = gethrtime() - lock_acquire_time;
+ if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10))
+ zfs_dbgmsg("MMP SCL_STATE acquisition pool '%s' took %llu ns "
+diff --git a/module/zfs/spa.c b/module/zfs/spa.c
+index 1ed79eed3..5f238e691 100644
+--- a/module/zfs/spa.c
++++ b/module/zfs/spa.c
+@@ -33,6 +33,7 @@
+ * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
++ * Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
+ */
+
+ /*
+@@ -6261,6 +6262,16 @@ spa_tryimport(nvlist_t *tryconfig)
+ spa->spa_config_source = SPA_CONFIG_SRC_SCAN;
+ }
+
++ /*
++ * spa_import() relies on a pool config fetched by spa_try_import()
++ * for spare/cache devices. Import flags are not passed to
++ * spa_tryimport(), which makes it return early due to a missing log
++ * device and missing retrieving the cache device and spare eventually.
++ * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch
++ * the correct configuration regardless of the missing log device.
++ */
++ spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG;
++
+ error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING);
+
+ /*
+@@ -6747,9 +6758,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
+ return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+
+- if (dsl_scan_resilvering(spa_get_dsl(spa)))
++ if (dsl_scan_resilvering(spa_get_dsl(spa)) ||
++ dsl_scan_resilver_scheduled(spa_get_dsl(spa))) {
+ return (spa_vdev_exit(spa, NULL, txg,
+ ZFS_ERR_RESILVER_IN_PROGRESS));
++ }
+ } else {
+ if (vdev_rebuild_active(rvd))
+ return (spa_vdev_exit(spa, NULL, txg,
+@@ -6987,7 +7000,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
+ * Detach a device from a mirror or replacing vdev.
+ *
+ * If 'replace_done' is specified, only detach if the parent
+- * is a replacing vdev.
++ * is a replacing or a spare vdev.
+ */
+ int
+ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
+@@ -7294,6 +7307,10 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
+ vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) {
+ mutex_exit(&vd->vdev_initialize_lock);
+ return (SET_ERROR(ESRCH));
++ } else if (cmd_type == POOL_INITIALIZE_UNINIT &&
++ vd->vdev_initialize_thread != NULL) {
++ mutex_exit(&vd->vdev_initialize_lock);
++ return (SET_ERROR(EBUSY));
+ }
+
+ switch (cmd_type) {
+@@ -7306,6 +7323,9 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
+ case POOL_INITIALIZE_SUSPEND:
+ vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED, vd_list);
+ break;
++ case POOL_INITIALIZE_UNINIT:
++ vdev_uninitialize(vd);
++ break;
+ default:
+ panic("invalid cmd_type %llu", (unsigned long long)cmd_type);
+ }
+@@ -8210,7 +8230,8 @@ spa_async_thread(void *arg)
+ * If any devices are done replacing, detach them.
+ */
+ if (tasks & SPA_ASYNC_RESILVER_DONE ||
+- tasks & SPA_ASYNC_REBUILD_DONE) {
++ tasks & SPA_ASYNC_REBUILD_DONE ||
++ tasks & SPA_ASYNC_DETACH_SPARE) {
+ spa_vdev_resilver_done(spa);
+ }
+
+diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
+index a57f0727d..113943026 100644
+--- a/module/zfs/spa_misc.c
++++ b/module/zfs/spa_misc.c
+@@ -494,8 +494,9 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
+ return (1);
+ }
+
+-void
+-spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
++static void
++spa_config_enter_impl(spa_t *spa, int locks, const void *tag, krw_t rw,
++ int mmp_flag)
+ {
+ (void) tag;
+ int wlocks_held = 0;
+@@ -510,7 +511,8 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
+ continue;
+ mutex_enter(&scl->scl_lock);
+ if (rw == RW_READER) {
+- while (scl->scl_writer || scl->scl_write_wanted) {
++ while (scl->scl_writer ||
++ (!mmp_flag && scl->scl_write_wanted)) {
+ cv_wait(&scl->scl_cv, &scl->scl_lock);
+ }
+ } else {
+@@ -528,6 +530,27 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
+ ASSERT3U(wlocks_held, <=, locks);
+ }
+
++void
++spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
++{
++ spa_config_enter_impl(spa, locks, tag, rw, 0);
++}
++
++/*
++ * The spa_config_enter_mmp() allows the mmp thread to cut in front of
++ * outstanding write lock requests. This is needed since the mmp updates are
++ * time sensitive and failure to service them promptly will result in a
++ * suspended pool. This pool suspension has been seen in practice when there is
++ * a single disk in a pool that is responding slowly and presumably about to
++ * fail.
++ */
++
++void
++spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, krw_t rw)
++{
++ spa_config_enter_impl(spa, locks, tag, rw, 1);
++}
++
+ void
+ spa_config_exit(spa_t *spa, int locks, const void *tag)
+ {
+@@ -2564,7 +2587,6 @@ spa_scan_stat_init(spa_t *spa)
+ spa->spa_scan_pass_scrub_spent_paused = 0;
+ spa->spa_scan_pass_exam = 0;
+ spa->spa_scan_pass_issued = 0;
+- vdev_scan_stat_init(spa->spa_root_vdev);
+ }
+
+ /*
+diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
+index 4b9d7e7c0..57259b8ce 100644
+--- a/module/zfs/vdev.c
++++ b/module/zfs/vdev.c
+@@ -28,7 +28,7 @@
+ * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Datto Inc. All rights reserved.
+- * Copyright [2021] Hewlett Packard Enterprise Development LP
++ * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
+ */
+
+ #include <sys/zfs_context.h>
+@@ -2645,6 +2645,17 @@ vdev_reopen(vdev_t *vd)
+ (void) vdev_validate(vd);
+ }
+
++ /*
++ * Recheck if resilver is still needed and cancel any
++ * scheduled resilver if resilver is unneeded.
++ */
++ if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
++ spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
++ mutex_enter(&spa->spa_async_lock);
++ spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
++ mutex_exit(&spa->spa_async_lock);
++ }
++
+ /*
+ * Reassess parent vdev's health.
+ */
+@@ -3983,11 +3994,18 @@ vdev_remove_wanted(spa_t *spa, uint64_t guid)
+ return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
+
+ /*
+- * If the vdev is already removed, then don't do anything.
++ * If the vdev is already removed, or expanding which can trigger
++ * repartition add/remove events, then don't do anything.
+ */
+- if (vd->vdev_removed)
++ if (vd->vdev_removed || vd->vdev_expanding)
+ return (spa_vdev_state_exit(spa, NULL, 0));
+
++ /*
++ * Confirm the vdev has been removed, otherwise don't do anything.
++ */
++ if (vd->vdev_ops->vdev_op_leaf && !zio_wait(vdev_probe(vd, NULL)))
++ return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST)));
++
+ vd->vdev_remove_wanted = B_TRUE;
+ spa_async_request(spa, SPA_ASYNC_REMOVE);
+
+@@ -4085,9 +4103,19 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
+
+ if (wasoffline ||
+ (oldstate < VDEV_STATE_DEGRADED &&
+- vd->vdev_state >= VDEV_STATE_DEGRADED))
++ vd->vdev_state >= VDEV_STATE_DEGRADED)) {
+ spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_ONLINE);
+
++ /*
++ * Asynchronously detach spare vdev if resilver or
++ * rebuild is not required
++ */
++ if (vd->vdev_unspare &&
++ !dsl_scan_resilvering(spa->spa_dsl_pool) &&
++ !dsl_scan_resilver_scheduled(spa->spa_dsl_pool) &&
++ !vdev_rebuild_active(tvd))
++ spa_async_request(spa, SPA_ASYNC_DETACH_SPARE);
++ }
+ return (spa_vdev_state_exit(spa, vd, 0));
+ }
+
+diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c
+index 6ffd0d618..5d90fd67c 100644
+--- a/module/zfs/vdev_initialize.c
++++ b/module/zfs/vdev_initialize.c
+@@ -100,6 +100,39 @@ vdev_initialize_zap_update_sync(void *arg, dmu_tx_t *tx)
+ &initialize_state, tx));
+ }
+
++static void
++vdev_initialize_zap_remove_sync(void *arg, dmu_tx_t *tx)
++{
++ uint64_t guid = *(uint64_t *)arg;
++
++ kmem_free(arg, sizeof (uint64_t));
++
++ vdev_t *vd = spa_lookup_by_guid(tx->tx_pool->dp_spa, guid, B_FALSE);
++ if (vd == NULL || vd->vdev_top->vdev_removing || !vdev_is_concrete(vd))
++ return;
++
++ ASSERT3S(vd->vdev_initialize_state, ==, VDEV_INITIALIZE_NONE);
++ ASSERT3U(vd->vdev_leaf_zap, !=, 0);
++
++ vd->vdev_initialize_last_offset = 0;
++ vd->vdev_initialize_action_time = 0;
++
++ objset_t *mos = vd->vdev_spa->spa_meta_objset;
++ int error;
++
++ error = zap_remove(mos, vd->vdev_leaf_zap,
++ VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET, tx);
++ VERIFY(error == 0 || error == ENOENT);
++
++ error = zap_remove(mos, vd->vdev_leaf_zap,
++ VDEV_LEAF_ZAP_INITIALIZE_STATE, tx);
++ VERIFY(error == 0 || error == ENOENT);
++
++ error = zap_remove(mos, vd->vdev_leaf_zap,
++ VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME, tx);
++ VERIFY(error == 0 || error == ENOENT);
++}
++
+ static void
+ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
+ {
+@@ -127,8 +160,14 @@ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
+
+ dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+ VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+- dsl_sync_task_nowait(spa_get_dsl(spa), vdev_initialize_zap_update_sync,
+- guid, tx);
++
++ if (new_state != VDEV_INITIALIZE_NONE) {
++ dsl_sync_task_nowait(spa_get_dsl(spa),
++ vdev_initialize_zap_update_sync, guid, tx);
++ } else {
++ dsl_sync_task_nowait(spa_get_dsl(spa),
++ vdev_initialize_zap_remove_sync, guid, tx);
++ }
+
+ switch (new_state) {
+ case VDEV_INITIALIZE_ACTIVE:
+@@ -149,6 +188,10 @@ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
+ spa_history_log_internal(spa, "initialize", tx,
+ "vdev=%s complete", vd->vdev_path);
+ break;
++ case VDEV_INITIALIZE_NONE:
++ spa_history_log_internal(spa, "uninitialize", tx,
++ "vdev=%s", vd->vdev_path);
++ break;
+ default:
+ panic("invalid state %llu", (unsigned long long)new_state);
+ }
+@@ -604,6 +647,24 @@ vdev_initialize(vdev_t *vd)
+ vdev_initialize_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
+ }
+
++/*
++ * Uninitializes a device. Caller must hold vdev_initialize_lock.
++ * Device must be a leaf and not already be initializing.
++ */
++void
++vdev_uninitialize(vdev_t *vd)
++{
++ ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
++ ASSERT(vd->vdev_ops->vdev_op_leaf);
++ ASSERT(vdev_is_concrete(vd));
++ ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
++ ASSERT(!vd->vdev_detached);
++ ASSERT(!vd->vdev_initialize_exit_wanted);
++ ASSERT(!vd->vdev_top->vdev_removing);
++
++ vdev_initialize_change_state(vd, VDEV_INITIALIZE_NONE);
++}
++
+ /*
+ * Wait for the initialize thread to be terminated (cancelled or stopped).
+ */
+@@ -760,6 +821,7 @@ vdev_initialize_restart(vdev_t *vd)
+ }
+
+ EXPORT_SYMBOL(vdev_initialize);
++EXPORT_SYMBOL(vdev_uninitialize);
+ EXPORT_SYMBOL(vdev_initialize_stop);
+ EXPORT_SYMBOL(vdev_initialize_stop_all);
+ EXPORT_SYMBOL(vdev_initialize_stop_wait);
+diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c
+index 9dfbe0cf6..b180fa146 100644
+--- a/module/zfs/vdev_rebuild.c
++++ b/module/zfs/vdev_rebuild.c
+@@ -34,6 +34,7 @@
+ #include <sys/zio.h>
+ #include <sys/dmu_tx.h>
+ #include <sys/arc.h>
++#include <sys/arc_impl.h>
+ #include <sys/zap.h>
+
+ /*
+@@ -116,13 +117,12 @@ unsigned long zfs_rebuild_max_segment = 1024 * 1024;
+ * segment size is also large (zfs_rebuild_max_segment=1M). This helps keep
+ * the queue depth short.
+ *
+- * 32MB was selected as the default value to achieve good performance with
+- * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential
+- * rebuild was unable to saturate all of the drives using smaller values.
+- * With a value of 32MB the sequential resilver write rate was measured at
+- * 800MB/s sustained while rebuilding to a distributed spare.
++ * 64MB was observed to deliver the best performance and set as the default.
++ * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c)
++ * and a rebuild rate of 1.2GB/s was measured to the distribute spare.
++ * Smaller values were unable to fully saturate the available pool I/O.
+ */
+-unsigned long zfs_rebuild_vdev_limit = 32 << 20;
++unsigned long zfs_rebuild_vdev_limit = 64 << 20;
+
+ /*
+ * Automatically start a pool scrub when the last active sequential resilver
+@@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg)
+ {
+ vdev_t *vd = arg;
+ spa_t *spa = vd->vdev_spa;
++ vdev_t *rvd = spa->spa_root_vdev;
+ int error = 0;
+
+ /*
+@@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg)
+ vr->vr_pass_bytes_scanned = 0;
+ vr->vr_pass_bytes_issued = 0;
+
+- vr->vr_bytes_inflight_max = MAX(1ULL << 20,
+- zfs_rebuild_vdev_limit * vd->vdev_children);
+-
+ uint64_t update_est_time = gethrtime();
+ vdev_rebuild_update_bytes_est(vd, 0);
+
+@@ -804,6 +802,17 @@ vdev_rebuild_thread(void *arg)
+ metaslab_t *msp = vd->vdev_ms[i];
+ vr->vr_scan_msp = msp;
+
++ /*
++ * Calculate the max number of in-flight bytes for top-level
++ * vdev scanning operations (minimum 1MB, maximum 1/4 of
++ * arc_c_max shared by all top-level vdevs). Limits for the
++ * issuing phase are done per top-level vdev and are handled
++ * separately.
++ */
++ uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1);
++ vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20,
++ zfs_rebuild_vdev_limit * vd->vdev_children));
++
+ /*
+ * Removal of vdevs from the vdev tree may eliminate the need
+ * for the rebuild, in which case it should be canceled. The
+diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
+index a4b391cbe..f441328f3 100644
+--- a/module/zfs/zfs_ioctl.c
++++ b/module/zfs/zfs_ioctl.c
+@@ -3985,7 +3985,8 @@ zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+
+ if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
+ cmd_type == POOL_INITIALIZE_START ||
+- cmd_type == POOL_INITIALIZE_SUSPEND)) {
++ cmd_type == POOL_INITIALIZE_SUSPEND ||
++ cmd_type == POOL_INITIALIZE_UNINIT)) {
+ return (SET_ERROR(EINVAL));
+ }
+
+diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
+index b9498d17e..0987fd0f7 100644
+--- a/module/zfs/zfs_vnops.c
++++ b/module/zfs/zfs_vnops.c
+@@ -68,7 +68,9 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
+ if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
+ ZFS_ENTER(zfsvfs);
+ ZFS_VERIFY_ZP(zp);
++ atomic_inc_32(&zp->z_sync_writes_cnt);
+ zil_commit(zfsvfs->z_log, zp->z_id);
++ atomic_dec_32(&zp->z_sync_writes_cnt);
+ ZFS_EXIT(zfsvfs);
+ }
+ tsd_set(zfs_fsyncer_key, NULL);
+@@ -102,7 +104,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
+ hole = B_FALSE;
+
+ /* Flush any mmap()'d data to disk */
+- if (zn_has_cached_data(zp))
++ if (zn_has_cached_data(zp, 0, file_sz - 1))
+ zn_flush_cached_data(zp, B_FALSE);
+
+ lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
+@@ -275,7 +277,8 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
+ error = mappedread_sf(zp, nbytes, uio);
+ else
+ #endif
+- if (zn_has_cached_data(zp) && !(ioflag & O_DIRECT)) {
++ if (zn_has_cached_data(zp, zfs_uio_offset(uio),
++ zfs_uio_offset(uio) + nbytes - 1) && !(ioflag & O_DIRECT)) {
+ error = mappedread(zp, nbytes, uio);
+ } else {
+ error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
+@@ -686,7 +689,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
+ zfs_uioskip(uio, nbytes);
+ tx_bytes = nbytes;
+ }
+- if (tx_bytes && zn_has_cached_data(zp) &&
++ if (tx_bytes &&
++ zn_has_cached_data(zp, woff, woff + tx_bytes - 1) &&
+ !(ioflag & O_DIRECT)) {
+ update_pages(zp, woff, tx_bytes, zfsvfs->z_os);
+ }
+diff --git a/module/zfs/zil.c b/module/zfs/zil.c
+index aaf509a2f..f2aaeb550 100644
+--- a/module/zfs/zil.c
++++ b/module/zfs/zil.c
+@@ -226,11 +226,10 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
+ */
+ static int
+ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
+- blkptr_t *nbp, void *dst, char **end)
++ blkptr_t *nbp, char **begin, char **end, arc_buf_t **abuf)
+ {
+ enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
+ arc_flags_t aflags = ARC_FLAG_WAIT;
+- arc_buf_t *abuf = NULL;
+ zbookmark_phys_t zb;
+ int error;
+
+@@ -247,7 +246,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
+ ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
+
+ error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
+- &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
++ abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
+
+ if (error == 0) {
+ zio_cksum_t cksum = bp->blk_cksum;
+@@ -262,23 +261,23 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
+ */
+ cksum.zc_word[ZIL_ZC_SEQ]++;
+
++ uint64_t size = BP_GET_LSIZE(bp);
+ if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
+- zil_chain_t *zilc = abuf->b_data;
++ zil_chain_t *zilc = (*abuf)->b_data;
+ char *lr = (char *)(zilc + 1);
+- uint64_t len = zilc->zc_nused - sizeof (zil_chain_t);
+
+ if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
+- sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
++ sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) ||
++ zilc->zc_nused < sizeof (*zilc) ||
++ zilc->zc_nused > size) {
+ error = SET_ERROR(ECKSUM);
+ } else {
+- ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE);
+- bcopy(lr, dst, len);
+- *end = (char *)dst + len;
++ *begin = lr;
++ *end = lr + zilc->zc_nused - sizeof (*zilc);
+ *nbp = zilc->zc_next_blk;
+ }
+ } else {
+- char *lr = abuf->b_data;
+- uint64_t size = BP_GET_LSIZE(bp);
++ char *lr = (*abuf)->b_data;
+ zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1;
+
+ if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
+@@ -286,15 +285,11 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
+ (zilc->zc_nused > (size - sizeof (*zilc)))) {
+ error = SET_ERROR(ECKSUM);
+ } else {
+- ASSERT3U(zilc->zc_nused, <=,
+- SPA_OLD_MAXBLOCKSIZE);
+- bcopy(lr, dst, zilc->zc_nused);
+- *end = (char *)dst + zilc->zc_nused;
++ *begin = lr;
++ *end = lr + zilc->zc_nused;
+ *nbp = zilc->zc_next_blk;
+ }
+ }
+-
+- arc_buf_destroy(abuf, &abuf);
+ }
+
+ return (error);
+@@ -362,7 +357,6 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
+ uint64_t blk_count = 0;
+ uint64_t lr_count = 0;
+ blkptr_t blk, next_blk;
+- char *lrbuf, *lrp;
+ int error = 0;
+
+ bzero(&next_blk, sizeof (blkptr_t));
+@@ -382,13 +376,13 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
+ * If the log has been claimed, stop if we encounter a sequence
+ * number greater than the highest claimed sequence number.
+ */
+- lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE);
+ zil_bp_tree_init(zilog);
+
+ for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
+ uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
+ int reclen;
+- char *end = NULL;
++ char *lrp, *end;
++ arc_buf_t *abuf = NULL;
+
+ if (blk_seq > claim_blk_seq)
+ break;
+@@ -404,8 +398,10 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
+ break;
+
+ error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
+- lrbuf, &end);
++ &lrp, &end, &abuf);
+ if (error != 0) {
++ if (abuf)
++ arc_buf_destroy(abuf, &abuf);
+ if (claimed) {
+ char name[ZFS_MAX_DATASET_NAME_LEN];
+
+@@ -418,7 +414,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
+ break;
+ }
+
+- for (lrp = lrbuf; lrp < end; lrp += reclen) {
++ for (; lrp < end; lrp += reclen) {
+ lr_t *lr = (lr_t *)lrp;
+ reclen = lr->lrc_reclen;
+ ASSERT3U(reclen, >=, sizeof (lr_t));
+@@ -432,6 +428,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
+ max_lr_seq = lr->lrc_seq;
+ lr_count++;
+ }
++ arc_buf_destroy(abuf, &abuf);
+ }
+ done:
+ zilog->zl_parse_error = error;
+@@ -441,7 +438,6 @@ done:
+ zilog->zl_parse_lr_count = lr_count;
+
+ zil_bp_tree_fini(zilog);
+- zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
+
+ return (error);
+ }
+@@ -1593,6 +1589,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
+ wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
+ ASSERT3U(wsz, <=, lwb->lwb_sz);
+ zio_shrink(lwb->lwb_write_zio, wsz);
++ wsz = lwb->lwb_write_zio->io_size;
+
+ } else {
+ wsz = lwb->lwb_sz;
+@@ -2848,7 +2845,14 @@ static void
+ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw)
+ {
+ dmu_tx_t *tx = dmu_tx_create(zilog->zl_os);
+- VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
++
++ /*
++ * Since we are not going to create any new dirty data, and we
++ * can even help with clearing the existing dirty data, we
++ * should not be subject to the dirty data based delays. We
++ * use TXG_NOTHROTTLE to bypass the delay mechanism.
++ */
++ VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE));
+
+ itx_t *itx = zil_itx_create(TX_COMMIT, sizeof (lr_t));
+ itx->itx_sync = B_TRUE;
+diff --git a/module/zfs/zio.c b/module/zfs/zio.c
+index 700f87910..c367ef721 100644
+--- a/module/zfs/zio.c
++++ b/module/zfs/zio.c
+@@ -2287,7 +2287,7 @@ zio_nowait(zio_t *zio)
+ ASSERT3P(zio->io_executor, ==, NULL);
+
+ if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
+- zio_unique_parent(zio) == NULL) {
++ list_is_empty(&zio->io_parent_list)) {
+ zio_t *pio;
+
+ /*
+diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
+index 554cf96f8..6c2296d4c 100644
+--- a/tests/runfiles/common.run
++++ b/tests/runfiles/common.run
+@@ -37,7 +37,7 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos',
+ 'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos',
+ 'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos',
+ 'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos',
+- 'alloc_class_013_pos']
++ 'alloc_class_013_pos', 'alloc_class_014_neg', 'alloc_class_015_pos']
+ tags = ['functional', 'alloc_class']
+
+ [tests/functional/arc]
+@@ -407,7 +407,7 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
+ 'import_cachefile_mirror_detached',
+ 'import_cachefile_paths_changed',
+ 'import_cachefile_shared_device',
+- 'import_devices_missing',
++ 'import_devices_missing', 'import_log_missing',
+ 'import_paths_changed',
+ 'import_rewind_config_changed',
+ 'import_rewind_device_replaced']
+@@ -431,6 +431,7 @@ tests = ['zpool_initialize_attach_detach_add_remove',
+ 'zpool_initialize_start_and_cancel_neg',
+ 'zpool_initialize_start_and_cancel_pos',
+ 'zpool_initialize_suspend_resume',
++ 'zpool_initialize_uninit',
+ 'zpool_initialize_unsupported_vdevs',
+ 'zpool_initialize_verify_checksums',
+ 'zpool_initialize_verify_initialized']
+@@ -456,7 +457,8 @@ tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
+ tags = ['functional', 'cli_root', 'zpool_replace']
+
+ [tests/functional/cli_root/zpool_resilver]
+-tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
++tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart',
++ 'zpool_resilver_concurrent']
+ tags = ['functional', 'cli_root', 'zpool_resilver']
+
+ [tests/functional/cli_root/zpool_scrub]
+@@ -669,7 +671,8 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos',
+ tags = ['functional', 'migration']
+
+ [tests/functional/mmap]
+-tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_seek_001_pos']
++tests = ['mmap_mixed', 'mmap_read_001_pos', 'mmap_seek_001_pos',
++ 'mmap_write_001_pos', 'mmap_sync_001_pos']
+ tags = ['functional', 'mmap']
+
+ [tests/functional/mount]
+@@ -823,9 +826,9 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
+ 'send-c_mixed_compression', 'send-c_stream_size_estimate',
+ 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
+ 'send-c_recv_dedup', 'send-L_toggle',
+- 'send_encrypted_incremental.ksh', 'send_encrypted_hierarchy',
+- 'send_encrypted_props', 'send_encrypted_truncated_files',
+- 'send_freeobjects', 'send_realloc_files',
++ 'send_encrypted_incremental.ksh', 'send_encrypted_freeobjects',
++ 'send_encrypted_hierarchy', 'send_encrypted_props',
++ 'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_files',
+ 'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',
+ 'send_hole_birth', 'send_mixed_raw', 'send-wR_encrypted_zvol',
+ 'send_partial_dataset', 'send_invalid', 'send_doall',
+diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run
+index fb39fa54b..0a3d42cb2 100644
+--- a/tests/runfiles/sanity.run
++++ b/tests/runfiles/sanity.run
+@@ -547,6 +547,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
+ 'rsend_014_pos', 'rsend_016_neg', 'send-c_verify_contents',
+ 'send-c_volume', 'send-c_zstreamdump', 'send-c_recv_dedup',
+ 'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props',
++ 'send_encrypted_freeobjects',
+ 'send_encrypted_truncated_files', 'send_freeobjects', 'send_holds',
+ 'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset',
+ 'send_invalid']
+diff --git a/tests/test-runner/bin/test-runner.py.in b/tests/test-runner/bin/test-runner.py.in
+index a652d3d4a..5c868d945 100755
+--- a/tests/test-runner/bin/test-runner.py.in
++++ b/tests/test-runner/bin/test-runner.py.in
+@@ -33,7 +33,7 @@ from subprocess import PIPE
+ from subprocess import Popen
+ from subprocess import check_output
+ from threading import Timer
+-from time import time, CLOCK_MONOTONIC_RAW
++from time import time, CLOCK_MONOTONIC
+ from os.path import exists
+
+ BASEDIR = '/var/tmp/test_results'
+@@ -62,7 +62,7 @@ clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
+
+ def monotonic_time():
+ t = timespec()
+- if clock_gettime(CLOCK_MONOTONIC_RAW, ctypes.pointer(t)) != 0:
++ if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(t)) != 0:
+ errno_ = ctypes.get_errno()
+ raise OSError(errno_, os.strerror(errno_))
+ return t.tv_sec + t.tv_nsec * 1e-9
+diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in
+index 432899c21..878b30025 100755
+--- a/tests/test-runner/bin/zts-report.py.in
++++ b/tests/test-runner/bin/zts-report.py.in
+@@ -183,10 +183,13 @@ if sys.platform.startswith('freebsd'):
+ known.update({
+ 'cli_root/zfs_receive/receive-o-x_props_override':
+ ['FAIL', known_reason],
++ 'cli_root/zpool_resilver/zpool_resilver_concurrent':
++ ['SKIP', na_reason],
+ 'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason],
+ 'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
+ 'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
+ 'link_count/link_count_001': ['SKIP', na_reason],
++ 'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
+ })
+ elif sys.platform.startswith('linux'):
+ known.update({
+@@ -210,6 +213,7 @@ elif sys.platform.startswith('linux'):
+ # reasons listed above can be used.
+ #
+ maybe = {
++ 'threadsappend/threadsappend_001_pos': ['FAIL', 6136],
+ 'chattr/setup': ['SKIP', exec_reason],
+ 'crtime/crtime_001_pos': ['SKIP', statx_reason],
+ 'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason],
+@@ -243,6 +247,7 @@ maybe = {
+ 'mmp/mmp_on_uberblocks': ['FAIL', known_reason],
+ 'pyzfs/pyzfs_unittest': ['SKIP', python_deps_reason],
+ 'pool_checkpoint/checkpoint_discard_busy': ['FAIL', '11946'],
++ 'pam/setup': ['SKIP', "pamtester might be not available"],
+ 'projectquota/setup': ['SKIP', exec_reason],
+ 'removal/removal_condense_export': ['FAIL', known_reason],
+ 'reservation/reservation_008_pos': ['FAIL', '7741'],
+@@ -252,14 +257,12 @@ maybe = {
+ 'snapshot/snapshot_010_pos': ['FAIL', '7961'],
+ 'snapused/snapused_004_pos': ['FAIL', '5513'],
+ 'tmpfile/setup': ['SKIP', tmpfile_reason],
+- 'threadsappend/threadsappend_001_pos': ['FAIL', '6136'],
+ 'trim/setup': ['SKIP', trim_reason],
+ 'upgrade/upgrade_projectquota_001_pos': ['SKIP', project_id_reason],
+ 'user_namespace/setup': ['SKIP', user_ns_reason],
+ 'userquota/setup': ['SKIP', exec_reason],
+- 'vdev_zaps/vdev_zaps_004_pos': ['FAIL', '6935'],
++ 'vdev_zaps/vdev_zaps_004_pos': ['FAIL', known_reason],
+ 'zvol/zvol_ENOSPC/zvol_ENOSPC_001_pos': ['FAIL', '5848'],
+- 'pam/setup': ['SKIP', "pamtester might be not available"],
+ }
+
+ if sys.platform.startswith('freebsd'):
+@@ -275,12 +278,18 @@ if sys.platform.startswith('freebsd'):
+ 'resilver/resilver_restart_001': ['FAIL', known_reason],
+ 'pool_checkpoint/checkpoint_big_rewind': ['FAIL', '12622'],
+ 'pool_checkpoint/checkpoint_indirect': ['FAIL', '12623'],
++ 'snapshot/snapshot_002_pos': ['FAIL', '14831'],
+ })
+ elif sys.platform.startswith('linux'):
+ maybe.update({
+ 'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
+ 'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
+- 'fault/auto_spare_shared': ['FAIL', '11889'],
++ 'fault/auto_online_002_pos': ['FAIL', 11889],
++ 'fault/auto_replace_001_pos': ['FAIL', 14851],
++ 'fault/auto_spare_002_pos': ['FAIL', 11889],
++ 'fault/auto_spare_multiple': ['FAIL', 11889],
++ 'fault/auto_spare_shared': ['FAIL', 11889],
++ 'fault/decompress_fault': ['FAIL', 11889],
+ 'io/io_uring': ['SKIP', 'io_uring support required'],
+ 'limits/filesystem_limit': ['SKIP', known_reason],
+ 'limits/snapshot_limit': ['SKIP', known_reason],
+diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am
+index d1c29fcd1..7ec4cb619 100644
+--- a/tests/zfs-tests/cmd/Makefile.am
++++ b/tests/zfs-tests/cmd/Makefile.am
+@@ -20,6 +20,7 @@ SUBDIRS = \
+ mmap_exec \
+ mmap_libaio \
+ mmap_seek \
++ mmap_sync \
+ mmapwrite \
+ nvlist_to_lua \
+ randwritecomp \
+diff --git a/tests/zfs-tests/cmd/mmap_sync/.gitignore b/tests/zfs-tests/cmd/mmap_sync/.gitignore
+new file mode 100644
+index 000000000..c721f472b
+--- /dev/null
++++ b/tests/zfs-tests/cmd/mmap_sync/.gitignore
+@@ -0,0 +1 @@
++/mmap_sync
+diff --git a/tests/zfs-tests/cmd/mmap_sync/Makefile.am b/tests/zfs-tests/cmd/mmap_sync/Makefile.am
+new file mode 100644
+index 000000000..313e8db5c
+--- /dev/null
++++ b/tests/zfs-tests/cmd/mmap_sync/Makefile.am
+@@ -0,0 +1,6 @@
++include $(top_srcdir)/config/Rules.am
++
++pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
++
++pkgexec_PROGRAMS = mmap_sync
++mmap_sync_SOURCES = mmap_sync.c
+diff --git a/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c b/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c
+new file mode 100644
+index 000000000..226e71be2
+--- /dev/null
++++ b/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c
+@@ -0,0 +1,152 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://opensource.org/licenses/CDDL-1.0.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++#include <stdlib.h>
++#include <stdio.h>
++#include <string.h>
++#include <sys/mman.h>
++#include <sys/stat.h>
++#include <sys/time.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <time.h>
++
++static void
++cleanup(char *file)
++{
++ (void) remove(file);
++}
++
++int
++main(int argc, char *argv[])
++{
++ char *testdir = getenv("TESTDIR");
++ if (!testdir) {
++ fprintf(stderr, "environment variable TESTDIR not set\n");
++ return (1);
++ }
++
++ struct stat st;
++ umask(0);
++ if (stat(testdir, &st) != 0 &&
++ mkdir(testdir, 0777) != 0) {
++ perror("mkdir");
++ return (1);
++ }
++
++ if (argc > 3) {
++ fprintf(stderr, "usage: %s "
++ "[run time in mins] "
++ "[max msync time in ms]\n", argv[0]);
++ return (1);
++ }
++
++ int run_time_mins = 1;
++ if (argc >= 2) {
++ run_time_mins = atoi(argv[1]);
++ }
++
++ int max_msync_time_ms = 1000;
++ if (argc >= 3) {
++ max_msync_time_ms = atoi(argv[2]);
++ }
++
++ char filepath[512];
++ filepath[0] = '\0';
++ char *file = &filepath[0];
++
++ (void) snprintf(file, 512, "%s/msync_file", testdir);
++
++ const int LEN = 8;
++ cleanup(file);
++
++ int fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR |
++ S_IRGRP | S_IROTH);
++
++ if (fd == -1) {
++ (void) fprintf(stderr, "%s: %s: ", argv[0], file);
++ perror("open");
++ return (1);
++ }
++
++ if (ftruncate(fd, LEN) != 0) {
++ perror("ftruncate");
++ cleanup(file);
++ return (1);
++ }
++
++ void *ptr = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
++
++ if (ptr == MAP_FAILED) {
++ perror("mmap");
++ cleanup(file);
++ return (1);
++ }
++
++ struct timeval tstart;
++ gettimeofday(&tstart, NULL);
++
++ long long x = 0LL;
++
++ for (;;) {
++ *((long long *)ptr) = x;
++ x++;
++
++ struct timeval t1, t2;
++ gettimeofday(&t1, NULL);
++ if (msync(ptr, LEN, MS_SYNC|MS_INVALIDATE) != 0) {
++ perror("msync");
++ cleanup(file);
++ return (1);
++ }
++
++ gettimeofday(&t2, NULL);
++
++ double elapsed = (t2.tv_sec - t1.tv_sec) * 1000.0;
++ elapsed += ((t2.tv_usec - t1.tv_usec) / 1000.0);
++ if (elapsed > max_msync_time_ms) {
++ fprintf(stderr, "slow msync: %f ms\n", elapsed);
++ if (munmap(ptr, LEN) != 0)
++ perror("munmap");
++ cleanup(file);
++ return (1);
++ }
++
++ double elapsed_start = (t2.tv_sec - tstart.tv_sec) * 1000.0;
++ elapsed_start += ((t2.tv_usec - tstart.tv_usec) / 1000.0);
++ if (elapsed_start > run_time_mins * 60 * 1000) {
++ break;
++ }
++ }
++
++ if (munmap(ptr, LEN) != 0) {
++ perror("munmap");
++ cleanup(file);
++ return (1);
++ }
++
++ if (close(fd) != 0) {
++ perror("close");
++ }
++
++ cleanup(file);
++ return (0);
++}
+diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg
+index 78802c9fb..8ac38dfd8 100644
+--- a/tests/zfs-tests/include/commands.cfg
++++ b/tests/zfs-tests/include/commands.cfg
+@@ -207,6 +207,7 @@ export ZFSTEST_FILES='badsend
+ mmap_exec
+ mmap_libaio
+ mmap_seek
++ mmap_sync
+ mmapwrite
+ nvlist_to_lua
+ randfree_file
+diff --git a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am
+index 7cffb2eac..82fd9f340 100644
+--- a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am
++++ b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am
+@@ -14,7 +14,9 @@ dist_pkgdata_SCRIPTS = \
+ alloc_class_010_pos.ksh \
+ alloc_class_011_neg.ksh \
+ alloc_class_012_pos.ksh \
+- alloc_class_013_pos.ksh
++ alloc_class_013_pos.ksh \
++ alloc_class_014_neg.ksh \
++ alloc_class_015_pos.ksh
+
+ dist_pkgdata_DATA = \
+ alloc_class.cfg \
+diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh
+index 2ce22a624..790a47f26 100755
+--- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh
++++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh
+@@ -42,7 +42,8 @@ log_must display_status "$TESTPOOL"
+
+ log_must zfs create -o dedup=on -V 2G $TESTPOOL/$TESTVOL
+
+-log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null 2>&1"
++block_device_wait "$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL"
++log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null"
+
+ sync_pool
+ log_must zpool list -v $TESTPOOL
+diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh
+new file mode 100755
+index 000000000..1b52014fd
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh
+@@ -0,0 +1,38 @@
++#!/bin/ksh -p
++
++#
++# This file and its contents are supplied under the terms of the
++# Common Development and Distribution License ("CDDL"), version 1.0.
++# You may only use this file in accordance with the terms of version
++# 1.0 of the CDDL.
++#
++# A full copy of the text of the CDDL should have accompanied this
++# source. A copy of the CDDL is also available via the Internet at
++# http://www.illumos.org/license/CDDL.
++#
++
++. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
++
++#
++# DESCRIPTION:
++# Setting the special_small_blocks property greater than recordsize fails.
++#
++
++verify_runnable "global"
++
++claim="Setting the special_small_blocks property greater than recordsize fails"
++
++log_assert $claim
++log_onexit cleanup
++log_must disk_setup
++
++for size in 512 4096 32768 131072 524288 1048576
++do
++ let bigger=$size*2
++ log_mustnot zpool create -O recordsize=$size \
++ -O special_small_blocks=$bigger \
++ $TESTPOOL raidz $ZPOOL_DISKS special mirror \
++ $CLASS_DISK0 $CLASS_DISK1
++done
++
++log_pass $claim
+diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh
+new file mode 100755
+index 000000000..49c468af6
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh
+@@ -0,0 +1,45 @@
++#!/bin/ksh -p
++
++#
++# This file and its contents are supplied under the terms of the
++# Common Development and Distribution License ("CDDL"), version 1.0.
++# You may only use this file in accordance with the terms of version
++# 1.0 of the CDDL.
++#
++# A full copy of the text of the CDDL should have accompanied this
++# source. A copy of the CDDL is also available via the Internet at
++# http://www.illumos.org/license/CDDL.
++#
++
++. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
++
++#
++# DESCRIPTION:
++# Can set special_small_blocks property less than or equal to recordsize.
++#
++
++verify_runnable "global"
++
++claim="Can set special_small_blocks property less than or equal to recordsize"
++
++log_assert $claim
++log_onexit cleanup
++log_must disk_setup
++
++for size in 8192 32768 131072 524288 1048576
++do
++ let smaller=$size/2
++ log_must zpool create -O recordsize=$size \
++ -O special_small_blocks=$smaller \
++ $TESTPOOL raidz $ZPOOL_DISKS special mirror \
++ $CLASS_DISK0 $CLASS_DISK1
++ log_must zpool destroy -f "$TESTPOOL"
++
++ log_must zpool create -O recordsize=$size \
++ -O special_small_blocks=$size \
++ $TESTPOOL raidz $ZPOOL_DISKS special mirror \
++ $CLASS_DISK0 $CLASS_DISK1
++ log_must zpool destroy -f "$TESTPOOL"
++done
++
++log_pass $claim
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
+index a8c9a31dc..4230ec557 100644
+--- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
+@@ -12,6 +12,7 @@ dist_pkgdata_SCRIPTS = \
+ import_cachefile_paths_changed.ksh \
+ import_cachefile_shared_device.ksh \
+ import_devices_missing.ksh \
++ import_log_missing.ksh \
+ import_paths_changed.ksh \
+ import_rewind_config_changed.ksh \
+ import_rewind_device_replaced.ksh \
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh
+new file mode 100755
+index 000000000..f12cac785
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh
+@@ -0,0 +1,75 @@
++#!/bin/ksh -p
++
++#
++# This file and its contents are supplied under the terms of the
++# Common Development and Distribution License ("CDDL"), version 1.0.
++# You may only use this file in accordance with the terms of version
++# 1.0 of the CDDL.
++#
++# A full copy of the text of the CDDL should have accompanied this
++# source. A copy of the CDDL is also available via the Internet at
++# http://www.illumos.org/license/CDDL.
++#
++
++. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
++
++#
++# DESCRIPTION:
++# Import with missing log device should not remove spare/cache.
++#
++# STRATEGY:
++# 1. Create a pool.
++# 2. Add spare, cache and log devices to the pool.
++# 3. Export the pool.
++# 4. Remove the log device.
++# 5. Import the pool with -m flag.
++# 6. Verify that spare and cache are still present in the pool.
++#
++
++verify_runnable "global"
++
++log_onexit cleanup
++
++function test_missing_log
++{
++ typeset poolcreate="$1"
++ typeset cachevdev="$2"
++ typeset sparevdev="$3"
++ typeset logvdev="$4"
++ typeset missingvdev="$4"
++
++ log_note "$0: pool '$poolcreate', adding $cachevdev, $sparevdev," \
++ "$logvdev then moving away $missingvdev."
++
++ log_must zpool create $TESTPOOL1 $poolcreate
++
++ log_must zpool add $TESTPOOL1 cache $cachevdev spare $sparevdev \
++ log $logvdev
++
++ log_must_busy zpool export $TESTPOOL1
++
++ log_must mv $missingvdev $BACKUP_DEVICE_DIR
++
++ log_must zpool import -m -d $DEVICE_DIR $TESTPOOL1
++
++ CACHE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $cachevdev)
++
++ SPARE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $sparevdev)
++
++ if [ -z "$CACHE_PRESENT"] || [ -z "SPARE_PRESENT"]
++ then
++ log_fail "cache/spare vdev missing after importing with missing" \
++ "log device"
++ fi
++
++ # Cleanup
++ log_must zpool destroy $TESTPOOL1
++
++ log_note ""
++}
++
++log_must mkdir -p $BACKUP_DEVICE_DIR
++
++test_missing_log "$VDEV0" "$VDEV1" "$VDEV2" "$VDEV3"
++
++log_pass "zpool import succeeded with missing log device"
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am
+index 3968902ec..483c1c2f5 100644
+--- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am
++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am
+@@ -10,6 +10,7 @@ dist_pkgdata_SCRIPTS = \
+ zpool_initialize_start_and_cancel_neg.ksh \
+ zpool_initialize_start_and_cancel_pos.ksh \
+ zpool_initialize_suspend_resume.ksh \
++ zpool_initialize_uninit.ksh \
+ zpool_initialize_unsupported_vdevs.ksh \
+ zpool_initialize_verify_checksums.ksh \
+ zpool_initialize_verify_initialized.ksh
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh
+new file mode 100755
+index 000000000..17f776cfb
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh
+@@ -0,0 +1,141 @@
++#!/bin/ksh -p
++#
++# CDDL HEADER START
++#
++# The contents of this file are subject to the terms of the
++# Common Development and Distribution License (the "License").
++# You may not use this file except in compliance with the License.
++#
++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++# or https://opensource.org/licenses/CDDL-1.0.
++# See the License for the specific language governing permissions
++# and limitations under the License.
++#
++# When distributing Covered Code, include this CDDL HEADER in each
++# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++# If applicable, add the following below this CDDL HEADER, with the
++# fields enclosed by brackets "[]" replaced with your own identifying
++# information: Portions Copyright [yyyy] [name of copyright owner]
++#
++# CDDL HEADER END
++#
++
++#
++# Copyright (c) 2016 by Delphix. All rights reserved.
++# Copyright (C) 2023 Lawrence Livermore National Security, LLC.
++#
++. $STF_SUITE/include/libtest.shlib
++. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
++
++#
++# DESCRIPTION:
++# Starting, stopping, uninitializing, and restart an initialize works.
++#
++# STRATEGY:
++# 1. Create a one-disk pool.
++# 2. Verify uninitialize succeeds for uninitialized pool.
++# 3. Verify pool wide cancel|suspend + uninit
++# a. Start initializing and verify that initializing is active.
++# b. Verify uninitialize fails when actively initializing.
++# c. Cancel or suspend initializing and verify that initializing is not active.
++# d. Verify uninitialize succeeds after being cancelled.
++# 4. Verify per-disk cancel|suspend + uninit
++#
++
++DISK1="$(echo $DISKS | cut -d' ' -f1)"
++DISK2="$(echo $DISKS | cut -d' ' -f2)"
++DISK3="$(echo $DISKS | cut -d' ' -f3)"
++
++function status_check # pool disk1-state disk2-state disk3-state
++{
++ typeset pool="$1"
++ typeset disk1_state="$2"
++ typeset disk2_state="$3"
++ typeset disk3_state="$4"
++
++ state=$(zpool status -i "$pool" | grep "$DISK1" | grep "$disk1_state")
++ if [[ -z "$state" ]]; then
++ log_fail "DISK1 state; expected='$disk1_state' got '$state'"
++ fi
++
++ state=$(zpool status -i "$pool" | grep "$DISK2" | grep "$disk2_state")
++ if [[ -z "$state" ]]; then
++ log_fail "DISK2 state; expected='$disk2_state' got '$state'"
++ fi
++
++ state=$(zpool status -i "$pool" | grep "$DISK3" | grep "$disk3_state")
++ if [[ -z "$state" ]]; then
++ log_fail "DISK3 state; expected='$disk3_state' got '$state'"
++ fi
++}
++
++function status_check_all # pool disk-state
++{
++ typeset pool="$1"
++ typeset disk_state="$2"
++
++ status_check "$pool" "$disk_state" "$disk_state" "$disk_state"
++}
++
++# 1. Create a one-disk pool.
++log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3
++status_check_all $TESTPOOL "uninitialized"
++
++# 2. Verify uninitialize succeeds for uninitialized pool.
++log_must zpool initialize -u $TESTPOOL
++status_check_all $TESTPOOL "uninitialized"
++
++# 3. Verify pool wide cancel + uninit
++log_must zpool initialize $TESTPOOL
++status_check_all $TESTPOOL "[[:digit:]]* initialized"
++
++log_mustnot zpool initialize -u $TESTPOOL
++status_check_all $TESTPOOL "[[:digit:]]* initialized"
++
++log_must zpool initialize -c $TESTPOOL
++status_check_all $TESTPOOL "uninitialized"
++
++log_must zpool initialize -u $TESTPOOL
++status_check_all $TESTPOOL "uninitialized"
++
++# 3. Verify pool wide suspend + uninit
++log_must zpool initialize $TESTPOOL
++status_check_all $TESTPOOL "[[:digit:]]* initialized"
++
++log_mustnot zpool initialize -u $TESTPOOL
++status_check_all $TESTPOOL "[[:digit:]]* initialized"
++
++log_must zpool initialize -s $TESTPOOL
++status_check_all $TESTPOOL "suspended"
++
++log_must zpool initialize -u $TESTPOOL
++status_check_all $TESTPOOL "uninitialized"
++
++# 4. Verify per-disk cancel|suspend + uninit
++log_must zpool initialize $TESTPOOL
++status_check_all $TESTPOOL "[[:digit:]]* initialized"
++
++log_must zpool initialize -c $TESTPOOL $DISK1
++log_must zpool initialize -s $TESTPOOL $DISK2
++log_mustnot zpool initialize -u $TESTPOOL $DISK3
++status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized"
++
++log_must zpool initialize -u $TESTPOOL $DISK1
++status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized"
++
++log_must zpool initialize -u $TESTPOOL $DISK2
++status_check $TESTPOOL "uninitialized" "uninitialized" "[[:digit:]]* initialized"
++
++log_must zpool initialize $TESTPOOL $DISK1
++status_check $TESTPOOL "[[:digit:]]* initialized" "uninitialized" "[[:digit:]]* initialized"
++
++log_must zpool initialize $TESTPOOL $DISK2
++status_check_all $TESTPOOL "[[:digit:]]* initialized"
++
++log_must zpool initialize -s $TESTPOOL
++status_check_all $TESTPOOL "suspended"
++
++log_must zpool initialize -u $TESTPOOL $DISK1 $DISK2 $DISK3
++status_check_all $TESTPOOL "uninitialized"
++
++log_pass "Initialize start + cancel/suspend + uninit + start works"
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am
+index 2cec5335f..7ca9e81c1 100644
+--- a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am
++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am
+@@ -3,7 +3,8 @@ dist_pkgdata_SCRIPTS = \
+ setup.ksh \
+ cleanup.ksh \
+ zpool_resilver_bad_args.ksh \
+- zpool_resilver_restart.ksh
++ zpool_resilver_restart.ksh \
++ zpool_resilver_concurrent.ksh
+
+ dist_pkgdata_DATA = \
+ zpool_resilver.cfg
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
+new file mode 100755
+index 000000000..4c3b09796
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
+@@ -0,0 +1,101 @@
++#!/bin/ksh -p
++#
++# CDDL HEADER START
++#
++# The contents of this file are subject to the terms of the
++# Common Development and Distribution License (the "License").
++# You may not use this file except in compliance with the License.
++#
++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++# or http://www.opensolaris.org/os/licensing.
++# See the License for the specific language governing permissions
++# and limitations under the License.
++#
++# When distributing Covered Code, include this CDDL HEADER in each
++# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++# If applicable, add the following below this CDDL HEADER, with the
++# fields enclosed by brackets "[]" replaced with your own identifying
++# information: Portions Copyright [yyyy] [name of copyright owner]
++#
++# CDDL HEADER END
++#
++
++#
++# Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
++#
++
++. $STF_SUITE/include/libtest.shlib
++. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
++
++#
++# DESCRIPTION:
++# Verify 'zpool clear' doesn't cause concurrent resilvers
++#
++# STRATEGY:
++# 1. Create N(10) virtual disk files.
++# 2. Create draid pool based on the virtual disk files.
++# 3. Fill the filesystem with directories and files.
++# 4. Force-fault 2 vdevs and verify distributed spare is kicked in.
++# 5. Free the distributed spare by replacing the faulty drive.
++# 6. Run zpool clear and verify that it does not initiate 2 resilvers
++# concurrently while distributed spare gets kicked in.
++#
++
++verify_runnable "global"
++
++typeset -ir devs=10
++typeset -ir nparity=1
++typeset -ir ndata=8
++typeset -ir dspare=1
++
++function cleanup
++{
++ poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
++
++ for i in {0..$devs}; do
++ log_must rm -f "$BASEDIR/vdev$i"
++ done
++
++ for dir in $BASEDIR; do
++ if [[ -d $dir ]]; then
++ log_must rm -rf $dir
++ fi
++ done
++
++ zed_stop
++ zed_cleanup
++}
++
++log_assert "Verify zpool clear on draid pool doesn't cause concurrent resilvers"
++log_onexit cleanup
++
++setup_test_env $TESTPOOL draid${nparity}:${ndata}d:${dspare}s $devs
++
++# ZED needed for sequential resilver
++zed_setup
++log_must zed_start
++
++log_must zpool offline -f $TESTPOOL $BASEDIR/vdev5
++log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
++log_must zpool wait -t resilver $TESTPOOL
++log_must zpool offline -f $TESTPOOL $BASEDIR/vdev6
++
++log_must zpool labelclear -f $BASEDIR/vdev5
++log_must zpool labelclear -f $BASEDIR/vdev6
++
++log_must zpool replace -w $TESTPOOL $BASEDIR/vdev5
++sync_pool $TESTPOOL
++
++log_must zpool events -c
++log_must zpool clear $TESTPOOL
++log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
++log_must zpool wait -t resilver $TESTPOOL
++log_must zpool wait -t scrub $TESTPOOL
++
++nof_resilver=$(zpool events | grep -c resilver_start)
++if [ $nof_resilver = 1 ] ; then
++ log_must verify_pool $TESTPOOL
++ log_pass "zpool clear on draid pool doesn't cause concurrent resilvers"
++else
++ log_fail "FAIL: sequential and healing resilver initiated concurrently"
++fi
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh
+index fbb0c2910..19781137d 100755
+--- a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh
++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh
+@@ -35,7 +35,7 @@
+ DISK1=${DISKS%% *}
+
+ log_must zpool create -f $TESTPOOL $DISK1
+-log_must zpool trim $TESTPOOL
++log_must zpool trim -r 1 $TESTPOOL
+
+ [[ -z "$(trim_progress $TESTPOOL $DISK1)" ]] && \
+ log_fail "TRIM did not start"
+diff --git a/tests/zfs-tests/tests/functional/mmap/Makefile.am b/tests/zfs-tests/tests/functional/mmap/Makefile.am
+index b26791ee7..526405954 100644
+--- a/tests/zfs-tests/tests/functional/mmap/Makefile.am
++++ b/tests/zfs-tests/tests/functional/mmap/Makefile.am
+@@ -2,10 +2,12 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/mmap
+ dist_pkgdata_SCRIPTS = \
+ setup.ksh \
+ cleanup.ksh \
++ mmap_mixed.ksh \
+ mmap_read_001_pos.ksh \
+ mmap_write_001_pos.ksh \
+ mmap_libaio_001_pos.ksh \
+- mmap_seek_001_pos.ksh
++ mmap_seek_001_pos.ksh \
++ mmap_sync_001_pos.ksh
+
+ dist_pkgdata_DATA = \
+ mmap.cfg
+diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh
+new file mode 100755
+index 000000000..6c8246d48
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh
+@@ -0,0 +1,86 @@
++#!/bin/ksh -p
++#
++# CDDL HEADER START
++#
++# The contents of this file are subject to the terms of the
++# Common Development and Distribution License (the "License").
++# You may not use this file except in compliance with the License.
++#
++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++# or https://opensource.org/licenses/CDDL-1.0.
++# See the License for the specific language governing permissions
++# and limitations under the License.
++#
++# When distributing Covered Code, include this CDDL HEADER in each
++# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++# If applicable, add the following below this CDDL HEADER, with the
++# fields enclosed by brackets "[]" replaced with your own identifying
++# information: Portions Copyright [yyyy] [name of copyright owner]
++#
++# CDDL HEADER END
++#
++
++#
++# Copyright (c) 2023 by Lawrence Livermore National Security, LLC.
++#
++
++. $STF_SUITE/include/libtest.shlib
++. $STF_SUITE/tests/functional/mmap/mmap.cfg
++
++#
++# DESCRIPTION:
++# Verify mixed buffered and mmap IO.
++#
++# STRATEGY:
++# 1. Create an empty file.
++# 2. Start a background buffered read/write fio to the file.
++# 3. Start a background mmap read/write fio to the file.
++#
++
++verify_runnable "global"
++
++function cleanup
++{
++ log_must rm -f "$tmp_file"
++}
++
++log_assert "Verify mixed buffered and mmap IO"
++
++log_onexit cleanup
++
++mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
++tmp_file=$mntpnt/file
++bs=$((128 * 1024))
++blocks=64
++size=$((bs * blocks))
++runtime=60
++
++log_must dd if=/dev/zero of=$tmp_file bs=$bs count=$blocks
++
++# Buffered IO writes
++log_must eval "fio --filename=$tmp_file --name=buffer-write \
++ --rw=randwrite --size=$size --bs=$bs --direct=0 --numjobs=1 \
++ --ioengine=sync --fallocate=none --group_reporting --minimal \
++ --runtime=$runtime --time_based --norandommap &"
++
++# Buffered IO reads
++log_must eval "fio --filename=$tmp_file --name=buffer-read \
++ --rw=randread --size=$size --bs=$bs --direct=0 --numjobs=1 \
++ --ioengine=sync --fallocate=none --group_reporting --minimal \
++ --runtime=$runtime --time_based --norandommap &"
++
++# mmap IO writes
++log_must eval "fio --filename=$tmp_file --name=mmap-write \
++ --rw=randwrite --size=$size --bs=$bs --numjobs=1 \
++ --ioengine=mmap --fallocate=none --group_reporting --minimal \
++ --runtime=$runtime --time_based --norandommap &"
++
++# mmap IO reads
++log_must eval "fio --filename=$tmp_file --name=mmap-read \
++ --rw=randread --size=$size --bs=$bs --numjobs=1 \
++ --ioengine=mmap --fallocate=none --group_reporting --minimal \
++ --runtime=$runtime --time_based --norandommap &"
++
++log_must wait
++
++log_pass "Verfied mixed buffered and mmap IO"
+diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh
+new file mode 100755
+index 000000000..b764d6607
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh
+@@ -0,0 +1,63 @@
++#!/bin/ksh -p
++
++#
++# This file and its contents are supplied under the terms of the
++# Common Development and Distribution License ("CDDL"), version 1.0.
++# You may only use this file in accordance with the terms of version
++# 1.0 of the CDDL.
++#
++# A full copy of the text of the CDDL should have accompanied this
++# source. A copy of the CDDL is also available via the Internet at
++# http://www.illumos.org/license/CDDL.
++#
++
++#
++# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
++#
++
++. $STF_SUITE/include/libtest.shlib
++
++#
++# DESCRIPTION:
++# msync()s of mmap()'ed file should complete quickly during
++# background dirty page writebacks by the kernel.
++#
++
++function cleanup
++{
++ log_must eval "echo $saved_vm_dirty_expire_centisecs > /proc/sys/vm/dirty_expire_centisecs"
++ log_must eval "echo $saved_vm_dirty_background_ratio > /proc/sys/vm/dirty_background_ratio"
++ log_must eval "echo $saved_vm_dirty_writeback_centisecs > /proc/sys/vm/dirty_writeback_centisecs"
++
++ # revert to some sensible defaults if the values we saved
++ # were incorrect due to a previous run being interrupted
++ if [ $(</proc/sys/vm/dirty_expire_centisecs) -eq 1 ]; then
++ log_must eval "echo 3000 > /proc/sys/vm/dirty_expire_centisecs"
++ fi
++
++ if [ $(</proc/sys/vm/dirty_background_ratio) -eq 0 ]; then
++ log_must eval "echo 10 > /proc/sys/vm/dirty_background_ratio"
++ fi
++
++ if [ $(</proc/sys/vm/dirty_writeback_centisecs) -eq 1 ]; then
++ log_must eval "echo 500 > /proc/sys/vm/dirty_writeback_centisecs"
++ fi
++}
++
++if ! is_linux; then
++ log_unsupported "Only supported on Linux, requires /proc/sys/vm/ tunables"
++fi
++
++log_onexit cleanup
++log_assert "Run the tests for mmap_sync"
++
++read -r saved_vm_dirty_expire_centisecs < /proc/sys/vm/dirty_expire_centisecs
++read -r saved_vm_dirty_background_ratio < /proc/sys/vm/dirty_background_ratio
++read -r saved_vm_dirty_writeback_centisecs < /proc/sys/vm/dirty_writeback_centisecs
++
++log_must eval "echo 1 > /proc/sys/vm/dirty_expire_centisecs"
++log_must eval "echo 1 > /proc/sys/vm/dirty_background_bytes"
++log_must eval "echo 1 > /proc/sys/vm/dirty_writeback_centisecs"
++
++log_must mmap_sync
++log_pass "mmap_sync tests passed."
+diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am
+index d80d2124e..2cedf03d3 100644
+--- a/tests/zfs-tests/tests/functional/rsend/Makefile.am
++++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am
+@@ -25,6 +25,7 @@ dist_pkgdata_SCRIPTS = \
+ rsend_022_pos.ksh \
+ rsend_024_pos.ksh \
+ send_encrypted_files.ksh \
++ send_encrypted_freeobjects.ksh \
+ send_encrypted_hierarchy.ksh \
+ send_encrypted_props.ksh \
+ send_encrypted_truncated_files.ksh \
+diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh
+index 988ed91b9..1bf234823 100755
+--- a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh
++++ b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh
+@@ -29,6 +29,7 @@
+
+ function cleanup
+ {
++ rm $BACKDIR/copy
+ log_must_busy zfs destroy -r $vol
+ cleanup_pool $POOL2
+ }
+@@ -60,7 +61,9 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/full"
+
+ verify_stream_size $BACKDIR/full $vol
+ verify_stream_size $BACKDIR/full $vol2
+-md5=$(dd if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest)
++block_device_wait $voldev2
++log_must dd if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs
++md5=$(md5digest $BACKDIR/copy)
+ [[ $md5 = $md5_1 ]] || log_fail "md5 mismatch: $md5 != $md5_1"
+
+ # Repeat, for an incremental send
+@@ -72,7 +75,9 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/inc"
+
+ verify_stream_size $BACKDIR/inc $vol 90 $vol@snap
+ verify_stream_size $BACKDIR/inc $vol2 90 $vol2@snap
+-md5=$(dd skip=$megs if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest)
++block_device_wait $voldev2
++log_must dd skip=$megs if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs
++md5=$(md5digest $BACKDIR/copy)
+ [[ $md5 = $md5_2 ]] || log_fail "md5 mismatch: $md5 != $md5_2"
+
+ log_pass "Verify compressed send works with volumes"
+diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh
+new file mode 100755
+index 000000000..92451bd1a
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh
+@@ -0,0 +1,87 @@
++#!/bin/ksh
++
++#
++# This file and its contents are supplied under the terms of the
++# Common Development and Distribution License ("CDDL"), version 1.0.
++# You may only use this file in accordance with the terms of version
++# 1.0 of the CDDL.
++#
++# A full copy of the text of the CDDL should have accompanied this
++# source. A copy of the CDDL is also available via the Internet at
++# http://www.illumos.org/license/CDDL.
++#
++
++#
++# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
++# Copyright (c) 2023 by Findity AB
++#
++
++. $STF_SUITE/tests/functional/rsend/rsend.kshlib
++
++#
++# Description:
++# Verify that receiving a raw encrypted stream, with a FREEOBJECTS
++# removing all existing objects in a block followed by an OBJECT write
++# to the same block, does not result in a panic.
++#
++# Strategy:
++# 1. Create a new encrypted filesystem
++# 2. Create file f1 as the first object in some block (here object 128)
++# 3. Take snapshot A
++# 4. Create file f2 as the second object in the same block (here object 129)
++# 5. Delete f1
++# 6. Take snapshot B
++# 7. Receive a full raw encrypted send of A
++# 8. Receive an incremental raw send of B
++#
++verify_runnable "both"
++
++function create_object_with_num
++{
++ file=$1
++ num=$2
++
++ tries=100
++ for ((i=0; i<$tries; i++)); do
++ touch $file
++ onum=$(ls -li $file | awk '{print $1}')
++
++ if [[ $onum -ne $num ]] ; then
++ rm -f $file
++ else
++ break
++ fi
++ done
++ if [[ $i -eq $tries ]]; then
++ log_fail "Failed to create object with number $num"
++ fi
++}
++
++log_assert "FREEOBJECTS followed by OBJECT in encrypted stream does not crash"
++
++sendds=sendencfods
++recvds=recvencfods
++keyfile=/$POOL/keyencfods
++f1=/$POOL/$sendds/f1
++f2=/$POOL/$sendds/f2
++
++log_must eval "echo 'password' > $keyfile"
++
++#
++# xattr=sa and dnodesize=legacy for sequential object numbers, see
++# note in send_freeobjects.ksh.
++#
++log_must zfs create -o xattr=sa -o dnodesize=legacy -o encryption=on \
++ -o keyformat=passphrase -o keylocation=file://$keyfile $POOL/$sendds
++
++create_object_with_num $f1 128
++log_must zfs snap $POOL/$sendds@A
++create_object_with_num $f2 129
++log_must rm $f1
++log_must zfs snap $POOL/$sendds@B
++
++log_must eval "zfs send -w $POOL/$sendds@A | zfs recv $POOL/$recvds"
++log_must eval "zfs send -w -i $POOL/$sendds@A $POOL/$sendds@B |" \
++ "zfs recv $POOL/$recvds"
++
++log_pass "FREEOBJECTS followed by OBJECT in encrypted stream did not crash"