aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2018-07-31 18:49:07 +0000
committerAlexander Motin <mav@FreeBSD.org>2018-07-31 18:49:07 +0000
commit59ef839e67afaa3edf118b90605c9d59de1c6fe3 (patch)
treeb03fbee8807ac696fc51ed5826ae9e483b878bfc /lib
parent86b3990c6e51d072a690a39f33e1b5b4d258aff2 (diff)
downloadsrc-59ef839e67afaa3edf118b90605c9d59de1c6fe3.tar.gz
src-59ef839e67afaa3edf118b90605c9d59de1c6fe3.zip
9102 zfs should be able to initialize storage devices
The first access to a disk block can incur a performance penalty on some platforms (e.g. AWS's EBS, VMware VMDKs). Therefore it is recommended that volumes be "thick provisioned", where supported by the platform (VMware). Thick provisioning is time consuming and often is ignored. If the thick provision step is omitted, customers will see suboptimal performance until we have written to all parts of the LUN. ZFS should be able to initialize any unused storage to remove any first-write penalty that exists. illumos/illumos-gate@094e47e980b0796b94b1b8f51f462a64d246e516 Reviewed by: John Wren Kennedy <john.kennedy@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Approved by: Richard Lowe <richlowe@richlowe.net>
Notes
Notes: svn path=/vendor-sys/illumos/dist/; revision=336991
Diffstat (limited to 'lib')
-rw-r--r--lib/libzfs/common/libzfs.h5
-rw-r--r--lib/libzfs/common/libzfs_pool.c94
-rw-r--r--lib/libzfs/common/libzfs_util.c7
-rw-r--r--lib/libzfs_core/common/libzfs_core.c37
-rw-r--r--lib/libzfs_core/common/libzfs_core.h4
5 files changed, 147 insertions, 0 deletions
diff --git a/lib/libzfs/common/libzfs.h b/lib/libzfs/common/libzfs.h
index 1ebaffe9d154..9dc2b02e147f 100644
--- a/lib/libzfs/common/libzfs.h
+++ b/lib/libzfs/common/libzfs.h
@@ -136,6 +136,9 @@ typedef enum zfs_error {
EZFS_NO_CHECKPOINT, /* pool has no checkpoint */
EZFS_DEVRM_IN_PROGRESS, /* a device is currently being removed */
EZFS_VDEV_TOO_BIG, /* a device is too big to be used */
+ EZFS_TOOMANY, /* argument list too long */
+ EZFS_INITIALIZING, /* currently initializing */
+ EZFS_NO_INITIALIZE, /* no active initialize */
EZFS_UNKNOWN
} zfs_error_t;
@@ -260,6 +263,8 @@ typedef struct splitflags {
* Functions to manipulate pool and vdev state
*/
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
+extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
+ nvlist_t *);
extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
extern int zpool_reguid(zpool_handle_t *);
extern int zpool_reopen(zpool_handle_t *);
diff --git a/lib/libzfs/common/libzfs_pool.c b/lib/libzfs/common/libzfs_pool.c
index 4a6fa0f6be97..deb425ad4f33 100644
--- a/lib/libzfs/common/libzfs_pool.c
+++ b/lib/libzfs/common/libzfs_pool.c
@@ -1969,6 +1969,100 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
}
}
+static int
+xlate_init_err(int err)
+{
+ switch (err) {
+ case ENODEV:
+ return (EZFS_NODEVICE);
+ case EINVAL:
+ case EROFS:
+ return (EZFS_BADDEV);
+ case EBUSY:
+ return (EZFS_INITIALIZING);
+ case ESRCH:
+ return (EZFS_NO_INITIALIZE);
+ }
+ return (err);
+}
+
+/*
+ * Begin, suspend, or cancel the initialization (initializing of all free
+ * blocks) for the given vdevs in the given pool.
+ */
+int
+zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
+ nvlist_t *vds)
+{
+ char msg[1024];
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+ nvlist_t *errlist;
+
+ /* translate vdev names to guids */
+ nvlist_t *vdev_guids = fnvlist_alloc();
+ nvlist_t *guids_to_paths = fnvlist_alloc();
+ boolean_t spare, cache;
+ nvlist_t *tgt;
+ nvpair_t *elem;
+
+ for (elem = nvlist_next_nvpair(vds, NULL); elem != NULL;
+ elem = nvlist_next_nvpair(vds, elem)) {
+ char *vd_path = nvpair_name(elem);
+ tgt = zpool_find_vdev(zhp, vd_path, &spare, &cache, NULL);
+
+ if ((tgt == NULL) || cache || spare) {
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot initialize '%s'"),
+ vd_path);
+ int err = (tgt == NULL) ? EZFS_NODEVICE :
+ (spare ? EZFS_ISSPARE : EZFS_ISL2CACHE);
+ fnvlist_free(vdev_guids);
+ fnvlist_free(guids_to_paths);
+ return (zfs_error(hdl, err, msg));
+ }
+
+ uint64_t guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
+ fnvlist_add_uint64(vdev_guids, vd_path, guid);
+
+ (void) snprintf(msg, sizeof (msg), "%llu", guid);
+ fnvlist_add_string(guids_to_paths, msg, vd_path);
+ }
+
+ int err = lzc_initialize(zhp->zpool_name, cmd_type, vdev_guids,
+ &errlist);
+ fnvlist_free(vdev_guids);
+
+ if (err == 0) {
+ fnvlist_free(guids_to_paths);
+ return (0);
+ }
+
+ nvlist_t *vd_errlist = NULL;
+ if (errlist != NULL) {
+ vd_errlist = fnvlist_lookup_nvlist(errlist,
+ ZPOOL_INITIALIZE_VDEVS);
+ }
+
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "operation failed"));
+
+ for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL;
+ elem = nvlist_next_nvpair(vd_errlist, elem)) {
+ int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem));
+ char *path = fnvlist_lookup_string(guids_to_paths,
+ nvpair_name(elem));
+ (void) zfs_error_fmt(hdl, vd_error, "cannot initialize '%s'",
+ path);
+ }
+
+ fnvlist_free(guids_to_paths);
+ if (vd_errlist != NULL)
+ return (-1);
+
+ return (zpool_standard_error(hdl, err, msg));
+}
+
/*
* This provides a very minimal check whether a given string is likely a
* c#t#d# style string. Users of this are expected to do their own
diff --git a/lib/libzfs/common/libzfs_util.c b/lib/libzfs/common/libzfs_util.c
index 3cf83eee9eb0..aa74189cc81d 100644
--- a/lib/libzfs/common/libzfs_util.c
+++ b/lib/libzfs/common/libzfs_util.c
@@ -249,6 +249,13 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "device removal in progress"));
case EZFS_VDEV_TOO_BIG:
return (dgettext(TEXT_DOMAIN, "device exceeds supported size"));
+ case EZFS_TOOMANY:
+ return (dgettext(TEXT_DOMAIN, "argument list too long"));
+ case EZFS_INITIALIZING:
+ return (dgettext(TEXT_DOMAIN, "currently initializing"));
+ case EZFS_NO_INITIALIZE:
+ return (dgettext(TEXT_DOMAIN, "there is no active "
+ "initialization"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
diff --git a/lib/libzfs_core/common/libzfs_core.c b/lib/libzfs_core/common/libzfs_core.c
index d09304fbbba5..ac25820c610b 100644
--- a/lib/libzfs_core/common/libzfs_core.c
+++ b/lib/libzfs_core/common/libzfs_core.c
@@ -1038,3 +1038,40 @@ lzc_channel_program_nosync(const char *pool, const char *program,
return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
memlimit, argnvl, outnvl));
}
+
+/*
+ * Changes initializing state.
+ *
+ * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
+ * The key is ignored.
+ *
+ * If there are errors related to vdev arguments, per-vdev errors are returned
+ * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
+ * guid is stringified with PRIu64, and errno is one of the following as
+ * an int64_t:
+ * - ENODEV if the device was not found
+ * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
+ * - EROFS if the device is not writeable
+ * - EBUSY start requested but the device is already being initialized
+ * - ESRCH cancel/suspend requested but device is not being initialized
+ *
+ * If the errlist is empty, then return value will be:
+ * - EINVAL if one or more arguments was invalid
+ * - Other spa_open failures
+ * - 0 if the operation succeeded
+ */
+int
+lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
+ nvlist_t *vdevs, nvlist_t **errlist)
+{
+ int error;
+ nvlist_t *args = fnvlist_alloc();
+ fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
+ fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
+
+ error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
+
+ fnvlist_free(args);
+
+ return (error);
+}
diff --git a/lib/libzfs_core/common/libzfs_core.h b/lib/libzfs_core/common/libzfs_core.h
index 8c6743f503f0..d4a9a49cc1e7 100644
--- a/lib/libzfs_core/common/libzfs_core.h
+++ b/lib/libzfs_core/common/libzfs_core.h
@@ -31,6 +31,8 @@
#include <libnvpair.h>
#include <sys/param.h>
#include <sys/types.h>
+#include <sys/fs/zfs.h>
+
#ifdef __cplusplus
extern "C" {
@@ -56,6 +58,8 @@ int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **);
int lzc_bookmark(nvlist_t *, nvlist_t **);
int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **);
int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **);
+int lzc_initialize(const char *, pool_initialize_func_t, nvlist_t *,
+ nvlist_t **);
int lzc_snaprange_space(const char *, const char *, uint64_t *);