aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2018-02-18 01:21:52 +0000
committerAlexander Motin <mav@FreeBSD.org>2018-02-18 01:21:52 +0000
commit79a23a69442d5f2ba114e37737bd6e9341ce2cab (patch)
treec97c3bdcbc6362db3937c810d6b3605133d147da /lib
parent6fd4145b3146bb393622c3796056109542cf953f (diff)
downloadsrc-79a23a69442d5f2ba114e37737bd6e9341ce2cab.tar.gz
src-79a23a69442d5f2ba114e37737bd6e9341ce2cab.zip
7614 zfs device evacuation/removal
illumos/illumos-gate@5cabbc6b49070407fb9610cfe73d4c0e0dea3e77 https://www.illumos.org/issues/7614: This project allows top-level vdevs to be removed from the storage pool with “zpool remove”, reducing the total amount of storage in the pool. This operation copies all allocated regions of the device to be removed onto other devices, recording the mapping from old to new location. After the removal is complete, read and free operations to the removed (now “indirect”) vdev must be remapped and performed at the new location on disk. The indirect mapping table is kept in memory whenever the pool is loaded, so there is minimal performance overhead when doing operations on the indirect vdev. The size of the in-memory mapping table will be reduced when its entries become “obsolete” because they are no longer used by any block pointers in the pool. An entry becomes obsolete when all the blocks that use it are freed. An entry can also become obsolete when all the snapshots that reference it are deleted, and the block pointers that reference it have been “remapped” in all filesystems/zvols (and clones). Whenever an indirect block is written, all the block pointers in it will be “remapped” to their new (concrete) locations if possible. This process can be accelerated by using the “zfs remap” command to proactively rewrite all indirect blocks that reference indirect (removed) vdevs. Note that when a device is removed, we do not verify the checksum of the data that is copied. This makes the process much faster, but if it were used on redundant vdevs (i.e. mirror or raidz vdevs), it would be possible to copy the wrong data, when we have the correct data on e.g. the other side of the mirror. Therefore, mirror and raidz devices can not be removed. Reviewed by: Alex Reece <alex@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: John Kennedy <john.kennedy@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Richard Laager <rlaager@wiktel.com> Reviewed by: Tim Chase <tim@chase2k.com> Approved by: Garrett D'Amore <garrett@damore.org> Author: Prashanth Sreenivasa <pks@delphix.com>
Notes
Notes: svn path=/vendor-sys/illumos/dist/; revision=329502
Diffstat (limited to 'lib')
-rw-r--r--lib/libzfs/common/libzfs.h6
-rw-r--r--lib/libzfs/common/libzfs_dataset.c18
-rw-r--r--lib/libzfs/common/libzfs_pool.c110
-rw-r--r--lib/libzfs/common/libzfs_util.c7
-rw-r--r--lib/libzfs_core/common/libzfs_core.c10
-rw-r--r--lib/libzfs_core/common/libzfs_core.h1
6 files changed, 132 insertions, 20 deletions
diff --git a/lib/libzfs/common/libzfs.h b/lib/libzfs/common/libzfs.h
index d62c4489da5a..b653f19a761b 100644
--- a/lib/libzfs/common/libzfs.h
+++ b/lib/libzfs/common/libzfs.h
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
@@ -130,6 +130,7 @@ typedef enum zfs_error {
EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
+ EZFS_NO_PENDING, /* cannot cancel, no operation is pending */
EZFS_UNKNOWN
} zfs_error_t;
@@ -265,6 +266,8 @@ extern int zpool_vdev_attach(zpool_handle_t *, const char *,
const char *, nvlist_t *, int);
extern int zpool_vdev_detach(zpool_handle_t *, const char *);
extern int zpool_vdev_remove(zpool_handle_t *, const char *);
+extern int zpool_vdev_remove_cancel(zpool_handle_t *);
+extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *);
extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
splitflags_t);
@@ -798,6 +801,7 @@ extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *);
extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
extern int zfs_get_hole_count(const char *, uint64_t *, uint64_t *);
+extern int zfs_remap_indirects(libzfs_handle_t *hdl, const char *);
/* Allow consumers to initialize libshare externally for optimal performance */
extern int zfs_init_libshare_arg(libzfs_handle_t *, int, void *);
diff --git a/lib/libzfs/common/libzfs_dataset.c b/lib/libzfs/common/libzfs_dataset.c
index eb26b711314b..0182afcbc44a 100644
--- a/lib/libzfs/common/libzfs_dataset.c
+++ b/lib/libzfs/common/libzfs_dataset.c
@@ -3780,6 +3780,24 @@ zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
return (rv);
}
+int
+zfs_remap_indirects(libzfs_handle_t *hdl, const char *fs)
+{
+ int err;
+ char errbuf[1024];
+
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot remap filesystem '%s' "), fs);
+
+ err = lzc_remap(fs);
+
+ if (err != 0) {
+ (void) zfs_standard_error(hdl, err, errbuf);
+ }
+
+ return (err);
+}
+
/*
* Creates snapshots. The keys in the snaps nvlist are the snapshots to be
* created.
diff --git a/lib/libzfs/common/libzfs_pool.c b/lib/libzfs/common/libzfs_pool.c
index d47f8704ed0a..4f636b6b0d83 100644
--- a/lib/libzfs/common/libzfs_pool.c
+++ b/lib/libzfs/common/libzfs_pool.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
@@ -1333,6 +1333,13 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
(void) zfs_error(hdl, EZFS_BADDEV, msg);
break;
+ case EINVAL:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid config; a pool with removing/removed "
+ "vdevs does not support adding raidz vdevs"));
+ (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ break;
+
case EOVERFLOW:
/*
* This occurrs when one of the devices is below
@@ -2656,7 +2663,7 @@ zpool_vdev_attach(zpool_handle_t *zhp,
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
- &islog)) == 0)
+ &islog)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
if (avail_spare)
@@ -2755,7 +2762,8 @@ zpool_vdev_attach(zpool_handle_t *zhp,
break;
case EBUSY:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
+ "or pool has removing/removed vdevs"),
new_disk);
(void) zfs_error(hdl, EZFS_BADDEV, msg);
break;
@@ -2809,7 +2817,7 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
- NULL)) == 0)
+ NULL)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
if (avail_spare)
@@ -3098,8 +3106,7 @@ out:
}
/*
- * Remove the given device. Currently, this is supported only for hot spares
- * and level 2 cache devices.
+ * Remove the given device.
*/
int
zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
@@ -3116,26 +3123,61 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
- &islog)) == 0)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
- /*
- * XXX - this should just go away.
- */
- if (!avail_spare && !l2cache && !islog) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "only inactive hot spares, cache, top-level, "
- "or log devices can be removed"));
+ &islog)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
- }
version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
if (islog && version < SPA_VERSION_HOLES) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "pool must be upgrade to support log removal"));
+ "pool must be upgraded to support log removal"));
return (zfs_error(hdl, EZFS_BADVERSION, msg));
}
- verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+ if (!islog && !avail_spare && !l2cache && zpool_is_bootable(zhp)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "root pool can not have removed devices, "
+ "because GRUB does not understand them"));
+ return (zfs_error(hdl, EINVAL, msg));
+ }
+
+ zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
+
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
+ return (0);
+
+ switch (errno) {
+
+ case EINVAL:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid config; all top-level vdevs must "
+ "have the same sector size and not be raidz."));
+ (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ break;
+
+ case EBUSY:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Pool busy; removal may already be in progress"));
+ (void) zfs_error(hdl, EZFS_BUSY, msg);
+ break;
+
+ default:
+ (void) zpool_standard_error(hdl, errno, msg);
+ }
+ return (-1);
+}
+
+int
+zpool_vdev_remove_cancel(zpool_handle_t *zhp)
+{
+ zfs_cmd_t zc = { 0 };
+ char msg[1024];
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot cancel removal"));
+
+ (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+ zc.zc_cookie = 1;
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
return (0);
@@ -3143,6 +3185,36 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
return (zpool_standard_error(hdl, errno, msg));
}
+int
+zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
+ uint64_t *sizep)
+{
+ char msg[1024];
+ nvlist_t *tgt;
+ boolean_t avail_spare, l2cache, islog;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
+ path);
+
+ if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+ &islog)) == NULL)
+ return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+ if (avail_spare || l2cache || islog) {
+ *sizep = 0;
+ return (0);
+ }
+
+ if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "indirect size not available"));
+ return (zfs_error(hdl, EINVAL, msg));
+ }
+ return (0);
+}
+
/*
* Clear the errors for the pool, or the particular device if specified.
*/
@@ -3170,7 +3242,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if (path) {
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
- &l2cache, NULL)) == 0)
+ &l2cache, NULL)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
/*
diff --git a/lib/libzfs/common/libzfs_util.c b/lib/libzfs/common/libzfs_util.c
index aba044add5e0..afa991d5a17e 100644
--- a/lib/libzfs/common/libzfs_util.c
+++ b/lib/libzfs/common/libzfs_util.c
@@ -235,6 +235,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "invalid diff data"));
case EZFS_POOLREADONLY:
return (dgettext(TEXT_DOMAIN, "pool is read-only"));
+ case EZFS_NO_PENDING:
+ return (dgettext(TEXT_DOMAIN, "operation is not "
+ "in progress"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
@@ -480,6 +483,10 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case EROFS:
zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap);
break;
+ /* There is no pending operation to cancel */
+ case ENOTACTIVE:
+ zfs_verror(hdl, EZFS_NO_PENDING, fmt, ap);
+ break;
default:
zfs_error_aux(hdl, strerror(error));
diff --git a/lib/libzfs_core/common/libzfs_core.c b/lib/libzfs_core/common/libzfs_core.c
index c910b677afef..7f3162168c06 100644
--- a/lib/libzfs_core/common/libzfs_core.c
+++ b/lib/libzfs_core/common/libzfs_core.c
@@ -239,6 +239,16 @@ lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
return (0);
}
+int
+lzc_remap(const char *fsname)
+{
+ int error;
+ nvlist_t *args = fnvlist_alloc();
+ error = lzc_ioctl(ZFS_IOC_REMAP, fsname, args, NULL);
+ nvlist_free(args);
+ return (error);
+}
+
/*
* Creates snapshots.
*
diff --git a/lib/libzfs_core/common/libzfs_core.h b/lib/libzfs_core/common/libzfs_core.h
index c21dbe109ade..c702660f00aa 100644
--- a/lib/libzfs_core/common/libzfs_core.h
+++ b/lib/libzfs_core/common/libzfs_core.h
@@ -47,6 +47,7 @@ enum lzc_dataset_type {
LZC_DATSET_TYPE_ZVOL
};
+int lzc_remap(const char *fsname);
int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *);
int lzc_clone(const char *, const char *, nvlist_t *);