aboutsummaryrefslogtreecommitdiffstats
path: root/uts/common/fs/zfs/zap.c
diff options
context:
space:
mode:
Diffstat (limited to 'uts/common/fs/zfs/zap.c')
-rw-r--r--uts/common/fs/zfs/zap.c46
1 files changed, 45 insertions, 1 deletions
diff --git a/uts/common/fs/zfs/zap.c b/uts/common/fs/zfs/zap.c
index 7a1994f603c1..de8e2120776f 100644
--- a/uts/common/fs/zfs/zap.c
+++ b/uts/common/fs/zfs/zap.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -49,6 +49,36 @@
#include <sys/zap_impl.h>
#include <sys/zap_leaf.h>
+/*
+ * If zap_iterate_prefetch is set, we will prefetch the entire ZAP object
+ * (all leaf blocks) when we start iterating over it.
+ *
+ * For zap_cursor_init(), the callers all intend to iterate through all the
+ * entries. There are a few cases where an error (typically i/o error) could
+ * cause it to bail out early.
+ *
+ * For zap_cursor_init_serialized(), there are callers that do the iteration
+ * outside of ZFS. Typically they would iterate over everything, but we
+ * don't have control of that. E.g. zfs_ioc_snapshot_list_next(),
+ * zcp_snapshots_iter(), and other iterators over things in the MOS - these
+ * are called by /sbin/zfs and channel programs. The other example is
+ * zfs_readdir() which iterates over directory entries for the getdents()
+ * syscall. /sbin/ls iterates to the end (unless it receives a signal), but
+ * userland doesn't have to.
+ *
+ * Given that the ZAP entries aren't returned in a specific order, the only
+ * legitimate use cases for partial iteration would be:
+ *
+ * 1. Pagination: e.g. you only want to display 100 entries at a time, so you
+ * get the first 100 and then wait for the user to hit "next page", which
+ * they may never do).
+ *
+ * 2. You want to know if there are more than X entries, without relying on
+ * the zfs-specific implementation of the directory's st_size (which is
+ * the number of entries).
+ */
+boolean_t zap_iterate_prefetch = B_TRUE;
+
int fzap_default_block_shift = 14; /* 16k blocksize */
extern inline zap_phys_t *zap_f_phys(zap_t *zap);
@@ -1169,6 +1199,20 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
/* retrieve the next entry at or after zc_hash/zc_cd */
/* if no entry, return ENOENT */
+ /*
+ * If we are reading from the beginning, we're almost
+ * certain to iterate over the entire ZAP object. If there are
+ * multiple leaf blocks (freeblk > 2), prefetch the whole
+ * object, so that we read the leaf blocks concurrently.
+ * (Unless noprefetch was requested via zap_cursor_init_noprefetch()).
+ */
+ if (zc->zc_hash == 0 && zap_iterate_prefetch &&
+ zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) {
+ dmu_prefetch(zc->zc_objset, zc->zc_zapobj, 0, 0,
+ zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap),
+ ZIO_PRIORITY_ASYNC_READ);
+ }
+
if (zc->zc_leaf &&
(ZAP_HASH_IDX(zc->zc_hash,
zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) !=