aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Matuska <mm@FreeBSD.org>2012-07-18 08:12:04 +0000
committerMartin Matuska <mm@FreeBSD.org>2012-07-18 08:12:04 +0000
commitaf56e8c4b416d774961b41eee1eb349d657ebb8c (patch)
treee332d1e6089905f45302dedddb9967a87ade136a
parent93a00b0821525e25814cd720fafd04d600811c28 (diff)
downloadsrc-vendor/opensolaris.tar.gz
src-vendor/opensolaris.zip
Update vendor-sys/opensolaris to last OpenSolaris state (13149:b23a4dab3d50)vendor/opensolaris/20100818vendor/opensolaris
Add ZFS bits to vendor-sys/opensolaris Obtained from: https://hg.openindiana.org/upstream/oracle/onnv-gate
Notes
Notes: svn path=/vendor-sys/opensolaris/dist/; revision=238567 svn path=/vendor-sys/opensolaris/20100818/; revision=238568; tag=vendor/opensolaris/20100818
-rw-r--r--common/acl/acl_common.c1755
-rw-r--r--common/acl/acl_common.h59
-rw-r--r--common/atomic/amd64/atomic.s573
-rw-r--r--common/atomic/i386/atomic.s720
-rw-r--r--common/atomic/sparc/atomic.s801
-rw-r--r--common/list/list.c251
-rw-r--r--common/nvpair/nvpair.c3297
-rw-r--r--common/nvpair/nvpair_alloc_fixed.c120
-rw-r--r--common/unicode/u8_textprep.c2132
-rw-r--r--common/zfs/zfs_comutil.c202
-rw-r--r--common/zfs/zfs_comutil.h46
-rw-r--r--common/zfs/zfs_deleg.c237
-rw-r--r--common/zfs/zfs_deleg.h85
-rw-r--r--common/zfs/zfs_fletcher.c246
-rw-r--r--common/zfs/zfs_fletcher.h53
-rw-r--r--common/zfs/zfs_namecheck.c345
-rw-r--r--common/zfs/zfs_namecheck.h58
-rw-r--r--common/zfs/zfs_prop.c595
-rw-r--r--common/zfs/zfs_prop.h129
-rw-r--r--common/zfs/zpool_prop.c202
-rw-r--r--common/zfs/zprop_common.c426
-rw-r--r--uts/common/Makefile.files2007
-rw-r--r--uts/common/dtrace/dtrace.c204
-rw-r--r--uts/common/dtrace/fasttrap.c25
-rw-r--r--uts/common/dtrace/lockstat.c10
-rw-r--r--uts/common/dtrace/profile.c9
-rw-r--r--uts/common/dtrace/sdt_subr.c311
-rw-r--r--uts/common/dtrace/systrace.c11
-rw-r--r--uts/common/fs/gfs.c1178
-rw-r--r--uts/common/fs/vnode.c4536
-rw-r--r--uts/common/fs/zfs/arc.c4658
-rw-r--r--uts/common/fs/zfs/bplist.c69
-rw-r--r--uts/common/fs/zfs/bpobj.c495
-rw-r--r--uts/common/fs/zfs/dbuf.c2707
-rw-r--r--uts/common/fs/zfs/ddt.c1146
-rw-r--r--uts/common/fs/zfs/ddt_zap.c157
-rw-r--r--uts/common/fs/zfs/dmu.c1764
-rw-r--r--uts/common/fs/zfs/dmu_diff.c221
-rw-r--r--uts/common/fs/zfs/dmu_object.c196
-rw-r--r--uts/common/fs/zfs/dmu_objset.c1789
-rw-r--r--uts/common/fs/zfs/dmu_send.c1606
-rw-r--r--uts/common/fs/zfs/dmu_traverse.c482
-rw-r--r--uts/common/fs/zfs/dmu_tx.c1382
-rw-r--r--uts/common/fs/zfs/dmu_zfetch.c724
-rw-r--r--uts/common/fs/zfs/dnode.c1993
-rw-r--r--uts/common/fs/zfs/dnode_sync.c693
-rw-r--r--uts/common/fs/zfs/dsl_dataset.c4030
-rw-r--r--uts/common/fs/zfs/dsl_deadlist.c474
-rw-r--r--uts/common/fs/zfs/dsl_deleg.c746
-rw-r--r--uts/common/fs/zfs/dsl_dir.c1416
-rw-r--r--uts/common/fs/zfs/dsl_pool.c848
-rw-r--r--uts/common/fs/zfs/dsl_prop.c1153
-rw-r--r--uts/common/fs/zfs/dsl_scan.c1766
-rw-r--r--uts/common/fs/zfs/dsl_synctask.c240
-rw-r--r--uts/common/fs/zfs/gzip.c69
-rw-r--r--uts/common/fs/zfs/lzjb.c123
-rw-r--r--uts/common/fs/zfs/metaslab.c1604
-rw-r--r--uts/common/fs/zfs/refcount.c223
-rw-r--r--uts/common/fs/zfs/rrwlock.c264
-rw-r--r--uts/common/fs/zfs/sa.c1970
-rw-r--r--uts/common/fs/zfs/sha256.c50
-rw-r--r--uts/common/fs/zfs/spa.c5882
-rw-r--r--uts/common/fs/zfs/spa_config.c487
-rw-r--r--uts/common/fs/zfs/spa_errlog.c403
-rw-r--r--uts/common/fs/zfs/spa_history.c502
-rw-r--r--uts/common/fs/zfs/spa_misc.c1672
-rw-r--r--uts/common/fs/zfs/space_map.c616
-rw-r--r--uts/common/fs/zfs/sys/arc.h142
-rw-r--r--uts/common/fs/zfs/sys/bplist.h57
-rw-r--r--uts/common/fs/zfs/sys/bpobj.h91
-rw-r--r--uts/common/fs/zfs/sys/dbuf.h375
-rw-r--r--uts/common/fs/zfs/sys/ddt.h246
-rw-r--r--uts/common/fs/zfs/sys/dmu.h740
-rw-r--r--uts/common/fs/zfs/sys/dmu_impl.h272
-rw-r--r--uts/common/fs/zfs/sys/dmu_objset.h183
-rw-r--r--uts/common/fs/zfs/sys/dmu_traverse.h64
-rw-r--r--uts/common/fs/zfs/sys/dmu_tx.h148
-rw-r--r--uts/common/fs/zfs/sys/dmu_zfetch.h76
-rw-r--r--uts/common/fs/zfs/sys/dnode.h329
-rw-r--r--uts/common/fs/zfs/sys/dsl_dataset.h283
-rw-r--r--uts/common/fs/zfs/sys/dsl_deadlist.h87
-rw-r--r--uts/common/fs/zfs/sys/dsl_deleg.h78
-rw-r--r--uts/common/fs/zfs/sys/dsl_dir.h167
-rw-r--r--uts/common/fs/zfs/sys/dsl_pool.h151
-rw-r--r--uts/common/fs/zfs/sys/dsl_prop.h119
-rw-r--r--uts/common/fs/zfs/sys/dsl_scan.h108
-rw-r--r--uts/common/fs/zfs/sys/dsl_synctask.h79
-rw-r--r--uts/common/fs/zfs/sys/metaslab.h80
-rw-r--r--uts/common/fs/zfs/sys/metaslab_impl.h89
-rw-r--r--uts/common/fs/zfs/sys/refcount.h107
-rw-r--r--uts/common/fs/zfs/sys/rrwlock.h80
-rw-r--r--uts/common/fs/zfs/sys/sa.h170
-rw-r--r--uts/common/fs/zfs/sys/sa_impl.h287
-rw-r--r--uts/common/fs/zfs/sys/spa.h706
-rw-r--r--uts/common/fs/zfs/sys/spa_boot.h42
-rw-r--r--uts/common/fs/zfs/sys/spa_impl.h235
-rw-r--r--uts/common/fs/zfs/sys/space_map.h179
-rw-r--r--uts/common/fs/zfs/sys/txg.h131
-rw-r--r--uts/common/fs/zfs/sys/txg_impl.h75
-rw-r--r--uts/common/fs/zfs/sys/uberblock.h46
-rw-r--r--uts/common/fs/zfs/sys/uberblock_impl.h63
-rw-r--r--uts/common/fs/zfs/sys/unique.h59
-rw-r--r--uts/common/fs/zfs/sys/vdev.h161
-rw-r--r--uts/common/fs/zfs/sys/vdev_disk.h56
-rw-r--r--uts/common/fs/zfs/sys/vdev_file.h46
-rw-r--r--uts/common/fs/zfs/sys/vdev_impl.h322
-rw-r--r--uts/common/fs/zfs/sys/zap.h482
-rw-r--r--uts/common/fs/zfs/sys/zap_impl.h228
-rw-r--r--uts/common/fs/zfs/sys/zap_leaf.h245
-rw-r--r--uts/common/fs/zfs/sys/zfs_acl.h245
-rw-r--r--uts/common/fs/zfs/sys/zfs_context.h73
-rw-r--r--uts/common/fs/zfs/sys/zfs_ctldir.h73
-rw-r--r--uts/common/fs/zfs/sys/zfs_debug.h82
-rw-r--r--uts/common/fs/zfs/sys/zfs_dir.h74
-rw-r--r--uts/common/fs/zfs/sys/zfs_fuid.h131
-rw-r--r--uts/common/fs/zfs/sys/zfs_ioctl.h349
-rw-r--r--uts/common/fs/zfs/sys/zfs_onexit.h66
-rw-r--r--uts/common/fs/zfs/sys/zfs_rlock.h89
-rw-r--r--uts/common/fs/zfs/sys/zfs_sa.h143
-rw-r--r--uts/common/fs/zfs/sys/zfs_stat.h56
-rw-r--r--uts/common/fs/zfs/sys/zfs_vfsops.h159
-rw-r--r--uts/common/fs/zfs/sys/zfs_znode.h361
-rw-r--r--uts/common/fs/zfs/sys/zil.h428
-rw-r--r--uts/common/fs/zfs/sys/zil_impl.h147
-rw-r--r--uts/common/fs/zfs/sys/zio.h559
-rw-r--r--uts/common/fs/zfs/sys/zio_checksum.h75
-rw-r--r--uts/common/fs/zfs/sys/zio_compress.h84
-rw-r--r--uts/common/fs/zfs/sys/zio_impl.h175
-rw-r--r--uts/common/fs/zfs/sys/zrlock.h66
-rw-r--r--uts/common/fs/zfs/sys/zvol.h76
-rw-r--r--uts/common/fs/zfs/txg.c724
-rw-r--r--uts/common/fs/zfs/uberblock.c61
-rw-r--r--uts/common/fs/zfs/unique.c116
-rw-r--r--uts/common/fs/zfs/vdev.c3130
-rw-r--r--uts/common/fs/zfs/vdev_cache.c416
-rw-r--r--uts/common/fs/zfs/vdev_disk.c610
-rw-r--r--uts/common/fs/zfs/vdev_file.c217
-rw-r--r--uts/common/fs/zfs/vdev_label.c1216
-rw-r--r--uts/common/fs/zfs/vdev_mirror.c485
-rw-r--r--uts/common/fs/zfs/vdev_missing.c100
-rw-r--r--uts/common/fs/zfs/vdev_queue.c406
-rw-r--r--uts/common/fs/zfs/vdev_raidz.c2146
-rw-r--r--uts/common/fs/zfs/vdev_root.c116
-rw-r--r--uts/common/fs/zfs/zap.c1354
-rw-r--r--uts/common/fs/zfs/zap_leaf.c872
-rw-r--r--uts/common/fs/zfs/zap_micro.c1455
-rw-r--r--uts/common/fs/zfs/zfs.conf28
-rw-r--r--uts/common/fs/zfs/zfs_acl.c2748
-rw-r--r--uts/common/fs/zfs/zfs_byteswap.c199
-rw-r--r--uts/common/fs/zfs/zfs_ctldir.c1349
-rw-r--r--uts/common/fs/zfs/zfs_debug.c95
-rw-r--r--uts/common/fs/zfs/zfs_dir.c1089
-rw-r--r--uts/common/fs/zfs/zfs_fm.c863
-rw-r--r--uts/common/fs/zfs/zfs_fuid.c756
-rw-r--r--uts/common/fs/zfs/zfs_ioctl.c5122
-rw-r--r--uts/common/fs/zfs/zfs_log.c676
-rw-r--r--uts/common/fs/zfs/zfs_onexit.c246
-rw-r--r--uts/common/fs/zfs/zfs_replay.c931
-rw-r--r--uts/common/fs/zfs/zfs_rlock.c602
-rw-r--r--uts/common/fs/zfs/zfs_sa.c334
-rw-r--r--uts/common/fs/zfs/zfs_vfsops.c2303
-rw-r--r--uts/common/fs/zfs/zfs_vnops.c5243
-rw-r--r--uts/common/fs/zfs/zfs_znode.c2121
-rw-r--r--uts/common/fs/zfs/zil.c1992
-rw-r--r--uts/common/fs/zfs/zio.c2952
-rw-r--r--uts/common/fs/zfs/zio_checksum.c274
-rw-r--r--uts/common/fs/zfs/zio_compress.c132
-rw-r--r--uts/common/fs/zfs/zio_inject.c515
-rw-r--r--uts/common/fs/zfs/zle.c86
-rw-r--r--uts/common/fs/zfs/zrlock.c194
-rw-r--r--uts/common/fs/zfs/zvol.c1894
-rw-r--r--uts/common/os/callb.c410
-rw-r--r--uts/common/os/fm.c1386
-rw-r--r--uts/common/os/nvpair_alloc_system.c64
-rw-r--r--uts/common/sys/acl.h302
-rw-r--r--uts/common/sys/acl_impl.h61
-rw-r--r--uts/common/sys/avl.h309
-rw-r--r--uts/common/sys/avl_impl.h164
-rw-r--r--uts/common/sys/bitmap.h194
-rw-r--r--uts/common/sys/callb.h213
-rw-r--r--uts/common/sys/ccompile.h127
-rw-r--r--uts/common/sys/compress.h46
-rw-r--r--uts/common/sys/cpupart.h27
-rw-r--r--uts/common/sys/cpuvar.h116
-rw-r--r--uts/common/sys/cred.h193
-rw-r--r--uts/common/sys/debug.h146
-rw-r--r--uts/common/sys/dtrace.h25
-rw-r--r--uts/common/sys/errorq.h83
-rw-r--r--uts/common/sys/extdirent.h77
-rw-r--r--uts/common/sys/feature_tests.h396
-rw-r--r--uts/common/sys/fm/fs/zfs.h96
-rw-r--r--uts/common/sys/fm/protocol.h371
-rw-r--r--uts/common/sys/fm/util.h103
-rw-r--r--uts/common/sys/fs/zfs.h912
-rw-r--r--uts/common/sys/fs/zut.h93
-rw-r--r--uts/common/sys/gfs.h173
-rw-r--r--uts/common/sys/idmap.h97
-rw-r--r--uts/common/sys/isa_defs.h487
-rw-r--r--uts/common/sys/list.h67
-rw-r--r--uts/common/sys/list_impl.h53
-rw-r--r--uts/common/sys/note.h56
-rw-r--r--uts/common/sys/nvpair.h281
-rw-r--r--uts/common/sys/nvpair_impl.h73
-rw-r--r--uts/common/sys/processor.h149
-rw-r--r--uts/common/sys/procset.h160
-rw-r--r--uts/common/sys/synch.h162
-rw-r--r--uts/common/sys/sysevent.h283
-rw-r--r--uts/common/sys/sysevent/dev.h256
-rw-r--r--uts/common/sys/sysevent/eventdefs.h275
-rw-r--r--uts/common/sys/sysmacros.h378
-rw-r--r--uts/common/sys/taskq.h92
-rw-r--r--uts/common/sys/u8_textprep.h113
-rw-r--r--uts/common/sys/u8_textprep_data.h35376
-rw-r--r--uts/common/sys/vnode.h1431
-rw-r--r--uts/common/sys/zmod.h68
-rw-r--r--uts/common/zmod/adler32.c149
-rw-r--r--uts/common/zmod/crc32.c428
-rw-r--r--uts/common/zmod/crc32.h443
-rw-r--r--uts/common/zmod/deflate.c1742
-rw-r--r--uts/common/zmod/deflate.h331
-rw-r--r--uts/common/zmod/inffast.c320
-rw-r--r--uts/common/zmod/inffast.h13
-rw-r--r--uts/common/zmod/inffixed.h96
-rw-r--r--uts/common/zmod/inflate.c1395
-rw-r--r--uts/common/zmod/inflate.h117
-rw-r--r--uts/common/zmod/inftrees.c331
-rw-r--r--uts/common/zmod/inftrees.h57
-rw-r--r--uts/common/zmod/trees.c1219
-rw-r--r--uts/common/zmod/zconf.h117
-rw-r--r--uts/common/zmod/zlib.h1359
-rw-r--r--uts/common/zmod/zmod.c113
-rw-r--r--uts/common/zmod/zmod_subr.c85
-rw-r--r--uts/common/zmod/zutil.c324
-rw-r--r--uts/common/zmod/zutil.h274
234 files changed, 177353 insertions, 110 deletions
diff --git a/common/acl/acl_common.c b/common/acl/acl_common.c
new file mode 100644
index 000000000000..eafc47d10f2d
--- /dev/null
+++ b/common/acl/acl_common.c
@@ -0,0 +1,1755 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/avl.h>
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <acl/acl_common.h>
+#else
+#include <errno.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <grp.h>
+#include <pwd.h>
+#include <acl_common.h>
+#define ASSERT assert
+#endif
+
+#define ACE_POSIX_SUPPORTED_BITS (ACE_READ_DATA | \
+ ACE_WRITE_DATA | ACE_APPEND_DATA | ACE_EXECUTE | \
+ ACE_READ_ATTRIBUTES | ACE_READ_ACL | ACE_WRITE_ACL)
+
+
+#define ACL_SYNCHRONIZE_SET_DENY 0x0000001
+#define ACL_SYNCHRONIZE_SET_ALLOW 0x0000002
+#define ACL_SYNCHRONIZE_ERR_DENY 0x0000004
+#define ACL_SYNCHRONIZE_ERR_ALLOW 0x0000008
+
+#define ACL_WRITE_OWNER_SET_DENY 0x0000010
+#define ACL_WRITE_OWNER_SET_ALLOW 0x0000020
+#define ACL_WRITE_OWNER_ERR_DENY 0x0000040
+#define ACL_WRITE_OWNER_ERR_ALLOW 0x0000080
+
+#define ACL_DELETE_SET_DENY 0x0000100
+#define ACL_DELETE_SET_ALLOW 0x0000200
+#define ACL_DELETE_ERR_DENY 0x0000400
+#define ACL_DELETE_ERR_ALLOW 0x0000800
+
+#define ACL_WRITE_ATTRS_OWNER_SET_DENY 0x0001000
+#define ACL_WRITE_ATTRS_OWNER_SET_ALLOW 0x0002000
+#define ACL_WRITE_ATTRS_OWNER_ERR_DENY 0x0004000
+#define ACL_WRITE_ATTRS_OWNER_ERR_ALLOW 0x0008000
+
+#define ACL_WRITE_ATTRS_WRITER_SET_DENY 0x0010000
+#define ACL_WRITE_ATTRS_WRITER_SET_ALLOW 0x0020000
+#define ACL_WRITE_ATTRS_WRITER_ERR_DENY 0x0040000
+#define ACL_WRITE_ATTRS_WRITER_ERR_ALLOW 0x0080000
+
+#define ACL_WRITE_NAMED_WRITER_SET_DENY 0x0100000
+#define ACL_WRITE_NAMED_WRITER_SET_ALLOW 0x0200000
+#define ACL_WRITE_NAMED_WRITER_ERR_DENY 0x0400000
+#define ACL_WRITE_NAMED_WRITER_ERR_ALLOW 0x0800000
+
+#define ACL_READ_NAMED_READER_SET_DENY 0x1000000
+#define ACL_READ_NAMED_READER_SET_ALLOW 0x2000000
+#define ACL_READ_NAMED_READER_ERR_DENY 0x4000000
+#define ACL_READ_NAMED_READER_ERR_ALLOW 0x8000000
+
+
+#define ACE_VALID_MASK_BITS (\
+ ACE_READ_DATA | \
+ ACE_LIST_DIRECTORY | \
+ ACE_WRITE_DATA | \
+ ACE_ADD_FILE | \
+ ACE_APPEND_DATA | \
+ ACE_ADD_SUBDIRECTORY | \
+ ACE_READ_NAMED_ATTRS | \
+ ACE_WRITE_NAMED_ATTRS | \
+ ACE_EXECUTE | \
+ ACE_DELETE_CHILD | \
+ ACE_READ_ATTRIBUTES | \
+ ACE_WRITE_ATTRIBUTES | \
+ ACE_DELETE | \
+ ACE_READ_ACL | \
+ ACE_WRITE_ACL | \
+ ACE_WRITE_OWNER | \
+ ACE_SYNCHRONIZE)
+
+#define ACE_MASK_UNDEFINED 0x80000000
+
+#define ACE_VALID_FLAG_BITS (ACE_FILE_INHERIT_ACE | \
+ ACE_DIRECTORY_INHERIT_ACE | \
+ ACE_NO_PROPAGATE_INHERIT_ACE | ACE_INHERIT_ONLY_ACE | \
+ ACE_SUCCESSFUL_ACCESS_ACE_FLAG | ACE_FAILED_ACCESS_ACE_FLAG | \
+ ACE_IDENTIFIER_GROUP | ACE_OWNER | ACE_GROUP | ACE_EVERYONE)
+
+/*
+ * ACL conversion helpers
+ */
+
+typedef enum {
+ ace_unused,
+ ace_user_obj,
+ ace_user,
+ ace_group, /* includes GROUP and GROUP_OBJ */
+ ace_other_obj
+} ace_to_aent_state_t;
+
+typedef struct acevals {
+ uid_t key;
+ avl_node_t avl;
+ uint32_t mask;
+ uint32_t allowed;
+ uint32_t denied;
+ int aent_type;
+} acevals_t;
+
+typedef struct ace_list {
+ acevals_t user_obj;
+ avl_tree_t user;
+ int numusers;
+ acevals_t group_obj;
+ avl_tree_t group;
+ int numgroups;
+ acevals_t other_obj;
+ uint32_t acl_mask;
+ int hasmask;
+ int dfacl_flag;
+ ace_to_aent_state_t state;
+ int seen; /* bitmask of all aclent_t a_type values seen */
+} ace_list_t;
+
+/*
+ * Generic shellsort, from K&R (1st ed, p 58.), somewhat modified.
+ * v = Ptr to array/vector of objs
+ * n = # objs in the array
+ * s = size of each obj (must be multiples of a word size)
+ * f = ptr to function to compare two objs
+ * returns (-1 = less than, 0 = equal, 1 = greater than
+ */
+void
+ksort(caddr_t v, int n, int s, int (*f)())
+{
+ int g, i, j, ii;
+ unsigned int *p1, *p2;
+ unsigned int tmp;
+
+ /* No work to do */
+ if (v == NULL || n <= 1)
+ return;
+
+ /* Sanity check on arguments */
+ ASSERT(((uintptr_t)v & 0x3) == 0 && (s & 0x3) == 0);
+ ASSERT(s > 0);
+ for (g = n / 2; g > 0; g /= 2) {
+ for (i = g; i < n; i++) {
+ for (j = i - g; j >= 0 &&
+ (*f)(v + j * s, v + (j + g) * s) == 1;
+ j -= g) {
+ p1 = (void *)(v + j * s);
+ p2 = (void *)(v + (j + g) * s);
+ for (ii = 0; ii < s / 4; ii++) {
+ tmp = *p1;
+ *p1++ = *p2;
+ *p2++ = tmp;
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Compare two acls, all fields. Returns:
+ * -1 (less than)
+ * 0 (equal)
+ * +1 (greater than)
+ */
+int
+cmp2acls(void *a, void *b)
+{
+ aclent_t *x = (aclent_t *)a;
+ aclent_t *y = (aclent_t *)b;
+
+ /* Compare types */
+ if (x->a_type < y->a_type)
+ return (-1);
+ if (x->a_type > y->a_type)
+ return (1);
+ /* Equal types; compare id's */
+ if (x->a_id < y->a_id)
+ return (-1);
+ if (x->a_id > y->a_id)
+ return (1);
+ /* Equal ids; compare perms */
+ if (x->a_perm < y->a_perm)
+ return (-1);
+ if (x->a_perm > y->a_perm)
+ return (1);
+ /* Totally equal */
+ return (0);
+}
+
+/*ARGSUSED*/
+static void *
+cacl_realloc(void *ptr, size_t size, size_t new_size)
+{
+#if defined(_KERNEL)
+ void *tmp;
+
+ tmp = kmem_alloc(new_size, KM_SLEEP);
+ (void) memcpy(tmp, ptr, (size < new_size) ? size : new_size);
+ kmem_free(ptr, size);
+ return (tmp);
+#else
+ return (realloc(ptr, new_size));
+#endif
+}
+
+static int
+cacl_malloc(void **ptr, size_t size)
+{
+#if defined(_KERNEL)
+ *ptr = kmem_zalloc(size, KM_SLEEP);
+ return (0);
+#else
+ *ptr = calloc(1, size);
+ if (*ptr == NULL)
+ return (errno);
+
+ return (0);
+#endif
+}
+
+/*ARGSUSED*/
+static void
+cacl_free(void *ptr, size_t size)
+{
+#if defined(_KERNEL)
+ kmem_free(ptr, size);
+#else
+ free(ptr);
+#endif
+}
+
+acl_t *
+acl_alloc(enum acl_type type)
+{
+ acl_t *aclp;
+
+ if (cacl_malloc((void **)&aclp, sizeof (acl_t)) != 0)
+ return (NULL);
+
+ aclp->acl_aclp = NULL;
+ aclp->acl_cnt = 0;
+
+ switch (type) {
+ case ACE_T:
+ aclp->acl_type = ACE_T;
+ aclp->acl_entry_size = sizeof (ace_t);
+ break;
+ case ACLENT_T:
+ aclp->acl_type = ACLENT_T;
+ aclp->acl_entry_size = sizeof (aclent_t);
+ break;
+ default:
+ acl_free(aclp);
+ aclp = NULL;
+ }
+ return (aclp);
+}
+
+/*
+ * Free acl_t structure
+ */
+void
+acl_free(acl_t *aclp)
+{
+ int acl_size;
+
+ if (aclp == NULL)
+ return;
+
+ if (aclp->acl_aclp) {
+ acl_size = aclp->acl_cnt * aclp->acl_entry_size;
+ cacl_free(aclp->acl_aclp, acl_size);
+ }
+
+ cacl_free(aclp, sizeof (acl_t));
+}
+
+static uint32_t
+access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow)
+{
+ uint32_t access_mask = 0;
+ int acl_produce;
+ int synchronize_set = 0, write_owner_set = 0;
+ int delete_set = 0, write_attrs_set = 0;
+ int read_named_set = 0, write_named_set = 0;
+
+ acl_produce = (ACL_SYNCHRONIZE_SET_ALLOW |
+ ACL_WRITE_ATTRS_OWNER_SET_ALLOW |
+ ACL_WRITE_ATTRS_WRITER_SET_DENY);
+
+ if (isallow) {
+ synchronize_set = ACL_SYNCHRONIZE_SET_ALLOW;
+ write_owner_set = ACL_WRITE_OWNER_SET_ALLOW;
+ delete_set = ACL_DELETE_SET_ALLOW;
+ if (hasreadperm)
+ read_named_set = ACL_READ_NAMED_READER_SET_ALLOW;
+ if (haswriteperm)
+ write_named_set = ACL_WRITE_NAMED_WRITER_SET_ALLOW;
+ if (isowner)
+ write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_ALLOW;
+ else if (haswriteperm)
+ write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_ALLOW;
+ } else {
+
+ synchronize_set = ACL_SYNCHRONIZE_SET_DENY;
+ write_owner_set = ACL_WRITE_OWNER_SET_DENY;
+ delete_set = ACL_DELETE_SET_DENY;
+ if (hasreadperm)
+ read_named_set = ACL_READ_NAMED_READER_SET_DENY;
+ if (haswriteperm)
+ write_named_set = ACL_WRITE_NAMED_WRITER_SET_DENY;
+ if (isowner)
+ write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_DENY;
+ else if (haswriteperm)
+ write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_DENY;
+ else
+ /*
+ * If the entity is not the owner and does not
+ * have write permissions ACE_WRITE_ATTRIBUTES will
+ * always go in the DENY ACE.
+ */
+ access_mask |= ACE_WRITE_ATTRIBUTES;
+ }
+
+ if (acl_produce & synchronize_set)
+ access_mask |= ACE_SYNCHRONIZE;
+ if (acl_produce & write_owner_set)
+ access_mask |= ACE_WRITE_OWNER;
+ if (acl_produce & delete_set)
+ access_mask |= ACE_DELETE;
+ if (acl_produce & write_attrs_set)
+ access_mask |= ACE_WRITE_ATTRIBUTES;
+ if (acl_produce & read_named_set)
+ access_mask |= ACE_READ_NAMED_ATTRS;
+ if (acl_produce & write_named_set)
+ access_mask |= ACE_WRITE_NAMED_ATTRS;
+
+ return (access_mask);
+}
+
+/*
+ * Given an mode_t, convert it into an access_mask as used
+ * by nfsace, assuming aclent_t -> nfsace semantics.
+ */
+static uint32_t
+mode_to_ace_access(mode_t mode, int isdir, int isowner, int isallow)
+{
+ uint32_t access = 0;
+ int haswriteperm = 0;
+ int hasreadperm = 0;
+
+ if (isallow) {
+ haswriteperm = (mode & S_IWOTH);
+ hasreadperm = (mode & S_IROTH);
+ } else {
+ haswriteperm = !(mode & S_IWOTH);
+ hasreadperm = !(mode & S_IROTH);
+ }
+
+ /*
+ * The following call takes care of correctly setting the following
+ * mask bits in the access_mask:
+ * ACE_SYNCHRONIZE, ACE_WRITE_OWNER, ACE_DELETE,
+ * ACE_WRITE_ATTRIBUTES, ACE_WRITE_NAMED_ATTRS, ACE_READ_NAMED_ATTRS
+ */
+ access = access_mask_set(haswriteperm, hasreadperm, isowner, isallow);
+
+ if (isallow) {
+ access |= ACE_READ_ACL | ACE_READ_ATTRIBUTES;
+ if (isowner)
+ access |= ACE_WRITE_ACL;
+ } else {
+ if (! isowner)
+ access |= ACE_WRITE_ACL;
+ }
+
+ /* read */
+ if (mode & S_IROTH) {
+ access |= ACE_READ_DATA;
+ }
+ /* write */
+ if (mode & S_IWOTH) {
+ access |= ACE_WRITE_DATA |
+ ACE_APPEND_DATA;
+ if (isdir)
+ access |= ACE_DELETE_CHILD;
+ }
+ /* exec */
+ if (mode & 01) {
+ access |= ACE_EXECUTE;
+ }
+
+ return (access);
+}
+
+/*
+ * Given an nfsace (presumably an ALLOW entry), make a
+ * corresponding DENY entry at the address given.
+ */
+static void
+ace_make_deny(ace_t *allow, ace_t *deny, int isdir, int isowner)
+{
+ (void) memcpy(deny, allow, sizeof (ace_t));
+
+ deny->a_who = allow->a_who;
+
+ deny->a_type = ACE_ACCESS_DENIED_ACE_TYPE;
+ deny->a_access_mask ^= ACE_POSIX_SUPPORTED_BITS;
+ if (isdir)
+ deny->a_access_mask ^= ACE_DELETE_CHILD;
+
+ deny->a_access_mask &= ~(ACE_SYNCHRONIZE | ACE_WRITE_OWNER |
+ ACE_DELETE | ACE_WRITE_ATTRIBUTES | ACE_READ_NAMED_ATTRS |
+ ACE_WRITE_NAMED_ATTRS);
+ deny->a_access_mask |= access_mask_set((allow->a_access_mask &
+ ACE_WRITE_DATA), (allow->a_access_mask & ACE_READ_DATA), isowner,
+ B_FALSE);
+}
+/*
+ * Make an initial pass over an array of aclent_t's. Gather
+ * information such as an ACL_MASK (if any), number of users,
+ * number of groups, and whether the array needs to be sorted.
+ */
+static int
+ln_aent_preprocess(aclent_t *aclent, int n,
+ int *hasmask, mode_t *mask,
+ int *numuser, int *numgroup, int *needsort)
+{
+ int error = 0;
+ int i;
+ int curtype = 0;
+
+ *hasmask = 0;
+ *mask = 07;
+ *needsort = 0;
+ *numuser = 0;
+ *numgroup = 0;
+
+ for (i = 0; i < n; i++) {
+ if (aclent[i].a_type < curtype)
+ *needsort = 1;
+ else if (aclent[i].a_type > curtype)
+ curtype = aclent[i].a_type;
+ if (aclent[i].a_type & USER)
+ (*numuser)++;
+ if (aclent[i].a_type & (GROUP | GROUP_OBJ))
+ (*numgroup)++;
+ if (aclent[i].a_type & CLASS_OBJ) {
+ if (*hasmask) {
+ error = EINVAL;
+ goto out;
+ } else {
+ *hasmask = 1;
+ *mask = aclent[i].a_perm;
+ }
+ }
+ }
+
+ if ((! *hasmask) && (*numuser + *numgroup > 1)) {
+ error = EINVAL;
+ goto out;
+ }
+
+out:
+ return (error);
+}
+
+/*
+ * Convert an array of aclent_t into an array of nfsace entries,
+ * following POSIX draft -> nfsv4 conversion semantics as outlined in
+ * the IETF draft.
+ */
+static int
+ln_aent_to_ace(aclent_t *aclent, int n, ace_t **acepp, int *rescount, int isdir)
+{
+ int error = 0;
+ mode_t mask;
+ int numuser, numgroup, needsort;
+ int resultsize = 0;
+ int i, groupi = 0, skip;
+ ace_t *acep, *result = NULL;
+ int hasmask;
+
+ error = ln_aent_preprocess(aclent, n, &hasmask, &mask,
+ &numuser, &numgroup, &needsort);
+ if (error != 0)
+ goto out;
+
+ /* allow + deny for each aclent */
+ resultsize = n * 2;
+ if (hasmask) {
+ /*
+ * stick extra deny on the group_obj and on each
+ * user|group for the mask (the group_obj was added
+ * into the count for numgroup)
+ */
+ resultsize += numuser + numgroup;
+ /* ... and don't count the mask itself */
+ resultsize -= 2;
+ }
+
+ /* sort the source if necessary */
+ if (needsort)
+ ksort((caddr_t)aclent, n, sizeof (aclent_t), cmp2acls);
+
+ if (cacl_malloc((void **)&result, resultsize * sizeof (ace_t)) != 0)
+ goto out;
+
+ acep = result;
+
+ for (i = 0; i < n; i++) {
+ /*
+ * don't process CLASS_OBJ (mask); mask was grabbed in
+ * ln_aent_preprocess()
+ */
+ if (aclent[i].a_type & CLASS_OBJ)
+ continue;
+
+ /* If we need an ACL_MASK emulator, prepend it now */
+ if ((hasmask) &&
+ (aclent[i].a_type & (USER | GROUP | GROUP_OBJ))) {
+ acep->a_type = ACE_ACCESS_DENIED_ACE_TYPE;
+ acep->a_flags = 0;
+ if (aclent[i].a_type & GROUP_OBJ) {
+ acep->a_who = (uid_t)-1;
+ acep->a_flags |=
+ (ACE_IDENTIFIER_GROUP|ACE_GROUP);
+ } else if (aclent[i].a_type & USER) {
+ acep->a_who = aclent[i].a_id;
+ } else {
+ acep->a_who = aclent[i].a_id;
+ acep->a_flags |= ACE_IDENTIFIER_GROUP;
+ }
+ if (aclent[i].a_type & ACL_DEFAULT) {
+ acep->a_flags |= ACE_INHERIT_ONLY_ACE |
+ ACE_FILE_INHERIT_ACE |
+ ACE_DIRECTORY_INHERIT_ACE;
+ }
+ /*
+ * Set the access mask for the prepended deny
+ * ace. To do this, we invert the mask (found
+ * in ln_aent_preprocess()) then convert it to an
+ * DENY ace access_mask.
+ */
+ acep->a_access_mask = mode_to_ace_access((mask ^ 07),
+ isdir, 0, 0);
+ acep += 1;
+ }
+
+ /* handle a_perm -> access_mask */
+ acep->a_access_mask = mode_to_ace_access(aclent[i].a_perm,
+ isdir, aclent[i].a_type & USER_OBJ, 1);
+
+ /* emulate a default aclent */
+ if (aclent[i].a_type & ACL_DEFAULT) {
+ acep->a_flags |= ACE_INHERIT_ONLY_ACE |
+ ACE_FILE_INHERIT_ACE |
+ ACE_DIRECTORY_INHERIT_ACE;
+ }
+
+ /*
+ * handle a_perm and a_id
+ *
+ * this must be done last, since it involves the
+ * corresponding deny aces, which are handled
+ * differently for each different a_type.
+ */
+ if (aclent[i].a_type & USER_OBJ) {
+ acep->a_who = (uid_t)-1;
+ acep->a_flags |= ACE_OWNER;
+ ace_make_deny(acep, acep + 1, isdir, B_TRUE);
+ acep += 2;
+ } else if (aclent[i].a_type & USER) {
+ acep->a_who = aclent[i].a_id;
+ ace_make_deny(acep, acep + 1, isdir, B_FALSE);
+ acep += 2;
+ } else if (aclent[i].a_type & (GROUP_OBJ | GROUP)) {
+ if (aclent[i].a_type & GROUP_OBJ) {
+ acep->a_who = (uid_t)-1;
+ acep->a_flags |= ACE_GROUP;
+ } else {
+ acep->a_who = aclent[i].a_id;
+ }
+ acep->a_flags |= ACE_IDENTIFIER_GROUP;
+ /*
+ * Set the corresponding deny for the group ace.
+ *
+ * The deny aces go after all of the groups, unlike
+ * everything else, where they immediately follow
+ * the allow ace.
+ *
+ * We calculate "skip", the number of slots to
+ * skip ahead for the deny ace, here.
+ *
+ * The pattern is:
+ * MD1 A1 MD2 A2 MD3 A3 D1 D2 D3
+ * thus, skip is
+ * (2 * numgroup) - 1 - groupi
+ * (2 * numgroup) to account for MD + A
+ * - 1 to account for the fact that we're on the
+ * access (A), not the mask (MD)
+ * - groupi to account for the fact that we have
+ * passed up groupi number of MD's.
+ */
+ skip = (2 * numgroup) - 1 - groupi;
+ ace_make_deny(acep, acep + skip, isdir, B_FALSE);
+ /*
+ * If we just did the last group, skip acep past
+ * all of the denies; else, just move ahead one.
+ */
+ if (++groupi >= numgroup)
+ acep += numgroup + 1;
+ else
+ acep += 1;
+ } else if (aclent[i].a_type & OTHER_OBJ) {
+ acep->a_who = (uid_t)-1;
+ acep->a_flags |= ACE_EVERYONE;
+ ace_make_deny(acep, acep + 1, isdir, B_FALSE);
+ acep += 2;
+ } else {
+ error = EINVAL;
+ goto out;
+ }
+ }
+
+ *acepp = result;
+ *rescount = resultsize;
+
+out:
+ if (error != 0) {
+ if ((result != NULL) && (resultsize > 0)) {
+ cacl_free(result, resultsize * sizeof (ace_t));
+ }
+ }
+
+ return (error);
+}
+
+static int
+convert_aent_to_ace(aclent_t *aclentp, int aclcnt, int isdir,
+ ace_t **retacep, int *retacecnt)
+{
+ ace_t *acep;
+ ace_t *dfacep;
+ int acecnt = 0;
+ int dfacecnt = 0;
+ int dfaclstart = 0;
+ int dfaclcnt = 0;
+ aclent_t *aclp;
+ int i;
+ int error;
+ int acesz, dfacesz;
+
+ ksort((caddr_t)aclentp, aclcnt, sizeof (aclent_t), cmp2acls);
+
+ for (i = 0, aclp = aclentp; i < aclcnt; aclp++, i++) {
+ if (aclp->a_type & ACL_DEFAULT)
+ break;
+ }
+
+ if (i < aclcnt) {
+ dfaclstart = i;
+ dfaclcnt = aclcnt - i;
+ }
+
+ if (dfaclcnt && isdir == 0) {
+ return (EINVAL);
+ }
+
+ error = ln_aent_to_ace(aclentp, i, &acep, &acecnt, isdir);
+ if (error)
+ return (error);
+
+ if (dfaclcnt) {
+ error = ln_aent_to_ace(&aclentp[dfaclstart], dfaclcnt,
+ &dfacep, &dfacecnt, isdir);
+ if (error) {
+ if (acep) {
+ cacl_free(acep, acecnt * sizeof (ace_t));
+ }
+ return (error);
+ }
+ }
+
+ if (dfacecnt != 0) {
+ acesz = sizeof (ace_t) * acecnt;
+ dfacesz = sizeof (ace_t) * dfacecnt;
+ acep = cacl_realloc(acep, acesz, acesz + dfacesz);
+ if (acep == NULL)
+ return (ENOMEM);
+ if (dfaclcnt) {
+ (void) memcpy(acep + acecnt, dfacep, dfacesz);
+ }
+ }
+ if (dfaclcnt)
+ cacl_free(dfacep, dfacecnt * sizeof (ace_t));
+
+ *retacecnt = acecnt + dfacecnt;
+ *retacep = acep;
+ return (0);
+}
+
+static int
+ace_mask_to_mode(uint32_t mask, o_mode_t *modep, int isdir)
+{
+ int error = 0;
+ o_mode_t mode = 0;
+ uint32_t bits, wantbits;
+
+ /* read */
+ if (mask & ACE_READ_DATA)
+ mode |= S_IROTH;
+
+ /* write */
+ wantbits = (ACE_WRITE_DATA | ACE_APPEND_DATA);
+ if (isdir)
+ wantbits |= ACE_DELETE_CHILD;
+ bits = mask & wantbits;
+ if (bits != 0) {
+ if (bits != wantbits) {
+ error = ENOTSUP;
+ goto out;
+ }
+ mode |= S_IWOTH;
+ }
+
+ /* exec */
+ if (mask & ACE_EXECUTE) {
+ mode |= S_IXOTH;
+ }
+
+ *modep = mode;
+
+out:
+ return (error);
+}
+
+static void
+acevals_init(acevals_t *vals, uid_t key)
+{
+ bzero(vals, sizeof (*vals));
+ vals->allowed = ACE_MASK_UNDEFINED;
+ vals->denied = ACE_MASK_UNDEFINED;
+ vals->mask = ACE_MASK_UNDEFINED;
+ vals->key = key;
+}
+
+static void
+ace_list_init(ace_list_t *al, int dfacl_flag)
+{
+ acevals_init(&al->user_obj, NULL);
+ acevals_init(&al->group_obj, NULL);
+ acevals_init(&al->other_obj, NULL);
+ al->numusers = 0;
+ al->numgroups = 0;
+ al->acl_mask = 0;
+ al->hasmask = 0;
+ al->state = ace_unused;
+ al->seen = 0;
+ al->dfacl_flag = dfacl_flag;
+}
+
+/*
+ * Find or create an acevals holder for a given id and avl tree.
+ *
+ * Note that only one thread will ever touch these avl trees, so
+ * there is no need for locking.
+ */
+static acevals_t *
+acevals_find(ace_t *ace, avl_tree_t *avl, int *num)
+{
+ acevals_t key, *rc;
+ avl_index_t where;
+
+ key.key = ace->a_who;
+ rc = avl_find(avl, &key, &where);
+ if (rc != NULL)
+ return (rc);
+
+ /* this memory is freed by ln_ace_to_aent()->ace_list_free() */
+ if (cacl_malloc((void **)&rc, sizeof (acevals_t)) != 0)
+ return (NULL);
+
+ acevals_init(rc, ace->a_who);
+ avl_insert(avl, rc, where);
+ (*num)++;
+
+ return (rc);
+}
+
+static int
+access_mask_check(ace_t *acep, int mask_bit, int isowner)
+{
+ int set_deny, err_deny;
+ int set_allow, err_allow;
+ int acl_consume;
+ int haswriteperm, hasreadperm;
+
+ if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) {
+ haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 0 : 1;
+ hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 0 : 1;
+ } else {
+ haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 1 : 0;
+ hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 1 : 0;
+ }
+
+ acl_consume = (ACL_SYNCHRONIZE_ERR_DENY |
+ ACL_DELETE_ERR_DENY |
+ ACL_WRITE_OWNER_ERR_DENY |
+ ACL_WRITE_OWNER_ERR_ALLOW |
+ ACL_WRITE_ATTRS_OWNER_SET_ALLOW |
+ ACL_WRITE_ATTRS_OWNER_ERR_DENY |
+ ACL_WRITE_ATTRS_WRITER_SET_DENY |
+ ACL_WRITE_ATTRS_WRITER_ERR_ALLOW |
+ ACL_WRITE_NAMED_WRITER_ERR_DENY |
+ ACL_READ_NAMED_READER_ERR_DENY);
+
+ if (mask_bit == ACE_SYNCHRONIZE) {
+ set_deny = ACL_SYNCHRONIZE_SET_DENY;
+ err_deny = ACL_SYNCHRONIZE_ERR_DENY;
+ set_allow = ACL_SYNCHRONIZE_SET_ALLOW;
+ err_allow = ACL_SYNCHRONIZE_ERR_ALLOW;
+ } else if (mask_bit == ACE_WRITE_OWNER) {
+ set_deny = ACL_WRITE_OWNER_SET_DENY;
+ err_deny = ACL_WRITE_OWNER_ERR_DENY;
+ set_allow = ACL_WRITE_OWNER_SET_ALLOW;
+ err_allow = ACL_WRITE_OWNER_ERR_ALLOW;
+ } else if (mask_bit == ACE_DELETE) {
+ set_deny = ACL_DELETE_SET_DENY;
+ err_deny = ACL_DELETE_ERR_DENY;
+ set_allow = ACL_DELETE_SET_ALLOW;
+ err_allow = ACL_DELETE_ERR_ALLOW;
+ } else if (mask_bit == ACE_WRITE_ATTRIBUTES) {
+ if (isowner) {
+ set_deny = ACL_WRITE_ATTRS_OWNER_SET_DENY;
+ err_deny = ACL_WRITE_ATTRS_OWNER_ERR_DENY;
+ set_allow = ACL_WRITE_ATTRS_OWNER_SET_ALLOW;
+ err_allow = ACL_WRITE_ATTRS_OWNER_ERR_ALLOW;
+ } else if (haswriteperm) {
+ set_deny = ACL_WRITE_ATTRS_WRITER_SET_DENY;
+ err_deny = ACL_WRITE_ATTRS_WRITER_ERR_DENY;
+ set_allow = ACL_WRITE_ATTRS_WRITER_SET_ALLOW;
+ err_allow = ACL_WRITE_ATTRS_WRITER_ERR_ALLOW;
+ } else {
+ if ((acep->a_access_mask & mask_bit) &&
+ (acep->a_type & ACE_ACCESS_ALLOWED_ACE_TYPE)) {
+ return (ENOTSUP);
+ }
+ return (0);
+ }
+ } else if (mask_bit == ACE_READ_NAMED_ATTRS) {
+ if (!hasreadperm)
+ return (0);
+
+ set_deny = ACL_READ_NAMED_READER_SET_DENY;
+ err_deny = ACL_READ_NAMED_READER_ERR_DENY;
+ set_allow = ACL_READ_NAMED_READER_SET_ALLOW;
+ err_allow = ACL_READ_NAMED_READER_ERR_ALLOW;
+ } else if (mask_bit == ACE_WRITE_NAMED_ATTRS) {
+ if (!haswriteperm)
+ return (0);
+
+ set_deny = ACL_WRITE_NAMED_WRITER_SET_DENY;
+ err_deny = ACL_WRITE_NAMED_WRITER_ERR_DENY;
+ set_allow = ACL_WRITE_NAMED_WRITER_SET_ALLOW;
+ err_allow = ACL_WRITE_NAMED_WRITER_ERR_ALLOW;
+ } else {
+ return (EINVAL);
+ }
+
+ if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) {
+ if (acl_consume & set_deny) {
+ if (!(acep->a_access_mask & mask_bit)) {
+ return (ENOTSUP);
+ }
+ } else if (acl_consume & err_deny) {
+ if (acep->a_access_mask & mask_bit) {
+ return (ENOTSUP);
+ }
+ }
+ } else {
+ /* ACE_ACCESS_ALLOWED_ACE_TYPE */
+ if (acl_consume & set_allow) {
+ if (!(acep->a_access_mask & mask_bit)) {
+ return (ENOTSUP);
+ }
+ } else if (acl_consume & err_allow) {
+ if (acep->a_access_mask & mask_bit) {
+ return (ENOTSUP);
+ }
+ }
+ }
+ return (0);
+}
+
+static int
+ace_to_aent_legal(ace_t *acep)
+{
+ int error = 0;
+ int isowner;
+
+ /* only ALLOW or DENY */
+ if ((acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE) &&
+ (acep->a_type != ACE_ACCESS_DENIED_ACE_TYPE)) {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ /* check for invalid flags */
+ if (acep->a_flags & ~(ACE_VALID_FLAG_BITS)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /* some flags are illegal */
+ if (acep->a_flags & (ACE_SUCCESSFUL_ACCESS_ACE_FLAG |
+ ACE_FAILED_ACCESS_ACE_FLAG |
+ ACE_NO_PROPAGATE_INHERIT_ACE)) {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ /* check for invalid masks */
+ if (acep->a_access_mask & ~(ACE_VALID_MASK_BITS)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if ((acep->a_flags & ACE_OWNER)) {
+ isowner = 1;
+ } else {
+ isowner = 0;
+ }
+
+ error = access_mask_check(acep, ACE_SYNCHRONIZE, isowner);
+ if (error)
+ goto out;
+
+ error = access_mask_check(acep, ACE_WRITE_OWNER, isowner);
+ if (error)
+ goto out;
+
+ error = access_mask_check(acep, ACE_DELETE, isowner);
+ if (error)
+ goto out;
+
+ error = access_mask_check(acep, ACE_WRITE_ATTRIBUTES, isowner);
+ if (error)
+ goto out;
+
+ error = access_mask_check(acep, ACE_READ_NAMED_ATTRS, isowner);
+ if (error)
+ goto out;
+
+ error = access_mask_check(acep, ACE_WRITE_NAMED_ATTRS, isowner);
+ if (error)
+ goto out;
+
+ /* more detailed checking of masks */
+ if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ if (! (acep->a_access_mask & ACE_READ_ATTRIBUTES)) {
+ error = ENOTSUP;
+ goto out;
+ }
+ if ((acep->a_access_mask & ACE_WRITE_DATA) &&
+ (! (acep->a_access_mask & ACE_APPEND_DATA))) {
+ error = ENOTSUP;
+ goto out;
+ }
+ if ((! (acep->a_access_mask & ACE_WRITE_DATA)) &&
+ (acep->a_access_mask & ACE_APPEND_DATA)) {
+ error = ENOTSUP;
+ goto out;
+ }
+ }
+
+ /* ACL enforcement */
+ if ((acep->a_access_mask & ACE_READ_ACL) &&
+ (acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE)) {
+ error = ENOTSUP;
+ goto out;
+ }
+ if (acep->a_access_mask & ACE_WRITE_ACL) {
+ if ((acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) &&
+ (isowner)) {
+ error = ENOTSUP;
+ goto out;
+ }
+ if ((acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) &&
+ (! isowner)) {
+ error = ENOTSUP;
+ goto out;
+ }
+ }
+
+out:
+ return (error);
+}
+
+static int
+ace_allow_to_mode(uint32_t mask, o_mode_t *modep, int isdir)
+{
+ /* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */
+ if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) !=
+ (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) {
+ return (ENOTSUP);
+ }
+
+ return (ace_mask_to_mode(mask, modep, isdir));
+}
+
+static int
+acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list,
+ uid_t owner, gid_t group, int isdir)
+{
+ int error;
+ uint32_t flips = ACE_POSIX_SUPPORTED_BITS;
+
+ if (isdir)
+ flips |= ACE_DELETE_CHILD;
+ if (vals->allowed != (vals->denied ^ flips)) {
+ error = ENOTSUP;
+ goto out;
+ }
+ if ((list->hasmask) && (list->acl_mask != vals->mask) &&
+ (vals->aent_type & (USER | GROUP | GROUP_OBJ))) {
+ error = ENOTSUP;
+ goto out;
+ }
+ error = ace_allow_to_mode(vals->allowed, &dest->a_perm, isdir);
+ if (error != 0)
+ goto out;
+ dest->a_type = vals->aent_type;
+ if (dest->a_type & (USER | GROUP)) {
+ dest->a_id = vals->key;
+ } else if (dest->a_type & USER_OBJ) {
+ dest->a_id = owner;
+ } else if (dest->a_type & GROUP_OBJ) {
+ dest->a_id = group;
+ } else if (dest->a_type & OTHER_OBJ) {
+ dest->a_id = 0;
+ } else {
+ error = EINVAL;
+ goto out;
+ }
+
+out:
+ return (error);
+}
+
+
+static int
+ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt,
+ uid_t owner, gid_t group, int isdir)
+{
+ int error = 0;
+ aclent_t *aent, *result = NULL;
+ acevals_t *vals;
+ int resultcount;
+
+ if ((list->seen & (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) !=
+ (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) {
+ error = ENOTSUP;
+ goto out;
+ }
+ if ((! list->hasmask) && (list->numusers + list->numgroups > 0)) {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ resultcount = 3 + list->numusers + list->numgroups;
+ /*
+ * This must be the same condition as below, when we add the CLASS_OBJ
+ * (aka ACL mask)
+ */
+ if ((list->hasmask) || (! list->dfacl_flag))
+ resultcount += 1;
+
+ if (cacl_malloc((void **)&result,
+ resultcount * sizeof (aclent_t)) != 0) {
+ error = ENOMEM;
+ goto out;
+ }
+ aent = result;
+
+ /* USER_OBJ */
+ if (!(list->user_obj.aent_type & USER_OBJ)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ error = acevals_to_aent(&list->user_obj, aent, list, owner, group,
+ isdir);
+
+ if (error != 0)
+ goto out;
+ ++aent;
+ /* USER */
+ vals = NULL;
+ for (vals = avl_first(&list->user); vals != NULL;
+ vals = AVL_NEXT(&list->user, vals)) {
+ if (!(vals->aent_type & USER)) {
+ error = EINVAL;
+ goto out;
+ }
+ error = acevals_to_aent(vals, aent, list, owner, group,
+ isdir);
+ if (error != 0)
+ goto out;
+ ++aent;
+ }
+ /* GROUP_OBJ */
+ if (!(list->group_obj.aent_type & GROUP_OBJ)) {
+ error = EINVAL;
+ goto out;
+ }
+ error = acevals_to_aent(&list->group_obj, aent, list, owner, group,
+ isdir);
+ if (error != 0)
+ goto out;
+ ++aent;
+ /* GROUP */
+ vals = NULL;
+ for (vals = avl_first(&list->group); vals != NULL;
+ vals = AVL_NEXT(&list->group, vals)) {
+ if (!(vals->aent_type & GROUP)) {
+ error = EINVAL;
+ goto out;
+ }
+ error = acevals_to_aent(vals, aent, list, owner, group,
+ isdir);
+ if (error != 0)
+ goto out;
+ ++aent;
+ }
+ /*
+ * CLASS_OBJ (aka ACL_MASK)
+ *
+ * An ACL_MASK is not fabricated if the ACL is a default ACL.
+ * This is to follow UFS's behavior.
+ */
+ if ((list->hasmask) || (! list->dfacl_flag)) {
+ if (list->hasmask) {
+ uint32_t flips = ACE_POSIX_SUPPORTED_BITS;
+ if (isdir)
+ flips |= ACE_DELETE_CHILD;
+ error = ace_mask_to_mode(list->acl_mask ^ flips,
+ &aent->a_perm, isdir);
+ if (error != 0)
+ goto out;
+ } else {
+ /* fabricate the ACL_MASK from the group permissions */
+ error = ace_mask_to_mode(list->group_obj.allowed,
+ &aent->a_perm, isdir);
+ if (error != 0)
+ goto out;
+ }
+ aent->a_id = 0;
+ aent->a_type = CLASS_OBJ | list->dfacl_flag;
+ ++aent;
+ }
+ /* OTHER_OBJ */
+ if (!(list->other_obj.aent_type & OTHER_OBJ)) {
+ error = EINVAL;
+ goto out;
+ }
+ error = acevals_to_aent(&list->other_obj, aent, list, owner, group,
+ isdir);
+ if (error != 0)
+ goto out;
+ ++aent;
+
+ *aclentp = result;
+ *aclcnt = resultcount;
+
+out:
+ if (error != 0) {
+ if (result != NULL)
+ cacl_free(result, resultcount * sizeof (aclent_t));
+ }
+
+ return (error);
+}
+
+
+/*
+ * free all data associated with an ace_list
+ */
+static void
+ace_list_free(ace_list_t *al)
+{
+ acevals_t *node;
+ void *cookie;
+
+ if (al == NULL)
+ return;
+
+ cookie = NULL;
+ while ((node = avl_destroy_nodes(&al->user, &cookie)) != NULL)
+ cacl_free(node, sizeof (acevals_t));
+ cookie = NULL;
+ while ((node = avl_destroy_nodes(&al->group, &cookie)) != NULL)
+ cacl_free(node, sizeof (acevals_t));
+
+ avl_destroy(&al->user);
+ avl_destroy(&al->group);
+
+ /* free the container itself */
+ cacl_free(al, sizeof (ace_list_t));
+}
+
+static int
+acevals_compare(const void *va, const void *vb)
+{
+ const acevals_t *a = va, *b = vb;
+
+ if (a->key == b->key)
+ return (0);
+
+ if (a->key > b->key)
+ return (1);
+
+ else
+ return (-1);
+}
+
+/*
+ * Convert a list of ace_t entries to equivalent regular and default
+ * aclent_t lists. Return error (ENOTSUP) when conversion is not possible.
+ */
+static int
+ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group,
+ aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt,
+ int isdir)
+{
+ int error = 0;
+ ace_t *acep;
+ uint32_t bits;
+ int i;
+ ace_list_t *normacl = NULL, *dfacl = NULL, *acl;
+ acevals_t *vals;
+
+ *aclentp = NULL;
+ *aclcnt = 0;
+ *dfaclentp = NULL;
+ *dfaclcnt = 0;
+
+ /* we need at least user_obj, group_obj, and other_obj */
+ if (n < 6) {
+ error = ENOTSUP;
+ goto out;
+ }
+ if (ace == NULL) {
+ error = EINVAL;
+ goto out;
+ }
+
+ error = cacl_malloc((void **)&normacl, sizeof (ace_list_t));
+ if (error != 0)
+ goto out;
+
+ avl_create(&normacl->user, acevals_compare, sizeof (acevals_t),
+ offsetof(acevals_t, avl));
+ avl_create(&normacl->group, acevals_compare, sizeof (acevals_t),
+ offsetof(acevals_t, avl));
+
+ ace_list_init(normacl, 0);
+
+ error = cacl_malloc((void **)&dfacl, sizeof (ace_list_t));
+ if (error != 0)
+ goto out;
+
+ avl_create(&dfacl->user, acevals_compare, sizeof (acevals_t),
+ offsetof(acevals_t, avl));
+ avl_create(&dfacl->group, acevals_compare, sizeof (acevals_t),
+ offsetof(acevals_t, avl));
+ ace_list_init(dfacl, ACL_DEFAULT);
+
+ /* process every ace_t... */
+ for (i = 0; i < n; i++) {
+ acep = &ace[i];
+
+ /* rule out certain cases quickly */
+ error = ace_to_aent_legal(acep);
+ if (error != 0)
+ goto out;
+
+ /*
+ * Turn off these bits in order to not have to worry about
+ * them when doing the checks for compliments.
+ */
+ acep->a_access_mask &= ~(ACE_WRITE_OWNER | ACE_DELETE |
+ ACE_SYNCHRONIZE | ACE_WRITE_ATTRIBUTES |
+ ACE_READ_NAMED_ATTRS | ACE_WRITE_NAMED_ATTRS);
+
+ /* see if this should be a regular or default acl */
+ bits = acep->a_flags &
+ (ACE_INHERIT_ONLY_ACE |
+ ACE_FILE_INHERIT_ACE |
+ ACE_DIRECTORY_INHERIT_ACE);
+ if (bits != 0) {
+ /* all or nothing on these inherit bits */
+ if (bits != (ACE_INHERIT_ONLY_ACE |
+ ACE_FILE_INHERIT_ACE |
+ ACE_DIRECTORY_INHERIT_ACE)) {
+ error = ENOTSUP;
+ goto out;
+ }
+ acl = dfacl;
+ } else {
+ acl = normacl;
+ }
+
+ if ((acep->a_flags & ACE_OWNER)) {
+ if (acl->state > ace_user_obj) {
+ error = ENOTSUP;
+ goto out;
+ }
+ acl->state = ace_user_obj;
+ acl->seen |= USER_OBJ;
+ vals = &acl->user_obj;
+ vals->aent_type = USER_OBJ | acl->dfacl_flag;
+ } else if ((acep->a_flags & ACE_EVERYONE)) {
+ acl->state = ace_other_obj;
+ acl->seen |= OTHER_OBJ;
+ vals = &acl->other_obj;
+ vals->aent_type = OTHER_OBJ | acl->dfacl_flag;
+ } else if (acep->a_flags & ACE_IDENTIFIER_GROUP) {
+ if (acl->state > ace_group) {
+ error = ENOTSUP;
+ goto out;
+ }
+ if ((acep->a_flags & ACE_GROUP)) {
+ acl->seen |= GROUP_OBJ;
+ vals = &acl->group_obj;
+ vals->aent_type = GROUP_OBJ | acl->dfacl_flag;
+ } else {
+ acl->seen |= GROUP;
+ vals = acevals_find(acep, &acl->group,
+ &acl->numgroups);
+ if (vals == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ vals->aent_type = GROUP | acl->dfacl_flag;
+ }
+ acl->state = ace_group;
+ } else {
+ if (acl->state > ace_user) {
+ error = ENOTSUP;
+ goto out;
+ }
+ acl->state = ace_user;
+ acl->seen |= USER;
+ vals = acevals_find(acep, &acl->user,
+ &acl->numusers);
+ if (vals == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ vals->aent_type = USER | acl->dfacl_flag;
+ }
+
+ if (!(acl->state > ace_unused)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ /* no more than one allowed per aclent_t */
+ if (vals->allowed != ACE_MASK_UNDEFINED) {
+ error = ENOTSUP;
+ goto out;
+ }
+ vals->allowed = acep->a_access_mask;
+ } else {
+ /*
+ * it's a DENY; if there was a previous DENY, it
+ * must have been an ACL_MASK.
+ */
+ if (vals->denied != ACE_MASK_UNDEFINED) {
+ /* ACL_MASK is for USER and GROUP only */
+ if ((acl->state != ace_user) &&
+ (acl->state != ace_group)) {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ if (! acl->hasmask) {
+ acl->hasmask = 1;
+ acl->acl_mask = vals->denied;
+ /* check for mismatched ACL_MASK emulations */
+ } else if (acl->acl_mask != vals->denied) {
+ error = ENOTSUP;
+ goto out;
+ }
+ vals->mask = vals->denied;
+ }
+ vals->denied = acep->a_access_mask;
+ }
+ }
+
+ /* done collating; produce the aclent_t lists */
+ if (normacl->state != ace_unused) {
+ error = ace_list_to_aent(normacl, aclentp, aclcnt,
+ owner, group, isdir);
+ if (error != 0) {
+ goto out;
+ }
+ }
+ if (dfacl->state != ace_unused) {
+ error = ace_list_to_aent(dfacl, dfaclentp, dfaclcnt,
+ owner, group, isdir);
+ if (error != 0) {
+ goto out;
+ }
+ }
+
+out:
+ if (normacl != NULL)
+ ace_list_free(normacl);
+ if (dfacl != NULL)
+ ace_list_free(dfacl);
+
+ return (error);
+}
+
+static int
+convert_ace_to_aent(ace_t *acebufp, int acecnt, int isdir,
+ uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt)
+{
+ int error = 0;
+ aclent_t *aclentp, *dfaclentp;
+ int aclcnt, dfaclcnt;
+ int aclsz, dfaclsz;
+
+ error = ln_ace_to_aent(acebufp, acecnt, owner, group,
+ &aclentp, &aclcnt, &dfaclentp, &dfaclcnt, isdir);
+
+ if (error)
+ return (error);
+
+
+ if (dfaclcnt != 0) {
+ /*
+ * Slap aclentp and dfaclentp into a single array.
+ */
+ aclsz = sizeof (aclent_t) * aclcnt;
+ dfaclsz = sizeof (aclent_t) * dfaclcnt;
+ aclentp = cacl_realloc(aclentp, aclsz, aclsz + dfaclsz);
+ if (aclentp != NULL) {
+ (void) memcpy(aclentp + aclcnt, dfaclentp, dfaclsz);
+ } else {
+ error = ENOMEM;
+ }
+ }
+
+ if (aclentp) {
+ *retaclentp = aclentp;
+ *retaclcnt = aclcnt + dfaclcnt;
+ }
+
+ if (dfaclentp)
+ cacl_free(dfaclentp, dfaclsz);
+
+ return (error);
+}
+
+
+int
+acl_translate(acl_t *aclp, int target_flavor, int isdir, uid_t owner,
+ gid_t group)
+{
+ int aclcnt;
+ void *acldata;
+ int error;
+
+ /*
+ * See if we need to translate
+ */
+ if ((target_flavor == _ACL_ACE_ENABLED && aclp->acl_type == ACE_T) ||
+ (target_flavor == _ACL_ACLENT_ENABLED &&
+ aclp->acl_type == ACLENT_T))
+ return (0);
+
+ if (target_flavor == -1) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (target_flavor == _ACL_ACE_ENABLED &&
+ aclp->acl_type == ACLENT_T) {
+ error = convert_aent_to_ace(aclp->acl_aclp,
+ aclp->acl_cnt, isdir, (ace_t **)&acldata, &aclcnt);
+ if (error)
+ goto out;
+
+ } else if (target_flavor == _ACL_ACLENT_ENABLED &&
+ aclp->acl_type == ACE_T) {
+ error = convert_ace_to_aent(aclp->acl_aclp, aclp->acl_cnt,
+ isdir, owner, group, (aclent_t **)&acldata, &aclcnt);
+ if (error)
+ goto out;
+ } else {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ /*
+ * replace old acl with newly translated acl
+ */
+ cacl_free(aclp->acl_aclp, aclp->acl_cnt * aclp->acl_entry_size);
+ aclp->acl_aclp = acldata;
+ aclp->acl_cnt = aclcnt;
+ if (target_flavor == _ACL_ACE_ENABLED) {
+ aclp->acl_type = ACE_T;
+ aclp->acl_entry_size = sizeof (ace_t);
+ } else {
+ aclp->acl_type = ACLENT_T;
+ aclp->acl_entry_size = sizeof (aclent_t);
+ }
+ return (0);
+
+out:
+
+#if !defined(_KERNEL)
+ errno = error;
+ return (-1);
+#else
+ return (error);
+#endif
+}
+
+#define SET_ACE(acl, index, who, mask, type, flags) { \
+ acl[0][index].a_who = (uint32_t)who; \
+ acl[0][index].a_type = type; \
+ acl[0][index].a_flags = flags; \
+ acl[0][index++].a_access_mask = mask; \
+}
+
+void
+acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
+ uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone)
+{
+ *deny1 = *deny2 = *allow0 = *group = 0;
+
+ if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
+ *deny1 |= ACE_READ_DATA;
+ if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
+ *deny1 |= ACE_WRITE_DATA;
+ if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
+ *deny1 |= ACE_EXECUTE;
+
+ if (!(mode & S_IRGRP) && (mode & S_IROTH))
+ *deny2 = ACE_READ_DATA;
+ if (!(mode & S_IWGRP) && (mode & S_IWOTH))
+ *deny2 |= ACE_WRITE_DATA;
+ if (!(mode & S_IXGRP) && (mode & S_IXOTH))
+ *deny2 |= ACE_EXECUTE;
+
+ if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
+ *allow0 |= ACE_READ_DATA;
+ if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
+ *allow0 |= ACE_WRITE_DATA;
+ if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
+ *allow0 |= ACE_EXECUTE;
+
+ *owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
+ ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
+ ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
+ if (mode & S_IRUSR)
+ *owner |= ACE_READ_DATA;
+ if (mode & S_IWUSR)
+ *owner |= ACE_WRITE_DATA|ACE_APPEND_DATA;
+ if (mode & S_IXUSR)
+ *owner |= ACE_EXECUTE;
+
+ *group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
+ ACE_SYNCHRONIZE;
+ if (mode & S_IRGRP)
+ *group |= ACE_READ_DATA;
+ if (mode & S_IWGRP)
+ *group |= ACE_WRITE_DATA|ACE_APPEND_DATA;
+ if (mode & S_IXGRP)
+ *group |= ACE_EXECUTE;
+
+ *everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
+ ACE_SYNCHRONIZE;
+ if (mode & S_IROTH)
+ *everyone |= ACE_READ_DATA;
+ if (mode & S_IWOTH)
+ *everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA;
+ if (mode & S_IXOTH)
+ *everyone |= ACE_EXECUTE;
+}
+
+int
+acl_trivial_create(mode_t mode, ace_t **acl, int *count)
+{
+ uint32_t deny1, deny2;
+ uint32_t allow0;
+ uint32_t owner, group, everyone;
+ int index = 0;
+ int error;
+
+ *count = 3;
+ acl_trivial_access_masks(mode, &allow0, &deny1, &deny2, &owner, &group,
+ &everyone);
+
+ if (allow0)
+ (*count)++;
+ if (deny1)
+ (*count)++;
+ if (deny2)
+ (*count)++;
+
+ if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0)
+ return (error);
+
+ if (allow0) {
+ SET_ACE(acl, index, -1, allow0, ACE_ACCESS_ALLOWED_ACE_TYPE,
+ ACE_OWNER);
+ }
+ if (deny1) {
+ SET_ACE(acl, index, -1, deny1, ACE_ACCESS_DENIED_ACE_TYPE,
+ ACE_OWNER);
+ }
+ if (deny2) {
+ SET_ACE(acl, index, -1, deny2, ACE_ACCESS_DENIED_ACE_TYPE,
+ ACE_GROUP|ACE_IDENTIFIER_GROUP);
+ }
+
+ SET_ACE(acl, index, -1, owner, ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER);
+ SET_ACE(acl, index, -1, group, ACE_ACCESS_ALLOWED_ACE_TYPE,
+ ACE_IDENTIFIER_GROUP|ACE_GROUP);
+ SET_ACE(acl, index, -1, everyone, ACE_ACCESS_ALLOWED_ACE_TYPE,
+ ACE_EVERYONE);
+
+ return (0);
+}
+
+/*
+ * ace_trivial:
+ * determine whether an ace_t acl is trivial
+ *
+ * Trivialness implies that the acl is composed of only
+ * owner, group, everyone entries. ACL can't
+ * have read_acl denied, and write_owner/write_acl/write_attributes
+ * can only be owner@ entry.
+ */
+int
+ace_trivial_common(void *acep, int aclcnt,
+ uint64_t (*walk)(void *, uint64_t, int aclcnt,
+ uint16_t *, uint16_t *, uint32_t *))
+{
+ uint16_t flags;
+ uint32_t mask;
+ uint16_t type;
+ uint64_t cookie = 0;
+
+ while (cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask)) {
+ switch (flags & ACE_TYPE_FLAGS) {
+ case ACE_OWNER:
+ case ACE_GROUP|ACE_IDENTIFIER_GROUP:
+ case ACE_EVERYONE:
+ break;
+ default:
+ return (1);
+
+ }
+
+ if (flags & (ACE_FILE_INHERIT_ACE|
+ ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|
+ ACE_INHERIT_ONLY_ACE))
+ return (1);
+
+ /*
+ * Special check for some special bits
+ *
+ * Don't allow anybody to deny reading basic
+ * attributes or a files ACL.
+ */
+ if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
+ (type == ACE_ACCESS_DENIED_ACE_TYPE))
+ return (1);
+
+ /*
+ * Delete permissions are never set by default
+ */
+ if (mask & (ACE_DELETE|ACE_DELETE_CHILD))
+ return (1);
+ /*
+ * only allow owner@ to have
+ * write_acl/write_owner/write_attributes/write_xattr/
+ */
+ if (type == ACE_ACCESS_ALLOWED_ACE_TYPE &&
+ (!(flags & ACE_OWNER) && (mask &
+ (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES|
+ ACE_WRITE_NAMED_ATTRS))))
+ return (1);
+
+ }
+ return (0);
+}
+
+uint64_t
+ace_walk(void *datap, uint64_t cookie, int aclcnt, uint16_t *flags,
+ uint16_t *type, uint32_t *mask)
+{
+ ace_t *acep = datap;
+
+ if (cookie >= aclcnt)
+ return (0);
+
+ *flags = acep[cookie].a_flags;
+ *type = acep[cookie].a_type;
+ *mask = acep[cookie++].a_access_mask;
+
+ return (cookie);
+}
+
+int
+ace_trivial(ace_t *acep, int aclcnt)
+{
+ return (ace_trivial_common(acep, aclcnt, ace_walk));
+}
diff --git a/common/acl/acl_common.h b/common/acl/acl_common.h
new file mode 100644
index 000000000000..f76cbd3b450f
--- /dev/null
+++ b/common/acl/acl_common.h
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _ACL_COMMON_H
+#define _ACL_COMMON_H
+
+#include <sys/types.h>
+#include <sys/acl.h>
+#include <sys/stat.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern ace_t trivial_acl[6];
+
+extern int acltrivial(const char *);
+extern void adjust_ace_pair(ace_t *pair, mode_t mode);
+extern void adjust_ace_pair_common(void *, size_t, size_t, mode_t);
+extern int ace_trivial(ace_t *acep, int aclcnt);
+extern int ace_trivial_common(void *, int,
+ uint64_t (*walk)(void *, uint64_t, int aclcnt, uint16_t *, uint16_t *,
+ uint32_t *mask));
+extern acl_t *acl_alloc(acl_type_t);
+extern void acl_free(acl_t *aclp);
+extern int acl_translate(acl_t *aclp, int target_flavor,
+ int isdir, uid_t owner, gid_t group);
+void ksort(caddr_t v, int n, int s, int (*f)());
+int cmp2acls(void *a, void *b);
+int acl_trivial_create(mode_t mode, ace_t **acl, int *count);
+void acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
+ uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ACL_COMMON_H */
diff --git a/common/atomic/amd64/atomic.s b/common/atomic/amd64/atomic.s
new file mode 100644
index 000000000000..4b0d66e4db20
--- /dev/null
+++ b/common/atomic/amd64/atomic.s
@@ -0,0 +1,573 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+ .file "atomic.s"
+
+#include <sys/asm_linkage.h>
+
+#if defined(_KERNEL)
+ /*
+ * Legacy kernel interfaces; they will go away (eventually).
+ */
+ ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
+ ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
+ ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
+ ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
+ ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
+ ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
+ ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
+#endif
+
+ ENTRY(atomic_inc_8)
+ ALTENTRY(atomic_inc_uchar)
+ lock
+ incb (%rdi)
+ ret
+ SET_SIZE(atomic_inc_uchar)
+ SET_SIZE(atomic_inc_8)
+
+ ENTRY(atomic_inc_16)
+ ALTENTRY(atomic_inc_ushort)
+ lock
+ incw (%rdi)
+ ret
+ SET_SIZE(atomic_inc_ushort)
+ SET_SIZE(atomic_inc_16)
+
+ ENTRY(atomic_inc_32)
+ ALTENTRY(atomic_inc_uint)
+ lock
+ incl (%rdi)
+ ret
+ SET_SIZE(atomic_inc_uint)
+ SET_SIZE(atomic_inc_32)
+
+ ENTRY(atomic_inc_64)
+ ALTENTRY(atomic_inc_ulong)
+ lock
+ incq (%rdi)
+ ret
+ SET_SIZE(atomic_inc_ulong)
+ SET_SIZE(atomic_inc_64)
+
+ ENTRY(atomic_inc_8_nv)
+ ALTENTRY(atomic_inc_uchar_nv)
+ xorl %eax, %eax / clear upper bits of %eax return register
+ incb %al / %al = 1
+ lock
+ xaddb %al, (%rdi) / %al = old value, (%rdi) = new value
+ incb %al / return new value
+ ret
+ SET_SIZE(atomic_inc_uchar_nv)
+ SET_SIZE(atomic_inc_8_nv)
+
+ ENTRY(atomic_inc_16_nv)
+ ALTENTRY(atomic_inc_ushort_nv)
+ xorl %eax, %eax / clear upper bits of %eax return register
+ incw %ax / %ax = 1
+ lock
+ xaddw %ax, (%rdi) / %ax = old value, (%rdi) = new value
+ incw %ax / return new value
+ ret
+ SET_SIZE(atomic_inc_ushort_nv)
+ SET_SIZE(atomic_inc_16_nv)
+
+ ENTRY(atomic_inc_32_nv)
+ ALTENTRY(atomic_inc_uint_nv)
+ xorl %eax, %eax / %eax = 0
+ incl %eax / %eax = 1
+ lock
+ xaddl %eax, (%rdi) / %eax = old value, (%rdi) = new value
+ incl %eax / return new value
+ ret
+ SET_SIZE(atomic_inc_uint_nv)
+ SET_SIZE(atomic_inc_32_nv)
+
+ ENTRY(atomic_inc_64_nv)
+ ALTENTRY(atomic_inc_ulong_nv)
+ xorq %rax, %rax / %rax = 0
+ incq %rax / %rax = 1
+ lock
+ xaddq %rax, (%rdi) / %rax = old value, (%rdi) = new value
+ incq %rax / return new value
+ ret
+ SET_SIZE(atomic_inc_ulong_nv)
+ SET_SIZE(atomic_inc_64_nv)
+
+ ENTRY(atomic_dec_8)
+ ALTENTRY(atomic_dec_uchar)
+ lock
+ decb (%rdi)
+ ret
+ SET_SIZE(atomic_dec_uchar)
+ SET_SIZE(atomic_dec_8)
+
+ ENTRY(atomic_dec_16)
+ ALTENTRY(atomic_dec_ushort)
+ lock
+ decw (%rdi)
+ ret
+ SET_SIZE(atomic_dec_ushort)
+ SET_SIZE(atomic_dec_16)
+
+ ENTRY(atomic_dec_32)
+ ALTENTRY(atomic_dec_uint)
+ lock
+ decl (%rdi)
+ ret
+ SET_SIZE(atomic_dec_uint)
+ SET_SIZE(atomic_dec_32)
+
+ ENTRY(atomic_dec_64)
+ ALTENTRY(atomic_dec_ulong)
+ lock
+ decq (%rdi)
+ ret
+ SET_SIZE(atomic_dec_ulong)
+ SET_SIZE(atomic_dec_64)
+
+ ENTRY(atomic_dec_8_nv)
+ ALTENTRY(atomic_dec_uchar_nv)
+ xorl %eax, %eax / clear upper bits of %eax return register
+ decb %al / %al = -1
+ lock
+ xaddb %al, (%rdi) / %al = old value, (%rdi) = new value
+ decb %al / return new value
+ ret
+ SET_SIZE(atomic_dec_uchar_nv)
+ SET_SIZE(atomic_dec_8_nv)
+
+ ENTRY(atomic_dec_16_nv)
+ ALTENTRY(atomic_dec_ushort_nv)
+ xorl %eax, %eax / clear upper bits of %eax return register
+ decw %ax / %ax = -1
+ lock
+ xaddw %ax, (%rdi) / %ax = old value, (%rdi) = new value
+ decw %ax / return new value
+ ret
+ SET_SIZE(atomic_dec_ushort_nv)
+ SET_SIZE(atomic_dec_16_nv)
+
+ ENTRY(atomic_dec_32_nv)
+ ALTENTRY(atomic_dec_uint_nv)
+ xorl %eax, %eax / %eax = 0
+ decl %eax / %eax = -1
+ lock
+ xaddl %eax, (%rdi) / %eax = old value, (%rdi) = new value
+ decl %eax / return new value
+ ret
+ SET_SIZE(atomic_dec_uint_nv)
+ SET_SIZE(atomic_dec_32_nv)
+
+ ENTRY(atomic_dec_64_nv)
+ ALTENTRY(atomic_dec_ulong_nv)
+ xorq %rax, %rax / %rax = 0
+ decq %rax / %rax = -1
+ lock
+ xaddq %rax, (%rdi) / %rax = old value, (%rdi) = new value
+ decq %rax / return new value
+ ret
+ SET_SIZE(atomic_dec_ulong_nv)
+ SET_SIZE(atomic_dec_64_nv)
+
+ ENTRY(atomic_add_8)
+ ALTENTRY(atomic_add_char)
+ lock
+ addb %sil, (%rdi)
+ ret
+ SET_SIZE(atomic_add_char)
+ SET_SIZE(atomic_add_8)
+
+ ENTRY(atomic_add_16)
+ ALTENTRY(atomic_add_short)
+ lock
+ addw %si, (%rdi)
+ ret
+ SET_SIZE(atomic_add_short)
+ SET_SIZE(atomic_add_16)
+
+ ENTRY(atomic_add_32)
+ ALTENTRY(atomic_add_int)
+ lock
+ addl %esi, (%rdi)
+ ret
+ SET_SIZE(atomic_add_int)
+ SET_SIZE(atomic_add_32)
+
+ ENTRY(atomic_add_64)
+ ALTENTRY(atomic_add_ptr)
+ ALTENTRY(atomic_add_long)
+ lock
+ addq %rsi, (%rdi)
+ ret
+ SET_SIZE(atomic_add_long)
+ SET_SIZE(atomic_add_ptr)
+ SET_SIZE(atomic_add_64)
+
+ ENTRY(atomic_or_8)
+ ALTENTRY(atomic_or_uchar)
+ lock
+ orb %sil, (%rdi)
+ ret
+ SET_SIZE(atomic_or_uchar)
+ SET_SIZE(atomic_or_8)
+
+ ENTRY(atomic_or_16)
+ ALTENTRY(atomic_or_ushort)
+ lock
+ orw %si, (%rdi)
+ ret
+ SET_SIZE(atomic_or_ushort)
+ SET_SIZE(atomic_or_16)
+
+ ENTRY(atomic_or_32)
+ ALTENTRY(atomic_or_uint)
+ lock
+ orl %esi, (%rdi)
+ ret
+ SET_SIZE(atomic_or_uint)
+ SET_SIZE(atomic_or_32)
+
+ ENTRY(atomic_or_64)
+ ALTENTRY(atomic_or_ulong)
+ lock
+ orq %rsi, (%rdi)
+ ret
+ SET_SIZE(atomic_or_ulong)
+ SET_SIZE(atomic_or_64)
+
+ ENTRY(atomic_and_8)
+ ALTENTRY(atomic_and_uchar)
+ lock
+ andb %sil, (%rdi)
+ ret
+ SET_SIZE(atomic_and_uchar)
+ SET_SIZE(atomic_and_8)
+
+ ENTRY(atomic_and_16)
+ ALTENTRY(atomic_and_ushort)
+ lock
+ andw %si, (%rdi)
+ ret
+ SET_SIZE(atomic_and_ushort)
+ SET_SIZE(atomic_and_16)
+
+ ENTRY(atomic_and_32)
+ ALTENTRY(atomic_and_uint)
+ lock
+ andl %esi, (%rdi)
+ ret
+ SET_SIZE(atomic_and_uint)
+ SET_SIZE(atomic_and_32)
+
+ ENTRY(atomic_and_64)
+ ALTENTRY(atomic_and_ulong)
+ lock
+ andq %rsi, (%rdi)
+ ret
+ SET_SIZE(atomic_and_ulong)
+ SET_SIZE(atomic_and_64)
+
+ ENTRY(atomic_add_8_nv)
+ ALTENTRY(atomic_add_char_nv)
+ movzbl %sil, %eax / %al = delta addend, clear upper bits
+ lock
+ xaddb %sil, (%rdi) / %sil = old value, (%rdi) = sum
+ addb %sil, %al / new value = original value + delta
+ ret
+ SET_SIZE(atomic_add_char_nv)
+ SET_SIZE(atomic_add_8_nv)
+
+ ENTRY(atomic_add_16_nv)
+ ALTENTRY(atomic_add_short_nv)
+ movzwl %si, %eax / %ax = delta addend, clean upper bits
+ lock
+ xaddw %si, (%rdi) / %si = old value, (%rdi) = sum
+ addw %si, %ax / new value = original value + delta
+ ret
+ SET_SIZE(atomic_add_short_nv)
+ SET_SIZE(atomic_add_16_nv)
+
+ ENTRY(atomic_add_32_nv)
+ ALTENTRY(atomic_add_int_nv)
+ mov %esi, %eax / %eax = delta addend
+ lock
+ xaddl %esi, (%rdi) / %esi = old value, (%rdi) = sum
+ add %esi, %eax / new value = original value + delta
+ ret
+ SET_SIZE(atomic_add_int_nv)
+ SET_SIZE(atomic_add_32_nv)
+
+ ENTRY(atomic_add_64_nv)
+ ALTENTRY(atomic_add_ptr_nv)
+ ALTENTRY(atomic_add_long_nv)
+ mov %rsi, %rax / %rax = delta addend
+ lock
+ xaddq %rsi, (%rdi) / %rsi = old value, (%rdi) = sum
+ addq %rsi, %rax / new value = original value + delta
+ ret
+ SET_SIZE(atomic_add_long_nv)
+ SET_SIZE(atomic_add_ptr_nv)
+ SET_SIZE(atomic_add_64_nv)
+
+ ENTRY(atomic_and_8_nv)
+ ALTENTRY(atomic_and_uchar_nv)
+ movb (%rdi), %al / %al = old value
+1:
+ movb %sil, %cl
+ andb %al, %cl / %cl = new value
+ lock
+ cmpxchgb %cl, (%rdi) / try to stick it in
+ jne 1b
+ movzbl %cl, %eax / return new value
+ ret
+ SET_SIZE(atomic_and_uchar_nv)
+ SET_SIZE(atomic_and_8_nv)
+
+ ENTRY(atomic_and_16_nv)
+ ALTENTRY(atomic_and_ushort_nv)
+ movw (%rdi), %ax / %ax = old value
+1:
+ movw %si, %cx
+ andw %ax, %cx / %cx = new value
+ lock
+ cmpxchgw %cx, (%rdi) / try to stick it in
+ jne 1b
+ movzwl %cx, %eax / return new value
+ ret
+ SET_SIZE(atomic_and_ushort_nv)
+ SET_SIZE(atomic_and_16_nv)
+
+ ENTRY(atomic_and_32_nv)
+ ALTENTRY(atomic_and_uint_nv)
+ movl (%rdi), %eax
+1:
+ movl %esi, %ecx
+ andl %eax, %ecx
+ lock
+ cmpxchgl %ecx, (%rdi)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_and_uint_nv)
+ SET_SIZE(atomic_and_32_nv)
+
+ ENTRY(atomic_and_64_nv)
+ ALTENTRY(atomic_and_ulong_nv)
+ movq (%rdi), %rax
+1:
+ movq %rsi, %rcx
+ andq %rax, %rcx
+ lock
+ cmpxchgq %rcx, (%rdi)
+ jne 1b
+ movq %rcx, %rax
+ ret
+ SET_SIZE(atomic_and_ulong_nv)
+ SET_SIZE(atomic_and_64_nv)
+
+ ENTRY(atomic_or_8_nv)
+ ALTENTRY(atomic_or_uchar_nv)
+ movb (%rdi), %al / %al = old value
+1:
+ movb %sil, %cl
+ orb %al, %cl / %cl = new value
+ lock
+ cmpxchgb %cl, (%rdi) / try to stick it in
+ jne 1b
+ movzbl %cl, %eax / return new value
+ ret
+ SET_SIZE(atomic_or_uchar_nv)
+ SET_SIZE(atomic_or_8_nv)
+
+ ENTRY(atomic_or_16_nv)
+ ALTENTRY(atomic_or_ushort_nv)
+ movw (%rdi), %ax / %ax = old value
+1:
+ movw %si, %cx
+ orw %ax, %cx / %cx = new value
+ lock
+ cmpxchgw %cx, (%rdi) / try to stick it in
+ jne 1b
+ movzwl %cx, %eax / return new value
+ ret
+ SET_SIZE(atomic_or_ushort_nv)
+ SET_SIZE(atomic_or_16_nv)
+
+ ENTRY(atomic_or_32_nv)
+ ALTENTRY(atomic_or_uint_nv)
+ movl (%rdi), %eax
+1:
+ movl %esi, %ecx
+ orl %eax, %ecx
+ lock
+ cmpxchgl %ecx, (%rdi)
+ jne 1b
+ movl %ecx, %eax
+ ret
+ SET_SIZE(atomic_or_uint_nv)
+ SET_SIZE(atomic_or_32_nv)
+
+ ENTRY(atomic_or_64_nv)
+ ALTENTRY(atomic_or_ulong_nv)
+ movq (%rdi), %rax
+1:
+ movq %rsi, %rcx
+ orq %rax, %rcx
+ lock
+ cmpxchgq %rcx, (%rdi)
+ jne 1b
+ movq %rcx, %rax
+ ret
+ SET_SIZE(atomic_or_ulong_nv)
+ SET_SIZE(atomic_or_64_nv)
+
+ ENTRY(atomic_cas_8)
+ ALTENTRY(atomic_cas_uchar)
+ movzbl %sil, %eax
+ lock
+ cmpxchgb %dl, (%rdi)
+ ret
+ SET_SIZE(atomic_cas_uchar)
+ SET_SIZE(atomic_cas_8)
+
+ ENTRY(atomic_cas_16)
+ ALTENTRY(atomic_cas_ushort)
+ movzwl %si, %eax
+ lock
+ cmpxchgw %dx, (%rdi)
+ ret
+ SET_SIZE(atomic_cas_ushort)
+ SET_SIZE(atomic_cas_16)
+
+ ENTRY(atomic_cas_32)
+ ALTENTRY(atomic_cas_uint)
+ movl %esi, %eax
+ lock
+ cmpxchgl %edx, (%rdi)
+ ret
+ SET_SIZE(atomic_cas_uint)
+ SET_SIZE(atomic_cas_32)
+
+ ENTRY(atomic_cas_64)
+ ALTENTRY(atomic_cas_ulong)
+ ALTENTRY(atomic_cas_ptr)
+ movq %rsi, %rax
+ lock
+ cmpxchgq %rdx, (%rdi)
+ ret
+ SET_SIZE(atomic_cas_ptr)
+ SET_SIZE(atomic_cas_ulong)
+ SET_SIZE(atomic_cas_64)
+
+ ENTRY(atomic_swap_8)
+ ALTENTRY(atomic_swap_uchar)
+ movzbl %sil, %eax
+ lock
+ xchgb %al, (%rdi)
+ ret
+ SET_SIZE(atomic_swap_uchar)
+ SET_SIZE(atomic_swap_8)
+
+ ENTRY(atomic_swap_16)
+ ALTENTRY(atomic_swap_ushort)
+ movzwl %si, %eax
+ lock
+ xchgw %ax, (%rdi)
+ ret
+ SET_SIZE(atomic_swap_ushort)
+ SET_SIZE(atomic_swap_16)
+
+ ENTRY(atomic_swap_32)
+ ALTENTRY(atomic_swap_uint)
+ movl %esi, %eax
+ lock
+ xchgl %eax, (%rdi)
+ ret
+ SET_SIZE(atomic_swap_uint)
+ SET_SIZE(atomic_swap_32)
+
+ ENTRY(atomic_swap_64)
+ ALTENTRY(atomic_swap_ulong)
+ ALTENTRY(atomic_swap_ptr)
+ movq %rsi, %rax
+ lock
+ xchgq %rax, (%rdi)
+ ret
+ SET_SIZE(atomic_swap_ptr)
+ SET_SIZE(atomic_swap_ulong)
+ SET_SIZE(atomic_swap_64)
+
+ ENTRY(atomic_set_long_excl)
+ xorl %eax, %eax
+ lock
+ btsq %rsi, (%rdi)
+ jnc 1f
+ decl %eax / return -1
+1:
+ ret
+ SET_SIZE(atomic_set_long_excl)
+
+ ENTRY(atomic_clear_long_excl)
+ xorl %eax, %eax
+ lock
+ btrq %rsi, (%rdi)
+ jc 1f
+ decl %eax / return -1
+1:
+ ret
+ SET_SIZE(atomic_clear_long_excl)
+
+#if !defined(_KERNEL)
+
+ /*
+ * NOTE: membar_enter, and membar_exit are identical routines.
+ * We define them separately, instead of using an ALTENTRY
+ * definitions to alias them together, so that DTrace and
+ * debuggers will see a unique address for them, allowing
+ * more accurate tracing.
+ */
+
+ ENTRY(membar_enter)
+ mfence
+ ret
+ SET_SIZE(membar_enter)
+
+ ENTRY(membar_exit)
+ mfence
+ ret
+ SET_SIZE(membar_exit)
+
+ ENTRY(membar_producer)
+ sfence
+ ret
+ SET_SIZE(membar_producer)
+
+ ENTRY(membar_consumer)
+ lfence
+ ret
+ SET_SIZE(membar_consumer)
+
+#endif /* !_KERNEL */
diff --git a/common/atomic/i386/atomic.s b/common/atomic/i386/atomic.s
new file mode 100644
index 000000000000..4fa525ba20af
--- /dev/null
+++ b/common/atomic/i386/atomic.s
@@ -0,0 +1,720 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .file "atomic.s"
+
+#include <sys/asm_linkage.h>
+
+#if defined(_KERNEL)
+ /*
+ * Legacy kernel interfaces; they will go away (eventually).
+ */
+ ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
+ ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
+ ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
+ ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
+ ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
+ ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
+ ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
+#endif
+
+ ENTRY(atomic_inc_8)
+ ALTENTRY(atomic_inc_uchar)
+ movl 4(%esp), %eax
+ lock
+ incb (%eax)
+ ret
+ SET_SIZE(atomic_inc_uchar)
+ SET_SIZE(atomic_inc_8)
+
+ ENTRY(atomic_inc_16)
+ ALTENTRY(atomic_inc_ushort)
+ movl 4(%esp), %eax
+ lock
+ incw (%eax)
+ ret
+ SET_SIZE(atomic_inc_ushort)
+ SET_SIZE(atomic_inc_16)
+
+ ENTRY(atomic_inc_32)
+ ALTENTRY(atomic_inc_uint)
+ ALTENTRY(atomic_inc_ulong)
+ movl 4(%esp), %eax
+ lock
+ incl (%eax)
+ ret
+ SET_SIZE(atomic_inc_ulong)
+ SET_SIZE(atomic_inc_uint)
+ SET_SIZE(atomic_inc_32)
+
+ ENTRY(atomic_inc_8_nv)
+ ALTENTRY(atomic_inc_uchar_nv)
+ movl 4(%esp), %edx / %edx = target address
+ xorl %eax, %eax / clear upper bits of %eax
+ incb %al / %al = 1
+ lock
+ xaddb %al, (%edx) / %al = old value, inc (%edx)
+ incb %al / return new value
+ ret
+ SET_SIZE(atomic_inc_uchar_nv)
+ SET_SIZE(atomic_inc_8_nv)
+
+ ENTRY(atomic_inc_16_nv)
+ ALTENTRY(atomic_inc_ushort_nv)
+ movl 4(%esp), %edx / %edx = target address
+ xorl %eax, %eax / clear upper bits of %eax
+ incw %ax / %ax = 1
+ lock
+ xaddw %ax, (%edx) / %ax = old value, inc (%edx)
+ incw %ax / return new value
+ ret
+ SET_SIZE(atomic_inc_ushort_nv)
+ SET_SIZE(atomic_inc_16_nv)
+
+ ENTRY(atomic_inc_32_nv)
+ ALTENTRY(atomic_inc_uint_nv)
+ ALTENTRY(atomic_inc_ulong_nv)
+ movl 4(%esp), %edx / %edx = target address
+ xorl %eax, %eax / %eax = 0
+ incl %eax / %eax = 1
+ lock
+ xaddl %eax, (%edx) / %eax = old value, inc (%edx)
+ incl %eax / return new value
+ ret
+ SET_SIZE(atomic_inc_ulong_nv)
+ SET_SIZE(atomic_inc_uint_nv)
+ SET_SIZE(atomic_inc_32_nv)
+
+ /*
+ * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
+ * separated, you need to also edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_inc_64_nv.
+ */
+ ENTRY(atomic_inc_64)
+ ALTENTRY(atomic_inc_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi / %edi = target address
+ movl (%edi), %eax
+ movl 4(%edi), %edx / %edx:%eax = old value
+1:
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ incl %ebx / %ecx:%ebx = 1
+ addl %eax, %ebx
+ adcl %edx, %ecx / add in the carry from inc
+ lock
+ cmpxchg8b (%edi) / try to stick it in
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx / return new value
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_inc_64_nv)
+ SET_SIZE(atomic_inc_64)
+
+ ENTRY(atomic_dec_8)
+ ALTENTRY(atomic_dec_uchar)
+ movl 4(%esp), %eax
+ lock
+ decb (%eax)
+ ret
+ SET_SIZE(atomic_dec_uchar)
+ SET_SIZE(atomic_dec_8)
+
+ ENTRY(atomic_dec_16)
+ ALTENTRY(atomic_dec_ushort)
+ movl 4(%esp), %eax
+ lock
+ decw (%eax)
+ ret
+ SET_SIZE(atomic_dec_ushort)
+ SET_SIZE(atomic_dec_16)
+
+ ENTRY(atomic_dec_32)
+ ALTENTRY(atomic_dec_uint)
+ ALTENTRY(atomic_dec_ulong)
+ movl 4(%esp), %eax
+ lock
+ decl (%eax)
+ ret
+ SET_SIZE(atomic_dec_ulong)
+ SET_SIZE(atomic_dec_uint)
+ SET_SIZE(atomic_dec_32)
+
+ ENTRY(atomic_dec_8_nv)
+ ALTENTRY(atomic_dec_uchar_nv)
+ movl 4(%esp), %edx / %edx = target address
+ xorl %eax, %eax / zero upper bits of %eax
+ decb %al / %al = -1
+ lock
+ xaddb %al, (%edx) / %al = old value, dec (%edx)
+ decb %al / return new value
+ ret
+ SET_SIZE(atomic_dec_uchar_nv)
+ SET_SIZE(atomic_dec_8_nv)
+
+ ENTRY(atomic_dec_16_nv)
+ ALTENTRY(atomic_dec_ushort_nv)
+ movl 4(%esp), %edx / %edx = target address
+ xorl %eax, %eax / zero upper bits of %eax
+ decw %ax / %ax = -1
+ lock
+ xaddw %ax, (%edx) / %ax = old value, dec (%edx)
+ decw %ax / return new value
+ ret
+ SET_SIZE(atomic_dec_ushort_nv)
+ SET_SIZE(atomic_dec_16_nv)
+
+ ENTRY(atomic_dec_32_nv)
+ ALTENTRY(atomic_dec_uint_nv)
+ ALTENTRY(atomic_dec_ulong_nv)
+ movl 4(%esp), %edx / %edx = target address
+ xorl %eax, %eax / %eax = 0
+ decl %eax / %eax = -1
+ lock
+ xaddl %eax, (%edx) / %eax = old value, dec (%edx)
+ decl %eax / return new value
+ ret
+ SET_SIZE(atomic_dec_ulong_nv)
+ SET_SIZE(atomic_dec_uint_nv)
+ SET_SIZE(atomic_dec_32_nv)
+
+ /*
+ * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
+ * separated, it is important to edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_dec_64_nv.
+ */
+ ENTRY(atomic_dec_64)
+ ALTENTRY(atomic_dec_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi / %edi = target address
+ movl (%edi), %eax
+ movl 4(%edi), %edx / %edx:%eax = old value
+1:
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ not %ecx
+ not %ebx / %ecx:%ebx = -1
+ addl %eax, %ebx
+ adcl %edx, %ecx / add in the carry from inc
+ lock
+ cmpxchg8b (%edi) / try to stick it in
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx / return new value
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_dec_64_nv)
+ SET_SIZE(atomic_dec_64)
+
+ ENTRY(atomic_add_8)
+ ALTENTRY(atomic_add_char)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ addb %cl, (%eax)
+ ret
+ SET_SIZE(atomic_add_char)
+ SET_SIZE(atomic_add_8)
+
+ ENTRY(atomic_add_16)
+ ALTENTRY(atomic_add_short)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ addw %cx, (%eax)
+ ret
+ SET_SIZE(atomic_add_short)
+ SET_SIZE(atomic_add_16)
+
+ ENTRY(atomic_add_32)
+ ALTENTRY(atomic_add_int)
+ ALTENTRY(atomic_add_ptr)
+ ALTENTRY(atomic_add_long)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ addl %ecx, (%eax)
+ ret
+ SET_SIZE(atomic_add_long)
+ SET_SIZE(atomic_add_ptr)
+ SET_SIZE(atomic_add_int)
+ SET_SIZE(atomic_add_32)
+
+ ENTRY(atomic_or_8)
+ ALTENTRY(atomic_or_uchar)
+ movl 4(%esp), %eax
+ movb 8(%esp), %cl
+ lock
+ orb %cl, (%eax)
+ ret
+ SET_SIZE(atomic_or_uchar)
+ SET_SIZE(atomic_or_8)
+
+ ENTRY(atomic_or_16)
+ ALTENTRY(atomic_or_ushort)
+ movl 4(%esp), %eax
+ movw 8(%esp), %cx
+ lock
+ orw %cx, (%eax)
+ ret
+ SET_SIZE(atomic_or_ushort)
+ SET_SIZE(atomic_or_16)
+
+ ENTRY(atomic_or_32)
+ ALTENTRY(atomic_or_uint)
+ ALTENTRY(atomic_or_ulong)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ orl %ecx, (%eax)
+ ret
+ SET_SIZE(atomic_or_ulong)
+ SET_SIZE(atomic_or_uint)
+ SET_SIZE(atomic_or_32)
+
+ ENTRY(atomic_and_8)
+ ALTENTRY(atomic_and_uchar)
+ movl 4(%esp), %eax
+ movb 8(%esp), %cl
+ lock
+ andb %cl, (%eax)
+ ret
+ SET_SIZE(atomic_and_uchar)
+ SET_SIZE(atomic_and_8)
+
+ ENTRY(atomic_and_16)
+ ALTENTRY(atomic_and_ushort)
+ movl 4(%esp), %eax
+ movw 8(%esp), %cx
+ lock
+ andw %cx, (%eax)
+ ret
+ SET_SIZE(atomic_and_ushort)
+ SET_SIZE(atomic_and_16)
+
+ ENTRY(atomic_and_32)
+ ALTENTRY(atomic_and_uint)
+ ALTENTRY(atomic_and_ulong)
+ movl 4(%esp), %eax
+ movl 8(%esp), %ecx
+ lock
+ andl %ecx, (%eax)
+ ret
+ SET_SIZE(atomic_and_ulong)
+ SET_SIZE(atomic_and_uint)
+ SET_SIZE(atomic_and_32)
+
+ ENTRY(atomic_add_8_nv)
+ ALTENTRY(atomic_add_char_nv)
+ movl 4(%esp), %edx / %edx = target address
+ movb 8(%esp), %cl / %cl = delta
+ movzbl %cl, %eax / %al = delta, zero extended
+ lock
+ xaddb %cl, (%edx) / %cl = old value, (%edx) = sum
+ addb %cl, %al / return old value plus delta
+ ret
+ SET_SIZE(atomic_add_char_nv)
+ SET_SIZE(atomic_add_8_nv)
+
+ ENTRY(atomic_add_16_nv)
+ ALTENTRY(atomic_add_short_nv)
+ movl 4(%esp), %edx / %edx = target address
+ movw 8(%esp), %cx / %cx = delta
+ movzwl %cx, %eax / %ax = delta, zero extended
+ lock
+ xaddw %cx, (%edx) / %cx = old value, (%edx) = sum
+ addw %cx, %ax / return old value plus delta
+ ret
+ SET_SIZE(atomic_add_short_nv)
+ SET_SIZE(atomic_add_16_nv)
+
+ ENTRY(atomic_add_32_nv)
+ ALTENTRY(atomic_add_int_nv)
+ ALTENTRY(atomic_add_ptr_nv)
+ ALTENTRY(atomic_add_long_nv)
+ movl 4(%esp), %edx / %edx = target address
+ movl 8(%esp), %eax / %eax = delta
+ movl %eax, %ecx / %ecx = delta
+ lock
+ xaddl %eax, (%edx) / %eax = old value, (%edx) = sum
+ addl %ecx, %eax / return old value plus delta
+ ret
+ SET_SIZE(atomic_add_long_nv)
+ SET_SIZE(atomic_add_ptr_nv)
+ SET_SIZE(atomic_add_int_nv)
+ SET_SIZE(atomic_add_32_nv)
+
+ /*
+ * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
+ * separated, it is important to edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_add_64_nv.
+ */
+ ENTRY(atomic_add_64)
+ ALTENTRY(atomic_add_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi / %edi = target address
+ movl (%edi), %eax
+ movl 4(%edi), %edx / %edx:%eax = old value
+1:
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx / %ecx:%ebx = delta
+ addl %eax, %ebx
+ adcl %edx, %ecx / %ecx:%ebx = new value
+ lock
+ cmpxchg8b (%edi) / try to stick it in
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx / return new value
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_add_64_nv)
+ SET_SIZE(atomic_add_64)
+
+ ENTRY(atomic_or_8_nv)
+ ALTENTRY(atomic_or_uchar_nv)
+ movl 4(%esp), %edx / %edx = target address
+ movb (%edx), %al / %al = old value
+1:
+ movl 8(%esp), %ecx / %ecx = delta
+ orb %al, %cl / %cl = new value
+ lock
+ cmpxchgb %cl, (%edx) / try to stick it in
+ jne 1b
+ movzbl %cl, %eax / return new value
+ ret
+ SET_SIZE(atomic_or_uchar_nv)
+ SET_SIZE(atomic_or_8_nv)
+
+ ENTRY(atomic_or_16_nv)
+ ALTENTRY(atomic_or_ushort_nv)
+ movl 4(%esp), %edx / %edx = target address
+ movw (%edx), %ax / %ax = old value
+1:
+ movl 8(%esp), %ecx / %ecx = delta
+ orw %ax, %cx / %cx = new value
+ lock
+ cmpxchgw %cx, (%edx) / try to stick it in
+ jne 1b
+ movzwl %cx, %eax / return new value
+ ret
+ SET_SIZE(atomic_or_ushort_nv)
+ SET_SIZE(atomic_or_16_nv)
+
+ ENTRY(atomic_or_32_nv)
+ ALTENTRY(atomic_or_uint_nv)
+ ALTENTRY(atomic_or_ulong_nv)
+ movl 4(%esp), %edx / %edx = target address
+ movl (%edx), %eax / %eax = old value
+1:
+ movl 8(%esp), %ecx / %ecx = delta
+ orl %eax, %ecx / %ecx = new value
+ lock
+ cmpxchgl %ecx, (%edx) / try to stick it in
+ jne 1b
+ movl %ecx, %eax / return new value
+ ret
+ SET_SIZE(atomic_or_ulong_nv)
+ SET_SIZE(atomic_or_uint_nv)
+ SET_SIZE(atomic_or_32_nv)
+
+ /*
+ * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
+ * separated, it is important to edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_or_64_nv.
+ */
+ ENTRY(atomic_or_64)
+ ALTENTRY(atomic_or_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi / %edi = target address
+ movl (%edi), %eax
+ movl 4(%edi), %edx / %edx:%eax = old value
+1:
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx / %ecx:%ebx = delta
+ orl %eax, %ebx
+ orl %edx, %ecx / %ecx:%ebx = new value
+ lock
+ cmpxchg8b (%edi) / try to stick it in
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx / return new value
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_or_64_nv)
+ SET_SIZE(atomic_or_64)
+
+ ENTRY(atomic_and_8_nv)
+ ALTENTRY(atomic_and_uchar_nv)
+ movl 4(%esp), %edx / %edx = target address
+ movb (%edx), %al / %al = old value
+1:
+ movl 8(%esp), %ecx / %ecx = delta
+ andb %al, %cl / %cl = new value
+ lock
+ cmpxchgb %cl, (%edx) / try to stick it in
+ jne 1b
+ movzbl %cl, %eax / return new value
+ ret
+ SET_SIZE(atomic_and_uchar_nv)
+ SET_SIZE(atomic_and_8_nv)
+
+ ENTRY(atomic_and_16_nv)
+ ALTENTRY(atomic_and_ushort_nv)
+ movl 4(%esp), %edx / %edx = target address
+ movw (%edx), %ax / %ax = old value
+1:
+ movl 8(%esp), %ecx / %ecx = delta
+ andw %ax, %cx / %cx = new value
+ lock
+ cmpxchgw %cx, (%edx) / try to stick it in
+ jne 1b
+ movzwl %cx, %eax / return new value
+ ret
+ SET_SIZE(atomic_and_ushort_nv)
+ SET_SIZE(atomic_and_16_nv)
+
+ ENTRY(atomic_and_32_nv)
+ ALTENTRY(atomic_and_uint_nv)
+ ALTENTRY(atomic_and_ulong_nv)
+ movl 4(%esp), %edx / %edx = target address
+ movl (%edx), %eax / %eax = old value
+1:
+ movl 8(%esp), %ecx / %ecx = delta
+ andl %eax, %ecx / %ecx = new value
+ lock
+ cmpxchgl %ecx, (%edx) / try to stick it in
+ jne 1b
+ movl %ecx, %eax / return new value
+ ret
+ SET_SIZE(atomic_and_ulong_nv)
+ SET_SIZE(atomic_and_uint_nv)
+ SET_SIZE(atomic_and_32_nv)
+
+ /*
+ * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
+ * separated, it is important to edit the libc i386 platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_and_64_nv.
+ */
+ ENTRY(atomic_and_64)
+ ALTENTRY(atomic_and_64_nv)
+ pushl %edi
+ pushl %ebx
+ movl 12(%esp), %edi / %edi = target address
+ movl (%edi), %eax
+ movl 4(%edi), %edx / %edx:%eax = old value
+1:
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx / %ecx:%ebx = delta
+ andl %eax, %ebx
+ andl %edx, %ecx / %ecx:%ebx = new value
+ lock
+ cmpxchg8b (%edi) / try to stick it in
+ jne 1b
+ movl %ebx, %eax
+ movl %ecx, %edx / return new value
+ popl %ebx
+ popl %edi
+ ret
+ SET_SIZE(atomic_and_64_nv)
+ SET_SIZE(atomic_and_64)
+
+ ENTRY(atomic_cas_8)
+ ALTENTRY(atomic_cas_uchar)
+ movl 4(%esp), %edx
+ movzbl 8(%esp), %eax
+ movb 12(%esp), %cl
+ lock
+ cmpxchgb %cl, (%edx)
+ ret
+ SET_SIZE(atomic_cas_uchar)
+ SET_SIZE(atomic_cas_8)
+
+ ENTRY(atomic_cas_16)
+ ALTENTRY(atomic_cas_ushort)
+ movl 4(%esp), %edx
+ movzwl 8(%esp), %eax
+ movw 12(%esp), %cx
+ lock
+ cmpxchgw %cx, (%edx)
+ ret
+ SET_SIZE(atomic_cas_ushort)
+ SET_SIZE(atomic_cas_16)
+
+ ENTRY(atomic_cas_32)
+ ALTENTRY(atomic_cas_uint)
+ ALTENTRY(atomic_cas_ulong)
+ ALTENTRY(atomic_cas_ptr)
+ movl 4(%esp), %edx
+ movl 8(%esp), %eax
+ movl 12(%esp), %ecx
+ lock
+ cmpxchgl %ecx, (%edx)
+ ret
+ SET_SIZE(atomic_cas_ptr)
+ SET_SIZE(atomic_cas_ulong)
+ SET_SIZE(atomic_cas_uint)
+ SET_SIZE(atomic_cas_32)
+
+ ENTRY(atomic_cas_64)
+ pushl %ebx
+ pushl %esi
+ movl 12(%esp), %esi
+ movl 16(%esp), %eax
+ movl 20(%esp), %edx
+ movl 24(%esp), %ebx
+ movl 28(%esp), %ecx
+ lock
+ cmpxchg8b (%esi)
+ popl %esi
+ popl %ebx
+ ret
+ SET_SIZE(atomic_cas_64)
+
+ ENTRY(atomic_swap_8)
+ ALTENTRY(atomic_swap_uchar)
+ movl 4(%esp), %edx
+ movzbl 8(%esp), %eax
+ lock
+ xchgb %al, (%edx)
+ ret
+ SET_SIZE(atomic_swap_uchar)
+ SET_SIZE(atomic_swap_8)
+
+ ENTRY(atomic_swap_16)
+ ALTENTRY(atomic_swap_ushort)
+ movl 4(%esp), %edx
+ movzwl 8(%esp), %eax
+ lock
+ xchgw %ax, (%edx)
+ ret
+ SET_SIZE(atomic_swap_ushort)
+ SET_SIZE(atomic_swap_16)
+
+ ENTRY(atomic_swap_32)
+ ALTENTRY(atomic_swap_uint)
+ ALTENTRY(atomic_swap_ptr)
+ ALTENTRY(atomic_swap_ulong)
+ movl 4(%esp), %edx
+ movl 8(%esp), %eax
+ lock
+ xchgl %eax, (%edx)
+ ret
+ SET_SIZE(atomic_swap_ulong)
+ SET_SIZE(atomic_swap_ptr)
+ SET_SIZE(atomic_swap_uint)
+ SET_SIZE(atomic_swap_32)
+
+ ENTRY(atomic_swap_64)
+ pushl %esi
+ pushl %ebx
+ movl 12(%esp), %esi
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx
+ movl (%esi), %eax
+ movl 4(%esi), %edx / %edx:%eax = old value
+1:
+ lock
+ cmpxchg8b (%esi)
+ jne 1b
+ popl %ebx
+ popl %esi
+ ret
+ SET_SIZE(atomic_swap_64)
+
+ ENTRY(atomic_set_long_excl)
+ movl 4(%esp), %edx / %edx = target address
+ movl 8(%esp), %ecx / %ecx = bit id
+ xorl %eax, %eax
+ lock
+ btsl %ecx, (%edx)
+ jnc 1f
+ decl %eax / return -1
+1:
+ ret
+ SET_SIZE(atomic_set_long_excl)
+
+ ENTRY(atomic_clear_long_excl)
+ movl 4(%esp), %edx / %edx = target address
+ movl 8(%esp), %ecx / %ecx = bit id
+ xorl %eax, %eax
+ lock
+ btrl %ecx, (%edx)
+ jc 1f
+ decl %eax / return -1
+1:
+ ret
+ SET_SIZE(atomic_clear_long_excl)
+
+#if !defined(_KERNEL)
+
+ /*
+ * NOTE: membar_enter, membar_exit, membar_producer, and
+ * membar_consumer are all identical routines. We define them
+ * separately, instead of using ALTENTRY definitions to alias them
+ * together, so that DTrace and debuggers will see a unique address
+ * for them, allowing more accurate tracing.
+ */
+
+
+ ENTRY(membar_enter)
+ lock
+ xorl $0, (%esp)
+ ret
+ SET_SIZE(membar_enter)
+
+ ENTRY(membar_exit)
+ lock
+ xorl $0, (%esp)
+ ret
+ SET_SIZE(membar_exit)
+
+ ENTRY(membar_producer)
+ lock
+ xorl $0, (%esp)
+ ret
+ SET_SIZE(membar_producer)
+
+ ENTRY(membar_consumer)
+ lock
+ xorl $0, (%esp)
+ ret
+ SET_SIZE(membar_consumer)
+
+#endif /* !_KERNEL */
diff --git a/common/atomic/sparc/atomic.s b/common/atomic/sparc/atomic.s
new file mode 100644
index 000000000000..8aa240efa297
--- /dev/null
+++ b/common/atomic/sparc/atomic.s
@@ -0,0 +1,801 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .file "atomic.s"
+
+#include <sys/asm_linkage.h>
+
+#if defined(_KERNEL)
+ /*
+ * Legacy kernel interfaces; they will go away (eventually).
+ */
+ ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
+ ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
+ ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
+ ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
+ ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
+ ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
+ ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
+ ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function)
+#endif
+
+ /*
+ * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_inc_8_nv.
+ */
+ ENTRY(atomic_inc_8)
+ ALTENTRY(atomic_inc_8_nv)
+ ALTENTRY(atomic_inc_uchar)
+ ALTENTRY(atomic_inc_uchar_nv)
+ ba add_8
+ add %g0, 1, %o1
+ SET_SIZE(atomic_inc_uchar_nv)
+ SET_SIZE(atomic_inc_uchar)
+ SET_SIZE(atomic_inc_8_nv)
+ SET_SIZE(atomic_inc_8)
+
+ /*
+ * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_dec_8_nv.
+ */
+ ENTRY(atomic_dec_8)
+ ALTENTRY(atomic_dec_8_nv)
+ ALTENTRY(atomic_dec_uchar)
+ ALTENTRY(atomic_dec_uchar_nv)
+ ba add_8
+ sub %g0, 1, %o1
+ SET_SIZE(atomic_dec_uchar_nv)
+ SET_SIZE(atomic_dec_uchar)
+ SET_SIZE(atomic_dec_8_nv)
+ SET_SIZE(atomic_dec_8)
+
+ /*
+ * NOTE: If atomic_add_8 and atomic_add_8_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_add_8_nv.
+ */
+ ENTRY(atomic_add_8)
+ ALTENTRY(atomic_add_8_nv)
+ ALTENTRY(atomic_add_char)
+ ALTENTRY(atomic_add_char_nv)
+add_8:
+ and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ set 0xff, %o3 ! %o3 = mask
+ sll %o3, %g1, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ and %o1, %o3, %o1 ! %o1 = single byte value
+ andn %o0, 0x3, %o0 ! %o0 = word address
+ ld [%o0], %o2 ! read old value
+1:
+ add %o2, %o1, %o5 ! add value to the old value
+ and %o5, %o3, %o5 ! clear other bits
+ andn %o2, %o3, %o4 ! clear target bits
+ or %o4, %o5, %o5 ! insert the new value
+ cas [%o0], %o2, %o5
+ cmp %o2, %o5
+ bne,a,pn %icc, 1b
+ mov %o5, %o2 ! %o2 = old value
+ add %o2, %o1, %o5
+ and %o5, %o3, %o5
+ retl
+ srl %o5, %g1, %o0 ! %o0 = new value
+ SET_SIZE(atomic_add_char_nv)
+ SET_SIZE(atomic_add_char)
+ SET_SIZE(atomic_add_8_nv)
+ SET_SIZE(atomic_add_8)
+
+ /*
+ * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_inc_16_nv.
+ */
+ ENTRY(atomic_inc_16)
+ ALTENTRY(atomic_inc_16_nv)
+ ALTENTRY(atomic_inc_ushort)
+ ALTENTRY(atomic_inc_ushort_nv)
+ ba add_16
+ add %g0, 1, %o1
+ SET_SIZE(atomic_inc_ushort_nv)
+ SET_SIZE(atomic_inc_ushort)
+ SET_SIZE(atomic_inc_16_nv)
+ SET_SIZE(atomic_inc_16)
+
+ /*
+ * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_dec_16_nv.
+ */
+ ENTRY(atomic_dec_16)
+ ALTENTRY(atomic_dec_16_nv)
+ ALTENTRY(atomic_dec_ushort)
+ ALTENTRY(atomic_dec_ushort_nv)
+ ba add_16
+ sub %g0, 1, %o1
+ SET_SIZE(atomic_dec_ushort_nv)
+ SET_SIZE(atomic_dec_ushort)
+ SET_SIZE(atomic_dec_16_nv)
+ SET_SIZE(atomic_dec_16)
+
+ /*
+ * NOTE: If atomic_add_16 and atomic_add_16_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_add_16_nv.
+ */
+ ENTRY(atomic_add_16)
+ ALTENTRY(atomic_add_16_nv)
+ ALTENTRY(atomic_add_short)
+ ALTENTRY(atomic_add_short_nv)
+add_16:
+ and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left
+ sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ sethi %hi(0xffff0000), %o3 ! %o3 = mask
+ srl %o3, %o4, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ and %o1, %o3, %o1 ! %o1 = single short value
+ andn %o0, 0x2, %o0 ! %o0 = word address
+ ! if low-order bit is 1, we will properly get an alignment fault here
+ ld [%o0], %o2 ! read old value
+1:
+ add %o1, %o2, %o5 ! add value to the old value
+ and %o5, %o3, %o5 ! clear other bits
+ andn %o2, %o3, %o4 ! clear target bits
+ or %o4, %o5, %o5 ! insert the new value
+ cas [%o0], %o2, %o5
+ cmp %o2, %o5
+ bne,a,pn %icc, 1b
+ mov %o5, %o2 ! %o2 = old value
+ add %o1, %o2, %o5
+ and %o5, %o3, %o5
+ retl
+ srl %o5, %g1, %o0 ! %o0 = new value
+ SET_SIZE(atomic_add_short_nv)
+ SET_SIZE(atomic_add_short)
+ SET_SIZE(atomic_add_16_nv)
+ SET_SIZE(atomic_add_16)
+
+ /*
+ * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_inc_32_nv.
+ */
+ ENTRY(atomic_inc_32)
+ ALTENTRY(atomic_inc_32_nv)
+ ALTENTRY(atomic_inc_uint)
+ ALTENTRY(atomic_inc_uint_nv)
+ ALTENTRY(atomic_inc_ulong)
+ ALTENTRY(atomic_inc_ulong_nv)
+ ba add_32
+ add %g0, 1, %o1
+ SET_SIZE(atomic_inc_ulong_nv)
+ SET_SIZE(atomic_inc_ulong)
+ SET_SIZE(atomic_inc_uint_nv)
+ SET_SIZE(atomic_inc_uint)
+ SET_SIZE(atomic_inc_32_nv)
+ SET_SIZE(atomic_inc_32)
+
+ /*
+ * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_dec_32_nv.
+ */
+ ENTRY(atomic_dec_32)
+ ALTENTRY(atomic_dec_32_nv)
+ ALTENTRY(atomic_dec_uint)
+ ALTENTRY(atomic_dec_uint_nv)
+ ALTENTRY(atomic_dec_ulong)
+ ALTENTRY(atomic_dec_ulong_nv)
+ ba add_32
+ sub %g0, 1, %o1
+ SET_SIZE(atomic_dec_ulong_nv)
+ SET_SIZE(atomic_dec_ulong)
+ SET_SIZE(atomic_dec_uint_nv)
+ SET_SIZE(atomic_dec_uint)
+ SET_SIZE(atomic_dec_32_nv)
+ SET_SIZE(atomic_dec_32)
+
+ /*
+ * NOTE: If atomic_add_32 and atomic_add_32_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_add_32_nv.
+ */
+ ENTRY(atomic_add_32)
+ ALTENTRY(atomic_add_32_nv)
+ ALTENTRY(atomic_add_int)
+ ALTENTRY(atomic_add_int_nv)
+ ALTENTRY(atomic_add_ptr)
+ ALTENTRY(atomic_add_ptr_nv)
+ ALTENTRY(atomic_add_long)
+ ALTENTRY(atomic_add_long_nv)
+add_32:
+ ld [%o0], %o2
+1:
+ add %o2, %o1, %o3
+ cas [%o0], %o2, %o3
+ cmp %o2, %o3
+ bne,a,pn %icc, 1b
+ mov %o3, %o2
+ retl
+ add %o2, %o1, %o0 ! return new value
+ SET_SIZE(atomic_add_long_nv)
+ SET_SIZE(atomic_add_long)
+ SET_SIZE(atomic_add_ptr_nv)
+ SET_SIZE(atomic_add_ptr)
+ SET_SIZE(atomic_add_int_nv)
+ SET_SIZE(atomic_add_int)
+ SET_SIZE(atomic_add_32_nv)
+ SET_SIZE(atomic_add_32)
+
+ /*
+ * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_inc_64_nv.
+ */
+ ENTRY(atomic_inc_64)
+ ALTENTRY(atomic_inc_64_nv)
+ ba add_64
+ add %g0, 1, %o1
+ SET_SIZE(atomic_inc_64_nv)
+ SET_SIZE(atomic_inc_64)
+
+ /*
+ * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_dec_64_nv.
+ */
+ ENTRY(atomic_dec_64)
+ ALTENTRY(atomic_dec_64_nv)
+ ba add_64
+ sub %g0, 1, %o1
+ SET_SIZE(atomic_dec_64_nv)
+ SET_SIZE(atomic_dec_64)
+
+ /*
+ * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_add_64_nv.
+ */
+ ENTRY(atomic_add_64)
+ ALTENTRY(atomic_add_64_nv)
+ sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2
+ srl %o2, 0, %o2
+ add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit
+add_64:
+ ldx [%o0], %o2
+1:
+ add %o2, %o1, %o3
+ casx [%o0], %o2, %o3
+ cmp %o2, %o3
+ bne,a,pn %xcc, 1b
+ mov %o3, %o2
+ add %o2, %o1, %o1 ! return lower 32-bits in %o1
+ retl
+ srlx %o1, 32, %o0 ! return upper 32-bits in %o0
+ SET_SIZE(atomic_add_64_nv)
+ SET_SIZE(atomic_add_64)
+
+ /*
+ * NOTE: If atomic_or_8 and atomic_or_8_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_or_8_nv.
+ */
+ ENTRY(atomic_or_8)
+ ALTENTRY(atomic_or_8_nv)
+ ALTENTRY(atomic_or_uchar)
+ ALTENTRY(atomic_or_uchar_nv)
+ and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ set 0xff, %o3 ! %o3 = mask
+ sll %o3, %g1, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ and %o1, %o3, %o1 ! %o1 = single byte value
+ andn %o0, 0x3, %o0 ! %o0 = word address
+ ld [%o0], %o2 ! read old value
+1:
+ or %o2, %o1, %o5 ! or in the new value
+ cas [%o0], %o2, %o5
+ cmp %o2, %o5
+ bne,a,pn %icc, 1b
+ mov %o5, %o2 ! %o2 = old value
+ or %o2, %o1, %o5
+ and %o5, %o3, %o5
+ retl
+ srl %o5, %g1, %o0 ! %o0 = new value
+ SET_SIZE(atomic_or_uchar_nv)
+ SET_SIZE(atomic_or_uchar)
+ SET_SIZE(atomic_or_8_nv)
+ SET_SIZE(atomic_or_8)
+
+ /*
+ * NOTE: If atomic_or_16 and atomic_or_16_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_or_16_nv.
+ */
+ ENTRY(atomic_or_16)
+ ALTENTRY(atomic_or_16_nv)
+ ALTENTRY(atomic_or_ushort)
+ ALTENTRY(atomic_or_ushort_nv)
+ and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left
+ sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ sethi %hi(0xffff0000), %o3 ! %o3 = mask
+ srl %o3, %o4, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ and %o1, %o3, %o1 ! %o1 = single short value
+ andn %o0, 0x2, %o0 ! %o0 = word address
+ ! if low-order bit is 1, we will properly get an alignment fault here
+ ld [%o0], %o2 ! read old value
+1:
+ or %o2, %o1, %o5 ! or in the new value
+ cas [%o0], %o2, %o5
+ cmp %o2, %o5
+ bne,a,pn %icc, 1b
+ mov %o5, %o2 ! %o2 = old value
+ or %o2, %o1, %o5 ! or in the new value
+ and %o5, %o3, %o5
+ retl
+ srl %o5, %g1, %o0 ! %o0 = new value
+ SET_SIZE(atomic_or_ushort_nv)
+ SET_SIZE(atomic_or_ushort)
+ SET_SIZE(atomic_or_16_nv)
+ SET_SIZE(atomic_or_16)
+
+ /*
+ * NOTE: If atomic_or_32 and atomic_or_32_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_or_32_nv.
+ */
+ ENTRY(atomic_or_32)
+ ALTENTRY(atomic_or_32_nv)
+ ALTENTRY(atomic_or_uint)
+ ALTENTRY(atomic_or_uint_nv)
+ ALTENTRY(atomic_or_ulong)
+ ALTENTRY(atomic_or_ulong_nv)
+ ld [%o0], %o2
+1:
+ or %o2, %o1, %o3
+ cas [%o0], %o2, %o3
+ cmp %o2, %o3
+ bne,a,pn %icc, 1b
+ mov %o3, %o2
+ retl
+ or %o2, %o1, %o0 ! return new value
+ SET_SIZE(atomic_or_ulong_nv)
+ SET_SIZE(atomic_or_ulong)
+ SET_SIZE(atomic_or_uint_nv)
+ SET_SIZE(atomic_or_uint)
+ SET_SIZE(atomic_or_32_nv)
+ SET_SIZE(atomic_or_32)
+
+ /*
+ * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_or_64_nv.
+ */
+ ENTRY(atomic_or_64)
+ ALTENTRY(atomic_or_64_nv)
+ sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2
+ srl %o2, 0, %o2
+ add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit
+ ldx [%o0], %o2
+1:
+ or %o2, %o1, %o3
+ casx [%o0], %o2, %o3
+ cmp %o2, %o3
+ bne,a,pn %xcc, 1b
+ mov %o3, %o2
+ or %o2, %o1, %o1 ! return lower 32-bits in %o1
+ retl
+ srlx %o1, 32, %o0 ! return upper 32-bits in %o0
+ SET_SIZE(atomic_or_64_nv)
+ SET_SIZE(atomic_or_64)
+
+ /*
+ * NOTE: If atomic_and_8 and atomic_and_8_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_and_8_nv.
+ */
+ ENTRY(atomic_and_8)
+ ALTENTRY(atomic_and_8_nv)
+ ALTENTRY(atomic_and_uchar)
+ ALTENTRY(atomic_and_uchar_nv)
+ and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ set 0xff, %o3 ! %o3 = mask
+ sll %o3, %g1, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ orn %o1, %o3, %o1 ! all ones in other bytes
+ andn %o0, 0x3, %o0 ! %o0 = word address
+ ld [%o0], %o2 ! read old value
+1:
+ and %o2, %o1, %o5 ! and in the new value
+ cas [%o0], %o2, %o5
+ cmp %o2, %o5
+ bne,a,pn %icc, 1b
+ mov %o5, %o2 ! %o2 = old value
+ and %o2, %o1, %o5
+ and %o5, %o3, %o5
+ retl
+ srl %o5, %g1, %o0 ! %o0 = new value
+ SET_SIZE(atomic_and_uchar_nv)
+ SET_SIZE(atomic_and_uchar)
+ SET_SIZE(atomic_and_8_nv)
+ SET_SIZE(atomic_and_8)
+
+ /*
+ * NOTE: If atomic_and_16 and atomic_and_16_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_and_16_nv.
+ */
+ ENTRY(atomic_and_16)
+ ALTENTRY(atomic_and_16_nv)
+ ALTENTRY(atomic_and_ushort)
+ ALTENTRY(atomic_and_ushort_nv)
+ and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left
+ sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ sethi %hi(0xffff0000), %o3 ! %o3 = mask
+ srl %o3, %o4, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ orn %o1, %o3, %o1 ! all ones in the other half
+ andn %o0, 0x2, %o0 ! %o0 = word address
+ ! if low-order bit is 1, we will properly get an alignment fault here
+ ld [%o0], %o2 ! read old value
+1:
+ and %o2, %o1, %o5 ! and in the new value
+ cas [%o0], %o2, %o5
+ cmp %o2, %o5
+ bne,a,pn %icc, 1b
+ mov %o5, %o2 ! %o2 = old value
+ and %o2, %o1, %o5
+ and %o5, %o3, %o5
+ retl
+ srl %o5, %g1, %o0 ! %o0 = new value
+ SET_SIZE(atomic_and_ushort_nv)
+ SET_SIZE(atomic_and_ushort)
+ SET_SIZE(atomic_and_16_nv)
+ SET_SIZE(atomic_and_16)
+
+ /*
+ * NOTE: If atomic_and_32 and atomic_and_32_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_and_32_nv.
+ */
+ ENTRY(atomic_and_32)
+ ALTENTRY(atomic_and_32_nv)
+ ALTENTRY(atomic_and_uint)
+ ALTENTRY(atomic_and_uint_nv)
+ ALTENTRY(atomic_and_ulong)
+ ALTENTRY(atomic_and_ulong_nv)
+ ld [%o0], %o2
+1:
+ and %o2, %o1, %o3
+ cas [%o0], %o2, %o3
+ cmp %o2, %o3
+ bne,a,pn %icc, 1b
+ mov %o3, %o2
+ retl
+ and %o2, %o1, %o0 ! return new value
+ SET_SIZE(atomic_and_ulong_nv)
+ SET_SIZE(atomic_and_ulong)
+ SET_SIZE(atomic_and_uint_nv)
+ SET_SIZE(atomic_and_uint)
+ SET_SIZE(atomic_and_32_nv)
+ SET_SIZE(atomic_and_32)
+
+ /*
+ * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
+ * separated, you need to also edit the libc sparc platform
+ * specific mapfile and remove the NODYNSORT attribute
+ * from atomic_and_64_nv.
+ */
+ ENTRY(atomic_and_64)
+ ALTENTRY(atomic_and_64_nv)
+ sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2
+ srl %o2, 0, %o2
+ add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit
+ ldx [%o0], %o2
+1:
+ and %o2, %o1, %o3
+ casx [%o0], %o2, %o3
+ cmp %o2, %o3
+ bne,a,pn %xcc, 1b
+ mov %o3, %o2
+ and %o2, %o1, %o1 ! return lower 32-bits in %o1
+ retl
+ srlx %o1, 32, %o0 ! return upper 32-bits in %o0
+ SET_SIZE(atomic_and_64_nv)
+ SET_SIZE(atomic_and_64)
+
+ ENTRY(atomic_cas_8)
+ ALTENTRY(atomic_cas_uchar)
+ and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ set 0xff, %o3 ! %o3 = mask
+ sll %o3, %g1, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ and %o1, %o3, %o1 ! %o1 = single byte value
+ sll %o2, %g1, %o2 ! %o2 = shifted to bit offset
+ and %o2, %o3, %o2 ! %o2 = single byte value
+ andn %o0, 0x3, %o0 ! %o0 = word address
+ ld [%o0], %o4 ! read old value
+1:
+ andn %o4, %o3, %o4 ! clear target bits
+ or %o4, %o2, %o5 ! insert the new value
+ or %o4, %o1, %o4 ! insert the comparison value
+ cas [%o0], %o4, %o5
+ cmp %o4, %o5 ! did we succeed?
+ be,pt %icc, 2f
+ and %o5, %o3, %o4 ! isolate the old value
+ cmp %o1, %o4 ! should we have succeeded?
+ be,a,pt %icc, 1b ! yes, try again
+ mov %o5, %o4 ! %o4 = old value
+2:
+ retl
+ srl %o4, %g1, %o0 ! %o0 = old value
+ SET_SIZE(atomic_cas_uchar)
+ SET_SIZE(atomic_cas_8)
+
+ ENTRY(atomic_cas_16)
+ ALTENTRY(atomic_cas_ushort)
+ and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left
+ sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ sethi %hi(0xffff0000), %o3 ! %o3 = mask
+ srl %o3, %o4, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ and %o1, %o3, %o1 ! %o1 = single short value
+ sll %o2, %g1, %o2 ! %o2 = shifted to bit offset
+ and %o2, %o3, %o2 ! %o2 = single short value
+ andn %o0, 0x2, %o0 ! %o0 = word address
+ ! if low-order bit is 1, we will properly get an alignment fault here
+ ld [%o0], %o4 ! read old value
+1:
+ andn %o4, %o3, %o4 ! clear target bits
+ or %o4, %o2, %o5 ! insert the new value
+ or %o4, %o1, %o4 ! insert the comparison value
+ cas [%o0], %o4, %o5
+ cmp %o4, %o5 ! did we succeed?
+ be,pt %icc, 2f
+ and %o5, %o3, %o4 ! isolate the old value
+ cmp %o1, %o4 ! should we have succeeded?
+ be,a,pt %icc, 1b ! yes, try again
+ mov %o5, %o4 ! %o4 = old value
+2:
+ retl
+ srl %o4, %g1, %o0 ! %o0 = old value
+ SET_SIZE(atomic_cas_ushort)
+ SET_SIZE(atomic_cas_16)
+
+ ENTRY(atomic_cas_32)
+ ALTENTRY(atomic_cas_uint)
+ ALTENTRY(atomic_cas_ptr)
+ ALTENTRY(atomic_cas_ulong)
+ cas [%o0], %o1, %o2
+ retl
+ mov %o2, %o0
+ SET_SIZE(atomic_cas_ulong)
+ SET_SIZE(atomic_cas_ptr)
+ SET_SIZE(atomic_cas_uint)
+ SET_SIZE(atomic_cas_32)
+
+ ENTRY(atomic_cas_64)
+ sllx %o1, 32, %o1 ! cmp's upper 32 in %o1, lower in %o2
+ srl %o2, 0, %o2 ! convert 2 32-bit args into 1 64-bit
+ add %o1, %o2, %o1
+ sllx %o3, 32, %o2 ! newval upper 32 in %o3, lower in %o4
+ srl %o4, 0, %o4 ! setup %o2 to have newval
+ add %o2, %o4, %o2
+ casx [%o0], %o1, %o2
+ srl %o2, 0, %o1 ! return lower 32-bits in %o1
+ retl
+ srlx %o2, 32, %o0 ! return upper 32-bits in %o0
+ SET_SIZE(atomic_cas_64)
+
+ ENTRY(atomic_swap_8)
+ ALTENTRY(atomic_swap_uchar)
+ and %o0, 0x3, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x3, %g1 ! %g1 = byte offset, right-to-left
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ set 0xff, %o3 ! %o3 = mask
+ sll %o3, %g1, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ and %o1, %o3, %o1 ! %o1 = single byte value
+ andn %o0, 0x3, %o0 ! %o0 = word address
+ ld [%o0], %o2 ! read old value
+1:
+ andn %o2, %o3, %o5 ! clear target bits
+ or %o5, %o1, %o5 ! insert the new value
+ cas [%o0], %o2, %o5
+ cmp %o2, %o5
+ bne,a,pn %icc, 1b
+ mov %o5, %o2 ! %o2 = old value
+ and %o5, %o3, %o5
+ retl
+ srl %o5, %g1, %o0 ! %o0 = old value
+ SET_SIZE(atomic_swap_uchar)
+ SET_SIZE(atomic_swap_8)
+
+ ENTRY(atomic_swap_16)
+ ALTENTRY(atomic_swap_ushort)
+ and %o0, 0x2, %o4 ! %o4 = byte offset, left-to-right
+ xor %o4, 0x2, %g1 ! %g1 = byte offset, right-to-left
+ sll %o4, 3, %o4 ! %o4 = bit offset, left-to-right
+ sll %g1, 3, %g1 ! %g1 = bit offset, right-to-left
+ sethi %hi(0xffff0000), %o3 ! %o3 = mask
+ srl %o3, %o4, %o3 ! %o3 = shifted to bit offset
+ sll %o1, %g1, %o1 ! %o1 = shifted to bit offset
+ and %o1, %o3, %o1 ! %o1 = single short value
+ andn %o0, 0x2, %o0 ! %o0 = word address
+ ! if low-order bit is 1, we will properly get an alignment fault here
+ ld [%o0], %o2 ! read old value
+1:
+ andn %o2, %o3, %o5 ! clear target bits
+ or %o5, %o1, %o5 ! insert the new value
+ cas [%o0], %o2, %o5
+ cmp %o2, %o5
+ bne,a,pn %icc, 1b
+ mov %o5, %o2 ! %o2 = old value
+ and %o5, %o3, %o5
+ retl
+ srl %o5, %g1, %o0 ! %o0 = old value
+ SET_SIZE(atomic_swap_ushort)
+ SET_SIZE(atomic_swap_16)
+
+ ENTRY(atomic_swap_32)
+ ALTENTRY(atomic_swap_uint)
+ ALTENTRY(atomic_swap_ptr)
+ ALTENTRY(atomic_swap_ulong)
+ ld [%o0], %o2
+1:
+ mov %o1, %o3
+ cas [%o0], %o2, %o3
+ cmp %o2, %o3
+ bne,a,pn %icc, 1b
+ mov %o3, %o2
+ retl
+ mov %o3, %o0
+ SET_SIZE(atomic_swap_ulong)
+ SET_SIZE(atomic_swap_ptr)
+ SET_SIZE(atomic_swap_uint)
+ SET_SIZE(atomic_swap_32)
+
+ ENTRY(atomic_swap_64)
+ sllx %o1, 32, %o1 ! upper 32 in %o1, lower in %o2
+ srl %o2, 0, %o2
+ add %o1, %o2, %o1 ! convert 2 32-bit args into 1 64-bit
+ ldx [%o0], %o2
+1:
+ mov %o1, %o3
+ casx [%o0], %o2, %o3
+ cmp %o2, %o3
+ bne,a,pn %xcc, 1b
+ mov %o3, %o2
+ srl %o3, 0, %o1 ! return lower 32-bits in %o1
+ retl
+ srlx %o3, 32, %o0 ! return upper 32-bits in %o0
+ SET_SIZE(atomic_swap_64)
+
+ ENTRY(atomic_set_long_excl)
+ mov 1, %o3
+ slln %o3, %o1, %o3
+ ldn [%o0], %o2
+1:
+ andcc %o2, %o3, %g0 ! test if the bit is set
+ bnz,a,pn %ncc, 2f ! if so, then fail out
+ mov -1, %o0
+ or %o2, %o3, %o4 ! set the bit, and try to commit it
+ casn [%o0], %o2, %o4
+ cmp %o2, %o4
+ bne,a,pn %ncc, 1b ! failed to commit, try again
+ mov %o4, %o2
+ mov %g0, %o0
+2:
+ retl
+ nop
+ SET_SIZE(atomic_set_long_excl)
+
+ ENTRY(atomic_clear_long_excl)
+ mov 1, %o3
+ slln %o3, %o1, %o3
+ ldn [%o0], %o2
+1:
+ andncc %o3, %o2, %g0 ! test if the bit is clear
+ bnz,a,pn %ncc, 2f ! if so, then fail out
+ mov -1, %o0
+ andn %o2, %o3, %o4 ! clear the bit, and try to commit it
+ casn [%o0], %o2, %o4
+ cmp %o2, %o4
+ bne,a,pn %ncc, 1b ! failed to commit, try again
+ mov %o4, %o2
+ mov %g0, %o0
+2:
+ retl
+ nop
+ SET_SIZE(atomic_clear_long_excl)
+
+#if !defined(_KERNEL)
+
+ /*
+ * Spitfires and Blackbirds have a problem with membars in the
+ * delay slot (SF_ERRATA_51). For safety's sake, we assume
+ * that the whole world needs the workaround.
+ */
+ ENTRY(membar_enter)
+ membar #StoreLoad|#StoreStore
+ retl
+ nop
+ SET_SIZE(membar_enter)
+
+ ENTRY(membar_exit)
+ membar #LoadStore|#StoreStore
+ retl
+ nop
+ SET_SIZE(membar_exit)
+
+ ENTRY(membar_producer)
+ membar #StoreStore
+ retl
+ nop
+ SET_SIZE(membar_producer)
+
+ ENTRY(membar_consumer)
+ membar #LoadLoad
+ retl
+ nop
+ SET_SIZE(membar_consumer)
+
+#endif /* !_KERNEL */
diff --git a/common/list/list.c b/common/list/list.c
new file mode 100644
index 000000000000..94f7782a87d2
--- /dev/null
+++ b/common/list/list.c
@@ -0,0 +1,251 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Generic doubly-linked list implementation
+ */
+
+#include <sys/list.h>
+#include <sys/list_impl.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#ifdef _KERNEL
+#include <sys/debug.h>
+#else
+#include <assert.h>
+#define ASSERT(a) assert(a)
+#endif
+
+#ifdef lint
+extern list_node_t *list_d2l(list_t *list, void *obj);
+#else
+#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
+#endif
+#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
+
+#define list_insert_after_node(list, node, object) { \
+ list_node_t *lnew = list_d2l(list, object); \
+ lnew->list_prev = (node); \
+ lnew->list_next = (node)->list_next; \
+ (node)->list_next->list_prev = lnew; \
+ (node)->list_next = lnew; \
+}
+
+#define list_insert_before_node(list, node, object) { \
+ list_node_t *lnew = list_d2l(list, object); \
+ lnew->list_next = (node); \
+ lnew->list_prev = (node)->list_prev; \
+ (node)->list_prev->list_next = lnew; \
+ (node)->list_prev = lnew; \
+}
+
+#define list_remove_node(node) \
+ (node)->list_prev->list_next = (node)->list_next; \
+ (node)->list_next->list_prev = (node)->list_prev; \
+ (node)->list_next = (node)->list_prev = NULL
+
+void
+list_create(list_t *list, size_t size, size_t offset)
+{
+ ASSERT(list);
+ ASSERT(size > 0);
+ ASSERT(size >= offset + sizeof (list_node_t));
+
+ list->list_size = size;
+ list->list_offset = offset;
+ list->list_head.list_next = list->list_head.list_prev =
+ &list->list_head;
+}
+
+void
+list_destroy(list_t *list)
+{
+ list_node_t *node = &list->list_head;
+
+ ASSERT(list);
+ ASSERT(list->list_head.list_next == node);
+ ASSERT(list->list_head.list_prev == node);
+
+ node->list_next = node->list_prev = NULL;
+}
+
+void
+list_insert_after(list_t *list, void *object, void *nobject)
+{
+ if (object == NULL) {
+ list_insert_head(list, nobject);
+ } else {
+ list_node_t *lold = list_d2l(list, object);
+ list_insert_after_node(list, lold, nobject);
+ }
+}
+
+void
+list_insert_before(list_t *list, void *object, void *nobject)
+{
+ if (object == NULL) {
+ list_insert_tail(list, nobject);
+ } else {
+ list_node_t *lold = list_d2l(list, object);
+ list_insert_before_node(list, lold, nobject);
+ }
+}
+
+void
+list_insert_head(list_t *list, void *object)
+{
+ list_node_t *lold = &list->list_head;
+ list_insert_after_node(list, lold, object);
+}
+
+void
+list_insert_tail(list_t *list, void *object)
+{
+ list_node_t *lold = &list->list_head;
+ list_insert_before_node(list, lold, object);
+}
+
+void
+list_remove(list_t *list, void *object)
+{
+ list_node_t *lold = list_d2l(list, object);
+ ASSERT(!list_empty(list));
+ ASSERT(lold->list_next != NULL);
+ list_remove_node(lold);
+}
+
+void *
+list_remove_head(list_t *list)
+{
+ list_node_t *head = list->list_head.list_next;
+ if (head == &list->list_head)
+ return (NULL);
+ list_remove_node(head);
+ return (list_object(list, head));
+}
+
+void *
+list_remove_tail(list_t *list)
+{
+ list_node_t *tail = list->list_head.list_prev;
+ if (tail == &list->list_head)
+ return (NULL);
+ list_remove_node(tail);
+ return (list_object(list, tail));
+}
+
+void *
+list_head(list_t *list)
+{
+ if (list_empty(list))
+ return (NULL);
+ return (list_object(list, list->list_head.list_next));
+}
+
+void *
+list_tail(list_t *list)
+{
+ if (list_empty(list))
+ return (NULL);
+ return (list_object(list, list->list_head.list_prev));
+}
+
+void *
+list_next(list_t *list, void *object)
+{
+ list_node_t *node = list_d2l(list, object);
+
+ if (node->list_next != &list->list_head)
+ return (list_object(list, node->list_next));
+
+ return (NULL);
+}
+
+void *
+list_prev(list_t *list, void *object)
+{
+ list_node_t *node = list_d2l(list, object);
+
+ if (node->list_prev != &list->list_head)
+ return (list_object(list, node->list_prev));
+
+ return (NULL);
+}
+
+/*
+ * Insert src list after dst list. Empty src list thereafter.
+ */
+void
+list_move_tail(list_t *dst, list_t *src)
+{
+ list_node_t *dstnode = &dst->list_head;
+ list_node_t *srcnode = &src->list_head;
+
+ ASSERT(dst->list_size == src->list_size);
+ ASSERT(dst->list_offset == src->list_offset);
+
+ if (list_empty(src))
+ return;
+
+ dstnode->list_prev->list_next = srcnode->list_next;
+ srcnode->list_next->list_prev = dstnode->list_prev;
+ dstnode->list_prev = srcnode->list_prev;
+ srcnode->list_prev->list_next = dstnode;
+
+ /* empty src list */
+ srcnode->list_next = srcnode->list_prev = srcnode;
+}
+
+void
+list_link_replace(list_node_t *lold, list_node_t *lnew)
+{
+ ASSERT(list_link_active(lold));
+ ASSERT(!list_link_active(lnew));
+
+ lnew->list_next = lold->list_next;
+ lnew->list_prev = lold->list_prev;
+ lold->list_prev->list_next = lnew;
+ lold->list_next->list_prev = lnew;
+ lold->list_next = lold->list_prev = NULL;
+}
+
+void
+list_link_init(list_node_t *link)
+{
+ link->list_next = NULL;
+ link->list_prev = NULL;
+}
+
+int
+list_link_active(list_node_t *link)
+{
+ return (link->list_next != NULL);
+}
+
+int
+list_is_empty(list_t *list)
+{
+ return (list_empty(list));
+}
diff --git a/common/nvpair/nvpair.c b/common/nvpair/nvpair.c
new file mode 100644
index 000000000000..00d44263ccda
--- /dev/null
+++ b/common/nvpair/nvpair.c
@@ -0,0 +1,3297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/stropts.h>
+#include <sys/debug.h>
+#include <sys/isa_defs.h>
+#include <sys/int_limits.h>
+#include <sys/nvpair.h>
+#include <sys/nvpair_impl.h>
+#include <rpc/types.h>
+#include <rpc/xdr.h>
+
+#if defined(_KERNEL) && !defined(_BOOT)
+#include <sys/varargs.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#else
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#endif
+
+#ifndef offsetof
+#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
+#endif
+#define skip_whitespace(p) while ((*(p) == ' ') || (*(p) == '\t')) p++
+
+/*
+ * nvpair.c - Provides kernel & userland interfaces for manipulating
+ * name-value pairs.
+ *
+ * Overview Diagram
+ *
+ * +--------------+
+ * | nvlist_t |
+ * |--------------|
+ * | nvl_version |
+ * | nvl_nvflag |
+ * | nvl_priv -+-+
+ * | nvl_flag | |
+ * | nvl_pad | |
+ * +--------------+ |
+ * V
+ * +--------------+ last i_nvp in list
+ * | nvpriv_t | +--------------------->
+ * |--------------| |
+ * +--+- nvp_list | | +------------+
+ * | | nvp_last -+--+ + nv_alloc_t |
+ * | | nvp_curr | |------------|
+ * | | nvp_nva -+----> | nva_ops |
+ * | | nvp_stat | | nva_arg |
+ * | +--------------+ +------------+
+ * |
+ * +-------+
+ * V
+ * +---------------------+ +-------------------+
+ * | i_nvp_t | +-->| i_nvp_t | +-->
+ * |---------------------| | |-------------------| |
+ * | nvi_next -+--+ | nvi_next -+--+
+ * | nvi_prev (NULL) | <----+ nvi_prev |
+ * | . . . . . . . . . . | | . . . . . . . . . |
+ * | nvp (nvpair_t) | | nvp (nvpair_t) |
+ * | - nvp_size | | - nvp_size |
+ * | - nvp_name_sz | | - nvp_name_sz |
+ * | - nvp_value_elem | | - nvp_value_elem |
+ * | - nvp_type | | - nvp_type |
+ * | - data ... | | - data ... |
+ * +---------------------+ +-------------------+
+ *
+ *
+ *
+ * +---------------------+ +---------------------+
+ * | i_nvp_t | +--> +-->| i_nvp_t (last) |
+ * |---------------------| | | |---------------------|
+ * | nvi_next -+--+ ... --+ | nvi_next (NULL) |
+ * <-+- nvi_prev |<-- ... <----+ nvi_prev |
+ * | . . . . . . . . . | | . . . . . . . . . |
+ * | nvp (nvpair_t) | | nvp (nvpair_t) |
+ * | - nvp_size | | - nvp_size |
+ * | - nvp_name_sz | | - nvp_name_sz |
+ * | - nvp_value_elem | | - nvp_value_elem |
+ * | - DATA_TYPE_NVLIST | | - nvp_type |
+ * | - data (embedded) | | - data ... |
+ * | nvlist name | +---------------------+
+ * | +--------------+ |
+ * | | nvlist_t | |
+ * | |--------------| |
+ * | | nvl_version | |
+ * | | nvl_nvflag | |
+ * | | nvl_priv --+---+---->
+ * | | nvl_flag | |
+ * | | nvl_pad | |
+ * | +--------------+ |
+ * +---------------------+
+ *
+ *
+ * N.B. nvpair_t may be aligned on 4 byte boundary, so +4 will
+ * allow value to be aligned on 8 byte boundary
+ *
+ * name_len is the length of the name string including the null terminator
+ * so it must be >= 1
+ */
+#define NVP_SIZE_CALC(name_len, data_len) \
+ (NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len))
+
+static int i_get_value_size(data_type_t type, const void *data, uint_t nelem);
+static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type,
+ uint_t nelem, const void *data);
+
+#define NV_STAT_EMBEDDED 0x1
+#define EMBEDDED_NVL(nvp) ((nvlist_t *)(void *)NVP_VALUE(nvp))
+#define EMBEDDED_NVL_ARRAY(nvp) ((nvlist_t **)(void *)NVP_VALUE(nvp))
+
+#define NVP_VALOFF(nvp) (NV_ALIGN(sizeof (nvpair_t) + (nvp)->nvp_name_sz))
+#define NVPAIR2I_NVP(nvp) \
+ ((i_nvp_t *)((size_t)(nvp) - offsetof(i_nvp_t, nvi_nvp)))
+
+
+int
+nv_alloc_init(nv_alloc_t *nva, const nv_alloc_ops_t *nvo, /* args */ ...)
+{
+ va_list valist;
+ int err = 0;
+
+ nva->nva_ops = nvo;
+ nva->nva_arg = NULL;
+
+ va_start(valist, nvo);
+ if (nva->nva_ops->nv_ao_init != NULL)
+ err = nva->nva_ops->nv_ao_init(nva, valist);
+ va_end(valist);
+
+ return (err);
+}
+
+void
+nv_alloc_reset(nv_alloc_t *nva)
+{
+ if (nva->nva_ops->nv_ao_reset != NULL)
+ nva->nva_ops->nv_ao_reset(nva);
+}
+
+void
+nv_alloc_fini(nv_alloc_t *nva)
+{
+ if (nva->nva_ops->nv_ao_fini != NULL)
+ nva->nva_ops->nv_ao_fini(nva);
+}
+
+nv_alloc_t *
+nvlist_lookup_nv_alloc(nvlist_t *nvl)
+{
+ nvpriv_t *priv;
+
+ if (nvl == NULL ||
+ (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return (NULL);
+
+ return (priv->nvp_nva);
+}
+
+static void *
+nv_mem_zalloc(nvpriv_t *nvp, size_t size)
+{
+ nv_alloc_t *nva = nvp->nvp_nva;
+ void *buf;
+
+ if ((buf = nva->nva_ops->nv_ao_alloc(nva, size)) != NULL)
+ bzero(buf, size);
+
+ return (buf);
+}
+
+static void
+nv_mem_free(nvpriv_t *nvp, void *buf, size_t size)
+{
+ nv_alloc_t *nva = nvp->nvp_nva;
+
+ nva->nva_ops->nv_ao_free(nva, buf, size);
+}
+
+static void
+nv_priv_init(nvpriv_t *priv, nv_alloc_t *nva, uint32_t stat)
+{
+ bzero(priv, sizeof (nvpriv_t));
+
+ priv->nvp_nva = nva;
+ priv->nvp_stat = stat;
+}
+
+static nvpriv_t *
+nv_priv_alloc(nv_alloc_t *nva)
+{
+ nvpriv_t *priv;
+
+ /*
+ * nv_mem_alloc() cannot called here because it needs the priv
+ * argument.
+ */
+ if ((priv = nva->nva_ops->nv_ao_alloc(nva, sizeof (nvpriv_t))) == NULL)
+ return (NULL);
+
+ nv_priv_init(priv, nva, 0);
+
+ return (priv);
+}
+
+/*
+ * Embedded lists need their own nvpriv_t's. We create a new
+ * nvpriv_t using the parameters and allocator from the parent
+ * list's nvpriv_t.
+ */
+static nvpriv_t *
+nv_priv_alloc_embedded(nvpriv_t *priv)
+{
+ nvpriv_t *emb_priv;
+
+ if ((emb_priv = nv_mem_zalloc(priv, sizeof (nvpriv_t))) == NULL)
+ return (NULL);
+
+ nv_priv_init(emb_priv, priv->nvp_nva, NV_STAT_EMBEDDED);
+
+ return (emb_priv);
+}
+
+static void
+nvlist_init(nvlist_t *nvl, uint32_t nvflag, nvpriv_t *priv)
+{
+ nvl->nvl_version = NV_VERSION;
+ nvl->nvl_nvflag = nvflag & (NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE);
+ nvl->nvl_priv = (uint64_t)(uintptr_t)priv;
+ nvl->nvl_flag = 0;
+ nvl->nvl_pad = 0;
+}
+
+uint_t
+nvlist_nvflag(nvlist_t *nvl)
+{
+ return (nvl->nvl_nvflag);
+}
+
+/*
+ * nvlist_alloc - Allocate nvlist.
+ */
+/*ARGSUSED1*/
+int
+nvlist_alloc(nvlist_t **nvlp, uint_t nvflag, int kmflag)
+{
+#if defined(_KERNEL) && !defined(_BOOT)
+ return (nvlist_xalloc(nvlp, nvflag,
+ (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
+#else
+ return (nvlist_xalloc(nvlp, nvflag, nv_alloc_nosleep));
+#endif
+}
+
+int
+nvlist_xalloc(nvlist_t **nvlp, uint_t nvflag, nv_alloc_t *nva)
+{
+ nvpriv_t *priv;
+
+ if (nvlp == NULL || nva == NULL)
+ return (EINVAL);
+
+ if ((priv = nv_priv_alloc(nva)) == NULL)
+ return (ENOMEM);
+
+ if ((*nvlp = nv_mem_zalloc(priv,
+ NV_ALIGN(sizeof (nvlist_t)))) == NULL) {
+ nv_mem_free(priv, priv, sizeof (nvpriv_t));
+ return (ENOMEM);
+ }
+
+ nvlist_init(*nvlp, nvflag, priv);
+
+ return (0);
+}
+
+/*
+ * nvp_buf_alloc - Allocate i_nvp_t for storing a new nv pair.
+ */
+static nvpair_t *
+nvp_buf_alloc(nvlist_t *nvl, size_t len)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+ i_nvp_t *buf;
+ nvpair_t *nvp;
+ size_t nvsize;
+
+ /*
+ * Allocate the buffer
+ */
+ nvsize = len + offsetof(i_nvp_t, nvi_nvp);
+
+ if ((buf = nv_mem_zalloc(priv, nvsize)) == NULL)
+ return (NULL);
+
+ nvp = &buf->nvi_nvp;
+ nvp->nvp_size = len;
+
+ return (nvp);
+}
+
+/*
+ * nvp_buf_free - de-Allocate an i_nvp_t.
+ */
+static void
+nvp_buf_free(nvlist_t *nvl, nvpair_t *nvp)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+ size_t nvsize = nvp->nvp_size + offsetof(i_nvp_t, nvi_nvp);
+
+ nv_mem_free(priv, NVPAIR2I_NVP(nvp), nvsize);
+}
+
+/*
+ * nvp_buf_link - link a new nv pair into the nvlist.
+ */
+static void
+nvp_buf_link(nvlist_t *nvl, nvpair_t *nvp)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+ i_nvp_t *curr = NVPAIR2I_NVP(nvp);
+
+ /* Put element at end of nvlist */
+ if (priv->nvp_list == NULL) {
+ priv->nvp_list = priv->nvp_last = curr;
+ } else {
+ curr->nvi_prev = priv->nvp_last;
+ priv->nvp_last->nvi_next = curr;
+ priv->nvp_last = curr;
+ }
+}
+
+/*
+ * nvp_buf_unlink - unlink an removed nvpair out of the nvlist.
+ */
+static void
+nvp_buf_unlink(nvlist_t *nvl, nvpair_t *nvp)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+ i_nvp_t *curr = NVPAIR2I_NVP(nvp);
+
+ /*
+ * protect nvlist_next_nvpair() against walking on freed memory.
+ */
+ if (priv->nvp_curr == curr)
+ priv->nvp_curr = curr->nvi_next;
+
+ if (curr == priv->nvp_list)
+ priv->nvp_list = curr->nvi_next;
+ else
+ curr->nvi_prev->nvi_next = curr->nvi_next;
+
+ if (curr == priv->nvp_last)
+ priv->nvp_last = curr->nvi_prev;
+ else
+ curr->nvi_next->nvi_prev = curr->nvi_prev;
+}
+
+/*
+ * take a nvpair type and number of elements and make sure the are valid
+ */
+static int
+i_validate_type_nelem(data_type_t type, uint_t nelem)
+{
+ switch (type) {
+ case DATA_TYPE_BOOLEAN:
+ if (nelem != 0)
+ return (EINVAL);
+ break;
+ case DATA_TYPE_BOOLEAN_VALUE:
+ case DATA_TYPE_BYTE:
+ case DATA_TYPE_INT8:
+ case DATA_TYPE_UINT8:
+ case DATA_TYPE_INT16:
+ case DATA_TYPE_UINT16:
+ case DATA_TYPE_INT32:
+ case DATA_TYPE_UINT32:
+ case DATA_TYPE_INT64:
+ case DATA_TYPE_UINT64:
+ case DATA_TYPE_STRING:
+ case DATA_TYPE_HRTIME:
+ case DATA_TYPE_NVLIST:
+#if !defined(_KERNEL)
+ case DATA_TYPE_DOUBLE:
+#endif
+ if (nelem != 1)
+ return (EINVAL);
+ break;
+ case DATA_TYPE_BOOLEAN_ARRAY:
+ case DATA_TYPE_BYTE_ARRAY:
+ case DATA_TYPE_INT8_ARRAY:
+ case DATA_TYPE_UINT8_ARRAY:
+ case DATA_TYPE_INT16_ARRAY:
+ case DATA_TYPE_UINT16_ARRAY:
+ case DATA_TYPE_INT32_ARRAY:
+ case DATA_TYPE_UINT32_ARRAY:
+ case DATA_TYPE_INT64_ARRAY:
+ case DATA_TYPE_UINT64_ARRAY:
+ case DATA_TYPE_STRING_ARRAY:
+ case DATA_TYPE_NVLIST_ARRAY:
+ /* we allow arrays with 0 elements */
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/*
+ * Verify nvp_name_sz and check the name string length.
+ */
+static int
+i_validate_nvpair_name(nvpair_t *nvp)
+{
+ if ((nvp->nvp_name_sz <= 0) ||
+ (nvp->nvp_size < NVP_SIZE_CALC(nvp->nvp_name_sz, 0)))
+ return (EFAULT);
+
+ /* verify the name string, make sure its terminated */
+ if (NVP_NAME(nvp)[nvp->nvp_name_sz - 1] != '\0')
+ return (EFAULT);
+
+ return (strlen(NVP_NAME(nvp)) == nvp->nvp_name_sz - 1 ? 0 : EFAULT);
+}
+
+static int
+i_validate_nvpair_value(data_type_t type, uint_t nelem, const void *data)
+{
+ switch (type) {
+ case DATA_TYPE_BOOLEAN_VALUE:
+ if (*(boolean_t *)data != B_TRUE &&
+ *(boolean_t *)data != B_FALSE)
+ return (EINVAL);
+ break;
+ case DATA_TYPE_BOOLEAN_ARRAY: {
+ int i;
+
+ for (i = 0; i < nelem; i++)
+ if (((boolean_t *)data)[i] != B_TRUE &&
+ ((boolean_t *)data)[i] != B_FALSE)
+ return (EINVAL);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * This function takes a pointer to what should be a nvpair and it's size
+ * and then verifies that all the nvpair fields make sense and can be
+ * trusted. This function is used when decoding packed nvpairs.
+ */
+static int
+i_validate_nvpair(nvpair_t *nvp)
+{
+ data_type_t type = NVP_TYPE(nvp);
+ int size1, size2;
+
+ /* verify nvp_name_sz, check the name string length */
+ if (i_validate_nvpair_name(nvp) != 0)
+ return (EFAULT);
+
+ if (i_validate_nvpair_value(type, NVP_NELEM(nvp), NVP_VALUE(nvp)) != 0)
+ return (EFAULT);
+
+ /*
+ * verify nvp_type, nvp_value_elem, and also possibly
+ * verify string values and get the value size.
+ */
+ size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp));
+ size1 = nvp->nvp_size - NVP_VALOFF(nvp);
+ if (size2 < 0 || size1 != NV_ALIGN(size2))
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+nvlist_copy_pairs(nvlist_t *snvl, nvlist_t *dnvl)
+{
+ nvpriv_t *priv;
+ i_nvp_t *curr;
+
+ if ((priv = (nvpriv_t *)(uintptr_t)snvl->nvl_priv) == NULL)
+ return (EINVAL);
+
+ for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
+ nvpair_t *nvp = &curr->nvi_nvp;
+ int err;
+
+ if ((err = nvlist_add_common(dnvl, NVP_NAME(nvp), NVP_TYPE(nvp),
+ NVP_NELEM(nvp), NVP_VALUE(nvp))) != 0)
+ return (err);
+ }
+
+ return (0);
+}
+
+/*
+ * Frees all memory allocated for an nvpair (like embedded lists) with
+ * the exception of the nvpair buffer itself.
+ */
+static void
+nvpair_free(nvpair_t *nvp)
+{
+ switch (NVP_TYPE(nvp)) {
+ case DATA_TYPE_NVLIST:
+ nvlist_free(EMBEDDED_NVL(nvp));
+ break;
+ case DATA_TYPE_NVLIST_ARRAY: {
+ nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
+ int i;
+
+ for (i = 0; i < NVP_NELEM(nvp); i++)
+ if (nvlp[i] != NULL)
+ nvlist_free(nvlp[i]);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+/*
+ * nvlist_free - free an unpacked nvlist
+ */
+void
+nvlist_free(nvlist_t *nvl)
+{
+ nvpriv_t *priv;
+ i_nvp_t *curr;
+
+ if (nvl == NULL ||
+ (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return;
+
+ /*
+ * Unpacked nvlist are linked through i_nvp_t
+ */
+ curr = priv->nvp_list;
+ while (curr != NULL) {
+ nvpair_t *nvp = &curr->nvi_nvp;
+ curr = curr->nvi_next;
+
+ nvpair_free(nvp);
+ nvp_buf_free(nvl, nvp);
+ }
+
+ if (!(priv->nvp_stat & NV_STAT_EMBEDDED))
+ nv_mem_free(priv, nvl, NV_ALIGN(sizeof (nvlist_t)));
+ else
+ nvl->nvl_priv = 0;
+
+ nv_mem_free(priv, priv, sizeof (nvpriv_t));
+}
+
+static int
+nvlist_contains_nvp(nvlist_t *nvl, nvpair_t *nvp)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+ i_nvp_t *curr;
+
+ if (nvp == NULL)
+ return (0);
+
+ for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next)
+ if (&curr->nvi_nvp == nvp)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Make a copy of nvlist
+ */
+/*ARGSUSED1*/
+int
+nvlist_dup(nvlist_t *nvl, nvlist_t **nvlp, int kmflag)
+{
+#if defined(_KERNEL) && !defined(_BOOT)
+ return (nvlist_xdup(nvl, nvlp,
+ (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
+#else
+ return (nvlist_xdup(nvl, nvlp, nv_alloc_nosleep));
+#endif
+}
+
+int
+nvlist_xdup(nvlist_t *nvl, nvlist_t **nvlp, nv_alloc_t *nva)
+{
+ int err;
+ nvlist_t *ret;
+
+ if (nvl == NULL || nvlp == NULL)
+ return (EINVAL);
+
+ if ((err = nvlist_xalloc(&ret, nvl->nvl_nvflag, nva)) != 0)
+ return (err);
+
+ if ((err = nvlist_copy_pairs(nvl, ret)) != 0)
+ nvlist_free(ret);
+ else
+ *nvlp = ret;
+
+ return (err);
+}
+
+/*
+ * Remove all with matching name
+ */
+int
+nvlist_remove_all(nvlist_t *nvl, const char *name)
+{
+ nvpriv_t *priv;
+ i_nvp_t *curr;
+ int error = ENOENT;
+
+ if (nvl == NULL || name == NULL ||
+ (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return (EINVAL);
+
+ curr = priv->nvp_list;
+ while (curr != NULL) {
+ nvpair_t *nvp = &curr->nvi_nvp;
+
+ curr = curr->nvi_next;
+ if (strcmp(name, NVP_NAME(nvp)) != 0)
+ continue;
+
+ nvp_buf_unlink(nvl, nvp);
+ nvpair_free(nvp);
+ nvp_buf_free(nvl, nvp);
+
+ error = 0;
+ }
+
+ return (error);
+}
+
+/*
+ * Remove first one with matching name and type
+ */
+int
+nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type)
+{
+ nvpriv_t *priv;
+ i_nvp_t *curr;
+
+ if (nvl == NULL || name == NULL ||
+ (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return (EINVAL);
+
+ curr = priv->nvp_list;
+ while (curr != NULL) {
+ nvpair_t *nvp = &curr->nvi_nvp;
+
+ if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type) {
+ nvp_buf_unlink(nvl, nvp);
+ nvpair_free(nvp);
+ nvp_buf_free(nvl, nvp);
+
+ return (0);
+ }
+ curr = curr->nvi_next;
+ }
+
+ return (ENOENT);
+}
+
+int
+nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+ if (nvl == NULL || nvp == NULL)
+ return (EINVAL);
+
+ nvp_buf_unlink(nvl, nvp);
+ nvpair_free(nvp);
+ nvp_buf_free(nvl, nvp);
+ return (0);
+}
+
+/*
+ * This function calculates the size of an nvpair value.
+ *
+ * The data argument controls the behavior in case of the data types
+ * DATA_TYPE_STRING and
+ * DATA_TYPE_STRING_ARRAY
+ * Is data == NULL then the size of the string(s) is excluded.
+ */
+static int
+i_get_value_size(data_type_t type, const void *data, uint_t nelem)
+{
+ uint64_t value_sz;
+
+ if (i_validate_type_nelem(type, nelem) != 0)
+ return (-1);
+
+ /* Calculate required size for holding value */
+ switch (type) {
+ case DATA_TYPE_BOOLEAN:
+ value_sz = 0;
+ break;
+ case DATA_TYPE_BOOLEAN_VALUE:
+ value_sz = sizeof (boolean_t);
+ break;
+ case DATA_TYPE_BYTE:
+ value_sz = sizeof (uchar_t);
+ break;
+ case DATA_TYPE_INT8:
+ value_sz = sizeof (int8_t);
+ break;
+ case DATA_TYPE_UINT8:
+ value_sz = sizeof (uint8_t);
+ break;
+ case DATA_TYPE_INT16:
+ value_sz = sizeof (int16_t);
+ break;
+ case DATA_TYPE_UINT16:
+ value_sz = sizeof (uint16_t);
+ break;
+ case DATA_TYPE_INT32:
+ value_sz = sizeof (int32_t);
+ break;
+ case DATA_TYPE_UINT32:
+ value_sz = sizeof (uint32_t);
+ break;
+ case DATA_TYPE_INT64:
+ value_sz = sizeof (int64_t);
+ break;
+ case DATA_TYPE_UINT64:
+ value_sz = sizeof (uint64_t);
+ break;
+#if !defined(_KERNEL)
+ case DATA_TYPE_DOUBLE:
+ value_sz = sizeof (double);
+ break;
+#endif
+ case DATA_TYPE_STRING:
+ if (data == NULL)
+ value_sz = 0;
+ else
+ value_sz = strlen(data) + 1;
+ break;
+ case DATA_TYPE_BOOLEAN_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (boolean_t);
+ break;
+ case DATA_TYPE_BYTE_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (uchar_t);
+ break;
+ case DATA_TYPE_INT8_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (int8_t);
+ break;
+ case DATA_TYPE_UINT8_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (uint8_t);
+ break;
+ case DATA_TYPE_INT16_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (int16_t);
+ break;
+ case DATA_TYPE_UINT16_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (uint16_t);
+ break;
+ case DATA_TYPE_INT32_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (int32_t);
+ break;
+ case DATA_TYPE_UINT32_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (uint32_t);
+ break;
+ case DATA_TYPE_INT64_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (int64_t);
+ break;
+ case DATA_TYPE_UINT64_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (uint64_t);
+ break;
+ case DATA_TYPE_STRING_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (uint64_t);
+
+ if (data != NULL) {
+ char *const *strs = data;
+ uint_t i;
+
+ /* no alignment requirement for strings */
+ for (i = 0; i < nelem; i++) {
+ if (strs[i] == NULL)
+ return (-1);
+ value_sz += strlen(strs[i]) + 1;
+ }
+ }
+ break;
+ case DATA_TYPE_HRTIME:
+ value_sz = sizeof (hrtime_t);
+ break;
+ case DATA_TYPE_NVLIST:
+ value_sz = NV_ALIGN(sizeof (nvlist_t));
+ break;
+ case DATA_TYPE_NVLIST_ARRAY:
+ value_sz = (uint64_t)nelem * sizeof (uint64_t) +
+ (uint64_t)nelem * NV_ALIGN(sizeof (nvlist_t));
+ break;
+ default:
+ return (-1);
+ }
+
+ return (value_sz > INT32_MAX ? -1 : (int)value_sz);
+}
+
+static int
+nvlist_copy_embedded(nvlist_t *nvl, nvlist_t *onvl, nvlist_t *emb_nvl)
+{
+ nvpriv_t *priv;
+ int err;
+
+ if ((priv = nv_priv_alloc_embedded((nvpriv_t *)(uintptr_t)
+ nvl->nvl_priv)) == NULL)
+ return (ENOMEM);
+
+ nvlist_init(emb_nvl, onvl->nvl_nvflag, priv);
+
+ if ((err = nvlist_copy_pairs(onvl, emb_nvl)) != 0) {
+ nvlist_free(emb_nvl);
+ emb_nvl->nvl_priv = 0;
+ }
+
+ return (err);
+}
+
+/*
+ * nvlist_add_common - Add new <name,value> pair to nvlist
+ */
+static int
+nvlist_add_common(nvlist_t *nvl, const char *name,
+ data_type_t type, uint_t nelem, const void *data)
+{
+ nvpair_t *nvp;
+ uint_t i;
+
+ int nvp_sz, name_sz, value_sz;
+ int err = 0;
+
+ if (name == NULL || nvl == NULL || nvl->nvl_priv == 0)
+ return (EINVAL);
+
+ if (nelem != 0 && data == NULL)
+ return (EINVAL);
+
+ /*
+ * Verify type and nelem and get the value size.
+ * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
+ * is the size of the string(s) included.
+ */
+ if ((value_sz = i_get_value_size(type, data, nelem)) < 0)
+ return (EINVAL);
+
+ if (i_validate_nvpair_value(type, nelem, data) != 0)
+ return (EINVAL);
+
+ /*
+ * If we're adding an nvlist or nvlist array, ensure that we are not
+ * adding the input nvlist to itself, which would cause recursion,
+ * and ensure that no NULL nvlist pointers are present.
+ */
+ switch (type) {
+ case DATA_TYPE_NVLIST:
+ if (data == nvl || data == NULL)
+ return (EINVAL);
+ break;
+ case DATA_TYPE_NVLIST_ARRAY: {
+ nvlist_t **onvlp = (nvlist_t **)data;
+ for (i = 0; i < nelem; i++) {
+ if (onvlp[i] == nvl || onvlp[i] == NULL)
+ return (EINVAL);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ /* calculate sizes of the nvpair elements and the nvpair itself */
+ name_sz = strlen(name) + 1;
+
+ nvp_sz = NVP_SIZE_CALC(name_sz, value_sz);
+
+ if ((nvp = nvp_buf_alloc(nvl, nvp_sz)) == NULL)
+ return (ENOMEM);
+
+ ASSERT(nvp->nvp_size == nvp_sz);
+ nvp->nvp_name_sz = name_sz;
+ nvp->nvp_value_elem = nelem;
+ nvp->nvp_type = type;
+ bcopy(name, NVP_NAME(nvp), name_sz);
+
+ switch (type) {
+ case DATA_TYPE_BOOLEAN:
+ break;
+ case DATA_TYPE_STRING_ARRAY: {
+ char *const *strs = data;
+ char *buf = NVP_VALUE(nvp);
+ char **cstrs = (void *)buf;
+
+ /* skip pre-allocated space for pointer array */
+ buf += nelem * sizeof (uint64_t);
+ for (i = 0; i < nelem; i++) {
+ int slen = strlen(strs[i]) + 1;
+ bcopy(strs[i], buf, slen);
+ cstrs[i] = buf;
+ buf += slen;
+ }
+ break;
+ }
+ case DATA_TYPE_NVLIST: {
+ nvlist_t *nnvl = EMBEDDED_NVL(nvp);
+ nvlist_t *onvl = (nvlist_t *)data;
+
+ if ((err = nvlist_copy_embedded(nvl, onvl, nnvl)) != 0) {
+ nvp_buf_free(nvl, nvp);
+ return (err);
+ }
+ break;
+ }
+ case DATA_TYPE_NVLIST_ARRAY: {
+ nvlist_t **onvlp = (nvlist_t **)data;
+ nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
+ nvlist_t *embedded = (nvlist_t *)
+ ((uintptr_t)nvlp + nelem * sizeof (uint64_t));
+
+ for (i = 0; i < nelem; i++) {
+ if ((err = nvlist_copy_embedded(nvl,
+ onvlp[i], embedded)) != 0) {
+ /*
+ * Free any successfully created lists
+ */
+ nvpair_free(nvp);
+ nvp_buf_free(nvl, nvp);
+ return (err);
+ }
+
+ nvlp[i] = embedded++;
+ }
+ break;
+ }
+ default:
+ bcopy(data, NVP_VALUE(nvp), value_sz);
+ }
+
+ /* if unique name, remove before add */
+ if (nvl->nvl_nvflag & NV_UNIQUE_NAME)
+ (void) nvlist_remove_all(nvl, name);
+ else if (nvl->nvl_nvflag & NV_UNIQUE_NAME_TYPE)
+ (void) nvlist_remove(nvl, name, type);
+
+ nvp_buf_link(nvl, nvp);
+
+ return (0);
+}
+
+int
+nvlist_add_boolean(nvlist_t *nvl, const char *name)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN, 0, NULL));
+}
+
+int
+nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, &val));
+}
+
+int
+nvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &val));
+}
+
+int
+nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &val));
+}
+
+int
+nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &val));
+}
+
+int
+nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &val));
+}
+
+int
+nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &val));
+}
+
+int
+nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &val));
+}
+
+int
+nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &val));
+}
+
+int
+nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &val));
+}
+
+int
+nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &val));
+}
+
+#if !defined(_KERNEL)
+int
+nvlist_add_double(nvlist_t *nvl, const char *name, double val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_DOUBLE, 1, &val));
+}
+#endif
+
+int
+nvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, (void *)val));
+}
+
+int
+nvlist_add_boolean_array(nvlist_t *nvl, const char *name,
+ boolean_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a));
+}
+
+int
+nvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a));
+}
+
+int
+nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a));
+}
+
+int
+nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a));
+}
+
+int
+nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a));
+}
+
+int
+nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a));
+}
+
+int
+nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a));
+}
+
+int
+nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a));
+}
+
+int
+nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a));
+}
+
+int
+nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a));
+}
+
+int
+nvlist_add_string_array(nvlist_t *nvl, const char *name,
+ char *const *a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a));
+}
+
+int
+nvlist_add_hrtime(nvlist_t *nvl, const char *name, hrtime_t val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_HRTIME, 1, &val));
+}
+
+int
+nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val));
+}
+
+int
+nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, uint_t n)
+{
+ return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a));
+}
+
+/* reading name-value pairs */
+nvpair_t *
+nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+ nvpriv_t *priv;
+ i_nvp_t *curr;
+
+ if (nvl == NULL ||
+ (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return (NULL);
+
+ curr = NVPAIR2I_NVP(nvp);
+
+ /*
+ * Ensure that nvp is a valid nvpair on this nvlist.
+ * NB: nvp_curr is used only as a hint so that we don't always
+ * have to walk the list to determine if nvp is still on the list.
+ */
+ if (nvp == NULL)
+ curr = priv->nvp_list;
+ else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp))
+ curr = curr->nvi_next;
+ else
+ curr = NULL;
+
+ priv->nvp_curr = curr;
+
+ return (curr != NULL ? &curr->nvi_nvp : NULL);
+}
+
+nvpair_t *
+nvlist_prev_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+ nvpriv_t *priv;
+ i_nvp_t *curr;
+
+ if (nvl == NULL ||
+ (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return (NULL);
+
+ curr = NVPAIR2I_NVP(nvp);
+
+ if (nvp == NULL)
+ curr = priv->nvp_last;
+ else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp))
+ curr = curr->nvi_prev;
+ else
+ curr = NULL;
+
+ priv->nvp_curr = curr;
+
+ return (curr != NULL ? &curr->nvi_nvp : NULL);
+}
+
+boolean_t
+nvlist_empty(nvlist_t *nvl)
+{
+ nvpriv_t *priv;
+
+ if (nvl == NULL ||
+ (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return (B_TRUE);
+
+ return (priv->nvp_list == NULL);
+}
+
+char *
+nvpair_name(nvpair_t *nvp)
+{
+ return (NVP_NAME(nvp));
+}
+
+data_type_t
+nvpair_type(nvpair_t *nvp)
+{
+ return (NVP_TYPE(nvp));
+}
+
+int
+nvpair_type_is_array(nvpair_t *nvp)
+{
+ data_type_t type = NVP_TYPE(nvp);
+
+ if ((type == DATA_TYPE_BYTE_ARRAY) ||
+ (type == DATA_TYPE_UINT8_ARRAY) ||
+ (type == DATA_TYPE_INT16_ARRAY) ||
+ (type == DATA_TYPE_UINT16_ARRAY) ||
+ (type == DATA_TYPE_INT32_ARRAY) ||
+ (type == DATA_TYPE_UINT32_ARRAY) ||
+ (type == DATA_TYPE_INT64_ARRAY) ||
+ (type == DATA_TYPE_UINT64_ARRAY) ||
+ (type == DATA_TYPE_BOOLEAN_ARRAY) ||
+ (type == DATA_TYPE_STRING_ARRAY) ||
+ (type == DATA_TYPE_NVLIST_ARRAY))
+ return (1);
+ return (0);
+
+}
+
+static int
+nvpair_value_common(nvpair_t *nvp, data_type_t type, uint_t *nelem, void *data)
+{
+ if (nvp == NULL || nvpair_type(nvp) != type)
+ return (EINVAL);
+
+ /*
+ * For non-array types, we copy the data.
+ * For array types (including string), we set a pointer.
+ */
+ switch (type) {
+ case DATA_TYPE_BOOLEAN:
+ if (nelem != NULL)
+ *nelem = 0;
+ break;
+
+ case DATA_TYPE_BOOLEAN_VALUE:
+ case DATA_TYPE_BYTE:
+ case DATA_TYPE_INT8:
+ case DATA_TYPE_UINT8:
+ case DATA_TYPE_INT16:
+ case DATA_TYPE_UINT16:
+ case DATA_TYPE_INT32:
+ case DATA_TYPE_UINT32:
+ case DATA_TYPE_INT64:
+ case DATA_TYPE_UINT64:
+ case DATA_TYPE_HRTIME:
+#if !defined(_KERNEL)
+ case DATA_TYPE_DOUBLE:
+#endif
+ if (data == NULL)
+ return (EINVAL);
+ bcopy(NVP_VALUE(nvp), data,
+ (size_t)i_get_value_size(type, NULL, 1));
+ if (nelem != NULL)
+ *nelem = 1;
+ break;
+
+ case DATA_TYPE_NVLIST:
+ case DATA_TYPE_STRING:
+ if (data == NULL)
+ return (EINVAL);
+ *(void **)data = (void *)NVP_VALUE(nvp);
+ if (nelem != NULL)
+ *nelem = 1;
+ break;
+
+ case DATA_TYPE_BOOLEAN_ARRAY:
+ case DATA_TYPE_BYTE_ARRAY:
+ case DATA_TYPE_INT8_ARRAY:
+ case DATA_TYPE_UINT8_ARRAY:
+ case DATA_TYPE_INT16_ARRAY:
+ case DATA_TYPE_UINT16_ARRAY:
+ case DATA_TYPE_INT32_ARRAY:
+ case DATA_TYPE_UINT32_ARRAY:
+ case DATA_TYPE_INT64_ARRAY:
+ case DATA_TYPE_UINT64_ARRAY:
+ case DATA_TYPE_STRING_ARRAY:
+ case DATA_TYPE_NVLIST_ARRAY:
+ if (nelem == NULL || data == NULL)
+ return (EINVAL);
+ if ((*nelem = NVP_NELEM(nvp)) != 0)
+ *(void **)data = (void *)NVP_VALUE(nvp);
+ else
+ *(void **)data = NULL;
+ break;
+
+ default:
+ return (ENOTSUP);
+ }
+
+ return (0);
+}
+
+static int
+nvlist_lookup_common(nvlist_t *nvl, const char *name, data_type_t type,
+ uint_t *nelem, void *data)
+{
+ nvpriv_t *priv;
+ nvpair_t *nvp;
+ i_nvp_t *curr;
+
+ if (name == NULL || nvl == NULL ||
+ (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return (EINVAL);
+
+ if (!(nvl->nvl_nvflag & (NV_UNIQUE_NAME | NV_UNIQUE_NAME_TYPE)))
+ return (ENOTSUP);
+
+ for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
+ nvp = &curr->nvi_nvp;
+
+ if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type)
+ return (nvpair_value_common(nvp, type, nelem, data));
+ }
+
+ return (ENOENT);
+}
+
+int
+nvlist_lookup_boolean(nvlist_t *nvl, const char *name)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_BOOLEAN, NULL, NULL));
+}
+
+int
+nvlist_lookup_boolean_value(nvlist_t *nvl, const char *name, boolean_t *val)
+{
+ return (nvlist_lookup_common(nvl, name,
+ DATA_TYPE_BOOLEAN_VALUE, NULL, val));
+}
+
+int
+nvlist_lookup_byte(nvlist_t *nvl, const char *name, uchar_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE, NULL, val));
+}
+
+int
+nvlist_lookup_int8(nvlist_t *nvl, const char *name, int8_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8, NULL, val));
+}
+
+int
+nvlist_lookup_uint8(nvlist_t *nvl, const char *name, uint8_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8, NULL, val));
+}
+
+int
+nvlist_lookup_int16(nvlist_t *nvl, const char *name, int16_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16, NULL, val));
+}
+
+int
+nvlist_lookup_uint16(nvlist_t *nvl, const char *name, uint16_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16, NULL, val));
+}
+
+int
+nvlist_lookup_int32(nvlist_t *nvl, const char *name, int32_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32, NULL, val));
+}
+
+int
+nvlist_lookup_uint32(nvlist_t *nvl, const char *name, uint32_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32, NULL, val));
+}
+
+int
+nvlist_lookup_int64(nvlist_t *nvl, const char *name, int64_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64, NULL, val));
+}
+
+int
+nvlist_lookup_uint64(nvlist_t *nvl, const char *name, uint64_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64, NULL, val));
+}
+
+#if !defined(_KERNEL)
+int
+nvlist_lookup_double(nvlist_t *nvl, const char *name, double *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_DOUBLE, NULL, val));
+}
+#endif
+
+int
+nvlist_lookup_string(nvlist_t *nvl, const char *name, char **val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING, NULL, val));
+}
+
+int
+nvlist_lookup_nvlist(nvlist_t *nvl, const char *name, nvlist_t **val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST, NULL, val));
+}
+
+int
+nvlist_lookup_boolean_array(nvlist_t *nvl, const char *name,
+ boolean_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name,
+ DATA_TYPE_BOOLEAN_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_byte_array(nvlist_t *nvl, const char *name,
+ uchar_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_int8_array(nvlist_t *nvl, const char *name, int8_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_uint8_array(nvlist_t *nvl, const char *name,
+ uint8_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_int16_array(nvlist_t *nvl, const char *name,
+ int16_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_uint16_array(nvlist_t *nvl, const char *name,
+ uint16_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_int32_array(nvlist_t *nvl, const char *name,
+ int32_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_uint32_array(nvlist_t *nvl, const char *name,
+ uint32_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_int64_array(nvlist_t *nvl, const char *name,
+ int64_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_uint64_array(nvlist_t *nvl, const char *name,
+ uint64_t **a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_string_array(nvlist_t *nvl, const char *name,
+ char ***a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_nvlist_array(nvlist_t *nvl, const char *name,
+ nvlist_t ***a, uint_t *n)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a));
+}
+
+int
+nvlist_lookup_hrtime(nvlist_t *nvl, const char *name, hrtime_t *val)
+{
+ return (nvlist_lookup_common(nvl, name, DATA_TYPE_HRTIME, NULL, val));
+}
+
+int
+nvlist_lookup_pairs(nvlist_t *nvl, int flag, ...)
+{
+ va_list ap;
+ char *name;
+ int noentok = (flag & NV_FLAG_NOENTOK ? 1 : 0);
+ int ret = 0;
+
+ va_start(ap, flag);
+ while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
+ data_type_t type;
+ void *val;
+ uint_t *nelem;
+
+ switch (type = va_arg(ap, data_type_t)) {
+ case DATA_TYPE_BOOLEAN:
+ ret = nvlist_lookup_common(nvl, name, type, NULL, NULL);
+ break;
+
+ case DATA_TYPE_BOOLEAN_VALUE:
+ case DATA_TYPE_BYTE:
+ case DATA_TYPE_INT8:
+ case DATA_TYPE_UINT8:
+ case DATA_TYPE_INT16:
+ case DATA_TYPE_UINT16:
+ case DATA_TYPE_INT32:
+ case DATA_TYPE_UINT32:
+ case DATA_TYPE_INT64:
+ case DATA_TYPE_UINT64:
+ case DATA_TYPE_HRTIME:
+ case DATA_TYPE_STRING:
+ case DATA_TYPE_NVLIST:
+#if !defined(_KERNEL)
+ case DATA_TYPE_DOUBLE:
+#endif
+ val = va_arg(ap, void *);
+ ret = nvlist_lookup_common(nvl, name, type, NULL, val);
+ break;
+
+ case DATA_TYPE_BYTE_ARRAY:
+ case DATA_TYPE_BOOLEAN_ARRAY:
+ case DATA_TYPE_INT8_ARRAY:
+ case DATA_TYPE_UINT8_ARRAY:
+ case DATA_TYPE_INT16_ARRAY:
+ case DATA_TYPE_UINT16_ARRAY:
+ case DATA_TYPE_INT32_ARRAY:
+ case DATA_TYPE_UINT32_ARRAY:
+ case DATA_TYPE_INT64_ARRAY:
+ case DATA_TYPE_UINT64_ARRAY:
+ case DATA_TYPE_STRING_ARRAY:
+ case DATA_TYPE_NVLIST_ARRAY:
+ val = va_arg(ap, void *);
+ nelem = va_arg(ap, uint_t *);
+ ret = nvlist_lookup_common(nvl, name, type, nelem, val);
+ break;
+
+ default:
+ ret = EINVAL;
+ }
+
+ if (ret == ENOENT && noentok)
+ ret = 0;
+ }
+ va_end(ap);
+
+ return (ret);
+}
+
+/*
+ * Find the 'name'ed nvpair in the nvlist 'nvl'. If 'name' found, the function
+ * returns zero and a pointer to the matching nvpair is returned in '*ret'
+ * (given 'ret' is non-NULL). If 'sep' is specified then 'name' will penitrate
+ * multiple levels of embedded nvlists, with 'sep' as the separator. As an
+ * example, if sep is '.', name might look like: "a" or "a.b" or "a.c[3]" or
+ * "a.d[3].e[1]". This matches the C syntax for array embed (for convience,
+ * code also supports "a.d[3]e[1]" syntax).
+ *
+ * If 'ip' is non-NULL and the last name component is an array, return the
+ * value of the "...[index]" array index in *ip. For an array reference that
+ * is not indexed, *ip will be returned as -1. If there is a syntax error in
+ * 'name', and 'ep' is non-NULL then *ep will be set to point to the location
+ * inside the 'name' string where the syntax error was detected.
+ */
+static int
+nvlist_lookup_nvpair_ei_sep(nvlist_t *nvl, const char *name, const char sep,
+ nvpair_t **ret, int *ip, char **ep)
+{
+ nvpair_t *nvp;
+ const char *np;
+ char *sepp;
+ char *idxp, *idxep;
+ nvlist_t **nva;
+ long idx;
+ int n;
+
+ if (ip)
+ *ip = -1; /* not indexed */
+ if (ep)
+ *ep = NULL;
+
+ if ((nvl == NULL) || (name == NULL))
+ return (EINVAL);
+
+ /* step through components of name */
+ for (np = name; np && *np; np = sepp) {
+ /* ensure unique names */
+ if (!(nvl->nvl_nvflag & NV_UNIQUE_NAME))
+ return (ENOTSUP);
+
+ /* skip white space */
+ skip_whitespace(np);
+ if (*np == 0)
+ break;
+
+ /* set 'sepp' to end of current component 'np' */
+ if (sep)
+ sepp = strchr(np, sep);
+ else
+ sepp = NULL;
+
+ /* find start of next "[ index ]..." */
+ idxp = strchr(np, '[');
+
+ /* if sepp comes first, set idxp to NULL */
+ if (sepp && idxp && (sepp < idxp))
+ idxp = NULL;
+
+ /*
+ * At this point 'idxp' is set if there is an index
+ * expected for the current component.
+ */
+ if (idxp) {
+ /* set 'n' to length of current 'np' name component */
+ n = idxp++ - np;
+
+ /* keep sepp up to date for *ep use as we advance */
+ skip_whitespace(idxp);
+ sepp = idxp;
+
+ /* determine the index value */
+#if defined(_KERNEL) && !defined(_BOOT)
+ if (ddi_strtol(idxp, &idxep, 0, &idx))
+ goto fail;
+#else
+ idx = strtol(idxp, &idxep, 0);
+#endif
+ if (idxep == idxp)
+ goto fail;
+
+ /* keep sepp up to date for *ep use as we advance */
+ sepp = idxep;
+
+ /* skip white space index value and check for ']' */
+ skip_whitespace(sepp);
+ if (*sepp++ != ']')
+ goto fail;
+
+ /* for embedded arrays, support C syntax: "a[1].b" */
+ skip_whitespace(sepp);
+ if (sep && (*sepp == sep))
+ sepp++;
+ } else if (sepp) {
+ n = sepp++ - np;
+ } else {
+ n = strlen(np);
+ }
+
+ /* trim trailing whitespace by reducing length of 'np' */
+ if (n == 0)
+ goto fail;
+ for (n--; (np[n] == ' ') || (np[n] == '\t'); n--)
+ ;
+ n++;
+
+ /* skip whitespace, and set sepp to NULL if complete */
+ if (sepp) {
+ skip_whitespace(sepp);
+ if (*sepp == 0)
+ sepp = NULL;
+ }
+
+ /*
+ * At this point:
+ * o 'n' is the length of current 'np' component.
+ * o 'idxp' is set if there was an index, and value 'idx'.
+ * o 'sepp' is set to the beginning of the next component,
+ * and set to NULL if we have no more components.
+ *
+ * Search for nvpair with matching component name.
+ */
+ for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL;
+ nvp = nvlist_next_nvpair(nvl, nvp)) {
+
+ /* continue if no match on name */
+ if (strncmp(np, nvpair_name(nvp), n) ||
+ (strlen(nvpair_name(nvp)) != n))
+ continue;
+
+ /* if indexed, verify type is array oriented */
+ if (idxp && !nvpair_type_is_array(nvp))
+ goto fail;
+
+ /*
+ * Full match found, return nvp and idx if this
+ * was the last component.
+ */
+ if (sepp == NULL) {
+ if (ret)
+ *ret = nvp;
+ if (ip && idxp)
+ *ip = (int)idx; /* return index */
+ return (0); /* found */
+ }
+
+ /*
+ * More components: current match must be
+ * of DATA_TYPE_NVLIST or DATA_TYPE_NVLIST_ARRAY
+ * to support going deeper.
+ */
+ if (nvpair_type(nvp) == DATA_TYPE_NVLIST) {
+ nvl = EMBEDDED_NVL(nvp);
+ break;
+ } else if (nvpair_type(nvp) == DATA_TYPE_NVLIST_ARRAY) {
+ (void) nvpair_value_nvlist_array(nvp,
+ &nva, (uint_t *)&n);
+ if ((n < 0) || (idx >= n))
+ goto fail;
+ nvl = nva[idx];
+ break;
+ }
+
+ /* type does not support more levels */
+ goto fail;
+ }
+ if (nvp == NULL)
+ goto fail; /* 'name' not found */
+
+ /* search for match of next component in embedded 'nvl' list */
+ }
+
+fail: if (ep && sepp)
+ *ep = sepp;
+ return (EINVAL);
+}
+
+/*
+ * Return pointer to nvpair with specified 'name'.
+ */
+int
+nvlist_lookup_nvpair(nvlist_t *nvl, const char *name, nvpair_t **ret)
+{
+ return (nvlist_lookup_nvpair_ei_sep(nvl, name, 0, ret, NULL, NULL));
+}
+
+/*
+ * Determine if named nvpair exists in nvlist (use embedded separator of '.'
+ * and return array index). See nvlist_lookup_nvpair_ei_sep for more detailed
+ * description.
+ */
+int nvlist_lookup_nvpair_embedded_index(nvlist_t *nvl,
+ const char *name, nvpair_t **ret, int *ip, char **ep)
+{
+ return (nvlist_lookup_nvpair_ei_sep(nvl, name, '.', ret, ip, ep));
+}
+
+boolean_t
+nvlist_exists(nvlist_t *nvl, const char *name)
+{
+ nvpriv_t *priv;
+ nvpair_t *nvp;
+ i_nvp_t *curr;
+
+ if (name == NULL || nvl == NULL ||
+ (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return (B_FALSE);
+
+ for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
+ nvp = &curr->nvi_nvp;
+
+ if (strcmp(name, NVP_NAME(nvp)) == 0)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+int
+nvpair_value_boolean_value(nvpair_t *nvp, boolean_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_VALUE, NULL, val));
+}
+
+int
+nvpair_value_byte(nvpair_t *nvp, uchar_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_BYTE, NULL, val));
+}
+
+int
+nvpair_value_int8(nvpair_t *nvp, int8_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_INT8, NULL, val));
+}
+
+int
+nvpair_value_uint8(nvpair_t *nvp, uint8_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_UINT8, NULL, val));
+}
+
+int
+nvpair_value_int16(nvpair_t *nvp, int16_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_INT16, NULL, val));
+}
+
+int
+nvpair_value_uint16(nvpair_t *nvp, uint16_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_UINT16, NULL, val));
+}
+
+int
+nvpair_value_int32(nvpair_t *nvp, int32_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_INT32, NULL, val));
+}
+
+int
+nvpair_value_uint32(nvpair_t *nvp, uint32_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_UINT32, NULL, val));
+}
+
+int
+nvpair_value_int64(nvpair_t *nvp, int64_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_INT64, NULL, val));
+}
+
+int
+nvpair_value_uint64(nvpair_t *nvp, uint64_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_UINT64, NULL, val));
+}
+
+#if !defined(_KERNEL)
+int
+nvpair_value_double(nvpair_t *nvp, double *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_DOUBLE, NULL, val));
+}
+#endif
+
+int
+nvpair_value_string(nvpair_t *nvp, char **val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_STRING, NULL, val));
+}
+
+int
+nvpair_value_nvlist(nvpair_t *nvp, nvlist_t **val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_NVLIST, NULL, val));
+}
+
+int
+nvpair_value_boolean_array(nvpair_t *nvp, boolean_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_byte_array(nvpair_t *nvp, uchar_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_BYTE_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_int8_array(nvpair_t *nvp, int8_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_INT8_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_uint8_array(nvpair_t *nvp, uint8_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_UINT8_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_int16_array(nvpair_t *nvp, int16_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_INT16_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_uint16_array(nvpair_t *nvp, uint16_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_UINT16_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_int32_array(nvpair_t *nvp, int32_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_INT32_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_uint32_array(nvpair_t *nvp, uint32_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_UINT32_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_int64_array(nvpair_t *nvp, int64_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_INT64_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_uint64_array(nvpair_t *nvp, uint64_t **val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_UINT64_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_string_array(nvpair_t *nvp, char ***val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_STRING_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_nvlist_array(nvpair_t *nvp, nvlist_t ***val, uint_t *nelem)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_NVLIST_ARRAY, nelem, val));
+}
+
+int
+nvpair_value_hrtime(nvpair_t *nvp, hrtime_t *val)
+{
+ return (nvpair_value_common(nvp, DATA_TYPE_HRTIME, NULL, val));
+}
+
+/*
+ * Add specified pair to the list.
+ */
+int
+nvlist_add_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+ if (nvl == NULL || nvp == NULL)
+ return (EINVAL);
+
+ return (nvlist_add_common(nvl, NVP_NAME(nvp), NVP_TYPE(nvp),
+ NVP_NELEM(nvp), NVP_VALUE(nvp)));
+}
+
+/*
+ * Merge the supplied nvlists and put the result in dst.
+ * The merged list will contain all names specified in both lists,
+ * the values are taken from nvl in the case of duplicates.
+ * Return 0 on success.
+ */
+/*ARGSUSED*/
+int
+nvlist_merge(nvlist_t *dst, nvlist_t *nvl, int flag)
+{
+ if (nvl == NULL || dst == NULL)
+ return (EINVAL);
+
+ if (dst != nvl)
+ return (nvlist_copy_pairs(nvl, dst));
+
+ return (0);
+}
+
+/*
+ * Encoding related routines
+ */
+#define NVS_OP_ENCODE 0
+#define NVS_OP_DECODE 1
+#define NVS_OP_GETSIZE 2
+
+typedef struct nvs_ops nvs_ops_t;
+
+typedef struct {
+ int nvs_op;
+ const nvs_ops_t *nvs_ops;
+ void *nvs_private;
+ nvpriv_t *nvs_priv;
+} nvstream_t;
+
+/*
+ * nvs operations are:
+ * - nvs_nvlist
+ * encoding / decoding of a nvlist header (nvlist_t)
+ * calculates the size used for header and end detection
+ *
+ * - nvs_nvpair
+ * responsible for the first part of encoding / decoding of an nvpair
+ * calculates the decoded size of an nvpair
+ *
+ * - nvs_nvp_op
+ * second part of encoding / decoding of an nvpair
+ *
+ * - nvs_nvp_size
+ * calculates the encoding size of an nvpair
+ *
+ * - nvs_nvl_fini
+ * encodes the end detection mark (zeros).
+ */
+struct nvs_ops {
+ int (*nvs_nvlist)(nvstream_t *, nvlist_t *, size_t *);
+ int (*nvs_nvpair)(nvstream_t *, nvpair_t *, size_t *);
+ int (*nvs_nvp_op)(nvstream_t *, nvpair_t *);
+ int (*nvs_nvp_size)(nvstream_t *, nvpair_t *, size_t *);
+ int (*nvs_nvl_fini)(nvstream_t *);
+};
+
+typedef struct {
+ char nvh_encoding; /* nvs encoding method */
+ char nvh_endian; /* nvs endian */
+ char nvh_reserved1; /* reserved for future use */
+ char nvh_reserved2; /* reserved for future use */
+} nvs_header_t;
+
+static int
+nvs_encode_pairs(nvstream_t *nvs, nvlist_t *nvl)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+ i_nvp_t *curr;
+
+ /*
+ * Walk nvpair in list and encode each nvpair
+ */
+ for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next)
+ if (nvs->nvs_ops->nvs_nvpair(nvs, &curr->nvi_nvp, NULL) != 0)
+ return (EFAULT);
+
+ return (nvs->nvs_ops->nvs_nvl_fini(nvs));
+}
+
+static int
+nvs_decode_pairs(nvstream_t *nvs, nvlist_t *nvl)
+{
+ nvpair_t *nvp;
+ size_t nvsize;
+ int err;
+
+ /*
+ * Get decoded size of next pair in stream, alloc
+ * memory for nvpair_t, then decode the nvpair
+ */
+ while ((err = nvs->nvs_ops->nvs_nvpair(nvs, NULL, &nvsize)) == 0) {
+ if (nvsize == 0) /* end of list */
+ break;
+
+ /* make sure len makes sense */
+ if (nvsize < NVP_SIZE_CALC(1, 0))
+ return (EFAULT);
+
+ if ((nvp = nvp_buf_alloc(nvl, nvsize)) == NULL)
+ return (ENOMEM);
+
+ if ((err = nvs->nvs_ops->nvs_nvp_op(nvs, nvp)) != 0) {
+ nvp_buf_free(nvl, nvp);
+ return (err);
+ }
+
+ if (i_validate_nvpair(nvp) != 0) {
+ nvpair_free(nvp);
+ nvp_buf_free(nvl, nvp);
+ return (EFAULT);
+ }
+
+ nvp_buf_link(nvl, nvp);
+ }
+ return (err);
+}
+
+static int
+nvs_getsize_pairs(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+ i_nvp_t *curr;
+ uint64_t nvsize = *buflen;
+ size_t size;
+
+ /*
+ * Get encoded size of nvpairs in nvlist
+ */
+ for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
+ if (nvs->nvs_ops->nvs_nvp_size(nvs, &curr->nvi_nvp, &size) != 0)
+ return (EINVAL);
+
+ if ((nvsize += size) > INT32_MAX)
+ return (EINVAL);
+ }
+
+ *buflen = nvsize;
+ return (0);
+}
+
+static int
+nvs_operation(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen)
+{
+ int err;
+
+ if (nvl->nvl_priv == 0)
+ return (EFAULT);
+
+ /*
+ * Perform the operation, starting with header, then each nvpair
+ */
+ if ((err = nvs->nvs_ops->nvs_nvlist(nvs, nvl, buflen)) != 0)
+ return (err);
+
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ err = nvs_encode_pairs(nvs, nvl);
+ break;
+
+ case NVS_OP_DECODE:
+ err = nvs_decode_pairs(nvs, nvl);
+ break;
+
+ case NVS_OP_GETSIZE:
+ err = nvs_getsize_pairs(nvs, nvl, buflen);
+ break;
+
+ default:
+ err = EINVAL;
+ }
+
+ return (err);
+}
+
+static int
+nvs_embedded(nvstream_t *nvs, nvlist_t *embedded)
+{
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ return (nvs_operation(nvs, embedded, NULL));
+
+ case NVS_OP_DECODE: {
+ nvpriv_t *priv;
+ int err;
+
+ if (embedded->nvl_version != NV_VERSION)
+ return (ENOTSUP);
+
+ if ((priv = nv_priv_alloc_embedded(nvs->nvs_priv)) == NULL)
+ return (ENOMEM);
+
+ nvlist_init(embedded, embedded->nvl_nvflag, priv);
+
+ if ((err = nvs_operation(nvs, embedded, NULL)) != 0)
+ nvlist_free(embedded);
+ return (err);
+ }
+ default:
+ break;
+ }
+
+ return (EINVAL);
+}
+
+static int
+nvs_embedded_nvl_array(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+ size_t nelem = NVP_NELEM(nvp);
+ nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
+ int i;
+
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ for (i = 0; i < nelem; i++)
+ if (nvs_embedded(nvs, nvlp[i]) != 0)
+ return (EFAULT);
+ break;
+
+ case NVS_OP_DECODE: {
+ size_t len = nelem * sizeof (uint64_t);
+ nvlist_t *embedded = (nvlist_t *)((uintptr_t)nvlp + len);
+
+ bzero(nvlp, len); /* don't trust packed data */
+ for (i = 0; i < nelem; i++) {
+ if (nvs_embedded(nvs, embedded) != 0) {
+ nvpair_free(nvp);
+ return (EFAULT);
+ }
+
+ nvlp[i] = embedded++;
+ }
+ break;
+ }
+ case NVS_OP_GETSIZE: {
+ uint64_t nvsize = 0;
+
+ for (i = 0; i < nelem; i++) {
+ size_t nvp_sz = 0;
+
+ if (nvs_operation(nvs, nvlp[i], &nvp_sz) != 0)
+ return (EINVAL);
+
+ if ((nvsize += nvp_sz) > INT32_MAX)
+ return (EINVAL);
+ }
+
+ *size = nvsize;
+ break;
+ }
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static int nvs_native(nvstream_t *, nvlist_t *, char *, size_t *);
+static int nvs_xdr(nvstream_t *, nvlist_t *, char *, size_t *);
+
+/*
+ * Common routine for nvlist operations:
+ * encode, decode, getsize (encoded size).
+ */
+static int
+nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding,
+ int nvs_op)
+{
+ int err = 0;
+ nvstream_t nvs;
+ int nvl_endian;
+#ifdef _LITTLE_ENDIAN
+ int host_endian = 1;
+#else
+ int host_endian = 0;
+#endif /* _LITTLE_ENDIAN */
+ nvs_header_t *nvh = (void *)buf;
+
+ if (buflen == NULL || nvl == NULL ||
+ (nvs.nvs_priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ return (EINVAL);
+
+ nvs.nvs_op = nvs_op;
+
+ /*
+ * For NVS_OP_ENCODE and NVS_OP_DECODE make sure an nvlist and
+ * a buffer is allocated. The first 4 bytes in the buffer are
+ * used for encoding method and host endian.
+ */
+ switch (nvs_op) {
+ case NVS_OP_ENCODE:
+ if (buf == NULL || *buflen < sizeof (nvs_header_t))
+ return (EINVAL);
+
+ nvh->nvh_encoding = encoding;
+ nvh->nvh_endian = nvl_endian = host_endian;
+ nvh->nvh_reserved1 = 0;
+ nvh->nvh_reserved2 = 0;
+ break;
+
+ case NVS_OP_DECODE:
+ if (buf == NULL || *buflen < sizeof (nvs_header_t))
+ return (EINVAL);
+
+ /* get method of encoding from first byte */
+ encoding = nvh->nvh_encoding;
+ nvl_endian = nvh->nvh_endian;
+ break;
+
+ case NVS_OP_GETSIZE:
+ nvl_endian = host_endian;
+
+ /*
+ * add the size for encoding
+ */
+ *buflen = sizeof (nvs_header_t);
+ break;
+
+ default:
+ return (ENOTSUP);
+ }
+
+ /*
+ * Create an nvstream with proper encoding method
+ */
+ switch (encoding) {
+ case NV_ENCODE_NATIVE:
+ /*
+ * check endianness, in case we are unpacking
+ * from a file
+ */
+ if (nvl_endian != host_endian)
+ return (ENOTSUP);
+ err = nvs_native(&nvs, nvl, buf, buflen);
+ break;
+ case NV_ENCODE_XDR:
+ err = nvs_xdr(&nvs, nvl, buf, buflen);
+ break;
+ default:
+ err = ENOTSUP;
+ break;
+ }
+
+ return (err);
+}
+
+int
+nvlist_size(nvlist_t *nvl, size_t *size, int encoding)
+{
+ return (nvlist_common(nvl, NULL, size, encoding, NVS_OP_GETSIZE));
+}
+
+/*
+ * Pack nvlist into contiguous memory
+ */
+/*ARGSUSED1*/
+int
+nvlist_pack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding,
+ int kmflag)
+{
+#if defined(_KERNEL) && !defined(_BOOT)
+ return (nvlist_xpack(nvl, bufp, buflen, encoding,
+ (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
+#else
+ return (nvlist_xpack(nvl, bufp, buflen, encoding, nv_alloc_nosleep));
+#endif
+}
+
+int
+nvlist_xpack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding,
+ nv_alloc_t *nva)
+{
+ nvpriv_t nvpriv;
+ size_t alloc_size;
+ char *buf;
+ int err;
+
+ if (nva == NULL || nvl == NULL || bufp == NULL || buflen == NULL)
+ return (EINVAL);
+
+ if (*bufp != NULL)
+ return (nvlist_common(nvl, *bufp, buflen, encoding,
+ NVS_OP_ENCODE));
+
+ /*
+ * Here is a difficult situation:
+ * 1. The nvlist has fixed allocator properties.
+ * All other nvlist routines (like nvlist_add_*, ...) use
+ * these properties.
+ * 2. When using nvlist_pack() the user can specify his own
+ * allocator properties (e.g. by using KM_NOSLEEP).
+ *
+ * We use the user specified properties (2). A clearer solution
+ * will be to remove the kmflag from nvlist_pack(), but we will
+ * not change the interface.
+ */
+ nv_priv_init(&nvpriv, nva, 0);
+
+ if (err = nvlist_size(nvl, &alloc_size, encoding))
+ return (err);
+
+ if ((buf = nv_mem_zalloc(&nvpriv, alloc_size)) == NULL)
+ return (ENOMEM);
+
+ if ((err = nvlist_common(nvl, buf, &alloc_size, encoding,
+ NVS_OP_ENCODE)) != 0) {
+ nv_mem_free(&nvpriv, buf, alloc_size);
+ } else {
+ *buflen = alloc_size;
+ *bufp = buf;
+ }
+
+ return (err);
+}
+
+/*
+ * Unpack buf into an nvlist_t
+ */
+/*ARGSUSED1*/
+int
+nvlist_unpack(char *buf, size_t buflen, nvlist_t **nvlp, int kmflag)
+{
+#if defined(_KERNEL) && !defined(_BOOT)
+ return (nvlist_xunpack(buf, buflen, nvlp,
+ (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
+#else
+ return (nvlist_xunpack(buf, buflen, nvlp, nv_alloc_nosleep));
+#endif
+}
+
+int
+nvlist_xunpack(char *buf, size_t buflen, nvlist_t **nvlp, nv_alloc_t *nva)
+{
+ nvlist_t *nvl;
+ int err;
+
+ if (nvlp == NULL)
+ return (EINVAL);
+
+ if ((err = nvlist_xalloc(&nvl, 0, nva)) != 0)
+ return (err);
+
+ if ((err = nvlist_common(nvl, buf, &buflen, 0, NVS_OP_DECODE)) != 0)
+ nvlist_free(nvl);
+ else
+ *nvlp = nvl;
+
+ return (err);
+}
+
+/*
+ * Native encoding functions
+ */
+typedef struct {
+ /*
+ * This structure is used when decoding a packed nvpair in
+ * the native format. n_base points to a buffer containing the
+ * packed nvpair. n_end is a pointer to the end of the buffer.
+ * (n_end actually points to the first byte past the end of the
+ * buffer.) n_curr is a pointer that lies between n_base and n_end.
+ * It points to the current data that we are decoding.
+ * The amount of data left in the buffer is equal to n_end - n_curr.
+ * n_flag is used to recognize a packed embedded list.
+ */
+ caddr_t n_base;
+ caddr_t n_end;
+ caddr_t n_curr;
+ uint_t n_flag;
+} nvs_native_t;
+
+static int
+nvs_native_create(nvstream_t *nvs, nvs_native_t *native, char *buf,
+ size_t buflen)
+{
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ case NVS_OP_DECODE:
+ nvs->nvs_private = native;
+ native->n_curr = native->n_base = buf;
+ native->n_end = buf + buflen;
+ native->n_flag = 0;
+ return (0);
+
+ case NVS_OP_GETSIZE:
+ nvs->nvs_private = native;
+ native->n_curr = native->n_base = native->n_end = NULL;
+ native->n_flag = 0;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+
+/*ARGSUSED*/
+static void
+nvs_native_destroy(nvstream_t *nvs)
+{
+}
+
+static int
+native_cp(nvstream_t *nvs, void *buf, size_t size)
+{
+ nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+
+ if (native->n_curr + size > native->n_end)
+ return (EFAULT);
+
+ /*
+ * The bcopy() below eliminates alignment requirement
+ * on the buffer (stream) and is preferred over direct access.
+ */
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ bcopy(buf, native->n_curr, size);
+ break;
+ case NVS_OP_DECODE:
+ bcopy(native->n_curr, buf, size);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ native->n_curr += size;
+ return (0);
+}
+
+/*
+ * operate on nvlist_t header
+ */
+static int
+nvs_native_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size)
+{
+ nvs_native_t *native = nvs->nvs_private;
+
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ case NVS_OP_DECODE:
+ if (native->n_flag)
+ return (0); /* packed embedded list */
+
+ native->n_flag = 1;
+
+ /* copy version and nvflag of the nvlist_t */
+ if (native_cp(nvs, &nvl->nvl_version, sizeof (int32_t)) != 0 ||
+ native_cp(nvs, &nvl->nvl_nvflag, sizeof (int32_t)) != 0)
+ return (EFAULT);
+
+ return (0);
+
+ case NVS_OP_GETSIZE:
+ /*
+ * if calculate for packed embedded list
+ * 4 for end of the embedded list
+ * else
+ * 2 * sizeof (int32_t) for nvl_version and nvl_nvflag
+ * and 4 for end of the entire list
+ */
+ if (native->n_flag) {
+ *size += 4;
+ } else {
+ native->n_flag = 1;
+ *size += 2 * sizeof (int32_t) + 4;
+ }
+
+ return (0);
+
+ default:
+ return (EINVAL);
+ }
+}
+
+static int
+nvs_native_nvl_fini(nvstream_t *nvs)
+{
+ if (nvs->nvs_op == NVS_OP_ENCODE) {
+ nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+ /*
+ * Add 4 zero bytes at end of nvlist. They are used
+ * for end detection by the decode routine.
+ */
+ if (native->n_curr + sizeof (int) > native->n_end)
+ return (EFAULT);
+
+ bzero(native->n_curr, sizeof (int));
+ native->n_curr += sizeof (int);
+ }
+
+ return (0);
+}
+
+static int
+nvpair_native_embedded(nvstream_t *nvs, nvpair_t *nvp)
+{
+ if (nvs->nvs_op == NVS_OP_ENCODE) {
+ nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+ nvlist_t *packed = (void *)
+ (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp));
+ /*
+ * Null out the pointer that is meaningless in the packed
+ * structure. The address may not be aligned, so we have
+ * to use bzero.
+ */
+ bzero(&packed->nvl_priv, sizeof (packed->nvl_priv));
+ }
+
+ return (nvs_embedded(nvs, EMBEDDED_NVL(nvp)));
+}
+
+static int
+nvpair_native_embedded_array(nvstream_t *nvs, nvpair_t *nvp)
+{
+ if (nvs->nvs_op == NVS_OP_ENCODE) {
+ nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+ char *value = native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp);
+ size_t len = NVP_NELEM(nvp) * sizeof (uint64_t);
+ nvlist_t *packed = (nvlist_t *)((uintptr_t)value + len);
+ int i;
+ /*
+ * Null out pointers that are meaningless in the packed
+ * structure. The addresses may not be aligned, so we have
+ * to use bzero.
+ */
+ bzero(value, len);
+
+ for (i = 0; i < NVP_NELEM(nvp); i++, packed++)
+ /*
+ * Null out the pointer that is meaningless in the
+ * packed structure. The address may not be aligned,
+ * so we have to use bzero.
+ */
+ bzero(&packed->nvl_priv, sizeof (packed->nvl_priv));
+ }
+
+ return (nvs_embedded_nvl_array(nvs, nvp, NULL));
+}
+
+static void
+nvpair_native_string_array(nvstream_t *nvs, nvpair_t *nvp)
+{
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE: {
+ nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+ uint64_t *strp = (void *)
+ (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp));
+ /*
+ * Null out pointers that are meaningless in the packed
+ * structure. The addresses may not be aligned, so we have
+ * to use bzero.
+ */
+ bzero(strp, NVP_NELEM(nvp) * sizeof (uint64_t));
+ break;
+ }
+ case NVS_OP_DECODE: {
+ char **strp = (void *)NVP_VALUE(nvp);
+ char *buf = ((char *)strp + NVP_NELEM(nvp) * sizeof (uint64_t));
+ int i;
+
+ for (i = 0; i < NVP_NELEM(nvp); i++) {
+ strp[i] = buf;
+ buf += strlen(buf) + 1;
+ }
+ break;
+ }
+ }
+}
+
+static int
+nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
+{
+ data_type_t type;
+ int value_sz;
+ int ret = 0;
+
+ /*
+ * We do the initial bcopy of the data before we look at
+ * the nvpair type, because when we're decoding, we won't
+ * have the correct values for the pair until we do the bcopy.
+ */
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ case NVS_OP_DECODE:
+ if (native_cp(nvs, nvp, nvp->nvp_size) != 0)
+ return (EFAULT);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ /* verify nvp_name_sz, check the name string length */
+ if (i_validate_nvpair_name(nvp) != 0)
+ return (EFAULT);
+
+ type = NVP_TYPE(nvp);
+
+ /*
+ * Verify type and nelem and get the value size.
+ * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
+ * is the size of the string(s) excluded.
+ */
+ if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0)
+ return (EFAULT);
+
+ if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size)
+ return (EFAULT);
+
+ switch (type) {
+ case DATA_TYPE_NVLIST:
+ ret = nvpair_native_embedded(nvs, nvp);
+ break;
+ case DATA_TYPE_NVLIST_ARRAY:
+ ret = nvpair_native_embedded_array(nvs, nvp);
+ break;
+ case DATA_TYPE_STRING_ARRAY:
+ nvpair_native_string_array(nvs, nvp);
+ break;
+ default:
+ break;
+ }
+
+ return (ret);
+}
+
+static int
+nvs_native_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+ uint64_t nvp_sz = nvp->nvp_size;
+
+ switch (NVP_TYPE(nvp)) {
+ case DATA_TYPE_NVLIST: {
+ size_t nvsize = 0;
+
+ if (nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize) != 0)
+ return (EINVAL);
+
+ nvp_sz += nvsize;
+ break;
+ }
+ case DATA_TYPE_NVLIST_ARRAY: {
+ size_t nvsize;
+
+ if (nvs_embedded_nvl_array(nvs, nvp, &nvsize) != 0)
+ return (EINVAL);
+
+ nvp_sz += nvsize;
+ break;
+ }
+ default:
+ break;
+ }
+
+ if (nvp_sz > INT32_MAX)
+ return (EINVAL);
+
+ *size = nvp_sz;
+
+ return (0);
+}
+
+static int
+nvs_native_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ return (nvs_native_nvp_op(nvs, nvp));
+
+ case NVS_OP_DECODE: {
+ nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
+ int32_t decode_len;
+
+ /* try to read the size value from the stream */
+ if (native->n_curr + sizeof (int32_t) > native->n_end)
+ return (EFAULT);
+ bcopy(native->n_curr, &decode_len, sizeof (int32_t));
+
+ /* sanity check the size value */
+ if (decode_len < 0 ||
+ decode_len > native->n_end - native->n_curr)
+ return (EFAULT);
+
+ *size = decode_len;
+
+ /*
+ * If at the end of the stream then move the cursor
+ * forward, otherwise nvpair_native_op() will read
+ * the entire nvpair at the same cursor position.
+ */
+ if (*size == 0)
+ native->n_curr += sizeof (int32_t);
+ break;
+ }
+
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static const nvs_ops_t nvs_native_ops = {
+ nvs_native_nvlist,
+ nvs_native_nvpair,
+ nvs_native_nvp_op,
+ nvs_native_nvp_size,
+ nvs_native_nvl_fini
+};
+
+static int
+nvs_native(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
+{
+ nvs_native_t native;
+ int err;
+
+ nvs->nvs_ops = &nvs_native_ops;
+
+ if ((err = nvs_native_create(nvs, &native, buf + sizeof (nvs_header_t),
+ *buflen - sizeof (nvs_header_t))) != 0)
+ return (err);
+
+ err = nvs_operation(nvs, nvl, buflen);
+
+ nvs_native_destroy(nvs);
+
+ return (err);
+}
+
+/*
+ * XDR encoding functions
+ *
+ * An xdr packed nvlist is encoded as:
+ *
+ * - encoding methode and host endian (4 bytes)
+ * - nvl_version (4 bytes)
+ * - nvl_nvflag (4 bytes)
+ *
+ * - encoded nvpairs, the format of one xdr encoded nvpair is:
+ * - encoded size of the nvpair (4 bytes)
+ * - decoded size of the nvpair (4 bytes)
+ * - name string, (4 + sizeof(NV_ALIGN4(string))
+ * a string is coded as size (4 bytes) and data
+ * - data type (4 bytes)
+ * - number of elements in the nvpair (4 bytes)
+ * - data
+ *
+ * - 2 zero's for end of the entire list (8 bytes)
+ */
+static int
+nvs_xdr_create(nvstream_t *nvs, XDR *xdr, char *buf, size_t buflen)
+{
+ /* xdr data must be 4 byte aligned */
+ if ((ulong_t)buf % 4 != 0)
+ return (EFAULT);
+
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ xdrmem_create(xdr, buf, (uint_t)buflen, XDR_ENCODE);
+ nvs->nvs_private = xdr;
+ return (0);
+ case NVS_OP_DECODE:
+ xdrmem_create(xdr, buf, (uint_t)buflen, XDR_DECODE);
+ nvs->nvs_private = xdr;
+ return (0);
+ case NVS_OP_GETSIZE:
+ nvs->nvs_private = NULL;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+
+static void
+nvs_xdr_destroy(nvstream_t *nvs)
+{
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ case NVS_OP_DECODE:
+ xdr_destroy((XDR *)nvs->nvs_private);
+ break;
+ default:
+ break;
+ }
+}
+
+static int
+nvs_xdr_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size)
+{
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE:
+ case NVS_OP_DECODE: {
+ XDR *xdr = nvs->nvs_private;
+
+ if (!xdr_int(xdr, &nvl->nvl_version) ||
+ !xdr_u_int(xdr, &nvl->nvl_nvflag))
+ return (EFAULT);
+ break;
+ }
+ case NVS_OP_GETSIZE: {
+ /*
+ * 2 * 4 for nvl_version + nvl_nvflag
+ * and 8 for end of the entire list
+ */
+ *size += 2 * 4 + 8;
+ break;
+ }
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static int
+nvs_xdr_nvl_fini(nvstream_t *nvs)
+{
+ if (nvs->nvs_op == NVS_OP_ENCODE) {
+ XDR *xdr = nvs->nvs_private;
+ int zero = 0;
+
+ if (!xdr_int(xdr, &zero) || !xdr_int(xdr, &zero))
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+/*
+ * The format of xdr encoded nvpair is:
+ * encode_size, decode_size, name string, data type, nelem, data
+ */
+static int
+nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
+{
+ data_type_t type;
+ char *buf;
+ char *buf_end = (char *)nvp + nvp->nvp_size;
+ int value_sz;
+ uint_t nelem, buflen;
+ bool_t ret = FALSE;
+ XDR *xdr = nvs->nvs_private;
+
+ ASSERT(xdr != NULL && nvp != NULL);
+
+ /* name string */
+ if ((buf = NVP_NAME(nvp)) >= buf_end)
+ return (EFAULT);
+ buflen = buf_end - buf;
+
+ if (!xdr_string(xdr, &buf, buflen - 1))
+ return (EFAULT);
+ nvp->nvp_name_sz = strlen(buf) + 1;
+
+ /* type and nelem */
+ if (!xdr_int(xdr, (int *)&nvp->nvp_type) ||
+ !xdr_int(xdr, &nvp->nvp_value_elem))
+ return (EFAULT);
+
+ type = NVP_TYPE(nvp);
+ nelem = nvp->nvp_value_elem;
+
+ /*
+ * Verify type and nelem and get the value size.
+ * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
+ * is the size of the string(s) excluded.
+ */
+ if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0)
+ return (EFAULT);
+
+ /* if there is no data to extract then return */
+ if (nelem == 0)
+ return (0);
+
+ /* value */
+ if ((buf = NVP_VALUE(nvp)) >= buf_end)
+ return (EFAULT);
+ buflen = buf_end - buf;
+
+ if (buflen < value_sz)
+ return (EFAULT);
+
+ switch (type) {
+ case DATA_TYPE_NVLIST:
+ if (nvs_embedded(nvs, (void *)buf) == 0)
+ return (0);
+ break;
+
+ case DATA_TYPE_NVLIST_ARRAY:
+ if (nvs_embedded_nvl_array(nvs, nvp, NULL) == 0)
+ return (0);
+ break;
+
+ case DATA_TYPE_BOOLEAN:
+ ret = TRUE;
+ break;
+
+ case DATA_TYPE_BYTE:
+ case DATA_TYPE_INT8:
+ case DATA_TYPE_UINT8:
+ ret = xdr_char(xdr, buf);
+ break;
+
+ case DATA_TYPE_INT16:
+ ret = xdr_short(xdr, (void *)buf);
+ break;
+
+ case DATA_TYPE_UINT16:
+ ret = xdr_u_short(xdr, (void *)buf);
+ break;
+
+ case DATA_TYPE_BOOLEAN_VALUE:
+ case DATA_TYPE_INT32:
+ ret = xdr_int(xdr, (void *)buf);
+ break;
+
+ case DATA_TYPE_UINT32:
+ ret = xdr_u_int(xdr, (void *)buf);
+ break;
+
+ case DATA_TYPE_INT64:
+ ret = xdr_longlong_t(xdr, (void *)buf);
+ break;
+
+ case DATA_TYPE_UINT64:
+ ret = xdr_u_longlong_t(xdr, (void *)buf);
+ break;
+
+ case DATA_TYPE_HRTIME:
+ /*
+ * NOTE: must expose the definition of hrtime_t here
+ */
+ ret = xdr_longlong_t(xdr, (void *)buf);
+ break;
+#if !defined(_KERNEL)
+ case DATA_TYPE_DOUBLE:
+ ret = xdr_double(xdr, (void *)buf);
+ break;
+#endif
+ case DATA_TYPE_STRING:
+ ret = xdr_string(xdr, &buf, buflen - 1);
+ break;
+
+ case DATA_TYPE_BYTE_ARRAY:
+ ret = xdr_opaque(xdr, buf, nelem);
+ break;
+
+ case DATA_TYPE_INT8_ARRAY:
+ case DATA_TYPE_UINT8_ARRAY:
+ ret = xdr_array(xdr, &buf, &nelem, buflen, sizeof (int8_t),
+ (xdrproc_t)xdr_char);
+ break;
+
+ case DATA_TYPE_INT16_ARRAY:
+ ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int16_t),
+ sizeof (int16_t), (xdrproc_t)xdr_short);
+ break;
+
+ case DATA_TYPE_UINT16_ARRAY:
+ ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint16_t),
+ sizeof (uint16_t), (xdrproc_t)xdr_u_short);
+ break;
+
+ case DATA_TYPE_BOOLEAN_ARRAY:
+ case DATA_TYPE_INT32_ARRAY:
+ ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int32_t),
+ sizeof (int32_t), (xdrproc_t)xdr_int);
+ break;
+
+ case DATA_TYPE_UINT32_ARRAY:
+ ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint32_t),
+ sizeof (uint32_t), (xdrproc_t)xdr_u_int);
+ break;
+
+ case DATA_TYPE_INT64_ARRAY:
+ ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int64_t),
+ sizeof (int64_t), (xdrproc_t)xdr_longlong_t);
+ break;
+
+ case DATA_TYPE_UINT64_ARRAY:
+ ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint64_t),
+ sizeof (uint64_t), (xdrproc_t)xdr_u_longlong_t);
+ break;
+
+ case DATA_TYPE_STRING_ARRAY: {
+ size_t len = nelem * sizeof (uint64_t);
+ char **strp = (void *)buf;
+ int i;
+
+ if (nvs->nvs_op == NVS_OP_DECODE)
+ bzero(buf, len); /* don't trust packed data */
+
+ for (i = 0; i < nelem; i++) {
+ if (buflen <= len)
+ return (EFAULT);
+
+ buf += len;
+ buflen -= len;
+
+ if (xdr_string(xdr, &buf, buflen - 1) != TRUE)
+ return (EFAULT);
+
+ if (nvs->nvs_op == NVS_OP_DECODE)
+ strp[i] = buf;
+ len = strlen(buf) + 1;
+ }
+ ret = TRUE;
+ break;
+ }
+ default:
+ break;
+ }
+
+ return (ret == TRUE ? 0 : EFAULT);
+}
+
+static int
+nvs_xdr_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+ data_type_t type = NVP_TYPE(nvp);
+ /*
+ * encode_size + decode_size + name string size + data type + nelem
+ * where name string size = 4 + NV_ALIGN4(strlen(NVP_NAME(nvp)))
+ */
+ uint64_t nvp_sz = 4 + 4 + 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) + 4 + 4;
+
+ switch (type) {
+ case DATA_TYPE_BOOLEAN:
+ break;
+
+ case DATA_TYPE_BOOLEAN_VALUE:
+ case DATA_TYPE_BYTE:
+ case DATA_TYPE_INT8:
+ case DATA_TYPE_UINT8:
+ case DATA_TYPE_INT16:
+ case DATA_TYPE_UINT16:
+ case DATA_TYPE_INT32:
+ case DATA_TYPE_UINT32:
+ nvp_sz += 4; /* 4 is the minimum xdr unit */
+ break;
+
+ case DATA_TYPE_INT64:
+ case DATA_TYPE_UINT64:
+ case DATA_TYPE_HRTIME:
+#if !defined(_KERNEL)
+ case DATA_TYPE_DOUBLE:
+#endif
+ nvp_sz += 8;
+ break;
+
+ case DATA_TYPE_STRING:
+ nvp_sz += 4 + NV_ALIGN4(strlen((char *)NVP_VALUE(nvp)));
+ break;
+
+ case DATA_TYPE_BYTE_ARRAY:
+ nvp_sz += NV_ALIGN4(NVP_NELEM(nvp));
+ break;
+
+ case DATA_TYPE_BOOLEAN_ARRAY:
+ case DATA_TYPE_INT8_ARRAY:
+ case DATA_TYPE_UINT8_ARRAY:
+ case DATA_TYPE_INT16_ARRAY:
+ case DATA_TYPE_UINT16_ARRAY:
+ case DATA_TYPE_INT32_ARRAY:
+ case DATA_TYPE_UINT32_ARRAY:
+ nvp_sz += 4 + 4 * (uint64_t)NVP_NELEM(nvp);
+ break;
+
+ case DATA_TYPE_INT64_ARRAY:
+ case DATA_TYPE_UINT64_ARRAY:
+ nvp_sz += 4 + 8 * (uint64_t)NVP_NELEM(nvp);
+ break;
+
+ case DATA_TYPE_STRING_ARRAY: {
+ int i;
+ char **strs = (void *)NVP_VALUE(nvp);
+
+ for (i = 0; i < NVP_NELEM(nvp); i++)
+ nvp_sz += 4 + NV_ALIGN4(strlen(strs[i]));
+
+ break;
+ }
+
+ case DATA_TYPE_NVLIST:
+ case DATA_TYPE_NVLIST_ARRAY: {
+ size_t nvsize = 0;
+ int old_nvs_op = nvs->nvs_op;
+ int err;
+
+ nvs->nvs_op = NVS_OP_GETSIZE;
+ if (type == DATA_TYPE_NVLIST)
+ err = nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize);
+ else
+ err = nvs_embedded_nvl_array(nvs, nvp, &nvsize);
+ nvs->nvs_op = old_nvs_op;
+
+ if (err != 0)
+ return (EINVAL);
+
+ nvp_sz += nvsize;
+ break;
+ }
+
+ default:
+ return (EINVAL);
+ }
+
+ if (nvp_sz > INT32_MAX)
+ return (EINVAL);
+
+ *size = nvp_sz;
+
+ return (0);
+}
+
+
+/*
+ * The NVS_XDR_MAX_LEN macro takes a packed xdr buffer of size x and estimates
+ * the largest nvpair that could be encoded in the buffer.
+ *
+ * See comments above nvpair_xdr_op() for the format of xdr encoding.
+ * The size of a xdr packed nvpair without any data is 5 words.
+ *
+ * Using the size of the data directly as an estimate would be ok
+ * in all cases except one. If the data type is of DATA_TYPE_STRING_ARRAY
+ * then the actual nvpair has space for an array of pointers to index
+ * the strings. These pointers are not encoded into the packed xdr buffer.
+ *
+ * If the data is of type DATA_TYPE_STRING_ARRAY and all the strings are
+ * of length 0, then each string is endcoded in xdr format as a single word.
+ * Therefore when expanded to an nvpair there will be 2.25 word used for
+ * each string. (a int64_t allocated for pointer usage, and a single char
+ * for the null termination.)
+ *
+ * This is the calculation performed by the NVS_XDR_MAX_LEN macro.
+ */
+#define NVS_XDR_HDR_LEN ((size_t)(5 * 4))
+#define NVS_XDR_DATA_LEN(y) (((size_t)(y) <= NVS_XDR_HDR_LEN) ? \
+ 0 : ((size_t)(y) - NVS_XDR_HDR_LEN))
+#define NVS_XDR_MAX_LEN(x) (NVP_SIZE_CALC(1, 0) + \
+ (NVS_XDR_DATA_LEN(x) * 2) + \
+ NV_ALIGN4((NVS_XDR_DATA_LEN(x) / 4)))
+
+static int
+nvs_xdr_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
+{
+ XDR *xdr = nvs->nvs_private;
+ int32_t encode_len, decode_len;
+
+ switch (nvs->nvs_op) {
+ case NVS_OP_ENCODE: {
+ size_t nvsize;
+
+ if (nvs_xdr_nvp_size(nvs, nvp, &nvsize) != 0)
+ return (EFAULT);
+
+ decode_len = nvp->nvp_size;
+ encode_len = nvsize;
+ if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len))
+ return (EFAULT);
+
+ return (nvs_xdr_nvp_op(nvs, nvp));
+ }
+ case NVS_OP_DECODE: {
+ struct xdr_bytesrec bytesrec;
+
+ /* get the encode and decode size */
+ if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len))
+ return (EFAULT);
+ *size = decode_len;
+
+ /* are we at the end of the stream? */
+ if (*size == 0)
+ return (0);
+
+ /* sanity check the size parameter */
+ if (!xdr_control(xdr, XDR_GET_BYTES_AVAIL, &bytesrec))
+ return (EFAULT);
+
+ if (*size > NVS_XDR_MAX_LEN(bytesrec.xc_num_avail))
+ return (EFAULT);
+ break;
+ }
+
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static const struct nvs_ops nvs_xdr_ops = {
+ nvs_xdr_nvlist,
+ nvs_xdr_nvpair,
+ nvs_xdr_nvp_op,
+ nvs_xdr_nvp_size,
+ nvs_xdr_nvl_fini
+};
+
+static int
+nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
+{
+ XDR xdr;
+ int err;
+
+ nvs->nvs_ops = &nvs_xdr_ops;
+
+ if ((err = nvs_xdr_create(nvs, &xdr, buf + sizeof (nvs_header_t),
+ *buflen - sizeof (nvs_header_t))) != 0)
+ return (err);
+
+ err = nvs_operation(nvs, nvl, buflen);
+
+ nvs_xdr_destroy(nvs);
+
+ return (err);
+}
diff --git a/common/nvpair/nvpair_alloc_fixed.c b/common/nvpair/nvpair_alloc_fixed.c
new file mode 100644
index 000000000000..b1128eeb9bc3
--- /dev/null
+++ b/common/nvpair/nvpair_alloc_fixed.c
@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/stropts.h>
+#include <sys/isa_defs.h>
+#include <sys/nvpair.h>
+#include <sys/sysmacros.h>
+#if defined(_KERNEL) && !defined(_BOOT)
+#include <sys/varargs.h>
+#else
+#include <stdarg.h>
+#include <strings.h>
+#endif
+
+/*
+ * This allocator is very simple.
+ * - it uses a pre-allocated buffer for memory allocations.
+ * - it does _not_ free memory in the pre-allocated buffer.
+ *
+ * The reason for the selected implemention is simplicity.
+ * This allocator is designed for the usage in interrupt context when
+ * the caller may not wait for free memory.
+ */
+
+/* pre-allocated buffer for memory allocations */
+typedef struct nvbuf {
+ uintptr_t nvb_buf; /* address of pre-allocated buffer */
+ uintptr_t nvb_lim; /* limit address in the buffer */
+ uintptr_t nvb_cur; /* current address in the buffer */
+} nvbuf_t;
+
+/*
+ * Initialize the pre-allocated buffer allocator. The caller needs to supply
+ *
+ * buf address of pre-allocated buffer
+ * bufsz size of pre-allocated buffer
+ *
+ * nv_fixed_init() calculates the remaining members of nvbuf_t.
+ */
+static int
+nv_fixed_init(nv_alloc_t *nva, va_list valist)
+{
+ uintptr_t base = va_arg(valist, uintptr_t);
+ uintptr_t lim = base + va_arg(valist, size_t);
+ nvbuf_t *nvb = (nvbuf_t *)P2ROUNDUP(base, sizeof (uintptr_t));
+
+ if (base == 0 || (uintptr_t)&nvb[1] > lim)
+ return (EINVAL);
+
+ nvb->nvb_buf = (uintptr_t)&nvb[0];
+ nvb->nvb_cur = (uintptr_t)&nvb[1];
+ nvb->nvb_lim = lim;
+ nva->nva_arg = nvb;
+
+ return (0);
+}
+
+static void *
+nv_fixed_alloc(nv_alloc_t *nva, size_t size)
+{
+ nvbuf_t *nvb = nva->nva_arg;
+ uintptr_t new = nvb->nvb_cur;
+
+ if (size == 0 || new + size > nvb->nvb_lim)
+ return (NULL);
+
+ nvb->nvb_cur = P2ROUNDUP(new + size, sizeof (uintptr_t));
+
+ return ((void *)new);
+}
+
+/*ARGSUSED*/
+static void
+nv_fixed_free(nv_alloc_t *nva, void *buf, size_t size)
+{
+ /* don't free memory in the pre-allocated buffer */
+}
+
+static void
+nv_fixed_reset(nv_alloc_t *nva)
+{
+ nvbuf_t *nvb = nva->nva_arg;
+
+ nvb->nvb_cur = (uintptr_t)&nvb[1];
+}
+
+const nv_alloc_ops_t nv_fixed_ops_def = {
+ nv_fixed_init, /* nv_ao_init() */
+ NULL, /* nv_ao_fini() */
+ nv_fixed_alloc, /* nv_ao_alloc() */
+ nv_fixed_free, /* nv_ao_free() */
+ nv_fixed_reset /* nv_ao_reset() */
+};
+
+const nv_alloc_ops_t *nv_fixed_ops = &nv_fixed_ops_def;
diff --git a/common/unicode/u8_textprep.c b/common/unicode/u8_textprep.c
new file mode 100644
index 000000000000..8faf1a97e47e
--- /dev/null
+++ b/common/unicode/u8_textprep.c
@@ -0,0 +1,2132 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+/*
+ * UTF-8 text preparation functions (PSARC/2007/149, PSARC/2007/458).
+ *
+ * Man pages: u8_textprep_open(9F), u8_textprep_buf(9F), u8_textprep_close(9F),
+ * u8_textprep_str(9F), u8_strcmp(9F), and u8_validate(9F). See also
+ * the section 3C man pages.
+ * Interface stability: Committed.
+ */
+
+#include <sys/types.h>
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#else
+#include <sys/u8_textprep.h>
+#include <strings.h>
+#endif /* _KERNEL */
+#include <sys/byteorder.h>
+#include <sys/errno.h>
+#include <sys/u8_textprep_data.h>
+
+
+/* The maximum possible number of bytes in a UTF-8 character. */
+#define U8_MB_CUR_MAX (4)
+
+/*
+ * The maximum number of bytes needed for a UTF-8 character to cover
+ * U+0000 - U+FFFF, i.e., the coding space of now deprecated UCS-2.
+ */
+#define U8_MAX_BYTES_UCS2 (3)
+
+/* The maximum possible number of bytes in a Stream-Safe Text. */
+#define U8_STREAM_SAFE_TEXT_MAX (128)
+
+/*
+ * The maximum number of characters in a combining/conjoining sequence and
+ * the actual upperbound limit of a combining/conjoining sequence.
+ */
+#define U8_MAX_CHARS_A_SEQ (32)
+#define U8_UPPER_LIMIT_IN_A_SEQ (31)
+
+/* The combining class value for Starter. */
+#define U8_COMBINING_CLASS_STARTER (0)
+
+/*
+ * Some Hangul related macros at below.
+ *
+ * The first and the last of Hangul syllables, Hangul Jamo Leading consonants,
+ * Vowels, and optional Trailing consonants in Unicode scalar values.
+ *
+ * Please be noted that the U8_HANGUL_JAMO_T_FIRST is 0x11A7 at below not
+ * the actual U+11A8. This is due to that the trailing consonant is optional
+ * and thus we are doing a pre-calculation of subtracting one.
+ *
+ * Each of 19 modern leading consonants has total 588 possible syllables since
+ * Hangul has 21 modern vowels and 27 modern trailing consonants plus 1 for
+ * no trailing consonant case, i.e., 21 x 28 = 588.
+ *
+ * We also have bunch of Hangul related macros at below. Please bear in mind
+ * that the U8_HANGUL_JAMO_1ST_BYTE can be used to check whether it is
+ * a Hangul Jamo or not but the value does not guarantee that it is a Hangul
+ * Jamo; it just guarantee that it will be most likely.
+ */
+#define U8_HANGUL_SYL_FIRST (0xAC00U)
+#define U8_HANGUL_SYL_LAST (0xD7A3U)
+
+#define U8_HANGUL_JAMO_L_FIRST (0x1100U)
+#define U8_HANGUL_JAMO_L_LAST (0x1112U)
+#define U8_HANGUL_JAMO_V_FIRST (0x1161U)
+#define U8_HANGUL_JAMO_V_LAST (0x1175U)
+#define U8_HANGUL_JAMO_T_FIRST (0x11A7U)
+#define U8_HANGUL_JAMO_T_LAST (0x11C2U)
+
+#define U8_HANGUL_V_COUNT (21)
+#define U8_HANGUL_VT_COUNT (588)
+#define U8_HANGUL_T_COUNT (28)
+
+#define U8_HANGUL_JAMO_1ST_BYTE (0xE1U)
+
+#define U8_SAVE_HANGUL_AS_UTF8(s, i, j, k, b) \
+ (s)[(i)] = (uchar_t)(0xE0U | ((uint32_t)(b) & 0xF000U) >> 12); \
+ (s)[(j)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x0FC0U) >> 6); \
+ (s)[(k)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x003FU));
+
+#define U8_HANGUL_JAMO_L(u) \
+ ((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_L_LAST)
+
+#define U8_HANGUL_JAMO_V(u) \
+ ((u) >= U8_HANGUL_JAMO_V_FIRST && (u) <= U8_HANGUL_JAMO_V_LAST)
+
+#define U8_HANGUL_JAMO_T(u) \
+ ((u) > U8_HANGUL_JAMO_T_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
+
+#define U8_HANGUL_JAMO(u) \
+ ((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
+
+#define U8_HANGUL_SYLLABLE(u) \
+ ((u) >= U8_HANGUL_SYL_FIRST && (u) <= U8_HANGUL_SYL_LAST)
+
+#define U8_HANGUL_COMPOSABLE_L_V(s, u) \
+ ((s) == U8_STATE_HANGUL_L && U8_HANGUL_JAMO_V((u)))
+
+#define U8_HANGUL_COMPOSABLE_LV_T(s, u) \
+ ((s) == U8_STATE_HANGUL_LV && U8_HANGUL_JAMO_T((u)))
+
+/* The types of decomposition mappings. */
+#define U8_DECOMP_BOTH (0xF5U)
+#define U8_DECOMP_CANONICAL (0xF6U)
+
+/* The indicator for 16-bit table. */
+#define U8_16BIT_TABLE_INDICATOR (0x8000U)
+
+/* The following are some convenience macros. */
+#define U8_PUT_3BYTES_INTO_UTF32(u, b1, b2, b3) \
+ (u) = ((uint32_t)(b1) & 0x0F) << 12 | ((uint32_t)(b2) & 0x3F) << 6 | \
+ (uint32_t)(b3) & 0x3F;
+
+#define U8_SIMPLE_SWAP(a, b, t) \
+ (t) = (a); \
+ (a) = (b); \
+ (b) = (t);
+
+#define U8_ASCII_TOUPPER(c) \
+ (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 'A' : (c))
+
+#define U8_ASCII_TOLOWER(c) \
+ (((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' + 'a' : (c))
+
+#define U8_ISASCII(c) (((uchar_t)(c)) < 0x80U)
+/*
+ * The following macro assumes that the two characters that are to be
+ * swapped are adjacent to each other and 'a' comes before 'b'.
+ *
+ * If the assumptions are not met, then, the macro will fail.
+ */
+#define U8_SWAP_COMB_MARKS(a, b) \
+ for (k = 0; k < disp[(a)]; k++) \
+ u8t[k] = u8s[start[(a)] + k]; \
+ for (k = 0; k < disp[(b)]; k++) \
+ u8s[start[(a)] + k] = u8s[start[(b)] + k]; \
+ start[(b)] = start[(a)] + disp[(b)]; \
+ for (k = 0; k < disp[(a)]; k++) \
+ u8s[start[(b)] + k] = u8t[k]; \
+ U8_SIMPLE_SWAP(comb_class[(a)], comb_class[(b)], tc); \
+ U8_SIMPLE_SWAP(disp[(a)], disp[(b)], tc);
+
+/* The possible states during normalization. */
+typedef enum {
+ U8_STATE_START = 0,
+ U8_STATE_HANGUL_L = 1,
+ U8_STATE_HANGUL_LV = 2,
+ U8_STATE_HANGUL_LVT = 3,
+ U8_STATE_HANGUL_V = 4,
+ U8_STATE_HANGUL_T = 5,
+ U8_STATE_COMBINING_MARK = 6
+} u8_normalization_states_t;
+
+/*
+ * The three vectors at below are used to check bytes of a given UTF-8
+ * character are valid and not containing any malformed byte values.
+ *
+ * We used to have a quite relaxed UTF-8 binary representation but then there
+ * was some security related issues and so the Unicode Consortium defined
+ * and announced the UTF-8 Corrigendum at Unicode 3.1 and then refined it
+ * one more time at the Unicode 3.2. The following three tables are based on
+ * that.
+ */
+
+#define U8_ILLEGAL_NEXT_BYTE_COMMON(c) ((c) < 0x80 || (c) > 0xBF)
+
+#define I_ U8_ILLEGAL_CHAR
+#define O_ U8_OUT_OF_RANGE_CHAR
+
+const int8_t u8_number_of_bytes[0x100] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+/* 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F */
+ I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/* 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F */
+ I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/* A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF */
+ I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/* B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF */
+ I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
+
+/* C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF */
+ I_, I_, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+
+/* D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF */
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+
+/* E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF */
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+
+/* F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF */
+ 4, 4, 4, 4, 4, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_,
+};
+
+#undef I_
+#undef O_
+
+const uint8_t u8_valid_min_2nd_byte[0x100] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+/* C0 C1 C2 C3 C4 C5 C6 C7 */
+ 0, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/* C8 C9 CA CB CC CD CE CF */
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/* D0 D1 D2 D3 D4 D5 D6 D7 */
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/* D8 D9 DA DB DC DD DE DF */
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/* E0 E1 E2 E3 E4 E5 E6 E7 */
+ 0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/* E8 E9 EA EB EC ED EE EF */
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+/* F0 F1 F2 F3 F4 F5 F6 F7 */
+ 0x90, 0x80, 0x80, 0x80, 0x80, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+const uint8_t u8_valid_max_2nd_byte[0x100] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+/* C0 C1 C2 C3 C4 C5 C6 C7 */
+ 0, 0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/* C8 C9 CA CB CC CD CE CF */
+ 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/* D0 D1 D2 D3 D4 D5 D6 D7 */
+ 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/* D8 D9 DA DB DC DD DE DF */
+ 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/* E0 E1 E2 E3 E4 E5 E6 E7 */
+ 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
+/* E8 E9 EA EB EC ED EE EF */
+ 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf,
+/* F0 F1 F2 F3 F4 F5 F6 F7 */
+ 0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+
+/*
+ * The u8_validate() validates on the given UTF-8 character string and
+ * calculate the byte length. It is quite similar to mblen(3C) except that
+ * this will validate against the list of characters if required and
+ * specific to UTF-8 and Unicode.
+ */
+int
+u8_validate(char *u8str, size_t n, char **list, int flag, int *errnum)
+{
+ uchar_t *ib;
+ uchar_t *ibtail;
+ uchar_t **p;
+ uchar_t *s1;
+ uchar_t *s2;
+ uchar_t f;
+ int sz;
+ size_t i;
+ int ret_val;
+ boolean_t second;
+ boolean_t no_need_to_validate_entire;
+ boolean_t check_additional;
+ boolean_t validate_ucs2_range_only;
+
+ if (! u8str)
+ return (0);
+
+ ib = (uchar_t *)u8str;
+ ibtail = ib + n;
+
+ ret_val = 0;
+
+ no_need_to_validate_entire = ! (flag & U8_VALIDATE_ENTIRE);
+ check_additional = flag & U8_VALIDATE_CHECK_ADDITIONAL;
+ validate_ucs2_range_only = flag & U8_VALIDATE_UCS2_RANGE;
+
+ while (ib < ibtail) {
+ /*
+ * The first byte of a UTF-8 character tells how many
+ * bytes will follow for the character. If the first byte
+ * is an illegal byte value or out of range value, we just
+ * return -1 with an appropriate error number.
+ */
+ sz = u8_number_of_bytes[*ib];
+ if (sz == U8_ILLEGAL_CHAR) {
+ *errnum = EILSEQ;
+ return (-1);
+ }
+
+ if (sz == U8_OUT_OF_RANGE_CHAR ||
+ (validate_ucs2_range_only && sz > U8_MAX_BYTES_UCS2)) {
+ *errnum = ERANGE;
+ return (-1);
+ }
+
+ /*
+ * If we don't have enough bytes to check on, that's also
+ * an error. As you can see, we give illegal byte sequence
+ * checking higher priority then EINVAL cases.
+ */
+ if ((ibtail - ib) < sz) {
+ *errnum = EINVAL;
+ return (-1);
+ }
+
+ if (sz == 1) {
+ ib++;
+ ret_val++;
+ } else {
+ /*
+ * Check on the multi-byte UTF-8 character. For more
+ * details on this, see comment added for the used
+ * data structures at the beginning of the file.
+ */
+ f = *ib++;
+ ret_val++;
+ second = B_TRUE;
+ for (i = 1; i < sz; i++) {
+ if (second) {
+ if (*ib < u8_valid_min_2nd_byte[f] ||
+ *ib > u8_valid_max_2nd_byte[f]) {
+ *errnum = EILSEQ;
+ return (-1);
+ }
+ second = B_FALSE;
+ } else if (U8_ILLEGAL_NEXT_BYTE_COMMON(*ib)) {
+ *errnum = EILSEQ;
+ return (-1);
+ }
+ ib++;
+ ret_val++;
+ }
+ }
+
+ if (check_additional) {
+ for (p = (uchar_t **)list, i = 0; p[i]; i++) {
+ s1 = ib - sz;
+ s2 = p[i];
+ while (s1 < ib) {
+ if (*s1 != *s2 || *s2 == '\0')
+ break;
+ s1++;
+ s2++;
+ }
+
+ if (s1 >= ib && *s2 == '\0') {
+ *errnum = EBADF;
+ return (-1);
+ }
+ }
+ }
+
+ if (no_need_to_validate_entire)
+ break;
+ }
+
+ return (ret_val);
+}
+
+/*
+ * The do_case_conv() looks at the mapping tables and returns found
+ * bytes if any. If not found, the input bytes are returned. The function
+ * always terminate the return bytes with a null character assuming that
+ * there are plenty of room to do so.
+ *
+ * The case conversions are simple case conversions mapping a character to
+ * another character as specified in the Unicode data. The byte size of
+ * the mapped character could be different from that of the input character.
+ *
+ * The return value is the byte length of the returned character excluding
+ * the terminating null byte.
+ */
+static size_t
+do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper)
+{
+ size_t i;
+ uint16_t b1 = 0;
+ uint16_t b2 = 0;
+ uint16_t b3 = 0;
+ uint16_t b3_tbl;
+ uint16_t b3_base;
+ uint16_t b4 = 0;
+ size_t start_id;
+ size_t end_id;
+
+ /*
+ * At this point, the only possible values for sz are 2, 3, and 4.
+ * The u8s should point to a vector that is well beyond the size of
+ * 5 bytes.
+ */
+ if (sz == 2) {
+ b3 = u8s[0] = s[0];
+ b4 = u8s[1] = s[1];
+ } else if (sz == 3) {
+ b2 = u8s[0] = s[0];
+ b3 = u8s[1] = s[1];
+ b4 = u8s[2] = s[2];
+ } else if (sz == 4) {
+ b1 = u8s[0] = s[0];
+ b2 = u8s[1] = s[1];
+ b3 = u8s[2] = s[2];
+ b4 = u8s[3] = s[3];
+ } else {
+ /* This is not possible but just in case as a fallback. */
+ if (is_it_toupper)
+ *u8s = U8_ASCII_TOUPPER(*s);
+ else
+ *u8s = U8_ASCII_TOLOWER(*s);
+ u8s[1] = '\0';
+
+ return (1);
+ }
+ u8s[sz] = '\0';
+
+ /*
+ * Let's find out if we have a corresponding character.
+ */
+ b1 = u8_common_b1_tbl[uv][b1];
+ if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+ return ((size_t)sz);
+
+ b2 = u8_case_common_b2_tbl[uv][b1][b2];
+ if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+ return ((size_t)sz);
+
+ if (is_it_toupper) {
+ b3_tbl = u8_toupper_b3_tbl[uv][b2][b3].tbl_id;
+ if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+ return ((size_t)sz);
+
+ start_id = u8_toupper_b4_tbl[uv][b3_tbl][b4];
+ end_id = u8_toupper_b4_tbl[uv][b3_tbl][b4 + 1];
+
+ /* Either there is no match or an error at the table. */
+ if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
+ return ((size_t)sz);
+
+ b3_base = u8_toupper_b3_tbl[uv][b2][b3].base;
+
+ for (i = 0; start_id < end_id; start_id++)
+ u8s[i++] = u8_toupper_final_tbl[uv][b3_base + start_id];
+ } else {
+ b3_tbl = u8_tolower_b3_tbl[uv][b2][b3].tbl_id;
+ if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+ return ((size_t)sz);
+
+ start_id = u8_tolower_b4_tbl[uv][b3_tbl][b4];
+ end_id = u8_tolower_b4_tbl[uv][b3_tbl][b4 + 1];
+
+ if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
+ return ((size_t)sz);
+
+ b3_base = u8_tolower_b3_tbl[uv][b2][b3].base;
+
+ for (i = 0; start_id < end_id; start_id++)
+ u8s[i++] = u8_tolower_final_tbl[uv][b3_base + start_id];
+ }
+
+ /*
+ * If i is still zero, that means there is no corresponding character.
+ */
+ if (i == 0)
+ return ((size_t)sz);
+
+ u8s[i] = '\0';
+
+ return (i);
+}
+
+/*
+ * The do_case_compare() function compares the two input strings, s1 and s2,
+ * one character at a time doing case conversions if applicable and return
+ * the comparison result as like strcmp().
+ *
+ * Since, in empirical sense, most of text data are 7-bit ASCII characters,
+ * we treat the 7-bit ASCII characters as a special case trying to yield
+ * faster processing time.
+ */
+static int
+do_case_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1,
+ size_t n2, boolean_t is_it_toupper, int *errnum)
+{
+ int f;
+ int sz1;
+ int sz2;
+ size_t j;
+ size_t i1;
+ size_t i2;
+ uchar_t u8s1[U8_MB_CUR_MAX + 1];
+ uchar_t u8s2[U8_MB_CUR_MAX + 1];
+
+ i1 = i2 = 0;
+ while (i1 < n1 && i2 < n2) {
+ /*
+ * Find out what would be the byte length for this UTF-8
+ * character at string s1 and also find out if this is
+ * an illegal start byte or not and if so, issue a proper
+ * error number and yet treat this byte as a character.
+ */
+ sz1 = u8_number_of_bytes[*s1];
+ if (sz1 < 0) {
+ *errnum = EILSEQ;
+ sz1 = 1;
+ }
+
+ /*
+ * For 7-bit ASCII characters mainly, we do a quick case
+ * conversion right at here.
+ *
+ * If we don't have enough bytes for this character, issue
+ * an EINVAL error and use what are available.
+ *
+ * If we have enough bytes, find out if there is
+ * a corresponding uppercase character and if so, copy over
+ * the bytes for a comparison later. If there is no
+ * corresponding uppercase character, then, use what we have
+ * for the comparison.
+ */
+ if (sz1 == 1) {
+ if (is_it_toupper)
+ u8s1[0] = U8_ASCII_TOUPPER(*s1);
+ else
+ u8s1[0] = U8_ASCII_TOLOWER(*s1);
+ s1++;
+ u8s1[1] = '\0';
+ } else if ((i1 + sz1) > n1) {
+ *errnum = EINVAL;
+ for (j = 0; (i1 + j) < n1; )
+ u8s1[j++] = *s1++;
+ u8s1[j] = '\0';
+ } else {
+ (void) do_case_conv(uv, u8s1, s1, sz1, is_it_toupper);
+ s1 += sz1;
+ }
+
+ /* Do the same for the string s2. */
+ sz2 = u8_number_of_bytes[*s2];
+ if (sz2 < 0) {
+ *errnum = EILSEQ;
+ sz2 = 1;
+ }
+
+ if (sz2 == 1) {
+ if (is_it_toupper)
+ u8s2[0] = U8_ASCII_TOUPPER(*s2);
+ else
+ u8s2[0] = U8_ASCII_TOLOWER(*s2);
+ s2++;
+ u8s2[1] = '\0';
+ } else if ((i2 + sz2) > n2) {
+ *errnum = EINVAL;
+ for (j = 0; (i2 + j) < n2; )
+ u8s2[j++] = *s2++;
+ u8s2[j] = '\0';
+ } else {
+ (void) do_case_conv(uv, u8s2, s2, sz2, is_it_toupper);
+ s2 += sz2;
+ }
+
+ /* Now compare the two characters. */
+ if (sz1 == 1 && sz2 == 1) {
+ if (*u8s1 > *u8s2)
+ return (1);
+ if (*u8s1 < *u8s2)
+ return (-1);
+ } else {
+ f = strcmp((const char *)u8s1, (const char *)u8s2);
+ if (f != 0)
+ return (f);
+ }
+
+ /*
+ * They were the same. Let's move on to the next
+ * characters then.
+ */
+ i1 += sz1;
+ i2 += sz2;
+ }
+
+ /*
+ * We compared until the end of either or both strings.
+ *
+ * If we reached to or went over the ends for the both, that means
+ * they are the same.
+ *
+ * If we reached only one of the two ends, that means the other string
+ * has something which then the fact can be used to determine
+ * the return value.
+ */
+ if (i1 >= n1) {
+ if (i2 >= n2)
+ return (0);
+ return (-1);
+ }
+ return (1);
+}
+
+/*
+ * The combining_class() function checks on the given bytes and find out
+ * the corresponding Unicode combining class value. The return value 0 means
+ * it is a Starter. Any illegal UTF-8 character will also be treated as
+ * a Starter.
+ */
+static uchar_t
+combining_class(size_t uv, uchar_t *s, size_t sz)
+{
+ uint16_t b1 = 0;
+ uint16_t b2 = 0;
+ uint16_t b3 = 0;
+ uint16_t b4 = 0;
+
+ if (sz == 1 || sz > 4)
+ return (0);
+
+ if (sz == 2) {
+ b3 = s[0];
+ b4 = s[1];
+ } else if (sz == 3) {
+ b2 = s[0];
+ b3 = s[1];
+ b4 = s[2];
+ } else if (sz == 4) {
+ b1 = s[0];
+ b2 = s[1];
+ b3 = s[2];
+ b4 = s[3];
+ }
+
+ b1 = u8_common_b1_tbl[uv][b1];
+ if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+ return (0);
+
+ b2 = u8_combining_class_b2_tbl[uv][b1][b2];
+ if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+ return (0);
+
+ b3 = u8_combining_class_b3_tbl[uv][b2][b3];
+ if (b3 == U8_TBL_ELEMENT_NOT_DEF)
+ return (0);
+
+ return (u8_combining_class_b4_tbl[uv][b3][b4]);
+}
+
+/*
+ * The do_decomp() function finds out a matching decomposition if any
+ * and return. If there is no match, the input bytes are copied and returned.
+ * The function also checks if there is a Hangul, decomposes it if necessary
+ * and returns.
+ *
+ * To save time, a single byte 7-bit ASCII character should be handled by
+ * the caller.
+ *
+ * The function returns the number of bytes returned sans always terminating
+ * the null byte. It will also return a state that will tell if there was
+ * a Hangul character decomposed which then will be used by the caller.
+ */
+static size_t
+do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz,
+ boolean_t canonical_decomposition, u8_normalization_states_t *state)
+{
+ uint16_t b1 = 0;
+ uint16_t b2 = 0;
+ uint16_t b3 = 0;
+ uint16_t b3_tbl;
+ uint16_t b3_base;
+ uint16_t b4 = 0;
+ size_t start_id;
+ size_t end_id;
+ size_t i;
+ uint32_t u1;
+
+ if (sz == 2) {
+ b3 = u8s[0] = s[0];
+ b4 = u8s[1] = s[1];
+ u8s[2] = '\0';
+ } else if (sz == 3) {
+ /* Convert it to a Unicode scalar value. */
+ U8_PUT_3BYTES_INTO_UTF32(u1, s[0], s[1], s[2]);
+
+ /*
+ * If this is a Hangul syllable, we decompose it into
+ * a leading consonant, a vowel, and an optional trailing
+ * consonant and then return.
+ */
+ if (U8_HANGUL_SYLLABLE(u1)) {
+ u1 -= U8_HANGUL_SYL_FIRST;
+
+ b1 = U8_HANGUL_JAMO_L_FIRST + u1 / U8_HANGUL_VT_COUNT;
+ b2 = U8_HANGUL_JAMO_V_FIRST + (u1 % U8_HANGUL_VT_COUNT)
+ / U8_HANGUL_T_COUNT;
+ b3 = u1 % U8_HANGUL_T_COUNT;
+
+ U8_SAVE_HANGUL_AS_UTF8(u8s, 0, 1, 2, b1);
+ U8_SAVE_HANGUL_AS_UTF8(u8s, 3, 4, 5, b2);
+ if (b3) {
+ b3 += U8_HANGUL_JAMO_T_FIRST;
+ U8_SAVE_HANGUL_AS_UTF8(u8s, 6, 7, 8, b3);
+
+ u8s[9] = '\0';
+ *state = U8_STATE_HANGUL_LVT;
+ return (9);
+ }
+
+ u8s[6] = '\0';
+ *state = U8_STATE_HANGUL_LV;
+ return (6);
+ }
+
+ b2 = u8s[0] = s[0];
+ b3 = u8s[1] = s[1];
+ b4 = u8s[2] = s[2];
+ u8s[3] = '\0';
+
+ /*
+ * If this is a Hangul Jamo, we know there is nothing
+ * further that we can decompose.
+ */
+ if (U8_HANGUL_JAMO_L(u1)) {
+ *state = U8_STATE_HANGUL_L;
+ return (3);
+ }
+
+ if (U8_HANGUL_JAMO_V(u1)) {
+ if (*state == U8_STATE_HANGUL_L)
+ *state = U8_STATE_HANGUL_LV;
+ else
+ *state = U8_STATE_HANGUL_V;
+ return (3);
+ }
+
+ if (U8_HANGUL_JAMO_T(u1)) {
+ if (*state == U8_STATE_HANGUL_LV)
+ *state = U8_STATE_HANGUL_LVT;
+ else
+ *state = U8_STATE_HANGUL_T;
+ return (3);
+ }
+ } else if (sz == 4) {
+ b1 = u8s[0] = s[0];
+ b2 = u8s[1] = s[1];
+ b3 = u8s[2] = s[2];
+ b4 = u8s[3] = s[3];
+ u8s[4] = '\0';
+ } else {
+ /*
+ * This is a fallback and should not happen if the function
+ * was called properly.
+ */
+ u8s[0] = s[0];
+ u8s[1] = '\0';
+ *state = U8_STATE_START;
+ return (1);
+ }
+
+ /*
+ * At this point, this rountine does not know what it would get.
+ * The caller should sort it out if the state isn't a Hangul one.
+ */
+ *state = U8_STATE_START;
+
+ /* Try to find matching decomposition mapping byte sequence. */
+ b1 = u8_common_b1_tbl[uv][b1];
+ if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+ return ((size_t)sz);
+
+ b2 = u8_decomp_b2_tbl[uv][b1][b2];
+ if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+ return ((size_t)sz);
+
+ b3_tbl = u8_decomp_b3_tbl[uv][b2][b3].tbl_id;
+ if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+ return ((size_t)sz);
+
+ /*
+ * If b3_tbl is bigger than or equal to U8_16BIT_TABLE_INDICATOR
+ * which is 0x8000, this means we couldn't fit the mappings into
+ * the cardinality of a unsigned byte.
+ */
+ if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
+ b3_tbl -= U8_16BIT_TABLE_INDICATOR;
+ start_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4];
+ end_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
+ } else {
+ start_id = u8_decomp_b4_tbl[uv][b3_tbl][b4];
+ end_id = u8_decomp_b4_tbl[uv][b3_tbl][b4 + 1];
+ }
+
+ /* This also means there wasn't any matching decomposition. */
+ if (start_id >= end_id)
+ return ((size_t)sz);
+
+ /*
+ * The final table for decomposition mappings has three types of
+ * byte sequences depending on whether a mapping is for compatibility
+ * decomposition, canonical decomposition, or both like the following:
+ *
+ * (1) Compatibility decomposition mappings:
+ *
+ * +---+---+-...-+---+
+ * | B0| B1| ... | Bm|
+ * +---+---+-...-+---+
+ *
+ * The first byte, B0, is always less then 0xF5 (U8_DECOMP_BOTH).
+ *
+ * (2) Canonical decomposition mappings:
+ *
+ * +---+---+---+-...-+---+
+ * | T | b0| b1| ... | bn|
+ * +---+---+---+-...-+---+
+ *
+ * where the first byte, T, is 0xF6 (U8_DECOMP_CANONICAL).
+ *
+ * (3) Both mappings:
+ *
+ * +---+---+---+---+-...-+---+---+---+-...-+---+
+ * | T | D | b0| b1| ... | bn| B0| B1| ... | Bm|
+ * +---+---+---+---+-...-+---+---+---+-...-+---+
+ *
+ * where T is 0xF5 (U8_DECOMP_BOTH) and D is a displacement
+ * byte, b0 to bn are canonical mapping bytes and B0 to Bm are
+ * compatibility mapping bytes.
+ *
+ * Note that compatibility decomposition means doing recursive
+ * decompositions using both compatibility decomposition mappings and
+ * canonical decomposition mappings. On the other hand, canonical
+ * decomposition means doing recursive decompositions using only
+ * canonical decomposition mappings. Since the table we have has gone
+ * through the recursions already, we do not need to do so during
+ * runtime, i.e., the table has been completely flattened out
+ * already.
+ */
+
+ b3_base = u8_decomp_b3_tbl[uv][b2][b3].base;
+
+ /* Get the type, T, of the byte sequence. */
+ b1 = u8_decomp_final_tbl[uv][b3_base + start_id];
+
+ /*
+ * If necessary, adjust start_id, end_id, or both. Note that if
+ * this is compatibility decomposition mapping, there is no
+ * adjustment.
+ */
+ if (canonical_decomposition) {
+ /* Is the mapping only for compatibility decomposition? */
+ if (b1 < U8_DECOMP_BOTH)
+ return ((size_t)sz);
+
+ start_id++;
+
+ if (b1 == U8_DECOMP_BOTH) {
+ end_id = start_id +
+ u8_decomp_final_tbl[uv][b3_base + start_id];
+ start_id++;
+ }
+ } else {
+ /*
+ * Unless this is a compatibility decomposition mapping,
+ * we adjust the start_id.
+ */
+ if (b1 == U8_DECOMP_BOTH) {
+ start_id++;
+ start_id += u8_decomp_final_tbl[uv][b3_base + start_id];
+ } else if (b1 == U8_DECOMP_CANONICAL) {
+ start_id++;
+ }
+ }
+
+ for (i = 0; start_id < end_id; start_id++)
+ u8s[i++] = u8_decomp_final_tbl[uv][b3_base + start_id];
+ u8s[i] = '\0';
+
+ return (i);
+}
+
+/*
+ * The find_composition_start() function uses the character bytes given and
+ * find out the matching composition mappings if any and return the address
+ * to the composition mappings as explained in the do_composition().
+ */
+static uchar_t *
+find_composition_start(size_t uv, uchar_t *s, size_t sz)
+{
+ uint16_t b1 = 0;
+ uint16_t b2 = 0;
+ uint16_t b3 = 0;
+ uint16_t b3_tbl;
+ uint16_t b3_base;
+ uint16_t b4 = 0;
+ size_t start_id;
+ size_t end_id;
+
+ if (sz == 1) {
+ b4 = s[0];
+ } else if (sz == 2) {
+ b3 = s[0];
+ b4 = s[1];
+ } else if (sz == 3) {
+ b2 = s[0];
+ b3 = s[1];
+ b4 = s[2];
+ } else if (sz == 4) {
+ b1 = s[0];
+ b2 = s[1];
+ b3 = s[2];
+ b4 = s[3];
+ } else {
+ /*
+ * This is a fallback and should not happen if the function
+ * was called properly.
+ */
+ return (NULL);
+ }
+
+ b1 = u8_composition_b1_tbl[uv][b1];
+ if (b1 == U8_TBL_ELEMENT_NOT_DEF)
+ return (NULL);
+
+ b2 = u8_composition_b2_tbl[uv][b1][b2];
+ if (b2 == U8_TBL_ELEMENT_NOT_DEF)
+ return (NULL);
+
+ b3_tbl = u8_composition_b3_tbl[uv][b2][b3].tbl_id;
+ if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
+ return (NULL);
+
+ if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
+ b3_tbl -= U8_16BIT_TABLE_INDICATOR;
+ start_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4];
+ end_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
+ } else {
+ start_id = u8_composition_b4_tbl[uv][b3_tbl][b4];
+ end_id = u8_composition_b4_tbl[uv][b3_tbl][b4 + 1];
+ }
+
+ if (start_id >= end_id)
+ return (NULL);
+
+ b3_base = u8_composition_b3_tbl[uv][b2][b3].base;
+
+ return ((uchar_t *)&(u8_composition_final_tbl[uv][b3_base + start_id]));
+}
+
+/*
+ * The blocked() function checks on the combining class values of previous
+ * characters in this sequence and return whether it is blocked or not.
+ */
+static boolean_t
+blocked(uchar_t *comb_class, size_t last)
+{
+ uchar_t my_comb_class;
+ size_t i;
+
+ my_comb_class = comb_class[last];
+ for (i = 1; i < last; i++)
+ if (comb_class[i] >= my_comb_class ||
+ comb_class[i] == U8_COMBINING_CLASS_STARTER)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+/*
+ * The do_composition() reads the character string pointed by 's' and
+ * do necessary canonical composition and then copy over the result back to
+ * the 's'.
+ *
+ * The input argument 's' cannot contain more than 32 characters.
+ */
+static size_t
+do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start,
+ uchar_t *disp, size_t last, uchar_t **os, uchar_t *oslast)
+{
+ uchar_t t[U8_STREAM_SAFE_TEXT_MAX + 1];
+ uchar_t tc[U8_MB_CUR_MAX];
+ uint8_t saved_marks[U8_MAX_CHARS_A_SEQ];
+ size_t saved_marks_count;
+ uchar_t *p;
+ uchar_t *saved_p;
+ uchar_t *q;
+ size_t i;
+ size_t saved_i;
+ size_t j;
+ size_t k;
+ size_t l;
+ size_t C;
+ size_t saved_l;
+ size_t size;
+ uint32_t u1;
+ uint32_t u2;
+ boolean_t match_not_found = B_TRUE;
+
+ /*
+ * This should never happen unless the callers are doing some strange
+ * and unexpected things.
+ *
+ * The "last" is the index pointing to the last character not last + 1.
+ */
+ if (last >= U8_MAX_CHARS_A_SEQ)
+ last = U8_UPPER_LIMIT_IN_A_SEQ;
+
+ for (i = l = 0; i <= last; i++) {
+ /*
+ * The last or any non-Starters at the beginning, we don't
+ * have any chance to do composition and so we just copy them
+ * to the temporary buffer.
+ */
+ if (i >= last || comb_class[i] != U8_COMBINING_CLASS_STARTER) {
+SAVE_THE_CHAR:
+ p = s + start[i];
+ size = disp[i];
+ for (k = 0; k < size; k++)
+ t[l++] = *p++;
+ continue;
+ }
+
+ /*
+ * If this could be a start of Hangul Jamos, then, we try to
+ * conjoin them.
+ */
+ if (s[start[i]] == U8_HANGUL_JAMO_1ST_BYTE) {
+ U8_PUT_3BYTES_INTO_UTF32(u1, s[start[i]],
+ s[start[i] + 1], s[start[i] + 2]);
+ U8_PUT_3BYTES_INTO_UTF32(u2, s[start[i] + 3],
+ s[start[i] + 4], s[start[i] + 5]);
+
+ if (U8_HANGUL_JAMO_L(u1) && U8_HANGUL_JAMO_V(u2)) {
+ u1 -= U8_HANGUL_JAMO_L_FIRST;
+ u2 -= U8_HANGUL_JAMO_V_FIRST;
+ u1 = U8_HANGUL_SYL_FIRST +
+ (u1 * U8_HANGUL_V_COUNT + u2) *
+ U8_HANGUL_T_COUNT;
+
+ i += 2;
+ if (i <= last) {
+ U8_PUT_3BYTES_INTO_UTF32(u2,
+ s[start[i]], s[start[i] + 1],
+ s[start[i] + 2]);
+
+ if (U8_HANGUL_JAMO_T(u2)) {
+ u1 += u2 -
+ U8_HANGUL_JAMO_T_FIRST;
+ i++;
+ }
+ }
+
+ U8_SAVE_HANGUL_AS_UTF8(t + l, 0, 1, 2, u1);
+ i--;
+ l += 3;
+ continue;
+ }
+ }
+
+ /*
+ * Let's then find out if this Starter has composition
+ * mapping.
+ */
+ p = find_composition_start(uv, s + start[i], disp[i]);
+ if (p == NULL)
+ goto SAVE_THE_CHAR;
+
+ /*
+ * We have a Starter with composition mapping and the next
+ * character is a non-Starter. Let's try to find out if
+ * we can do composition.
+ */
+
+ saved_p = p;
+ saved_i = i;
+ saved_l = l;
+ saved_marks_count = 0;
+
+TRY_THE_NEXT_MARK:
+ q = s + start[++i];
+ size = disp[i];
+
+ /*
+ * The next for() loop compares the non-Starter pointed by
+ * 'q' with the possible (joinable) characters pointed by 'p'.
+ *
+ * The composition final table entry pointed by the 'p'
+ * looks like the following:
+ *
+ * +---+---+---+-...-+---+---+---+---+-...-+---+---+
+ * | C | b0| b2| ... | bn| F | B0| B1| ... | Bm| F |
+ * +---+---+---+-...-+---+---+---+---+-...-+---+---+
+ *
+ * where C is the count byte indicating the number of
+ * mapping pairs where each pair would be look like
+ * (b0-bn F, B0-Bm F). The b0-bn are the bytes of the second
+ * character of a canonical decomposition and the B0-Bm are
+ * the bytes of a matching composite character. The F is
+ * a filler byte after each character as the separator.
+ */
+
+ match_not_found = B_TRUE;
+
+ for (C = *p++; C > 0; C--) {
+ for (k = 0; k < size; p++, k++)
+ if (*p != q[k])
+ break;
+
+ /* Have we found it? */
+ if (k >= size && *p == U8_TBL_ELEMENT_FILLER) {
+ match_not_found = B_FALSE;
+
+ l = saved_l;
+
+ while (*++p != U8_TBL_ELEMENT_FILLER)
+ t[l++] = *p;
+
+ break;
+ }
+
+ /* We didn't find; skip to the next pair. */
+ if (*p != U8_TBL_ELEMENT_FILLER)
+ while (*++p != U8_TBL_ELEMENT_FILLER)
+ ;
+ while (*++p != U8_TBL_ELEMENT_FILLER)
+ ;
+ p++;
+ }
+
+ /*
+ * If there was no match, we will need to save the combining
+ * mark for later appending. After that, if the next one
+ * is a non-Starter and not blocked, then, we try once
+ * again to do composition with the next non-Starter.
+ *
+ * If there was no match and this was a Starter, then,
+ * this is a new start.
+ *
+ * If there was a match and a composition done and we have
+ * more to check on, then, we retrieve a new composition final
+ * table entry for the composite and then try to do the
+ * composition again.
+ */
+
+ if (match_not_found) {
+ if (comb_class[i] == U8_COMBINING_CLASS_STARTER) {
+ i--;
+ goto SAVE_THE_CHAR;
+ }
+
+ saved_marks[saved_marks_count++] = i;
+ }
+
+ if (saved_l == l) {
+ while (i < last) {
+ if (blocked(comb_class, i + 1))
+ saved_marks[saved_marks_count++] = ++i;
+ else
+ break;
+ }
+ if (i < last) {
+ p = saved_p;
+ goto TRY_THE_NEXT_MARK;
+ }
+ } else if (i < last) {
+ p = find_composition_start(uv, t + saved_l,
+ l - saved_l);
+ if (p != NULL) {
+ saved_p = p;
+ goto TRY_THE_NEXT_MARK;
+ }
+ }
+
+ /*
+ * There is no more composition possible.
+ *
+ * If there was no composition what so ever then we copy
+ * over the original Starter and then append any non-Starters
+ * remaining at the target string sequentially after that.
+ */
+
+ if (saved_l == l) {
+ p = s + start[saved_i];
+ size = disp[saved_i];
+ for (j = 0; j < size; j++)
+ t[l++] = *p++;
+ }
+
+ for (k = 0; k < saved_marks_count; k++) {
+ p = s + start[saved_marks[k]];
+ size = disp[saved_marks[k]];
+ for (j = 0; j < size; j++)
+ t[l++] = *p++;
+ }
+ }
+
+ /*
+ * If the last character is a Starter and if we have a character
+ * (possibly another Starter) that can be turned into a composite,
+ * we do so and we do so until there is no more of composition
+ * possible.
+ */
+ if (comb_class[last] == U8_COMBINING_CLASS_STARTER) {
+ p = *os;
+ saved_l = l - disp[last];
+
+ while (p < oslast) {
+ size = u8_number_of_bytes[*p];
+ if (size <= 1 || (p + size) > oslast)
+ break;
+
+ saved_p = p;
+
+ for (i = 0; i < size; i++)
+ tc[i] = *p++;
+
+ q = find_composition_start(uv, t + saved_l,
+ l - saved_l);
+ if (q == NULL) {
+ p = saved_p;
+ break;
+ }
+
+ match_not_found = B_TRUE;
+
+ for (C = *q++; C > 0; C--) {
+ for (k = 0; k < size; q++, k++)
+ if (*q != tc[k])
+ break;
+
+ if (k >= size && *q == U8_TBL_ELEMENT_FILLER) {
+ match_not_found = B_FALSE;
+
+ l = saved_l;
+
+ while (*++q != U8_TBL_ELEMENT_FILLER) {
+ /*
+ * This is practically
+ * impossible but we don't
+ * want to take any chances.
+ */
+ if (l >=
+ U8_STREAM_SAFE_TEXT_MAX) {
+ p = saved_p;
+ goto SAFE_RETURN;
+ }
+ t[l++] = *q;
+ }
+
+ break;
+ }
+
+ if (*q != U8_TBL_ELEMENT_FILLER)
+ while (*++q != U8_TBL_ELEMENT_FILLER)
+ ;
+ while (*++q != U8_TBL_ELEMENT_FILLER)
+ ;
+ q++;
+ }
+
+ if (match_not_found) {
+ p = saved_p;
+ break;
+ }
+ }
+SAFE_RETURN:
+ *os = p;
+ }
+
+ /*
+ * Now we copy over the temporary string to the target string.
+ * Since composition always reduces the number of characters or
+ * the number of characters stay, we don't need to worry about
+ * the buffer overflow here.
+ */
+ for (i = 0; i < l; i++)
+ s[i] = t[i];
+ s[l] = '\0';
+
+ return (l);
+}
+
+/*
+ * The collect_a_seq() function checks on the given string s, collect
+ * a sequence of characters at u8s, and return the sequence. While it collects
+ * a sequence, it also applies case conversion, canonical or compatibility
+ * decomposition, canonical decomposition, or some or all of them and
+ * in that order.
+ *
+ * The collected sequence cannot be bigger than 32 characters since if
+ * it is having more than 31 characters, the sequence will be terminated
+ * with a U+034F COMBINING GRAPHEME JOINER (CGJ) character and turned into
+ * a Stream-Safe Text. The collected sequence is always terminated with
+ * a null byte and the return value is the byte length of the sequence
+ * including 0. The return value does not include the terminating
+ * null byte.
+ */
+static size_t
+collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast,
+ boolean_t is_it_toupper,
+ boolean_t is_it_tolower,
+ boolean_t canonical_decomposition,
+ boolean_t compatibility_decomposition,
+ boolean_t canonical_composition,
+ int *errnum, u8_normalization_states_t *state)
+{
+ uchar_t *s;
+ int sz;
+ int saved_sz;
+ size_t i;
+ size_t j;
+ size_t k;
+ size_t l;
+ uchar_t comb_class[U8_MAX_CHARS_A_SEQ];
+ uchar_t disp[U8_MAX_CHARS_A_SEQ];
+ uchar_t start[U8_MAX_CHARS_A_SEQ];
+ uchar_t u8t[U8_MB_CUR_MAX];
+ uchar_t uts[U8_STREAM_SAFE_TEXT_MAX + 1];
+ uchar_t tc;
+ size_t last;
+ size_t saved_last;
+ uint32_t u1;
+
+ /*
+ * Save the source string pointer which we will return a changed
+ * pointer if we do processing.
+ */
+ s = *source;
+
+ /*
+ * The following is a fallback for just in case callers are not
+ * checking the string boundaries before the calling.
+ */
+ if (s >= slast) {
+ u8s[0] = '\0';
+
+ return (0);
+ }
+
+ /*
+ * As the first thing, let's collect a character and do case
+ * conversion if necessary.
+ */
+
+ sz = u8_number_of_bytes[*s];
+
+ if (sz < 0) {
+ *errnum = EILSEQ;
+
+ u8s[0] = *s++;
+ u8s[1] = '\0';
+
+ *source = s;
+
+ return (1);
+ }
+
+ if (sz == 1) {
+ if (is_it_toupper)
+ u8s[0] = U8_ASCII_TOUPPER(*s);
+ else if (is_it_tolower)
+ u8s[0] = U8_ASCII_TOLOWER(*s);
+ else
+ u8s[0] = *s;
+ s++;
+ u8s[1] = '\0';
+ } else if ((s + sz) > slast) {
+ *errnum = EINVAL;
+
+ for (i = 0; s < slast; )
+ u8s[i++] = *s++;
+ u8s[i] = '\0';
+
+ *source = s;
+
+ return (i);
+ } else {
+ if (is_it_toupper || is_it_tolower) {
+ i = do_case_conv(uv, u8s, s, sz, is_it_toupper);
+ s += sz;
+ sz = i;
+ } else {
+ for (i = 0; i < sz; )
+ u8s[i++] = *s++;
+ u8s[i] = '\0';
+ }
+ }
+
+ /*
+ * And then canonical/compatibility decomposition followed by
+ * an optional canonical composition. Please be noted that
+ * canonical composition is done only when a decomposition is
+ * done.
+ */
+ if (canonical_decomposition || compatibility_decomposition) {
+ if (sz == 1) {
+ *state = U8_STATE_START;
+
+ saved_sz = 1;
+
+ comb_class[0] = 0;
+ start[0] = 0;
+ disp[0] = 1;
+
+ last = 1;
+ } else {
+ saved_sz = do_decomp(uv, u8s, u8s, sz,
+ canonical_decomposition, state);
+
+ last = 0;
+
+ for (i = 0; i < saved_sz; ) {
+ sz = u8_number_of_bytes[u8s[i]];
+
+ comb_class[last] = combining_class(uv,
+ u8s + i, sz);
+ start[last] = i;
+ disp[last] = sz;
+
+ last++;
+ i += sz;
+ }
+
+ /*
+ * Decomposition yields various Hangul related
+ * states but not on combining marks. We need to
+ * find out at here by checking on the last
+ * character.
+ */
+ if (*state == U8_STATE_START) {
+ if (comb_class[last - 1])
+ *state = U8_STATE_COMBINING_MARK;
+ }
+ }
+
+ saved_last = last;
+
+ while (s < slast) {
+ sz = u8_number_of_bytes[*s];
+
+ /*
+ * If this is an illegal character, an incomplete
+ * character, or an 7-bit ASCII Starter character,
+ * then we have collected a sequence; break and let
+ * the next call deal with the two cases.
+ *
+ * Note that this is okay only if you are using this
+ * function with a fixed length string, not on
+ * a buffer with multiple calls of one chunk at a time.
+ */
+ if (sz <= 1) {
+ break;
+ } else if ((s + sz) > slast) {
+ break;
+ } else {
+ /*
+ * If the previous character was a Hangul Jamo
+ * and this character is a Hangul Jamo that
+ * can be conjoined, we collect the Jamo.
+ */
+ if (*s == U8_HANGUL_JAMO_1ST_BYTE) {
+ U8_PUT_3BYTES_INTO_UTF32(u1,
+ *s, *(s + 1), *(s + 2));
+
+ if (U8_HANGUL_COMPOSABLE_L_V(*state,
+ u1)) {
+ i = 0;
+ *state = U8_STATE_HANGUL_LV;
+ goto COLLECT_A_HANGUL;
+ }
+
+ if (U8_HANGUL_COMPOSABLE_LV_T(*state,
+ u1)) {
+ i = 0;
+ *state = U8_STATE_HANGUL_LVT;
+ goto COLLECT_A_HANGUL;
+ }
+ }
+
+ /*
+ * Regardless of whatever it was, if this is
+ * a Starter, we don't collect the character
+ * since that's a new start and we will deal
+ * with it at the next time.
+ */
+ i = combining_class(uv, s, sz);
+ if (i == U8_COMBINING_CLASS_STARTER)
+ break;
+
+ /*
+ * We know the current character is a combining
+ * mark. If the previous character wasn't
+ * a Starter (not Hangul) or a combining mark,
+ * then, we don't collect this combining mark.
+ */
+ if (*state != U8_STATE_START &&
+ *state != U8_STATE_COMBINING_MARK)
+ break;
+
+ *state = U8_STATE_COMBINING_MARK;
+COLLECT_A_HANGUL:
+ /*
+ * If we collected a Starter and combining
+ * marks up to 30, i.e., total 31 characters,
+ * then, we terminate this degenerately long
+ * combining sequence with a U+034F COMBINING
+ * GRAPHEME JOINER (CGJ) which is 0xCD 0x8F in
+ * UTF-8 and turn this into a Stream-Safe
+ * Text. This will be extremely rare but
+ * possible.
+ *
+ * The following will also guarantee that
+ * we are not writing more than 32 characters
+ * plus a NULL at u8s[].
+ */
+ if (last >= U8_UPPER_LIMIT_IN_A_SEQ) {
+TURN_STREAM_SAFE:
+ *state = U8_STATE_START;
+ comb_class[last] = 0;
+ start[last] = saved_sz;
+ disp[last] = 2;
+ last++;
+
+ u8s[saved_sz++] = 0xCD;
+ u8s[saved_sz++] = 0x8F;
+
+ break;
+ }
+
+ /*
+ * Some combining marks also do decompose into
+ * another combining mark or marks.
+ */
+ if (*state == U8_STATE_COMBINING_MARK) {
+ k = last;
+ l = sz;
+ i = do_decomp(uv, uts, s, sz,
+ canonical_decomposition, state);
+ for (j = 0; j < i; ) {
+ sz = u8_number_of_bytes[uts[j]];
+
+ comb_class[last] =
+ combining_class(uv,
+ uts + j, sz);
+ start[last] = saved_sz + j;
+ disp[last] = sz;
+
+ last++;
+ if (last >=
+ U8_UPPER_LIMIT_IN_A_SEQ) {
+ last = k;
+ goto TURN_STREAM_SAFE;
+ }
+ j += sz;
+ }
+
+ *state = U8_STATE_COMBINING_MARK;
+ sz = i;
+ s += l;
+
+ for (i = 0; i < sz; i++)
+ u8s[saved_sz++] = uts[i];
+ } else {
+ comb_class[last] = i;
+ start[last] = saved_sz;
+ disp[last] = sz;
+ last++;
+
+ for (i = 0; i < sz; i++)
+ u8s[saved_sz++] = *s++;
+ }
+
+ /*
+ * If this is U+0345 COMBINING GREEK
+ * YPOGEGRAMMENI (0xCD 0x85 in UTF-8), a.k.a.,
+ * iota subscript, and need to be converted to
+ * uppercase letter, convert it to U+0399 GREEK
+ * CAPITAL LETTER IOTA (0xCE 0x99 in UTF-8),
+ * i.e., convert to capital adscript form as
+ * specified in the Unicode standard.
+ *
+ * This is the only special case of (ambiguous)
+ * case conversion at combining marks and
+ * probably the standard will never have
+ * anything similar like this in future.
+ */
+ if (is_it_toupper && sz >= 2 &&
+ u8s[saved_sz - 2] == 0xCD &&
+ u8s[saved_sz - 1] == 0x85) {
+ u8s[saved_sz - 2] = 0xCE;
+ u8s[saved_sz - 1] = 0x99;
+ }
+ }
+ }
+
+ /*
+ * Let's try to ensure a canonical ordering for the collected
+ * combining marks. We do this only if we have collected
+ * at least one more non-Starter. (The decomposition mapping
+ * data tables have fully (and recursively) expanded and
+ * canonically ordered decompositions.)
+ *
+ * The U8_SWAP_COMB_MARKS() convenience macro has some
+ * assumptions and we are meeting the assumptions.
+ */
+ last--;
+ if (last >= saved_last) {
+ for (i = 0; i < last; i++)
+ for (j = last; j > i; j--)
+ if (comb_class[j] &&
+ comb_class[j - 1] > comb_class[j]) {
+ U8_SWAP_COMB_MARKS(j - 1, j);
+ }
+ }
+
+ *source = s;
+
+ if (! canonical_composition) {
+ u8s[saved_sz] = '\0';
+ return (saved_sz);
+ }
+
+ /*
+ * Now do the canonical composition. Note that we do this
+ * only after a canonical or compatibility decomposition to
+ * finish up NFC or NFKC.
+ */
+ sz = do_composition(uv, u8s, comb_class, start, disp, last,
+ &s, slast);
+ }
+
+ *source = s;
+
+ return ((size_t)sz);
+}
+
+/*
+ * The do_norm_compare() function does string comparion based on Unicode
+ * simple case mappings and Unicode Normalization definitions.
+ *
+ * It does so by collecting a sequence of character at a time and comparing
+ * the collected sequences from the strings.
+ *
+ * The meanings on the return values are the same as the usual strcmp().
+ */
+static int
+do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2,
+ int flag, int *errnum)
+{
+ int result;
+ size_t sz1;
+ size_t sz2;
+ uchar_t u8s1[U8_STREAM_SAFE_TEXT_MAX + 1];
+ uchar_t u8s2[U8_STREAM_SAFE_TEXT_MAX + 1];
+ uchar_t *s1last;
+ uchar_t *s2last;
+ boolean_t is_it_toupper;
+ boolean_t is_it_tolower;
+ boolean_t canonical_decomposition;
+ boolean_t compatibility_decomposition;
+ boolean_t canonical_composition;
+ u8_normalization_states_t state;
+
+ s1last = s1 + n1;
+ s2last = s2 + n2;
+
+ is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
+ is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
+ canonical_decomposition = flag & U8_CANON_DECOMP;
+ compatibility_decomposition = flag & U8_COMPAT_DECOMP;
+ canonical_composition = flag & U8_CANON_COMP;
+
+ while (s1 < s1last && s2 < s2last) {
+ /*
+ * If the current character is a 7-bit ASCII and the last
+ * character, or, if the current character and the next
+ * character are both some 7-bit ASCII characters then
+ * we treat the current character as a sequence.
+ *
+ * In any other cases, we need to call collect_a_seq().
+ */
+
+ if (U8_ISASCII(*s1) && ((s1 + 1) >= s1last ||
+ ((s1 + 1) < s1last && U8_ISASCII(*(s1 + 1))))) {
+ if (is_it_toupper)
+ u8s1[0] = U8_ASCII_TOUPPER(*s1);
+ else if (is_it_tolower)
+ u8s1[0] = U8_ASCII_TOLOWER(*s1);
+ else
+ u8s1[0] = *s1;
+ u8s1[1] = '\0';
+ sz1 = 1;
+ s1++;
+ } else {
+ state = U8_STATE_START;
+ sz1 = collect_a_seq(uv, u8s1, &s1, s1last,
+ is_it_toupper, is_it_tolower,
+ canonical_decomposition,
+ compatibility_decomposition,
+ canonical_composition, errnum, &state);
+ }
+
+ if (U8_ISASCII(*s2) && ((s2 + 1) >= s2last ||
+ ((s2 + 1) < s2last && U8_ISASCII(*(s2 + 1))))) {
+ if (is_it_toupper)
+ u8s2[0] = U8_ASCII_TOUPPER(*s2);
+ else if (is_it_tolower)
+ u8s2[0] = U8_ASCII_TOLOWER(*s2);
+ else
+ u8s2[0] = *s2;
+ u8s2[1] = '\0';
+ sz2 = 1;
+ s2++;
+ } else {
+ state = U8_STATE_START;
+ sz2 = collect_a_seq(uv, u8s2, &s2, s2last,
+ is_it_toupper, is_it_tolower,
+ canonical_decomposition,
+ compatibility_decomposition,
+ canonical_composition, errnum, &state);
+ }
+
+ /*
+ * Now compare the two characters. If they are the same,
+ * we move on to the next character sequences.
+ */
+ if (sz1 == 1 && sz2 == 1) {
+ if (*u8s1 > *u8s2)
+ return (1);
+ if (*u8s1 < *u8s2)
+ return (-1);
+ } else {
+ result = strcmp((const char *)u8s1, (const char *)u8s2);
+ if (result != 0)
+ return (result);
+ }
+ }
+
+ /*
+ * We compared until the end of either or both strings.
+ *
+ * If we reached to or went over the ends for the both, that means
+ * they are the same.
+ *
+ * If we reached only one end, that means the other string has
+ * something which then can be used to determine the return value.
+ */
+ if (s1 >= s1last) {
+ if (s2 >= s2last)
+ return (0);
+ return (-1);
+ }
+ return (1);
+}
+
+/*
+ * The u8_strcmp() function compares two UTF-8 strings quite similar to
+ * the strcmp(). For the comparison, however, Unicode Normalization specific
+ * equivalency and Unicode simple case conversion mappings based equivalency
+ * can be requested and checked against.
+ */
+int
+u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv,
+ int *errnum)
+{
+ int f;
+ size_t n1;
+ size_t n2;
+
+ *errnum = 0;
+
+ /*
+ * Check on the requested Unicode version, case conversion, and
+ * normalization flag values.
+ */
+
+ if (uv > U8_UNICODE_LATEST) {
+ *errnum = ERANGE;
+ uv = U8_UNICODE_LATEST;
+ }
+
+ if (flag == 0) {
+ flag = U8_STRCMP_CS;
+ } else {
+ f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER |
+ U8_STRCMP_CI_LOWER);
+ if (f == 0) {
+ flag |= U8_STRCMP_CS;
+ } else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER &&
+ f != U8_STRCMP_CI_LOWER) {
+ *errnum = EBADF;
+ flag = U8_STRCMP_CS;
+ }
+
+ f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
+ if (f && f != U8_STRCMP_NFD && f != U8_STRCMP_NFC &&
+ f != U8_STRCMP_NFKD && f != U8_STRCMP_NFKC) {
+ *errnum = EBADF;
+ flag = U8_STRCMP_CS;
+ }
+ }
+
+ if (flag == U8_STRCMP_CS) {
+ return (n == 0 ? strcmp(s1, s2) : strncmp(s1, s2, n));
+ }
+
+ n1 = strlen(s1);
+ n2 = strlen(s2);
+ if (n != 0) {
+ if (n < n1)
+ n1 = n;
+ if (n < n2)
+ n2 = n;
+ }
+
+ /*
+ * Simple case conversion can be done much faster and so we do
+ * them separately here.
+ */
+ if (flag == U8_STRCMP_CI_UPPER) {
+ return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
+ n1, n2, B_TRUE, errnum));
+ } else if (flag == U8_STRCMP_CI_LOWER) {
+ return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
+ n1, n2, B_FALSE, errnum));
+ }
+
+ return (do_norm_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2,
+ flag, errnum));
+}
+
+size_t
+u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen,
+ int flag, size_t unicode_version, int *errnum)
+{
+ int f;
+ int sz;
+ uchar_t *ib;
+ uchar_t *ibtail;
+ uchar_t *ob;
+ uchar_t *obtail;
+ boolean_t do_not_ignore_null;
+ boolean_t do_not_ignore_invalid;
+ boolean_t is_it_toupper;
+ boolean_t is_it_tolower;
+ boolean_t canonical_decomposition;
+ boolean_t compatibility_decomposition;
+ boolean_t canonical_composition;
+ size_t ret_val;
+ size_t i;
+ size_t j;
+ uchar_t u8s[U8_STREAM_SAFE_TEXT_MAX + 1];
+ u8_normalization_states_t state;
+
+ if (unicode_version > U8_UNICODE_LATEST) {
+ *errnum = ERANGE;
+ return ((size_t)-1);
+ }
+
+ f = flag & (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER);
+ if (f == (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER)) {
+ *errnum = EBADF;
+ return ((size_t)-1);
+ }
+
+ f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
+ if (f && f != U8_TEXTPREP_NFD && f != U8_TEXTPREP_NFC &&
+ f != U8_TEXTPREP_NFKD && f != U8_TEXTPREP_NFKC) {
+ *errnum = EBADF;
+ return ((size_t)-1);
+ }
+
+ if (inarray == NULL || *inlen == 0)
+ return (0);
+
+ if (outarray == NULL) {
+ *errnum = E2BIG;
+ return ((size_t)-1);
+ }
+
+ ib = (uchar_t *)inarray;
+ ob = (uchar_t *)outarray;
+ ibtail = ib + *inlen;
+ obtail = ob + *outlen;
+
+ do_not_ignore_null = !(flag & U8_TEXTPREP_IGNORE_NULL);
+ do_not_ignore_invalid = !(flag & U8_TEXTPREP_IGNORE_INVALID);
+ is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
+ is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
+
+ ret_val = 0;
+
+ /*
+ * If we don't have a normalization flag set, we do the simple case
+ * conversion based text preparation separately below. Text
+ * preparation involving Normalization will be done in the false task
+ * block, again, separately since it will take much more time and
+ * resource than doing simple case conversions.
+ */
+ if (f == 0) {
+ while (ib < ibtail) {
+ if (*ib == '\0' && do_not_ignore_null)
+ break;
+
+ sz = u8_number_of_bytes[*ib];
+
+ if (sz < 0) {
+ if (do_not_ignore_invalid) {
+ *errnum = EILSEQ;
+ ret_val = (size_t)-1;
+ break;
+ }
+
+ sz = 1;
+ ret_val++;
+ }
+
+ if (sz == 1) {
+ if (ob >= obtail) {
+ *errnum = E2BIG;
+ ret_val = (size_t)-1;
+ break;
+ }
+
+ if (is_it_toupper)
+ *ob = U8_ASCII_TOUPPER(*ib);
+ else if (is_it_tolower)
+ *ob = U8_ASCII_TOLOWER(*ib);
+ else
+ *ob = *ib;
+ ib++;
+ ob++;
+ } else if ((ib + sz) > ibtail) {
+ if (do_not_ignore_invalid) {
+ *errnum = EINVAL;
+ ret_val = (size_t)-1;
+ break;
+ }
+
+ if ((obtail - ob) < (ibtail - ib)) {
+ *errnum = E2BIG;
+ ret_val = (size_t)-1;
+ break;
+ }
+
+ /*
+ * We treat the remaining incomplete character
+ * bytes as a character.
+ */
+ ret_val++;
+
+ while (ib < ibtail)
+ *ob++ = *ib++;
+ } else {
+ if (is_it_toupper || is_it_tolower) {
+ i = do_case_conv(unicode_version, u8s,
+ ib, sz, is_it_toupper);
+
+ if ((obtail - ob) < i) {
+ *errnum = E2BIG;
+ ret_val = (size_t)-1;
+ break;
+ }
+
+ ib += sz;
+
+ for (sz = 0; sz < i; sz++)
+ *ob++ = u8s[sz];
+ } else {
+ if ((obtail - ob) < sz) {
+ *errnum = E2BIG;
+ ret_val = (size_t)-1;
+ break;
+ }
+
+ for (i = 0; i < sz; i++)
+ *ob++ = *ib++;
+ }
+ }
+ }
+ } else {
+ canonical_decomposition = flag & U8_CANON_DECOMP;
+ compatibility_decomposition = flag & U8_COMPAT_DECOMP;
+ canonical_composition = flag & U8_CANON_COMP;
+
+ while (ib < ibtail) {
+ if (*ib == '\0' && do_not_ignore_null)
+ break;
+
+ /*
+ * If the current character is a 7-bit ASCII
+ * character and it is the last character, or,
+ * if the current character is a 7-bit ASCII
+ * character and the next character is also a 7-bit
+ * ASCII character, then, we copy over this
+ * character without going through collect_a_seq().
+ *
+ * In any other cases, we need to look further with
+ * the collect_a_seq() function.
+ */
+ if (U8_ISASCII(*ib) && ((ib + 1) >= ibtail ||
+ ((ib + 1) < ibtail && U8_ISASCII(*(ib + 1))))) {
+ if (ob >= obtail) {
+ *errnum = E2BIG;
+ ret_val = (size_t)-1;
+ break;
+ }
+
+ if (is_it_toupper)
+ *ob = U8_ASCII_TOUPPER(*ib);
+ else if (is_it_tolower)
+ *ob = U8_ASCII_TOLOWER(*ib);
+ else
+ *ob = *ib;
+ ib++;
+ ob++;
+ } else {
+ *errnum = 0;
+ state = U8_STATE_START;
+
+ j = collect_a_seq(unicode_version, u8s,
+ &ib, ibtail,
+ is_it_toupper,
+ is_it_tolower,
+ canonical_decomposition,
+ compatibility_decomposition,
+ canonical_composition,
+ errnum, &state);
+
+ if (*errnum && do_not_ignore_invalid) {
+ ret_val = (size_t)-1;
+ break;
+ }
+
+ if ((obtail - ob) < j) {
+ *errnum = E2BIG;
+ ret_val = (size_t)-1;
+ break;
+ }
+
+ for (i = 0; i < j; i++)
+ *ob++ = u8s[i];
+ }
+ }
+ }
+
+ *inlen = ibtail - ib;
+ *outlen = obtail - ob;
+
+ return (ret_val);
+}
diff --git a/common/zfs/zfs_comutil.c b/common/zfs/zfs_comutil.c
new file mode 100644
index 000000000000..ed9b67ea3bc9
--- /dev/null
+++ b/common/zfs/zfs_comutil.c
@@ -0,0 +1,202 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This file is intended for functions that ought to be common between user
+ * land (libzfs) and the kernel. When many common routines need to be shared
+ * then a separate file should to be created.
+ */
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/fs/zfs.h>
+#include <sys/int_limits.h>
+#include <sys/nvpair.h>
+#include "zfs_comutil.h"
+
+/*
+ * Are there allocatable vdevs?
+ */
+boolean_t
+zfs_allocatable_devs(nvlist_t *nv)
+{
+ uint64_t is_log;
+ uint_t c;
+ nvlist_t **child;
+ uint_t children;
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0) {
+ return (B_FALSE);
+ }
+ for (c = 0; c < children; c++) {
+ is_log = 0;
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+ if (!is_log)
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+void
+zpool_get_rewind_policy(nvlist_t *nvl, zpool_rewind_policy_t *zrpp)
+{
+ nvlist_t *policy;
+ nvpair_t *elem;
+ char *nm;
+
+ /* Defaults */
+ zrpp->zrp_request = ZPOOL_NO_REWIND;
+ zrpp->zrp_maxmeta = 0;
+ zrpp->zrp_maxdata = UINT64_MAX;
+ zrpp->zrp_txg = UINT64_MAX;
+
+ if (nvl == NULL)
+ return;
+
+ elem = NULL;
+ while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
+ nm = nvpair_name(elem);
+ if (strcmp(nm, ZPOOL_REWIND_POLICY) == 0) {
+ if (nvpair_value_nvlist(elem, &policy) == 0)
+ zpool_get_rewind_policy(policy, zrpp);
+ return;
+ } else if (strcmp(nm, ZPOOL_REWIND_REQUEST) == 0) {
+ if (nvpair_value_uint32(elem, &zrpp->zrp_request) == 0)
+ if (zrpp->zrp_request & ~ZPOOL_REWIND_POLICIES)
+ zrpp->zrp_request = ZPOOL_NO_REWIND;
+ } else if (strcmp(nm, ZPOOL_REWIND_REQUEST_TXG) == 0) {
+ (void) nvpair_value_uint64(elem, &zrpp->zrp_txg);
+ } else if (strcmp(nm, ZPOOL_REWIND_META_THRESH) == 0) {
+ (void) nvpair_value_uint64(elem, &zrpp->zrp_maxmeta);
+ } else if (strcmp(nm, ZPOOL_REWIND_DATA_THRESH) == 0) {
+ (void) nvpair_value_uint64(elem, &zrpp->zrp_maxdata);
+ }
+ }
+ if (zrpp->zrp_request == 0)
+ zrpp->zrp_request = ZPOOL_NO_REWIND;
+}
+
+typedef struct zfs_version_spa_map {
+ int version_zpl;
+ int version_spa;
+} zfs_version_spa_map_t;
+
+/*
+ * Keep this table in monotonically increasing version number order.
+ */
+static zfs_version_spa_map_t zfs_version_table[] = {
+ {ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL},
+ {ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL},
+ {ZPL_VERSION_FUID, SPA_VERSION_FUID},
+ {ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
+ {ZPL_VERSION_SA, SPA_VERSION_SA},
+ {0, 0}
+};
+
+/*
+ * Return the max zpl version for a corresponding spa version
+ * -1 is returned if no mapping exists.
+ */
+int
+zfs_zpl_version_map(int spa_version)
+{
+ int i;
+ int version = -1;
+
+ for (i = 0; zfs_version_table[i].version_spa; i++) {
+ if (spa_version >= zfs_version_table[i].version_spa)
+ version = zfs_version_table[i].version_zpl;
+ }
+
+ return (version);
+}
+
+/*
+ * Return the min spa version for a corresponding spa version
+ * -1 is returned if no mapping exists.
+ */
+int
+zfs_spa_version_map(int zpl_version)
+{
+ int i;
+ int version = -1;
+
+ for (i = 0; zfs_version_table[i].version_zpl; i++) {
+ if (zfs_version_table[i].version_zpl >= zpl_version)
+ return (zfs_version_table[i].version_spa);
+ }
+
+ return (version);
+}
+
+const char *zfs_history_event_names[LOG_END] = {
+ "invalid event",
+ "pool create",
+ "vdev add",
+ "pool remove",
+ "pool destroy",
+ "pool export",
+ "pool import",
+ "vdev attach",
+ "vdev replace",
+ "vdev detach",
+ "vdev online",
+ "vdev offline",
+ "vdev upgrade",
+ "pool clear",
+ "pool scrub",
+ "pool property set",
+ "create",
+ "clone",
+ "destroy",
+ "destroy_begin_sync",
+ "inherit",
+ "property set",
+ "quota set",
+ "permission update",
+ "permission remove",
+ "permission who remove",
+ "promote",
+ "receive",
+ "rename",
+ "reservation set",
+ "replay_inc_sync",
+ "replay_full_sync",
+ "rollback",
+ "snapshot",
+ "filesystem version upgrade",
+ "refquota set",
+ "refreservation set",
+ "pool scrub done",
+ "user hold",
+ "user release",
+ "pool split",
+};
diff --git a/common/zfs/zfs_comutil.h b/common/zfs/zfs_comutil.h
new file mode 100644
index 000000000000..61327f9aa909
--- /dev/null
+++ b/common/zfs/zfs_comutil.h
@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _ZFS_COMUTIL_H
+#define _ZFS_COMUTIL_H
+
+#include <sys/fs/zfs.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern boolean_t zfs_allocatable_devs(nvlist_t *);
+extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
+
+extern int zfs_zpl_version_map(int spa_version);
+extern int zfs_spa_version_map(int zpl_version);
+extern const char *zfs_history_event_names[LOG_END];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_COMUTIL_H */
diff --git a/common/zfs/zfs_deleg.c b/common/zfs/zfs_deleg.c
new file mode 100644
index 000000000000..83d9edb21389
--- /dev/null
+++ b/common/zfs/zfs_deleg.c
@@ -0,0 +1,237 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#include <sys/sunddi.h>
+#include <sys/ctype.h>
+#else
+#include <stdio.h>
+#include <unistd.h>
+#include <strings.h>
+#include <libnvpair.h>
+#include <ctype.h>
+#endif
+/* XXX includes zfs_context.h, so why bother with the above? */
+#include <sys/dsl_deleg.h>
+#include "zfs_prop.h"
+#include "zfs_deleg.h"
+#include "zfs_namecheck.h"
+
+/*
+ * permission table
+ *
+ * Keep this table in sorted order
+ *
+ * This table is used for displaying all permissions for
+ * zfs allow
+ */
+
+zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
+ {ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW},
+ {ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE },
+ {ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE },
+ {ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY },
+ {ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT },
+ {ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE },
+ {ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE },
+ {ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
+ {ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
+ {ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
+ {ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
+ {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE },
+ {ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
+ {ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
+ {ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
+ {ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
+ {ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
+ {ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
+ {ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
+ {ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF},
+ {NULL, ZFS_DELEG_NOTE_NONE }
+};
+
+static int
+zfs_valid_permission_name(const char *perm)
+{
+ if (zfs_deleg_canonicalize_perm(perm))
+ return (0);
+
+ return (permset_namecheck(perm, NULL, NULL));
+}
+
+const char *
+zfs_deleg_canonicalize_perm(const char *perm)
+{
+ int i;
+ zfs_prop_t prop;
+
+ for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) {
+ if (strcmp(perm, zfs_deleg_perm_tab[i].z_perm) == 0)
+ return (perm);
+ }
+
+ prop = zfs_name_to_prop(perm);
+ if (prop != ZPROP_INVAL && zfs_prop_delegatable(prop))
+ return (zfs_prop_to_name(prop));
+ return (NULL);
+
+}
+
+static int
+zfs_validate_who(char *who)
+{
+ char *p;
+
+ if (who[2] != ZFS_DELEG_FIELD_SEP_CHR)
+ return (-1);
+
+ switch (who[0]) {
+ case ZFS_DELEG_USER:
+ case ZFS_DELEG_GROUP:
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_GROUP_SETS:
+ if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT)
+ return (-1);
+ for (p = &who[3]; *p; p++)
+ if (!isdigit(*p))
+ return (-1);
+ break;
+
+ case ZFS_DELEG_NAMED_SET:
+ case ZFS_DELEG_NAMED_SET_SETS:
+ if (who[1] != ZFS_DELEG_NA)
+ return (-1);
+ return (permset_namecheck(&who[3], NULL, NULL));
+
+ case ZFS_DELEG_CREATE:
+ case ZFS_DELEG_CREATE_SETS:
+ if (who[1] != ZFS_DELEG_NA)
+ return (-1);
+ if (who[3] != '\0')
+ return (-1);
+ break;
+
+ case ZFS_DELEG_EVERYONE:
+ case ZFS_DELEG_EVERYONE_SETS:
+ if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT)
+ return (-1);
+ if (who[3] != '\0')
+ return (-1);
+ break;
+
+ default:
+ return (-1);
+ }
+
+ return (0);
+}
+
+int
+zfs_deleg_verify_nvlist(nvlist_t *nvp)
+{
+ nvpair_t *who, *perm_name;
+ nvlist_t *perms;
+ int error;
+
+ if (nvp == NULL)
+ return (-1);
+
+ who = nvlist_next_nvpair(nvp, NULL);
+ if (who == NULL)
+ return (-1);
+
+ do {
+ if (zfs_validate_who(nvpair_name(who)))
+ return (-1);
+
+ error = nvlist_lookup_nvlist(nvp, nvpair_name(who), &perms);
+
+ if (error && error != ENOENT)
+ return (-1);
+ if (error == ENOENT)
+ continue;
+
+ perm_name = nvlist_next_nvpair(perms, NULL);
+ if (perm_name == NULL) {
+ return (-1);
+ }
+ do {
+ error = zfs_valid_permission_name(
+ nvpair_name(perm_name));
+ if (error)
+ return (-1);
+ } while (perm_name = nvlist_next_nvpair(perms, perm_name));
+ } while (who = nvlist_next_nvpair(nvp, who));
+ return (0);
+}
+
+/*
+ * Construct the base attribute name. The base attribute names
+ * are the "key" to locate the jump objects which contain the actual
+ * permissions. The base attribute names are encoded based on
+ * type of entry and whether it is a local or descendent permission.
+ *
+ * Arguments:
+ * attr - attribute name return string, attribute is assumed to be
+ * ZFS_MAX_DELEG_NAME long.
+ * type - type of entry to construct
+ * inheritchr - inheritance type (local,descendent, or NA for create and
+ * permission set definitions
+ * data - is either a permission set name or a 64 bit uid/gid.
+ */
+void
+zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type,
+ char inheritchr, void *data)
+{
+ int len = ZFS_MAX_DELEG_NAME;
+ uint64_t *id = data;
+
+ switch (type) {
+ case ZFS_DELEG_USER:
+ case ZFS_DELEG_GROUP:
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_GROUP_SETS:
+ (void) snprintf(attr, len, "%c%c%c%lld", type, inheritchr,
+ ZFS_DELEG_FIELD_SEP_CHR, (longlong_t)*id);
+ break;
+ case ZFS_DELEG_NAMED_SET_SETS:
+ case ZFS_DELEG_NAMED_SET:
+ (void) snprintf(attr, len, "%c-%c%s", type,
+ ZFS_DELEG_FIELD_SEP_CHR, (char *)data);
+ break;
+ case ZFS_DELEG_CREATE:
+ case ZFS_DELEG_CREATE_SETS:
+ (void) snprintf(attr, len, "%c-%c", type,
+ ZFS_DELEG_FIELD_SEP_CHR);
+ break;
+ case ZFS_DELEG_EVERYONE:
+ case ZFS_DELEG_EVERYONE_SETS:
+ (void) snprintf(attr, len, "%c%c%c", type, inheritchr,
+ ZFS_DELEG_FIELD_SEP_CHR);
+ break;
+ default:
+ ASSERT(!"bad zfs_deleg_who_type_t");
+ }
+}
diff --git a/common/zfs/zfs_deleg.h b/common/zfs/zfs_deleg.h
new file mode 100644
index 000000000000..b4cb8e2b4e37
--- /dev/null
+++ b/common/zfs/zfs_deleg.h
@@ -0,0 +1,85 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _ZFS_DELEG_H
+#define _ZFS_DELEG_H
+
+#include <sys/fs/zfs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZFS_DELEG_SET_NAME_CHR '@' /* set name lead char */
+#define ZFS_DELEG_FIELD_SEP_CHR '$' /* field separator */
+
+/*
+ * Max name length for a delegation attribute
+ */
+#define ZFS_MAX_DELEG_NAME 128
+
+#define ZFS_DELEG_LOCAL 'l'
+#define ZFS_DELEG_DESCENDENT 'd'
+#define ZFS_DELEG_NA '-'
+
+typedef enum {
+ ZFS_DELEG_NOTE_CREATE,
+ ZFS_DELEG_NOTE_DESTROY,
+ ZFS_DELEG_NOTE_SNAPSHOT,
+ ZFS_DELEG_NOTE_ROLLBACK,
+ ZFS_DELEG_NOTE_CLONE,
+ ZFS_DELEG_NOTE_PROMOTE,
+ ZFS_DELEG_NOTE_RENAME,
+ ZFS_DELEG_NOTE_RECEIVE,
+ ZFS_DELEG_NOTE_ALLOW,
+ ZFS_DELEG_NOTE_USERPROP,
+ ZFS_DELEG_NOTE_MOUNT,
+ ZFS_DELEG_NOTE_SHARE,
+ ZFS_DELEG_NOTE_USERQUOTA,
+ ZFS_DELEG_NOTE_GROUPQUOTA,
+ ZFS_DELEG_NOTE_USERUSED,
+ ZFS_DELEG_NOTE_GROUPUSED,
+ ZFS_DELEG_NOTE_HOLD,
+ ZFS_DELEG_NOTE_RELEASE,
+ ZFS_DELEG_NOTE_DIFF,
+ ZFS_DELEG_NOTE_NONE
+} zfs_deleg_note_t;
+
+typedef struct zfs_deleg_perm_tab {
+ char *z_perm;
+ zfs_deleg_note_t z_note;
+} zfs_deleg_perm_tab_t;
+
+extern zfs_deleg_perm_tab_t zfs_deleg_perm_tab[];
+
+int zfs_deleg_verify_nvlist(nvlist_t *nvlist);
+void zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type,
+ char checkflag, void *data);
+const char *zfs_deleg_canonicalize_perm(const char *perm);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_DELEG_H */
diff --git a/common/zfs/zfs_fletcher.c b/common/zfs/zfs_fletcher.c
new file mode 100644
index 000000000000..fa43ce6bdb5d
--- /dev/null
+++ b/common/zfs/zfs_fletcher.c
@@ -0,0 +1,246 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Fletcher Checksums
+ * ------------------
+ *
+ * ZFS's 2nd and 4th order Fletcher checksums are defined by the following
+ * recurrence relations:
+ *
+ * a = a + f
+ * i i-1 i-1
+ *
+ * b = b + a
+ * i i-1 i
+ *
+ * c = c + b (fletcher-4 only)
+ * i i-1 i
+ *
+ * d = d + c (fletcher-4 only)
+ * i i-1 i
+ *
+ * Where
+ * a_0 = b_0 = c_0 = d_0 = 0
+ * and
+ * f_0 .. f_(n-1) are the input data.
+ *
+ * Using standard techniques, these translate into the following series:
+ *
+ * __n_ __n_
+ * \ | \ |
+ * a = > f b = > i * f
+ * n /___| n - i n /___| n - i
+ * i = 1 i = 1
+ *
+ *
+ * __n_ __n_
+ * \ | i*(i+1) \ | i*(i+1)*(i+2)
+ * c = > ------- f d = > ------------- f
+ * n /___| 2 n - i n /___| 6 n - i
+ * i = 1 i = 1
+ *
+ * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators.
+ * Since the additions are done mod (2^64), errors in the high bits may not
+ * be noticed. For this reason, fletcher-2 is deprecated.
+ *
+ * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators.
+ * A conservative estimate of how big the buffer can get before we overflow
+ * can be estimated using f_i = 0xffffffff for all i:
+ *
+ * % bc
+ * f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4
+ * 2264
+ * quit
+ * %
+ *
+ * So blocks of up to 2k will not overflow. Our largest block size is
+ * 128k, which has 32k 4-byte words, so we can compute the largest possible
+ * accumulators, then divide by 2^64 to figure the max amount of overflow:
+ *
+ * % bc
+ * a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c }
+ * a/2^64;b/2^64;c/2^64;d/2^64
+ * 0
+ * 0
+ * 1365
+ * 11186858
+ * quit
+ * %
+ *
+ * So a and b cannot overflow. To make sure each bit of input has some
+ * effect on the contents of c and d, we can look at what the factors of
+ * the coefficients in the equations for c_n and d_n are. The number of 2s
+ * in the factors determines the lowest set bit in the multiplier. Running
+ * through the cases for n*(n+1)/2 reveals that the highest power of 2 is
+ * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow
+ * the 64-bit accumulators, every bit of every f_i effects every accumulator,
+ * even for 128k blocks.
+ *
+ * If we wanted to make a stronger version of fletcher4 (fletcher4c?),
+ * we could do our calculations mod (2^32 - 1) by adding in the carries
+ * periodically, and store the number of carries in the top 32-bits.
+ *
+ * --------------------
+ * Checksum Performance
+ * --------------------
+ *
+ * There are two interesting components to checksum performance: cached and
+ * uncached performance. With cached data, fletcher-2 is about four times
+ * faster than fletcher-4. With uncached data, the performance difference is
+ * negligible, since the cost of a cache fill dominates the processing time.
+ * Even though fletcher-4 is slower than fletcher-2, it is still a pretty
+ * efficient pass over the data.
+ *
+ * In normal operation, the data which is being checksummed is in a buffer
+ * which has been filled either by:
+ *
+ * 1. a compression step, which will be mostly cached, or
+ * 2. a bcopy() or copyin(), which will be uncached (because the
+ * copy is cache-bypassing).
+ *
+ * For both cached and uncached data, both fletcher checksums are much faster
+ * than sha-256, and slower than 'off', which doesn't touch the data at all.
+ */
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/byteorder.h>
+#include <sys/zio.h>
+#include <sys/spa.h>
+
+void
+fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+ const uint64_t *ip = buf;
+ const uint64_t *ipend = ip + (size / sizeof (uint64_t));
+ uint64_t a0, b0, a1, b1;
+
+ for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
+ a0 += ip[0];
+ a1 += ip[1];
+ b0 += a0;
+ b1 += a1;
+ }
+
+ ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
+}
+
+void
+fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+ const uint64_t *ip = buf;
+ const uint64_t *ipend = ip + (size / sizeof (uint64_t));
+ uint64_t a0, b0, a1, b1;
+
+ for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
+ a0 += BSWAP_64(ip[0]);
+ a1 += BSWAP_64(ip[1]);
+ b0 += a0;
+ b1 += a1;
+ }
+
+ ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
+}
+
+void
+fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+ uint64_t a, b, c, d;
+
+ for (a = b = c = d = 0; ip < ipend; ip++) {
+ a += ip[0];
+ b += a;
+ c += b;
+ d += c;
+ }
+
+ ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+}
+
+void
+fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+ uint64_t a, b, c, d;
+
+ for (a = b = c = d = 0; ip < ipend; ip++) {
+ a += BSWAP_32(ip[0]);
+ b += a;
+ c += b;
+ d += c;
+ }
+
+ ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+}
+
+void
+fletcher_4_incremental_native(const void *buf, uint64_t size,
+ zio_cksum_t *zcp)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+ uint64_t a, b, c, d;
+
+ a = zcp->zc_word[0];
+ b = zcp->zc_word[1];
+ c = zcp->zc_word[2];
+ d = zcp->zc_word[3];
+
+ for (; ip < ipend; ip++) {
+ a += ip[0];
+ b += a;
+ c += b;
+ d += c;
+ }
+
+ ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+}
+
+void
+fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
+ zio_cksum_t *zcp)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+ uint64_t a, b, c, d;
+
+ a = zcp->zc_word[0];
+ b = zcp->zc_word[1];
+ c = zcp->zc_word[2];
+ d = zcp->zc_word[3];
+
+ for (; ip < ipend; ip++) {
+ a += BSWAP_32(ip[0]);
+ b += a;
+ c += b;
+ d += c;
+ }
+
+ ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+}
diff --git a/common/zfs/zfs_fletcher.h b/common/zfs/zfs_fletcher.h
new file mode 100644
index 000000000000..b49df0cf4f0f
--- /dev/null
+++ b/common/zfs/zfs_fletcher.h
@@ -0,0 +1,53 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZFS_FLETCHER_H
+#define _ZFS_FLETCHER_H
+
+#include <sys/types.h>
+#include <sys/spa.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * fletcher checksum functions
+ */
+
+void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
+void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
+void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
+void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *);
+void fletcher_4_incremental_native(const void *, uint64_t,
+ zio_cksum_t *);
+void fletcher_4_incremental_byteswap(const void *, uint64_t,
+ zio_cksum_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_FLETCHER_H */
diff --git a/common/zfs/zfs_namecheck.c b/common/zfs/zfs_namecheck.c
new file mode 100644
index 000000000000..5cfafea471b3
--- /dev/null
+++ b/common/zfs/zfs_namecheck.c
@@ -0,0 +1,345 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Common name validation routines for ZFS. These routines are shared by the
+ * userland code as well as the ioctl() layer to ensure that we don't
+ * inadvertently expose a hole through direct ioctl()s that never gets tested.
+ * In userland, however, we want significantly more information about _why_ the
+ * name is invalid. In the kernel, we only care whether it's valid or not.
+ * Each routine therefore takes a 'namecheck_err_t' which describes exactly why
+ * the name failed to validate.
+ *
+ * Each function returns 0 on success, -1 on error.
+ */
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#include <sys/param.h>
+#include <sys/nvpair.h>
+#include "zfs_namecheck.h"
+#include "zfs_deleg.h"
+
+static int
+valid_char(char c)
+{
+ return ((c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '-' || c == '_' || c == '.' || c == ':' || c == ' ');
+}
+
+/*
+ * Snapshot names must be made up of alphanumeric characters plus the following
+ * characters:
+ *
+ * [-_.: ]
+ */
+int
+snapshot_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+ const char *loc;
+
+ if (strlen(path) >= MAXNAMELEN) {
+ if (why)
+ *why = NAME_ERR_TOOLONG;
+ return (-1);
+ }
+
+ if (path[0] == '\0') {
+ if (why)
+ *why = NAME_ERR_EMPTY_COMPONENT;
+ return (-1);
+ }
+
+ for (loc = path; *loc; loc++) {
+ if (!valid_char(*loc)) {
+ if (why) {
+ *why = NAME_ERR_INVALCHAR;
+ *what = *loc;
+ }
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+
+/*
+ * Permissions set name must start with the letter '@' followed by the
+ * same character restrictions as snapshot names, except that the name
+ * cannot exceed 64 characters.
+ */
+int
+permset_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+ if (strlen(path) >= ZFS_PERMSET_MAXLEN) {
+ if (why)
+ *why = NAME_ERR_TOOLONG;
+ return (-1);
+ }
+
+ if (path[0] != '@') {
+ if (why) {
+ *why = NAME_ERR_NO_AT;
+ *what = path[0];
+ }
+ return (-1);
+ }
+
+ return (snapshot_namecheck(&path[1], why, what));
+}
+
+/*
+ * Dataset names must be of the following form:
+ *
+ * [component][/]*[component][@component]
+ *
+ * Where each component is made up of alphanumeric characters plus the following
+ * characters:
+ *
+ * [-_.:%]
+ *
+ * We allow '%' here as we use that character internally to create unique
+ * names for temporary clones (for online recv).
+ */
+int
+dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+ const char *loc, *end;
+ int found_snapshot;
+
+ /*
+ * Make sure the name is not too long.
+ *
+ * ZFS_MAXNAMELEN is the maximum dataset length used in the userland
+ * which is the same as MAXNAMELEN used in the kernel.
+ * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all
+ * places using MAXNAMELEN.
+ */
+
+ if (strlen(path) >= MAXNAMELEN) {
+ if (why)
+ *why = NAME_ERR_TOOLONG;
+ return (-1);
+ }
+
+ /* Explicitly check for a leading slash. */
+ if (path[0] == '/') {
+ if (why)
+ *why = NAME_ERR_LEADING_SLASH;
+ return (-1);
+ }
+
+ if (path[0] == '\0') {
+ if (why)
+ *why = NAME_ERR_EMPTY_COMPONENT;
+ return (-1);
+ }
+
+ loc = path;
+ found_snapshot = 0;
+ for (;;) {
+ /* Find the end of this component */
+ end = loc;
+ while (*end != '/' && *end != '@' && *end != '\0')
+ end++;
+
+ if (*end == '\0' && end[-1] == '/') {
+ /* trailing slashes are not allowed */
+ if (why)
+ *why = NAME_ERR_TRAILING_SLASH;
+ return (-1);
+ }
+
+ /* Zero-length components are not allowed */
+ if (loc == end) {
+ if (why) {
+ /*
+ * Make sure this is really a zero-length
+ * component and not a '@@'.
+ */
+ if (*end == '@' && found_snapshot) {
+ *why = NAME_ERR_MULTIPLE_AT;
+ } else {
+ *why = NAME_ERR_EMPTY_COMPONENT;
+ }
+ }
+
+ return (-1);
+ }
+
+ /* Validate the contents of this component */
+ while (loc != end) {
+ if (!valid_char(*loc) && *loc != '%') {
+ if (why) {
+ *why = NAME_ERR_INVALCHAR;
+ *what = *loc;
+ }
+ return (-1);
+ }
+ loc++;
+ }
+
+ /* If we've reached the end of the string, we're OK */
+ if (*end == '\0')
+ return (0);
+
+ if (*end == '@') {
+ /*
+ * If we've found an @ symbol, indicate that we're in
+ * the snapshot component, and report a second '@'
+ * character as an error.
+ */
+ if (found_snapshot) {
+ if (why)
+ *why = NAME_ERR_MULTIPLE_AT;
+ return (-1);
+ }
+
+ found_snapshot = 1;
+ }
+
+ /*
+ * If there is a '/' in a snapshot name
+ * then report an error
+ */
+ if (*end == '/' && found_snapshot) {
+ if (why)
+ *why = NAME_ERR_TRAILING_SLASH;
+ return (-1);
+ }
+
+ /* Update to the next component */
+ loc = end + 1;
+ }
+}
+
+
+/*
+ * mountpoint names must be of the following form:
+ *
+ * /[component][/]*[component][/]
+ */
+int
+mountpoint_namecheck(const char *path, namecheck_err_t *why)
+{
+ const char *start, *end;
+
+ /*
+ * Make sure none of the mountpoint component names are too long.
+ * If a component name is too long then the mkdir of the mountpoint
+ * will fail but then the mountpoint property will be set to a value
+ * that can never be mounted. Better to fail before setting the prop.
+ * Extra slashes are OK, they will be tossed by the mountpoint mkdir.
+ */
+
+ if (path == NULL || *path != '/') {
+ if (why)
+ *why = NAME_ERR_LEADING_SLASH;
+ return (-1);
+ }
+
+ /* Skip leading slash */
+ start = &path[1];
+ do {
+ end = start;
+ while (*end != '/' && *end != '\0')
+ end++;
+
+ if (end - start >= MAXNAMELEN) {
+ if (why)
+ *why = NAME_ERR_TOOLONG;
+ return (-1);
+ }
+ start = end + 1;
+
+ } while (*end != '\0');
+
+ return (0);
+}
+
+/*
+ * For pool names, we have the same set of valid characters as described in
+ * dataset names, with the additional restriction that the pool name must begin
+ * with a letter. The pool names 'raidz' and 'mirror' are also reserved names
+ * that cannot be used.
+ */
+int
+pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
+{
+ const char *c;
+
+ /*
+ * Make sure the name is not too long.
+ *
+ * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland
+ * which is the same as MAXNAMELEN used in the kernel.
+ * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all
+ * places using MAXNAMELEN.
+ */
+ if (strlen(pool) >= MAXNAMELEN) {
+ if (why)
+ *why = NAME_ERR_TOOLONG;
+ return (-1);
+ }
+
+ c = pool;
+ while (*c != '\0') {
+ if (!valid_char(*c)) {
+ if (why) {
+ *why = NAME_ERR_INVALCHAR;
+ *what = *c;
+ }
+ return (-1);
+ }
+ c++;
+ }
+
+ if (!(*pool >= 'a' && *pool <= 'z') &&
+ !(*pool >= 'A' && *pool <= 'Z')) {
+ if (why)
+ *why = NAME_ERR_NOLETTER;
+ return (-1);
+ }
+
+ if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) {
+ if (why)
+ *why = NAME_ERR_RESERVED;
+ return (-1);
+ }
+
+ if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) {
+ if (why)
+ *why = NAME_ERR_DISKLIKE;
+ return (-1);
+ }
+
+ return (0);
+}
diff --git a/common/zfs/zfs_namecheck.h b/common/zfs/zfs_namecheck.h
new file mode 100644
index 000000000000..7711da099be9
--- /dev/null
+++ b/common/zfs/zfs_namecheck.h
@@ -0,0 +1,58 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZFS_NAMECHECK_H
+#define _ZFS_NAMECHECK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ NAME_ERR_LEADING_SLASH, /* name begins with leading slash */
+ NAME_ERR_EMPTY_COMPONENT, /* name contains an empty component */
+ NAME_ERR_TRAILING_SLASH, /* name ends with a slash */
+ NAME_ERR_INVALCHAR, /* invalid character found */
+ NAME_ERR_MULTIPLE_AT, /* multiple '@' characters found */
+ NAME_ERR_NOLETTER, /* pool doesn't begin with a letter */
+ NAME_ERR_RESERVED, /* entire name is reserved */
+ NAME_ERR_DISKLIKE, /* reserved disk name (c[0-9].*) */
+ NAME_ERR_TOOLONG, /* name is too long */
+ NAME_ERR_NO_AT, /* permission set is missing '@' */
+} namecheck_err_t;
+
+#define ZFS_PERMSET_MAXLEN 64
+
+int pool_namecheck(const char *, namecheck_err_t *, char *);
+int dataset_namecheck(const char *, namecheck_err_t *, char *);
+int mountpoint_namecheck(const char *, namecheck_err_t *);
+int snapshot_namecheck(const char *, namecheck_err_t *, char *);
+int permset_namecheck(const char *, namecheck_err_t *, char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_NAMECHECK_H */
diff --git a/common/zfs/zfs_prop.c b/common/zfs/zfs_prop.c
new file mode 100644
index 000000000000..f29bcf62718f
--- /dev/null
+++ b/common/zfs/zfs_prop.c
@@ -0,0 +1,595 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/* Portions Copyright 2010 Robert Milkowski */
+
+#include <sys/zio.h>
+#include <sys/spa.h>
+#include <sys/u8_textprep.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_znode.h>
+
+#include "zfs_prop.h"
+#include "zfs_deleg.h"
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#endif
+
+static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS];
+
+/* Note this is indexed by zfs_userquota_prop_t, keep the order the same */
+const char *zfs_userquota_prop_prefixes[] = {
+ "userused@",
+ "userquota@",
+ "groupused@",
+ "groupquota@"
+};
+
+zprop_desc_t *
+zfs_prop_get_table(void)
+{
+ return (zfs_prop_table);
+}
+
+void
+zfs_prop_init(void)
+{
+ static zprop_index_t checksum_table[] = {
+ { "on", ZIO_CHECKSUM_ON },
+ { "off", ZIO_CHECKSUM_OFF },
+ { "fletcher2", ZIO_CHECKSUM_FLETCHER_2 },
+ { "fletcher4", ZIO_CHECKSUM_FLETCHER_4 },
+ { "sha256", ZIO_CHECKSUM_SHA256 },
+ { NULL }
+ };
+
+ static zprop_index_t dedup_table[] = {
+ { "on", ZIO_CHECKSUM_ON },
+ { "off", ZIO_CHECKSUM_OFF },
+ { "verify", ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY },
+ { "sha256", ZIO_CHECKSUM_SHA256 },
+ { "sha256,verify",
+ ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY },
+ { NULL }
+ };
+
+ static zprop_index_t compress_table[] = {
+ { "on", ZIO_COMPRESS_ON },
+ { "off", ZIO_COMPRESS_OFF },
+ { "lzjb", ZIO_COMPRESS_LZJB },
+ { "gzip", ZIO_COMPRESS_GZIP_6 }, /* gzip default */
+ { "gzip-1", ZIO_COMPRESS_GZIP_1 },
+ { "gzip-2", ZIO_COMPRESS_GZIP_2 },
+ { "gzip-3", ZIO_COMPRESS_GZIP_3 },
+ { "gzip-4", ZIO_COMPRESS_GZIP_4 },
+ { "gzip-5", ZIO_COMPRESS_GZIP_5 },
+ { "gzip-6", ZIO_COMPRESS_GZIP_6 },
+ { "gzip-7", ZIO_COMPRESS_GZIP_7 },
+ { "gzip-8", ZIO_COMPRESS_GZIP_8 },
+ { "gzip-9", ZIO_COMPRESS_GZIP_9 },
+ { "zle", ZIO_COMPRESS_ZLE },
+ { NULL }
+ };
+
+ static zprop_index_t snapdir_table[] = {
+ { "hidden", ZFS_SNAPDIR_HIDDEN },
+ { "visible", ZFS_SNAPDIR_VISIBLE },
+ { NULL }
+ };
+
+ static zprop_index_t acl_inherit_table[] = {
+ { "discard", ZFS_ACL_DISCARD },
+ { "noallow", ZFS_ACL_NOALLOW },
+ { "restricted", ZFS_ACL_RESTRICTED },
+ { "passthrough", ZFS_ACL_PASSTHROUGH },
+ { "secure", ZFS_ACL_RESTRICTED }, /* bkwrd compatability */
+ { "passthrough-x", ZFS_ACL_PASSTHROUGH_X },
+ { NULL }
+ };
+
+ static zprop_index_t case_table[] = {
+ { "sensitive", ZFS_CASE_SENSITIVE },
+ { "insensitive", ZFS_CASE_INSENSITIVE },
+ { "mixed", ZFS_CASE_MIXED },
+ { NULL }
+ };
+
+ static zprop_index_t copies_table[] = {
+ { "1", 1 },
+ { "2", 2 },
+ { "3", 3 },
+ { NULL }
+ };
+
+ /*
+ * Use the unique flags we have to send to u8_strcmp() and/or
+ * u8_textprep() to represent the various normalization property
+ * values.
+ */
+ static zprop_index_t normalize_table[] = {
+ { "none", 0 },
+ { "formD", U8_TEXTPREP_NFD },
+ { "formKC", U8_TEXTPREP_NFKC },
+ { "formC", U8_TEXTPREP_NFC },
+ { "formKD", U8_TEXTPREP_NFKD },
+ { NULL }
+ };
+
+ static zprop_index_t version_table[] = {
+ { "1", 1 },
+ { "2", 2 },
+ { "3", 3 },
+ { "4", 4 },
+ { "5", 5 },
+ { "current", ZPL_VERSION },
+ { NULL }
+ };
+
+ static zprop_index_t boolean_table[] = {
+ { "off", 0 },
+ { "on", 1 },
+ { NULL }
+ };
+
+ static zprop_index_t logbias_table[] = {
+ { "latency", ZFS_LOGBIAS_LATENCY },
+ { "throughput", ZFS_LOGBIAS_THROUGHPUT },
+ { NULL }
+ };
+
+ static zprop_index_t canmount_table[] = {
+ { "off", ZFS_CANMOUNT_OFF },
+ { "on", ZFS_CANMOUNT_ON },
+ { "noauto", ZFS_CANMOUNT_NOAUTO },
+ { NULL }
+ };
+
+ static zprop_index_t cache_table[] = {
+ { "none", ZFS_CACHE_NONE },
+ { "metadata", ZFS_CACHE_METADATA },
+ { "all", ZFS_CACHE_ALL },
+ { NULL }
+ };
+
+ static zprop_index_t sync_table[] = {
+ { "standard", ZFS_SYNC_STANDARD },
+ { "always", ZFS_SYNC_ALWAYS },
+ { "disabled", ZFS_SYNC_DISABLED },
+ { NULL }
+ };
+
+ /* inherit index properties */
+ zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "standard | always | disabled", "SYNC",
+ sync_table);
+ zprop_register_index(ZFS_PROP_CHECKSUM, "checksum",
+ ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
+ ZFS_TYPE_VOLUME,
+ "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM",
+ checksum_table);
+ zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "on | off | verify | sha256[,verify]", "DEDUP",
+ dedup_table);
+ zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
+ ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "on | off | lzjb | gzip | gzip-[1-9] | zle", "COMPRESS",
+ compress_table);
+ zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
+ "hidden | visible", "SNAPDIR", snapdir_table);
+ zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
+ ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
+ "discard | noallow | restricted | passthrough | passthrough-x",
+ "ACLINHERIT", acl_inherit_table);
+ zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "1 | 2 | 3", "COPIES", copies_table);
+ zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache",
+ ZFS_CACHE_ALL, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
+ "all | none | metadata", "PRIMARYCACHE", cache_table);
+ zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache",
+ ZFS_CACHE_ALL, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
+ "all | none | metadata", "SECONDARYCACHE", cache_table);
+ zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "latency | throughput", "LOGBIAS", logbias_table);
+
+ /* inherit index (boolean) properties */
+ zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table);
+ zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES",
+ boolean_table);
+ zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC",
+ boolean_table);
+ zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID",
+ boolean_table);
+ zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY",
+ boolean_table);
+ zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table);
+ zprop_register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "XATTR",
+ boolean_table);
+ zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN",
+ boolean_table);
+ zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND",
+ boolean_table);
+
+ /* default index properties */
+ zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
+ "1 | 2 | 3 | 4 | current", "VERSION", version_table);
+ zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
+ PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
+ "CANMOUNT", canmount_table);
+
+ /* readonly index (boolean) properties */
+ zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
+ ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
+ zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
+ PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
+ boolean_table);
+
+ /* set once index properties */
+ zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
+ PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
+ "none | formC | formD | formKC | formKD", "NORMALIZATION",
+ normalize_table);
+ zprop_register_index(ZFS_PROP_CASE, "casesensitivity",
+ ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM |
+ ZFS_TYPE_SNAPSHOT,
+ "sensitive | insensitive | mixed", "CASE", case_table);
+
+ /* set once index (boolean) properties */
+ zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
+ "on | off", "UTF8ONLY", boolean_table);
+
+ /* string properties */
+ zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN");
+ zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/",
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none",
+ "MOUNTPOINT");
+ zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off",
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options",
+ "SHARENFS");
+ zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY,
+ ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE");
+ zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off",
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
+ "on | off | sharemgr(1M) options", "SHARESMB");
+ zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel",
+ ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET,
+ "<sensitivity label>", "MLSLABEL");
+
+ /* readonly number properties */
+ zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
+ ZFS_TYPE_DATASET, "<size>", "USED");
+ zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL");
+ zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0,
+ PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER");
+ zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
+ PROP_READONLY, ZFS_TYPE_DATASET,
+ "<1.00x or higher if compressed>", "RATIO");
+ zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
+ ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
+ ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK");
+ zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
+ PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
+ "USEDSNAP");
+ zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0,
+ PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
+ "USEDDS");
+ zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0,
+ PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
+ "USEDCHILD");
+ zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
+ PROP_READONLY,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
+ zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
+ ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
+
+ /* default number properties */
+ zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
+ ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA");
+ zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0,
+ PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "<size> | none", "RESERV");
+ zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT,
+ ZFS_TYPE_VOLUME, "<size>", "VOLSIZE");
+ zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT,
+ ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA");
+ zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0,
+ PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "<size> | none", "REFRESERV");
+
+ /* inherit number properties */
+ zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize",
+ SPA_MAXBLOCKSIZE, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE");
+
+ /* hidden properties */
+ zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
+ PROP_READONLY, ZFS_TYPE_DATASET, "CREATETXG");
+ zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER,
+ PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES");
+ zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING,
+ PROP_READONLY, ZFS_TYPE_DATASET, "NAME");
+ zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions",
+ PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
+ zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
+ PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME,
+ "STMF_SBD_LU");
+ zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER,
+ PROP_READONLY, ZFS_TYPE_DATASET, "GUID");
+ zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
+ PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET,
+ "USERACCOUNTING");
+ zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER,
+ PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE");
+ zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER,
+ PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID");
+
+ /*
+ * Property to be removed once libbe is integrated
+ */
+ zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop",
+ PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
+ "PRIV_PROP");
+
+ /* oddball properties */
+ zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0,
+ NULL, PROP_READONLY, ZFS_TYPE_DATASET,
+ "<date>", "CREATION", B_FALSE, B_TRUE, NULL);
+}
+
+boolean_t
+zfs_prop_delegatable(zfs_prop_t prop)
+{
+ zprop_desc_t *pd = &zfs_prop_table[prop];
+
+ /* The mlslabel property is never delegatable. */
+ if (prop == ZFS_PROP_MLSLABEL)
+ return (B_FALSE);
+
+ return (pd->pd_attr != PROP_READONLY);
+}
+
+/*
+ * Given a zfs dataset property name, returns the corresponding property ID.
+ */
+zfs_prop_t
+zfs_name_to_prop(const char *propname)
+{
+ return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET));
+}
+
+/*
+ * For user property names, we allow all lowercase alphanumeric characters, plus
+ * a few useful punctuation characters.
+ */
+static int
+valid_char(char c)
+{
+ return ((c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '9') ||
+ c == '-' || c == '_' || c == '.' || c == ':');
+}
+
+/*
+ * Returns true if this is a valid user-defined property (one with a ':').
+ */
+boolean_t
+zfs_prop_user(const char *name)
+{
+ int i;
+ char c;
+ boolean_t foundsep = B_FALSE;
+
+ for (i = 0; i < strlen(name); i++) {
+ c = name[i];
+ if (!valid_char(c))
+ return (B_FALSE);
+ if (c == ':')
+ foundsep = B_TRUE;
+ }
+
+ if (!foundsep)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
+ * Returns true if this is a valid userspace-type property (one with a '@').
+ * Note that after the @, any character is valid (eg, another @, for SID
+ * user@domain).
+ */
+boolean_t
+zfs_prop_userquota(const char *name)
+{
+ zfs_userquota_prop_t prop;
+
+ for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) {
+ if (strncmp(name, zfs_userquota_prop_prefixes[prop],
+ strlen(zfs_userquota_prop_prefixes[prop])) == 0) {
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Tables of index types, plus functions to convert between the user view
+ * (strings) and internal representation (uint64_t).
+ */
+int
+zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index)
+{
+ return (zprop_string_to_index(prop, string, index, ZFS_TYPE_DATASET));
+}
+
+int
+zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string)
+{
+ return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET));
+}
+
+uint64_t
+zfs_prop_random_value(zfs_prop_t prop, uint64_t seed)
+{
+ return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET));
+}
+
+/*
+ * Returns TRUE if the property applies to any of the given dataset types.
+ */
+boolean_t
+zfs_prop_valid_for_type(int prop, zfs_type_t types)
+{
+ return (zprop_valid_for_type(prop, types));
+}
+
+zprop_type_t
+zfs_prop_get_type(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_proptype);
+}
+
+/*
+ * Returns TRUE if the property is readonly.
+ */
+boolean_t
+zfs_prop_readonly(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_attr == PROP_READONLY ||
+ zfs_prop_table[prop].pd_attr == PROP_ONETIME);
+}
+
+/*
+ * Returns TRUE if the property is only allowed to be set once.
+ */
+boolean_t
+zfs_prop_setonce(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_attr == PROP_ONETIME);
+}
+
+const char *
+zfs_prop_default_string(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_strdefault);
+}
+
+uint64_t
+zfs_prop_default_numeric(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_numdefault);
+}
+
+/*
+ * Given a dataset property ID, returns the corresponding name.
+ * Assuming the zfs dataset property ID is valid.
+ */
+const char *
+zfs_prop_to_name(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_name);
+}
+
+/*
+ * Returns TRUE if the property is inheritable.
+ */
+boolean_t
+zfs_prop_inheritable(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_attr == PROP_INHERIT ||
+ zfs_prop_table[prop].pd_attr == PROP_ONETIME);
+}
+
+#ifndef _KERNEL
+
+/*
+ * Returns a string describing the set of acceptable values for the given
+ * zfs property, or NULL if it cannot be set.
+ */
+const char *
+zfs_prop_values(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_values);
+}
+
+/*
+ * Returns TRUE if this property is a string type. Note that index types
+ * (compression, checksum) are treated as strings in userland, even though they
+ * are stored numerically on disk.
+ */
+int
+zfs_prop_is_string(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING ||
+ zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX);
+}
+
+/*
+ * Returns the column header for the given property. Used only in
+ * 'zfs list -o', but centralized here with the other property information.
+ */
+const char *
+zfs_prop_column_name(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_colname);
+}
+
+/*
+ * Returns whether the given property should be displayed right-justified for
+ * 'zfs list'.
+ */
+boolean_t
+zfs_prop_align_right(zfs_prop_t prop)
+{
+ return (zfs_prop_table[prop].pd_rightalign);
+}
+
+#endif
diff --git a/common/zfs/zfs_prop.h b/common/zfs/zfs_prop.h
new file mode 100644
index 000000000000..a63262311b3d
--- /dev/null
+++ b/common/zfs/zfs_prop.h
@@ -0,0 +1,129 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZFS_PROP_H
+#define _ZFS_PROP_H
+
+#include <sys/fs/zfs.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * For index types (e.g. compression and checksum), we want the numeric value
+ * in the kernel, but the string value in userland.
+ */
+typedef enum {
+ PROP_TYPE_NUMBER, /* numeric value */
+ PROP_TYPE_STRING, /* string value */
+ PROP_TYPE_INDEX /* numeric value indexed by string */
+} zprop_type_t;
+
+typedef enum {
+ PROP_DEFAULT,
+ PROP_READONLY,
+ PROP_INHERIT,
+ /*
+ * ONETIME properties are a sort of conglomeration of READONLY
+ * and INHERIT. They can be set only during object creation,
+ * after that they are READONLY. If not explicitly set during
+ * creation, they can be inherited.
+ */
+ PROP_ONETIME
+} zprop_attr_t;
+
+typedef struct zfs_index {
+ const char *pi_name;
+ uint64_t pi_value;
+} zprop_index_t;
+
+typedef struct {
+ const char *pd_name; /* human-readable property name */
+ int pd_propnum; /* property number */
+ zprop_type_t pd_proptype; /* string, boolean, index, number */
+ const char *pd_strdefault; /* default for strings */
+ uint64_t pd_numdefault; /* for boolean / index / number */
+ zprop_attr_t pd_attr; /* default, readonly, inherit */
+ int pd_types; /* bitfield of valid dataset types */
+ /* fs | vol | snap; or pool */
+ const char *pd_values; /* string telling acceptable values */
+ const char *pd_colname; /* column header for "zfs list" */
+ boolean_t pd_rightalign; /* column alignment for "zfs list" */
+ boolean_t pd_visible; /* do we list this property with the */
+ /* "zfs get" help message */
+ const zprop_index_t *pd_table; /* for index properties, a table */
+ /* defining the possible values */
+ size_t pd_table_size; /* number of entries in pd_table[] */
+} zprop_desc_t;
+
+/*
+ * zfs dataset property functions
+ */
+void zfs_prop_init(void);
+zprop_type_t zfs_prop_get_type(zfs_prop_t);
+boolean_t zfs_prop_delegatable(zfs_prop_t prop);
+zprop_desc_t *zfs_prop_get_table(void);
+
+/*
+ * zpool property functions
+ */
+void zpool_prop_init(void);
+zprop_type_t zpool_prop_get_type(zpool_prop_t);
+zprop_desc_t *zpool_prop_get_table(void);
+
+/*
+ * Common routines to initialize property tables
+ */
+void zprop_register_impl(int, const char *, zprop_type_t, uint64_t,
+ const char *, zprop_attr_t, int, const char *, const char *,
+ boolean_t, boolean_t, const zprop_index_t *);
+void zprop_register_string(int, const char *, const char *,
+ zprop_attr_t attr, int, const char *, const char *);
+void zprop_register_number(int, const char *, uint64_t, zprop_attr_t, int,
+ const char *, const char *);
+void zprop_register_index(int, const char *, uint64_t, zprop_attr_t, int,
+ const char *, const char *, const zprop_index_t *);
+void zprop_register_hidden(int, const char *, zprop_type_t, zprop_attr_t,
+ int, const char *);
+
+/*
+ * Common routines for zfs and zpool property management
+ */
+int zprop_iter_common(zprop_func, void *, boolean_t, boolean_t, zfs_type_t);
+int zprop_name_to_prop(const char *, zfs_type_t);
+int zprop_string_to_index(int, const char *, uint64_t *, zfs_type_t);
+int zprop_index_to_string(int, uint64_t, const char **, zfs_type_t);
+uint64_t zprop_random_value(int, uint64_t, zfs_type_t);
+const char *zprop_values(int, zfs_type_t);
+size_t zprop_width(int, boolean_t *, zfs_type_t);
+boolean_t zprop_valid_for_type(int, zfs_type_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFS_PROP_H */
diff --git a/common/zfs/zpool_prop.c b/common/zfs/zpool_prop.c
new file mode 100644
index 000000000000..988d05de6e20
--- /dev/null
+++ b/common/zfs/zpool_prop.c
@@ -0,0 +1,202 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/zio.h>
+#include <sys/spa.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/fs/zfs.h>
+
+#include "zfs_prop.h"
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#endif
+
+static zprop_desc_t zpool_prop_table[ZPOOL_NUM_PROPS];
+
+zprop_desc_t *
+zpool_prop_get_table(void)
+{
+ return (zpool_prop_table);
+}
+
+void
+zpool_prop_init(void)
+{
+ static zprop_index_t boolean_table[] = {
+ { "off", 0},
+ { "on", 1},
+ { NULL }
+ };
+
+ static zprop_index_t failuremode_table[] = {
+ { "wait", ZIO_FAILURE_MODE_WAIT },
+ { "continue", ZIO_FAILURE_MODE_CONTINUE },
+ { "panic", ZIO_FAILURE_MODE_PANIC },
+ { NULL }
+ };
+
+ /* string properties */
+ zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT,
+ ZFS_TYPE_POOL, "<path>", "ALTROOT");
+ zprop_register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT,
+ ZFS_TYPE_POOL, "<filesystem>", "BOOTFS");
+ zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL,
+ PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
+
+ /* readonly number properties */
+ zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
+ ZFS_TYPE_POOL, "<size>", "SIZE");
+ zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
+ ZFS_TYPE_POOL, "<size>", "FREE");
+ zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
+ PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
+ zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY,
+ ZFS_TYPE_POOL, "<size>", "CAP");
+ zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY,
+ ZFS_TYPE_POOL, "<guid>", "GUID");
+ zprop_register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY,
+ ZFS_TYPE_POOL, "<state>", "HEALTH");
+ zprop_register_number(ZPOOL_PROP_DEDUPRATIO, "dedupratio", 0,
+ PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>",
+ "DEDUP");
+
+ /* default number properties */
+ zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
+ PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
+ zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0,
+ PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO");
+
+ /* default index (boolean) properties */
+ zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1,
+ PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "DELEGATION",
+ boolean_table);
+ zprop_register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0,
+ PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table);
+ zprop_register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0,
+ PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "LISTSNAPS",
+ boolean_table);
+ zprop_register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0,
+ PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
+ zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0,
+ PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table);
+
+ /* default index properties */
+ zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
+ ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL,
+ "wait | continue | panic", "FAILMODE", failuremode_table);
+
+ /* hidden properties */
+ zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING,
+ PROP_READONLY, ZFS_TYPE_POOL, "NAME");
+}
+
+/*
+ * Given a property name and its type, returns the corresponding property ID.
+ */
+zpool_prop_t
+zpool_name_to_prop(const char *propname)
+{
+ return (zprop_name_to_prop(propname, ZFS_TYPE_POOL));
+}
+
+/*
+ * Given a pool property ID, returns the corresponding name.
+ * Assuming the pool propety ID is valid.
+ */
+const char *
+zpool_prop_to_name(zpool_prop_t prop)
+{
+ return (zpool_prop_table[prop].pd_name);
+}
+
+zprop_type_t
+zpool_prop_get_type(zpool_prop_t prop)
+{
+ return (zpool_prop_table[prop].pd_proptype);
+}
+
+boolean_t
+zpool_prop_readonly(zpool_prop_t prop)
+{
+ return (zpool_prop_table[prop].pd_attr == PROP_READONLY);
+}
+
+const char *
+zpool_prop_default_string(zpool_prop_t prop)
+{
+ return (zpool_prop_table[prop].pd_strdefault);
+}
+
+uint64_t
+zpool_prop_default_numeric(zpool_prop_t prop)
+{
+ return (zpool_prop_table[prop].pd_numdefault);
+}
+
+int
+zpool_prop_string_to_index(zpool_prop_t prop, const char *string,
+ uint64_t *index)
+{
+ return (zprop_string_to_index(prop, string, index, ZFS_TYPE_POOL));
+}
+
+int
+zpool_prop_index_to_string(zpool_prop_t prop, uint64_t index,
+ const char **string)
+{
+ return (zprop_index_to_string(prop, index, string, ZFS_TYPE_POOL));
+}
+
+uint64_t
+zpool_prop_random_value(zpool_prop_t prop, uint64_t seed)
+{
+ return (zprop_random_value(prop, seed, ZFS_TYPE_POOL));
+}
+
+#ifndef _KERNEL
+
+const char *
+zpool_prop_values(zpool_prop_t prop)
+{
+ return (zpool_prop_table[prop].pd_values);
+}
+
+const char *
+zpool_prop_column_name(zpool_prop_t prop)
+{
+ return (zpool_prop_table[prop].pd_colname);
+}
+
+boolean_t
+zpool_prop_align_right(zpool_prop_t prop)
+{
+ return (zpool_prop_table[prop].pd_rightalign);
+}
+#endif
diff --git a/common/zfs/zprop_common.c b/common/zfs/zprop_common.c
new file mode 100644
index 000000000000..0bbf20d4f02c
--- /dev/null
+++ b/common/zfs/zprop_common.c
@@ -0,0 +1,426 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Common routines used by zfs and zpool property management.
+ */
+
+#include <sys/zio.h>
+#include <sys/spa.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_znode.h>
+#include <sys/fs/zfs.h>
+
+#include "zfs_prop.h"
+#include "zfs_deleg.h"
+
+#if defined(_KERNEL)
+#include <sys/systm.h>
+#include <util/qsort.h>
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#endif
+
+static zprop_desc_t *
+zprop_get_proptable(zfs_type_t type)
+{
+ if (type == ZFS_TYPE_POOL)
+ return (zpool_prop_get_table());
+ else
+ return (zfs_prop_get_table());
+}
+
+static int
+zprop_get_numprops(zfs_type_t type)
+{
+ if (type == ZFS_TYPE_POOL)
+ return (ZPOOL_NUM_PROPS);
+ else
+ return (ZFS_NUM_PROPS);
+}
+
+void
+zprop_register_impl(int prop, const char *name, zprop_type_t type,
+ uint64_t numdefault, const char *strdefault, zprop_attr_t attr,
+ int objset_types, const char *values, const char *colname,
+ boolean_t rightalign, boolean_t visible, const zprop_index_t *idx_tbl)
+{
+ zprop_desc_t *prop_tbl = zprop_get_proptable(objset_types);
+ zprop_desc_t *pd;
+
+ pd = &prop_tbl[prop];
+
+ ASSERT(pd->pd_name == NULL || pd->pd_name == name);
+ ASSERT(name != NULL);
+ ASSERT(colname != NULL);
+
+ pd->pd_name = name;
+ pd->pd_propnum = prop;
+ pd->pd_proptype = type;
+ pd->pd_numdefault = numdefault;
+ pd->pd_strdefault = strdefault;
+ pd->pd_attr = attr;
+ pd->pd_types = objset_types;
+ pd->pd_values = values;
+ pd->pd_colname = colname;
+ pd->pd_rightalign = rightalign;
+ pd->pd_visible = visible;
+ pd->pd_table = idx_tbl;
+ pd->pd_table_size = 0;
+ while (idx_tbl && (idx_tbl++)->pi_name != NULL)
+ pd->pd_table_size++;
+}
+
+void
+zprop_register_string(int prop, const char *name, const char *def,
+ zprop_attr_t attr, int objset_types, const char *values,
+ const char *colname)
+{
+ zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr,
+ objset_types, values, colname, B_FALSE, B_TRUE, NULL);
+
+}
+
+void
+zprop_register_number(int prop, const char *name, uint64_t def,
+ zprop_attr_t attr, int objset_types, const char *values,
+ const char *colname)
+{
+ zprop_register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr,
+ objset_types, values, colname, B_TRUE, B_TRUE, NULL);
+}
+
+void
+zprop_register_index(int prop, const char *name, uint64_t def,
+ zprop_attr_t attr, int objset_types, const char *values,
+ const char *colname, const zprop_index_t *idx_tbl)
+{
+ zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
+ objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl);
+}
+
+void
+zprop_register_hidden(int prop, const char *name, zprop_type_t type,
+ zprop_attr_t attr, int objset_types, const char *colname)
+{
+ zprop_register_impl(prop, name, type, 0, NULL, attr,
+ objset_types, NULL, colname, B_FALSE, B_FALSE, NULL);
+}
+
+
+/*
+ * A comparison function we can use to order indexes into property tables.
+ */
+static int
+zprop_compare(const void *arg1, const void *arg2)
+{
+ const zprop_desc_t *p1 = *((zprop_desc_t **)arg1);
+ const zprop_desc_t *p2 = *((zprop_desc_t **)arg2);
+ boolean_t p1ro, p2ro;
+
+ p1ro = (p1->pd_attr == PROP_READONLY);
+ p2ro = (p2->pd_attr == PROP_READONLY);
+
+ if (p1ro == p2ro)
+ return (strcmp(p1->pd_name, p2->pd_name));
+
+ return (p1ro ? -1 : 1);
+}
+
+/*
+ * Iterate over all properties in the given property table, calling back
+ * into the specified function for each property. We will continue to
+ * iterate until we either reach the end or the callback function returns
+ * something other than ZPROP_CONT.
+ */
+int
+zprop_iter_common(zprop_func func, void *cb, boolean_t show_all,
+ boolean_t ordered, zfs_type_t type)
+{
+ int i, num_props, size, prop;
+ zprop_desc_t *prop_tbl;
+ zprop_desc_t **order;
+
+ prop_tbl = zprop_get_proptable(type);
+ num_props = zprop_get_numprops(type);
+ size = num_props * sizeof (zprop_desc_t *);
+
+#if defined(_KERNEL)
+ order = kmem_alloc(size, KM_SLEEP);
+#else
+ if ((order = malloc(size)) == NULL)
+ return (ZPROP_CONT);
+#endif
+
+ for (int j = 0; j < num_props; j++)
+ order[j] = &prop_tbl[j];
+
+ if (ordered) {
+ qsort((void *)order, num_props, sizeof (zprop_desc_t *),
+ zprop_compare);
+ }
+
+ prop = ZPROP_CONT;
+ for (i = 0; i < num_props; i++) {
+ if ((order[i]->pd_visible || show_all) &&
+ (func(order[i]->pd_propnum, cb) != ZPROP_CONT)) {
+ prop = order[i]->pd_propnum;
+ break;
+ }
+ }
+
+#if defined(_KERNEL)
+ kmem_free(order, size);
+#else
+ free(order);
+#endif
+ return (prop);
+}
+
+static boolean_t
+propname_match(const char *p, size_t len, zprop_desc_t *prop_entry)
+{
+ const char *propname = prop_entry->pd_name;
+#ifndef _KERNEL
+ const char *colname = prop_entry->pd_colname;
+ int c;
+#endif
+
+ if (len == strlen(propname) &&
+ strncmp(p, propname, len) == 0)
+ return (B_TRUE);
+
+#ifndef _KERNEL
+ if (colname == NULL || len != strlen(colname))
+ return (B_FALSE);
+
+ for (c = 0; c < len; c++)
+ if (p[c] != tolower(colname[c]))
+ break;
+
+ return (colname[c] == '\0');
+#else
+ return (B_FALSE);
+#endif
+}
+
+typedef struct name_to_prop_cb {
+ const char *propname;
+ zprop_desc_t *prop_tbl;
+} name_to_prop_cb_t;
+
+static int
+zprop_name_to_prop_cb(int prop, void *cb_data)
+{
+ name_to_prop_cb_t *data = cb_data;
+
+ if (propname_match(data->propname, strlen(data->propname),
+ &data->prop_tbl[prop]))
+ return (prop);
+
+ return (ZPROP_CONT);
+}
+
+int
+zprop_name_to_prop(const char *propname, zfs_type_t type)
+{
+ int prop;
+ name_to_prop_cb_t cb_data;
+
+ cb_data.propname = propname;
+ cb_data.prop_tbl = zprop_get_proptable(type);
+
+ prop = zprop_iter_common(zprop_name_to_prop_cb, &cb_data,
+ B_TRUE, B_FALSE, type);
+
+ return (prop == ZPROP_CONT ? ZPROP_INVAL : prop);
+}
+
+int
+zprop_string_to_index(int prop, const char *string, uint64_t *index,
+ zfs_type_t type)
+{
+ zprop_desc_t *prop_tbl;
+ const zprop_index_t *idx_tbl;
+ int i;
+
+ if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
+ return (-1);
+
+ ASSERT(prop < zprop_get_numprops(type));
+ prop_tbl = zprop_get_proptable(type);
+ if ((idx_tbl = prop_tbl[prop].pd_table) == NULL)
+ return (-1);
+
+ for (i = 0; idx_tbl[i].pi_name != NULL; i++) {
+ if (strcmp(string, idx_tbl[i].pi_name) == 0) {
+ *index = idx_tbl[i].pi_value;
+ return (0);
+ }
+ }
+
+ return (-1);
+}
+
+int
+zprop_index_to_string(int prop, uint64_t index, const char **string,
+ zfs_type_t type)
+{
+ zprop_desc_t *prop_tbl;
+ const zprop_index_t *idx_tbl;
+ int i;
+
+ if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
+ return (-1);
+
+ ASSERT(prop < zprop_get_numprops(type));
+ prop_tbl = zprop_get_proptable(type);
+ if ((idx_tbl = prop_tbl[prop].pd_table) == NULL)
+ return (-1);
+
+ for (i = 0; idx_tbl[i].pi_name != NULL; i++) {
+ if (idx_tbl[i].pi_value == index) {
+ *string = idx_tbl[i].pi_name;
+ return (0);
+ }
+ }
+
+ return (-1);
+}
+
+/*
+ * Return a random valid property value. Used by ztest.
+ */
+uint64_t
+zprop_random_value(int prop, uint64_t seed, zfs_type_t type)
+{
+ zprop_desc_t *prop_tbl;
+ const zprop_index_t *idx_tbl;
+
+ ASSERT((uint_t)prop < zprop_get_numprops(type));
+ prop_tbl = zprop_get_proptable(type);
+ idx_tbl = prop_tbl[prop].pd_table;
+
+ if (idx_tbl == NULL)
+ return (seed);
+
+ return (idx_tbl[seed % prop_tbl[prop].pd_table_size].pi_value);
+}
+
+const char *
+zprop_values(int prop, zfs_type_t type)
+{
+ zprop_desc_t *prop_tbl;
+
+ ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT);
+ ASSERT(prop < zprop_get_numprops(type));
+
+ prop_tbl = zprop_get_proptable(type);
+
+ return (prop_tbl[prop].pd_values);
+}
+
+/*
+ * Returns TRUE if the property applies to any of the given dataset types.
+ */
+boolean_t
+zprop_valid_for_type(int prop, zfs_type_t type)
+{
+ zprop_desc_t *prop_tbl;
+
+ if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
+ return (B_FALSE);
+
+ ASSERT(prop < zprop_get_numprops(type));
+ prop_tbl = zprop_get_proptable(type);
+ return ((prop_tbl[prop].pd_types & type) != 0);
+}
+
+#ifndef _KERNEL
+
+/*
+ * Determines the minimum width for the column, and indicates whether it's fixed
+ * or not. Only string columns are non-fixed.
+ */
+size_t
+zprop_width(int prop, boolean_t *fixed, zfs_type_t type)
+{
+ zprop_desc_t *prop_tbl, *pd;
+ const zprop_index_t *idx;
+ size_t ret;
+ int i;
+
+ ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT);
+ ASSERT(prop < zprop_get_numprops(type));
+
+ prop_tbl = zprop_get_proptable(type);
+ pd = &prop_tbl[prop];
+
+ *fixed = B_TRUE;
+
+ /*
+ * Start with the width of the column name.
+ */
+ ret = strlen(pd->pd_colname);
+
+ /*
+ * For fixed-width values, make sure the width is large enough to hold
+ * any possible value.
+ */
+ switch (pd->pd_proptype) {
+ case PROP_TYPE_NUMBER:
+ /*
+ * The maximum length of a human-readable number is 5 characters
+ * ("20.4M", for example).
+ */
+ if (ret < 5)
+ ret = 5;
+ /*
+ * 'creation' is handled specially because it's a number
+ * internally, but displayed as a date string.
+ */
+ if (prop == ZFS_PROP_CREATION)
+ *fixed = B_FALSE;
+ break;
+ case PROP_TYPE_INDEX:
+ idx = prop_tbl[prop].pd_table;
+ for (i = 0; idx[i].pi_name != NULL; i++) {
+ if (strlen(idx[i].pi_name) > ret)
+ ret = strlen(idx[i].pi_name);
+ }
+ break;
+
+ case PROP_TYPE_STRING:
+ *fixed = B_FALSE;
+ break;
+ }
+
+ return (ret);
+}
+
+#endif
diff --git a/uts/common/Makefile.files b/uts/common/Makefile.files
new file mode 100644
index 000000000000..ec08410b4ff3
--- /dev/null
+++ b/uts/common/Makefile.files
@@ -0,0 +1,2007 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+#
+# This Makefile defines all file modules for the directory uts/common
+# and its children. These are the source files which may be considered
+# common to all SunOS systems.
+
+i386_CORE_OBJS += \
+ atomic.o \
+ avintr.o \
+ pic.o
+
+sparc_CORE_OBJS +=
+
+COMMON_CORE_OBJS += \
+ beep.o \
+ bitset.o \
+ bp_map.o \
+ brand.o \
+ cpucaps.o \
+ cmt.o \
+ cmt_policy.o \
+ cpu.o \
+ cpu_event.o \
+ cpu_intr.o \
+ cpu_pm.o \
+ cpupart.o \
+ cap_util.o \
+ disp.o \
+ group.o \
+ kstat_fr.o \
+ iscsiboot_prop.o \
+ lgrp.o \
+ lgrp_topo.o \
+ mmapobj.o \
+ mutex.o \
+ page_lock.o \
+ page_retire.o \
+ panic.o \
+ param.o \
+ pg.o \
+ pghw.o \
+ putnext.o \
+ rctl_proc.o \
+ rwlock.o \
+ seg_kmem.o \
+ softint.o \
+ string.o \
+ strtol.o \
+ strtoul.o \
+ strtoll.o \
+ strtoull.o \
+ thread_intr.o \
+ vm_page.o \
+ vm_pagelist.o \
+ zlib_obj.o \
+ clock_tick.o
+
+CORE_OBJS += $(COMMON_CORE_OBJS) $($(MACH)_CORE_OBJS)
+
+ZLIB_OBJS = zutil.o zmod.o zmod_subr.o \
+ adler32.o crc32.o deflate.o inffast.o \
+ inflate.o inftrees.o trees.o
+
+GENUNIX_OBJS += \
+ access.o \
+ acl.o \
+ acl_common.o \
+ adjtime.o \
+ alarm.o \
+ aio_subr.o \
+ auditsys.o \
+ audit_core.o \
+ audit_zone.o \
+ audit_memory.o \
+ autoconf.o \
+ avl.o \
+ bdev_dsort.o \
+ bio.o \
+ bitmap.o \
+ blabel.o \
+ brandsys.o \
+ bz2blocksort.o \
+ bz2compress.o \
+ bz2decompress.o \
+ bz2randtable.o \
+ bz2bzlib.o \
+ bz2crctable.o \
+ bz2huffman.o \
+ callb.o \
+ callout.o \
+ chdir.o \
+ chmod.o \
+ chown.o \
+ cladm.o \
+ class.o \
+ clock.o \
+ clock_highres.o \
+ clock_realtime.o\
+ close.o \
+ compress.o \
+ condvar.o \
+ conf.o \
+ console.o \
+ contract.o \
+ copyops.o \
+ core.o \
+ corectl.o \
+ cred.o \
+ cs_stubs.o \
+ dacf.o \
+ dacf_clnt.o \
+ damap.o \
+ cyclic.o \
+ ddi.o \
+ ddifm.o \
+ ddi_hp_impl.o \
+ ddi_hp_ndi.o \
+ ddi_intr.o \
+ ddi_intr_impl.o \
+ ddi_intr_irm.o \
+ ddi_nodeid.o \
+ ddi_timer.o \
+ devcfg.o \
+ devcache.o \
+ device.o \
+ devid.o \
+ devid_cache.o \
+ devid_scsi.o \
+ devid_smp.o \
+ devpolicy.o \
+ disp_lock.o \
+ dnlc.o \
+ driver.o \
+ dumpsubr.o \
+ driver_lyr.o \
+ dtrace_subr.o \
+ errorq.o \
+ etheraddr.o \
+ evchannels.o \
+ exacct.o \
+ exacct_core.o \
+ exec.o \
+ exit.o \
+ fbio.o \
+ fcntl.o \
+ fdbuffer.o \
+ fdsync.o \
+ fem.o \
+ ffs.o \
+ fio.o \
+ flock.o \
+ fm.o \
+ fork.o \
+ vpm.o \
+ fs_reparse.o \
+ fs_subr.o \
+ fsflush.o \
+ ftrace.o \
+ getcwd.o \
+ getdents.o \
+ getloadavg.o \
+ getpagesizes.o \
+ getpid.o \
+ gfs.o \
+ rusagesys.o \
+ gid.o \
+ groups.o \
+ grow.o \
+ hat.o \
+ hat_refmod.o \
+ id32.o \
+ id_space.o \
+ inet_ntop.o \
+ instance.o \
+ ioctl.o \
+ ip_cksum.o \
+ issetugid.o \
+ ippconf.o \
+ kcpc.o \
+ kdi.o \
+ kiconv.o \
+ klpd.o \
+ kmem.o \
+ ksyms_snapshot.o \
+ l_strplumb.o \
+ labelsys.o \
+ link.o \
+ list.o \
+ lockstat_subr.o \
+ log_sysevent.o \
+ logsubr.o \
+ lookup.o \
+ lseek.o \
+ ltos.o \
+ lwp.o \
+ lwp_create.o \
+ lwp_info.o \
+ lwp_self.o \
+ lwp_sobj.o \
+ lwp_timer.o \
+ lwpsys.o \
+ main.o \
+ mmapobjsys.o \
+ memcntl.o \
+ memstr.o \
+ lgrpsys.o \
+ mkdir.o \
+ mknod.o \
+ mount.o \
+ move.o \
+ msacct.o \
+ multidata.o \
+ nbmlock.o \
+ ndifm.o \
+ nice.o \
+ netstack.o \
+ ntptime.o \
+ nvpair.o \
+ nvpair_alloc_system.o \
+ nvpair_alloc_fixed.o \
+ octet.o \
+ open.o \
+ p_online.o \
+ pathconf.o \
+ pathname.o \
+ pause.o \
+ serializer.o \
+ pci_intr_lib.o \
+ pci_cap.o \
+ pcifm.o \
+ pgrp.o \
+ pgrpsys.o \
+ pid.o \
+ pkp_hash.o \
+ policy.o \
+ poll.o \
+ pool.o \
+ pool_pset.o \
+ port_subr.o \
+ ppriv.o \
+ printf.o \
+ priocntl.o \
+ priv.o \
+ priv_const.o \
+ proc.o \
+ procset.o \
+ processor_bind.o \
+ processor_info.o \
+ profil.o \
+ project.o \
+ qsort.o \
+ rctl.o \
+ rctlsys.o \
+ readlink.o \
+ refstr.o \
+ rename.o \
+ resolvepath.o \
+ retire_store.o \
+ process.o \
+ rlimit.o \
+ rmap.o \
+ rw.o \
+ rwstlock.o \
+ sad_conf.o \
+ sid.o \
+ sidsys.o \
+ sched.o \
+ schedctl.o \
+ sctp_crc32.o \
+ seg_dev.o \
+ seg_kp.o \
+ seg_kpm.o \
+ seg_map.o \
+ seg_vn.o \
+ seg_spt.o \
+ semaphore.o \
+ sendfile.o \
+ session.o \
+ share.o \
+ shuttle.o \
+ sig.o \
+ sigaction.o \
+ sigaltstack.o \
+ signotify.o \
+ sigpending.o \
+ sigprocmask.o \
+ sigqueue.o \
+ sigsendset.o \
+ sigsuspend.o \
+ sigtimedwait.o \
+ sleepq.o \
+ sock_conf.o \
+ space.o \
+ sscanf.o \
+ stat.o \
+ statfs.o \
+ statvfs.o \
+ stol.o \
+ str_conf.o \
+ strcalls.o \
+ stream.o \
+ streamio.o \
+ strext.o \
+ strsubr.o \
+ strsun.o \
+ subr.o \
+ sunddi.o \
+ sunmdi.o \
+ sunndi.o \
+ sunpci.o \
+ sunpm.o \
+ sundlpi.o \
+ suntpi.o \
+ swap_subr.o \
+ swap_vnops.o \
+ symlink.o \
+ sync.o \
+ sysclass.o \
+ sysconfig.o \
+ sysent.o \
+ sysfs.o \
+ systeminfo.o \
+ task.o \
+ taskq.o \
+ tasksys.o \
+ time.o \
+ timer.o \
+ times.o \
+ timers.o \
+ thread.o \
+ tlabel.o \
+ tnf_res.o \
+ turnstile.o \
+ tty_common.o \
+ u8_textprep.o \
+ uadmin.o \
+ uconv.o \
+ ucredsys.o \
+ uid.o \
+ umask.o \
+ umount.o \
+ uname.o \
+ unix_bb.o \
+ unlink.o \
+ urw.o \
+ utime.o \
+ utssys.o \
+ uucopy.o \
+ vfs.o \
+ vfs_conf.o \
+ vmem.o \
+ vm_anon.o \
+ vm_as.o \
+ vm_meter.o \
+ vm_pageout.o \
+ vm_pvn.o \
+ vm_rm.o \
+ vm_seg.o \
+ vm_subr.o \
+ vm_swap.o \
+ vm_usage.o \
+ vnode.o \
+ vuid_queue.o \
+ vuid_store.o \
+ waitq.o \
+ watchpoint.o \
+ yield.o \
+ scsi_confdata.o \
+ xattr.o \
+ xattr_common.o \
+ xdr_mblk.o \
+ xdr_mem.o \
+ xdr.o \
+ xdr_array.o \
+ xdr_refer.o \
+ xhat.o \
+ zone.o
+
+#
+# Stubs for the stand-alone linker/loader
+#
+sparc_GENSTUBS_OBJS = \
+ kobj_stubs.o
+
+i386_GENSTUBS_OBJS =
+
+COMMON_GENSTUBS_OBJS =
+
+GENSTUBS_OBJS += $(COMMON_GENSTUBS_OBJS) $($(MACH)_GENSTUBS_OBJS)
+
+#
+# DTrace and DTrace Providers
+#
+DTRACE_OBJS += dtrace.o dtrace_isa.o dtrace_asm.o
+
+SDT_OBJS += sdt_subr.o
+
+PROFILE_OBJS += profile.o
+
+SYSTRACE_OBJS += systrace.o
+
+LOCKSTAT_OBJS += lockstat.o
+
+FASTTRAP_OBJS += fasttrap.o fasttrap_isa.o
+
+DCPC_OBJS += dcpc.o
+
+#
+# Driver (pseudo-driver) Modules
+#
+IPP_OBJS += ippctl.o
+
+AUDIO_OBJS += audio_client.o audio_ddi.o audio_engine.o \
+ audio_fltdata.o audio_format.o audio_ctrl.o \
+ audio_grc3.o audio_output.o audio_input.o \
+ audio_oss.o audio_sun.o
+
+AUDIOEMU10K_OBJS += audioemu10k.o
+
+AUDIOENS_OBJS += audioens.o
+
+AUDIOVIA823X_OBJS += audiovia823x.o
+
+AUDIOVIA97_OBJS += audiovia97.o
+
+AUDIO1575_OBJS += audio1575.o
+
+AUDIO810_OBJS += audio810.o
+
+AUDIOCMI_OBJS += audiocmi.o
+
+AUDIOHD_OBJS += audiohd.o
+
+AUDIOIXP_OBJS += audioixp.o
+
+AUDIOLS_OBJS += audiols.o
+
+AUDIOP16X_OBJS += audiop16x.o
+
+AUDIOPCI_OBJS += audiopci.o
+
+AUDIOSOLO_OBJS += audiosolo.o
+
+AUDIOTS_OBJS += audiots.o
+
+AC97_OBJS += ac97.o ac97_ad.o ac97_alc.o ac97_cmi.o
+
+BLKDEV_OBJS += blkdev.o
+
+CARDBUS_OBJS += cardbus.o cardbus_hp.o cardbus_cfg.o
+
+CONSKBD_OBJS += conskbd.o
+
+CONSMS_OBJS += consms.o
+
+OLDPTY_OBJS += tty_ptyconf.o
+
+PTC_OBJS += tty_pty.o
+
+PTSL_OBJS += tty_pts.o
+
+PTM_OBJS += ptm.o
+
+MII_OBJS += mii.o mii_cicada.o mii_natsemi.o mii_intel.o mii_qualsemi.o \
+ mii_marvell.o mii_realtek.o mii_other.o
+
+PTS_OBJS += pts.o
+
+PTY_OBJS += ptms_conf.o
+
+SAD_OBJS += sad.o
+
+MD4_OBJS += md4.o md4_mod.o
+
+MD5_OBJS += md5.o md5_mod.o
+
+SHA1_OBJS += sha1.o sha1_mod.o fips_sha1_util.o
+
+SHA2_OBJS += sha2.o sha2_mod.o fips_sha2_util.o
+
+IPGPC_OBJS += classifierddi.o classifier.o filters.o trie.o table.o \
+ ba_table.o
+
+DSCPMK_OBJS += dscpmk.o dscpmkddi.o
+
+DLCOSMK_OBJS += dlcosmk.o dlcosmkddi.o
+
+FLOWACCT_OBJS += flowacctddi.o flowacct.o
+
+TOKENMT_OBJS += tokenmt.o tokenmtddi.o
+
+TSWTCL_OBJS += tswtcl.o tswtclddi.o
+
+ARP_OBJS += arpddi.o
+
+ICMP_OBJS += icmpddi.o
+
+ICMP6_OBJS += icmp6ddi.o
+
+RTS_OBJS += rtsddi.o
+
+IP_ICMP_OBJS = icmp.o icmp_opt_data.o
+IP_RTS_OBJS = rts.o rts_opt_data.o
+IP_TCP_OBJS = tcp.o tcp_fusion.o tcp_opt_data.o tcp_sack.o tcp_stats.o \
+ tcp_misc.o tcp_timers.o tcp_time_wait.o tcp_tpi.o tcp_output.o \
+ tcp_input.o tcp_socket.o tcp_bind.o tcp_cluster.o tcp_tunables.o
+IP_UDP_OBJS = udp.o udp_opt_data.o udp_tunables.o udp_stats.o
+IP_SCTP_OBJS = sctp.o sctp_opt_data.o sctp_output.o \
+ sctp_init.o sctp_input.o sctp_cookie.o \
+ sctp_conn.o sctp_error.o sctp_snmp.o \
+ sctp_tunables.o sctp_shutdown.o sctp_common.o \
+ sctp_timer.o sctp_heartbeat.o sctp_hash.o \
+ sctp_bind.o sctp_notify.o sctp_asconf.o \
+ sctp_addr.o tn_ipopt.o tnet.o ip_netinfo.o \
+ sctp_misc.o
+IP_ILB_OBJS = ilb.o ilb_nat.o ilb_conn.o ilb_alg_hash.o ilb_alg_rr.o
+
+IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o \
+ ip6_rts.o ip_if.o ip_ire.o ip_listutils.o ip_mroute.o \
+ ip_multi.o ip2mac.o ip_ndp.o ip_rts.o ip_srcid.o \
+ ipddi.o ipdrop.o mi.o nd.o tunables.o optcom.o snmpcom.o \
+ ipsec_loader.o spd.o ipclassifier.o inet_common.o ip_squeue.o \
+ squeue.o ip_sadb.o ip_ftable.o proto_set.o radix.o ip_dummy.o \
+ ip_helper_stream.o ip_tunables.o \
+ ip_output.o ip_input.o ip6_input.o ip6_output.o ip_arp.o \
+ conn_opt.o ip_attr.o ip_dce.o \
+ $(IP_ICMP_OBJS) \
+ $(IP_RTS_OBJS) \
+ $(IP_TCP_OBJS) \
+ $(IP_UDP_OBJS) \
+ $(IP_SCTP_OBJS) \
+ $(IP_ILB_OBJS)
+
+IP6_OBJS += ip6ddi.o
+
+HOOK_OBJS += hook.o
+
+NETI_OBJS += neti_impl.o neti_mod.o neti_stack.o
+
+KEYSOCK_OBJS += keysockddi.o keysock.o keysock_opt_data.o
+
+IPNET_OBJS += ipnet.o ipnet_bpf.o
+
+SPDSOCK_OBJS += spdsockddi.o spdsock.o spdsock_opt_data.o
+
+IPSECESP_OBJS += ipsecespddi.o ipsecesp.o
+
+IPSECAH_OBJS += ipsecahddi.o ipsecah.o sadb.o
+
+SPPP_OBJS += sppp.o sppp_dlpi.o sppp_mod.o s_common.o
+
+SPPPTUN_OBJS += sppptun.o sppptun_mod.o
+
+SPPPASYN_OBJS += spppasyn.o spppasyn_mod.o
+
+SPPPCOMP_OBJS += spppcomp.o spppcomp_mod.o deflate.o bsd-comp.o vjcompress.o \
+ zlib.o
+
+TCP_OBJS += tcpddi.o
+
+TCP6_OBJS += tcp6ddi.o
+
+NCA_OBJS += ncaddi.o
+
+SDP_SOCK_MOD_OBJS += sockmod_sdp.o socksdp.o socksdpsubr.o
+
+SCTP_SOCK_MOD_OBJS += sockmod_sctp.o socksctp.o socksctpsubr.o
+
+PFP_SOCK_MOD_OBJS += sockmod_pfp.o
+
+RDS_SOCK_MOD_OBJS += sockmod_rds.o
+
+RDS_OBJS += rdsddi.o rdssubr.o rds_opt.o rds_ioctl.o
+
+RDSIB_OBJS += rdsib.o rdsib_ib.o rdsib_cm.o rdsib_ep.o rdsib_buf.o \
+ rdsib_debug.o rdsib_sc.o
+
+RDSV3_OBJS += af_rds.o rdsv3_ddi.o bind.o loop.o threads.o connection.o \
+ transport.o cong.o sysctl.o message.o rds_recv.o send.o \
+ stats.o info.o page.o rdma_transport.o ib_ring.o ib_rdma.o \
+ ib_recv.o ib.o ib_send.o ib_sysctl.o ib_stats.o ib_cm.o \
+ rdsv3_sc.o rdsv3_debug.o rdsv3_impl.o rdma.o rdsv3_af_thr.o
+
+ISER_OBJS += iser.o iser_cm.o iser_cq.o iser_ib.o iser_idm.o \
+ iser_resource.o iser_xfer.o
+
+UDP_OBJS += udpddi.o
+
+UDP6_OBJS += udp6ddi.o
+
+SY_OBJS += gentty.o
+
+TCO_OBJS += ticots.o
+
+TCOO_OBJS += ticotsord.o
+
+TCL_OBJS += ticlts.o
+
+TL_OBJS += tl.o
+
+DUMP_OBJS += dump.o
+
+BPF_OBJS += bpf.o bpf_filter.o bpf_mod.o bpf_dlt.o bpf_mac.o
+
+CLONE_OBJS += clone.o
+
+CN_OBJS += cons.o
+
+DLD_OBJS += dld_drv.o dld_proto.o dld_str.o dld_flow.o
+
+DLS_OBJS += dls.o dls_link.o dls_mod.o dls_stat.o dls_mgmt.o
+
+GLD_OBJS += gld.o gldutil.o
+
+MAC_OBJS += mac.o mac_bcast.o mac_client.o mac_datapath_setup.o mac_flow.o \
+ mac_hio.o mac_mod.o mac_ndd.o mac_provider.o mac_sched.o \
+ mac_protect.o mac_soft_ring.o mac_stat.o mac_util.o
+
+MAC_6TO4_OBJS += mac_6to4.o
+
+MAC_ETHER_OBJS += mac_ether.o
+
+MAC_IPV4_OBJS += mac_ipv4.o
+
+MAC_IPV6_OBJS += mac_ipv6.o
+
+MAC_WIFI_OBJS += mac_wifi.o
+
+MAC_IB_OBJS += mac_ib.o
+
+IPTUN_OBJS += iptun_dev.o iptun_ctl.o iptun.o
+
+AGGR_OBJS += aggr_dev.o aggr_ctl.o aggr_grp.o aggr_port.o \
+ aggr_send.o aggr_recv.o aggr_lacp.o
+
+SOFTMAC_OBJS += softmac_main.o softmac_ctl.o softmac_capab.o \
+ softmac_dev.o softmac_stat.o softmac_pkt.o softmac_fp.o
+
+NET80211_OBJS += net80211.o net80211_proto.o net80211_input.o \
+ net80211_output.o net80211_node.o net80211_crypto.o \
+ net80211_crypto_none.o net80211_crypto_wep.o net80211_ioctl.o \
+ net80211_crypto_tkip.o net80211_crypto_ccmp.o \
+ net80211_ht.o
+
+VNIC_OBJS += vnic_ctl.o vnic_dev.o
+
+SIMNET_OBJS += simnet.o
+
+IB_OBJS += ibnex.o ibnex_ioctl.o ibnex_hca.o
+
+IBCM_OBJS += ibcm_impl.o ibcm_sm.o ibcm_ti.o ibcm_utils.o ibcm_path.o \
+ ibcm_arp.o ibcm_arp_link.o
+
+IBDM_OBJS += ibdm.o
+
+IBDMA_OBJS += ibdma.o
+
+IBMF_OBJS += ibmf.o ibmf_impl.o ibmf_dr.o ibmf_wqe.o ibmf_ud_dest.o ibmf_mod.o \
+ ibmf_send.o ibmf_recv.o ibmf_handlers.o ibmf_trans.o \
+ ibmf_timers.o ibmf_msg.o ibmf_utils.o ibmf_rmpp.o \
+ ibmf_saa.o ibmf_saa_impl.o ibmf_saa_utils.o ibmf_saa_events.o
+
+IBTL_OBJS += ibtl_impl.o ibtl_util.o ibtl_mem.o ibtl_handlers.o ibtl_qp.o \
+ ibtl_cq.o ibtl_wr.o ibtl_hca.o ibtl_chan.o ibtl_cm.o \
+ ibtl_mcg.o ibtl_ibnex.o ibtl_srq.o ibtl_part.o
+
+TAVOR_OBJS += tavor.o tavor_agents.o tavor_cfg.o tavor_ci.o tavor_cmd.o \
+ tavor_cq.o tavor_event.o tavor_ioctl.o tavor_misc.o \
+ tavor_mr.o tavor_qp.o tavor_qpmod.o tavor_rsrc.o \
+ tavor_srq.o tavor_stats.o tavor_umap.o tavor_wr.o
+
+HERMON_OBJS += hermon.o hermon_agents.o hermon_cfg.o hermon_ci.o hermon_cmd.o \
+ hermon_cq.o hermon_event.o hermon_ioctl.o hermon_misc.o \
+ hermon_mr.o hermon_qp.o hermon_qpmod.o hermon_rsrc.o \
+ hermon_srq.o hermon_stats.o hermon_umap.o hermon_wr.o \
+ hermon_fcoib.o hermon_fm.o
+
+DAPLT_OBJS += daplt.o
+
+SOL_OFS_OBJS += sol_cma.o sol_ib_cma.o sol_uobj.o \
+ sol_ofs_debug_util.o sol_ofs_gen_util.o \
+ sol_kverbs.o
+
+SOL_UCMA_OBJS += sol_ucma.o
+
+SOL_UVERBS_OBJS += sol_uverbs.o sol_uverbs_comp.o sol_uverbs_event.o \
+ sol_uverbs_hca.o sol_uverbs_qp.o
+
+SOL_UMAD_OBJS += sol_umad.o
+
+KSTAT_OBJS += kstat.o
+
+KSYMS_OBJS += ksyms.o
+
+INSTANCE_OBJS += inst_sync.o
+
+IWSCN_OBJS += iwscons.o
+
+LOFI_OBJS += lofi.o LzmaDec.o
+
+FSSNAP_OBJS += fssnap.o
+
+FSSNAPIF_OBJS += fssnap_if.o
+
+MM_OBJS += mem.o
+
+PHYSMEM_OBJS += physmem.o
+
+OPTIONS_OBJS += options.o
+
+WINLOCK_OBJS += winlockio.o
+
+PM_OBJS += pm.o
+SRN_OBJS += srn.o
+
+PSEUDO_OBJS += pseudonex.o
+
+RAMDISK_OBJS += ramdisk.o
+
+LLC1_OBJS += llc1.o
+
+USBKBM_OBJS += usbkbm.o
+
+USBWCM_OBJS += usbwcm.o
+
+BOFI_OBJS += bofi.o
+
+HID_OBJS += hid.o
+
+HWA_RC_OBJS += hwarc.o
+
+USBSKEL_OBJS += usbskel.o
+
+USBVC_OBJS += usbvc.o usbvc_v4l2.o
+
+HIDPARSER_OBJS += hidparser.o
+
+USB_AC_OBJS += usb_ac.o
+
+USB_AS_OBJS += usb_as.o
+
+USB_AH_OBJS += usb_ah.o
+
+USBMS_OBJS += usbms.o
+
+USBPRN_OBJS += usbprn.o
+
+UGEN_OBJS += ugen.o
+
+USBSER_OBJS += usbser.o usbser_rseq.o
+
+USBSACM_OBJS += usbsacm.o
+
+USBSER_KEYSPAN_OBJS += usbser_keyspan.o keyspan_dsd.o keyspan_pipe.o
+
+USBS49_FW_OBJS += keyspan_49fw.o
+
+USBSPRL_OBJS += usbser_pl2303.o pl2303_dsd.o
+
+WUSB_CA_OBJS += wusb_ca.o
+
+USBFTDI_OBJS += usbser_uftdi.o uftdi_dsd.o
+
+USBECM_OBJS += usbecm.o
+
+WC_OBJS += wscons.o vcons.o
+
+VCONS_CONF_OBJS += vcons_conf.o
+
+SCSI_OBJS += scsi_capabilities.o scsi_confsubr.o scsi_control.o \
+ scsi_data.o scsi_fm.o scsi_hba.o scsi_reset_notify.o \
+ scsi_resource.o scsi_subr.o scsi_transport.o scsi_watch.o \
+ smp_transport.o
+
+SCSI_VHCI_OBJS += scsi_vhci.o mpapi_impl.o scsi_vhci_tpgs.o
+
+SCSI_VHCI_F_SYM_OBJS += sym.o
+
+SCSI_VHCI_F_TPGS_OBJS += tpgs.o
+
+SCSI_VHCI_F_ASYM_SUN_OBJS += asym_sun.o
+
+SCSI_VHCI_F_SYM_HDS_OBJS += sym_hds.o
+
+SCSI_VHCI_F_TAPE_OBJS += tape.o
+
+SCSI_VHCI_F_TPGS_TAPE_OBJS += tpgs_tape.o
+
+SGEN_OBJS += sgen.o
+
+SMP_OBJS += smp.o
+
+SATA_OBJS += sata.o
+
+USBA_OBJS += hcdi.o usba.o usbai.o hubdi.o parser.o genconsole.o \
+ usbai_pipe_mgmt.o usbai_req.o usbai_util.o usbai_register.o \
+ usba_devdb.o usba10_calls.o usba_ugen.o whcdi.o wa.o
+USBA_WITHOUT_WUSB_OBJS += hcdi.o usba.o usbai.o hubdi.o parser.o genconsole.o \
+ usbai_pipe_mgmt.o usbai_req.o usbai_util.o usbai_register.o \
+ usba_devdb.o usba10_calls.o usba_ugen.o
+
+USBA10_OBJS += usba10.o
+
+RSM_OBJS += rsm.o rsmka_pathmanager.o rsmka_util.o
+
+RSMOPS_OBJS += rsmops.o
+
+S1394_OBJS += t1394.o t1394_errmsg.o s1394.o s1394_addr.o s1394_asynch.o \
+ s1394_bus_reset.o s1394_cmp.o s1394_csr.o s1394_dev_disc.o \
+ s1394_fa.o s1394_fcp.o \
+ s1394_hotplug.o s1394_isoch.o s1394_misc.o h1394.o nx1394.o
+
+HCI1394_OBJS += hci1394.o hci1394_async.o hci1394_attach.o hci1394_buf.o \
+ hci1394_csr.o hci1394_detach.o hci1394_extern.o \
+ hci1394_ioctl.o hci1394_isoch.o hci1394_isr.o \
+ hci1394_ixl_comp.o hci1394_ixl_isr.o hci1394_ixl_misc.o \
+ hci1394_ixl_update.o hci1394_misc.o hci1394_ohci.o \
+ hci1394_q.o hci1394_s1394if.o hci1394_tlabel.o \
+ hci1394_tlist.o hci1394_vendor.o
+
+AV1394_OBJS += av1394.o av1394_as.o av1394_async.o av1394_cfgrom.o \
+ av1394_cmp.o av1394_fcp.o av1394_isoch.o av1394_isoch_chan.o \
+ av1394_isoch_recv.o av1394_isoch_xmit.o av1394_list.o \
+ av1394_queue.o
+
+DCAM1394_OBJS += dcam.o dcam_frame.o dcam_param.o dcam_reg.o \
+ dcam_ring_buff.o
+
+SCSA1394_OBJS += hba.o sbp2_driver.o sbp2_bus.o
+
+SBP2_OBJS += cfgrom.o sbp2.o
+
+PMODEM_OBJS += pmodem.o pmodem_cis.o cis.o cis_callout.o cis_handlers.o cis_params.o
+
+DSW_OBJS += dsw.o dsw_dev.o ii_tree.o
+
+NCALL_OBJS += ncall.o \
+ ncall_stub.o
+
+RDC_OBJS += rdc.o \
+ rdc_dev.o \
+ rdc_io.o \
+ rdc_clnt.o \
+ rdc_prot_xdr.o \
+ rdc_svc.o \
+ rdc_bitmap.o \
+ rdc_health.o \
+ rdc_subr.o \
+ rdc_diskq.o
+
+RDCSRV_OBJS += rdcsrv.o
+
+RDCSTUB_OBJS += rdc_stub.o
+
+SDBC_OBJS += sd_bcache.o \
+ sd_bio.o \
+ sd_conf.o \
+ sd_ft.o \
+ sd_hash.o \
+ sd_io.o \
+ sd_misc.o \
+ sd_pcu.o \
+ sd_tdaemon.o \
+ sd_trace.o \
+ sd_iob_impl0.o \
+ sd_iob_impl1.o \
+ sd_iob_impl2.o \
+ sd_iob_impl3.o \
+ sd_iob_impl4.o \
+ sd_iob_impl5.o \
+ sd_iob_impl6.o \
+ sd_iob_impl7.o \
+ safestore.o \
+ safestore_ram.o
+
+NSCTL_OBJS += nsctl.o \
+ nsc_cache.o \
+ nsc_disk.o \
+ nsc_dev.o \
+ nsc_freeze.o \
+ nsc_gen.o \
+ nsc_mem.o \
+ nsc_ncallio.o \
+ nsc_power.o \
+ nsc_resv.o \
+ nsc_rmspin.o \
+ nsc_solaris.o \
+ nsc_trap.o \
+ nsc_list.o
+UNISTAT_OBJS += spuni.o \
+ spcs_s_k.o
+
+NSKERN_OBJS += nsc_ddi.o \
+ nsc_proc.o \
+ nsc_raw.o \
+ nsc_thread.o \
+ nskernd.o
+
+SV_OBJS += sv.o
+
+PMCS_OBJS += pmcs_attach.o pmcs_ds.o pmcs_intr.o pmcs_nvram.o pmcs_sata.o \
+ pmcs_scsa.o pmcs_smhba.o pmcs_subr.o pmcs_fwlog.o
+
+PMCS8001FW_C_OBJS += pmcs_fw_hdr.o
+PMCS8001FW_OBJS += $(PMCS8001FW_C_OBJS) SPCBoot.o ila.o firmware.o
+
+#
+# Build up defines and paths.
+
+ST_OBJS += st.o st_conf.o
+
+EMLXS_OBJS += emlxs_clock.o emlxs_dfc.o emlxs_dhchap.o emlxs_diag.o \
+ emlxs_download.o emlxs_dump.o emlxs_els.o emlxs_event.o \
+ emlxs_fcp.o emlxs_fct.o emlxs_hba.o emlxs_ip.o \
+ emlxs_mbox.o emlxs_mem.o emlxs_msg.o emlxs_node.o \
+ emlxs_pkt.o emlxs_sli3.o emlxs_sli4.o emlxs_solaris.o \
+ emlxs_thread.o
+
+EMLXS_FW_OBJS += emlxs_fw.o
+
+OCE_OBJS += oce_buf.o oce_fm.o oce_gld.o oce_hw.o oce_intr.o oce_main.o \
+ oce_mbx.o oce_mq.o oce_queue.o oce_rx.o oce_stat.o oce_tx.o \
+ oce_utils.o
+
+FCT_OBJS += discovery.o fct.o
+
+QLT_OBJS += 2400.o 2500.o 8100.o qlt.o qlt_dma.o
+
+SRPT_OBJS += srpt_mod.o srpt_ch.o srpt_cm.o srpt_ioc.o srpt_stp.o
+
+FCOE_OBJS += fcoe.o fcoe_eth.o fcoe_fc.o
+
+FCOET_OBJS += fcoet.o fcoet_eth.o fcoet_fc.o
+
+FCOEI_OBJS += fcoei.o fcoei_eth.o fcoei_lv.o
+
+ISCSIT_SHARED_OBJS += \
+ iscsit_common.o
+
+ISCSIT_OBJS += $(ISCSIT_SHARED_OBJS) \
+ iscsit.o iscsit_tgt.o iscsit_sess.o iscsit_login.o \
+ iscsit_text.o iscsit_isns.o iscsit_radiusauth.o \
+ iscsit_radiuspacket.o iscsit_auth.o iscsit_authclient.o
+
+PPPT_OBJS += alua_ic_if.o pppt.o pppt_msg.o pppt_tgt.o
+
+STMF_OBJS += lun_map.o stmf.o
+
+STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o sbd_zvol.o
+
+SYSMSG_OBJS += sysmsg.o
+
+SES_OBJS += ses.o ses_sen.o ses_safte.o ses_ses.o
+
+TNF_OBJS += tnf_buf.o tnf_trace.o tnf_writer.o trace_init.o \
+ trace_funcs.o tnf_probe.o tnf.o
+
+LOGINDMUX_OBJS += logindmux.o
+
+DEVINFO_OBJS += devinfo.o
+
+DEVPOLL_OBJS += devpoll.o
+
+DEVPOOL_OBJS += devpool.o
+
+I8042_OBJS += i8042.o
+
+KB8042_OBJS += \
+ at_keyprocess.o \
+ kb8042.o \
+ kb8042_keytables.o
+
+MOUSE8042_OBJS += mouse8042.o
+
+FDC_OBJS += fdc.o
+
+ASY_OBJS += asy.o
+
+ECPP_OBJS += ecpp.o
+
+VUIDM3P_OBJS += vuidmice.o vuidm3p.o
+
+VUIDM4P_OBJS += vuidmice.o vuidm4p.o
+
+VUIDM5P_OBJS += vuidmice.o vuidm5p.o
+
+VUIDPS2_OBJS += vuidmice.o vuidps2.o
+
+HPCSVC_OBJS += hpcsvc.o
+
+PCIE_MISC_OBJS += pcie.o pcie_fault.o pcie_hp.o pciehpc.o pcishpc.o pcie_pwr.o pciev.o
+
+PCIHPNEXUS_OBJS += pcihp.o
+
+OPENEEPR_OBJS += openprom.o
+
+RANDOM_OBJS += random.o
+
+PSHOT_OBJS += pshot.o
+
+GEN_DRV_OBJS += gen_drv.o
+
+TCLIENT_OBJS += tclient.o
+
+TPHCI_OBJS += tphci.o
+
+TVHCI_OBJS += tvhci.o
+
+EMUL64_OBJS += emul64.o emul64_bsd.o
+
+FCP_OBJS += fcp.o
+
+FCIP_OBJS += fcip.o
+
+FCSM_OBJS += fcsm.o
+
+FCTL_OBJS += fctl.o
+
+FP_OBJS += fp.o
+
+QLC_OBJS += ql_api.o ql_debug.o ql_hba_fru.o ql_init.o ql_iocb.o ql_ioctl.o \
+ ql_isr.o ql_mbx.o ql_nx.o ql_xioctl.o ql_fw_table.o
+
+QLC_FW_2200_OBJS += ql_fw_2200.o
+
+QLC_FW_2300_OBJS += ql_fw_2300.o
+
+QLC_FW_2400_OBJS += ql_fw_2400.o
+
+QLC_FW_2500_OBJS += ql_fw_2500.o
+
+QLC_FW_6322_OBJS += ql_fw_6322.o
+
+QLC_FW_8100_OBJS += ql_fw_8100.o
+
+QLGE_OBJS += qlge.o qlge_dbg.o qlge_flash.o qlge_fm.o qlge_gld.o qlge_mpi.o
+
+ZCONS_OBJS += zcons.o
+
+NV_SATA_OBJS += nv_sata.o
+
+SI3124_OBJS += si3124.o
+
+AHCI_OBJS += ahci.o
+
+PCIIDE_OBJS += pci-ide.o
+
+PCEPP_OBJS += pcepp.o
+
+CPC_OBJS += cpc.o
+
+CPUID_OBJS += cpuid_drv.o
+
+SYSEVENT_OBJS += sysevent.o
+
+BL_OBJS += bl.o
+
+DRM_OBJS += drm_sunmod.o drm_kstat.o drm_agpsupport.o \
+ drm_auth.o drm_bufs.o drm_context.o drm_dma.o \
+ drm_drawable.o drm_drv.o drm_fops.o drm_ioctl.o drm_irq.o \
+ drm_lock.o drm_memory.o drm_msg.o drm_pci.o drm_scatter.o \
+ drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o
+
+FM_OBJS += devfm.o devfm_machdep.o
+
+RTLS_OBJS += rtls.o
+
+#
+# exec modules
+#
+AOUTEXEC_OBJS +=aout.o
+
+ELFEXEC_OBJS += elf.o elf_notes.o old_notes.o
+
+INTPEXEC_OBJS +=intp.o
+
+SHBINEXEC_OBJS +=shbin.o
+
+JAVAEXEC_OBJS +=java.o
+
+#
+# file system modules
+#
+AUTOFS_OBJS += auto_vfsops.o auto_vnops.o auto_subr.o auto_xdr.o auto_sys.o
+
+CACHEFS_OBJS += cachefs_cnode.o cachefs_cod.o \
+ cachefs_dir.o cachefs_dlog.o cachefs_filegrp.o \
+ cachefs_fscache.o cachefs_ioctl.o cachefs_log.o \
+ cachefs_module.o \
+ cachefs_noopc.o cachefs_resource.o \
+ cachefs_strict.o \
+ cachefs_subr.o cachefs_vfsops.o \
+ cachefs_vnops.o
+
+DCFS_OBJS += dc_vnops.o
+
+DEVFS_OBJS += devfs_subr.o devfs_vfsops.o devfs_vnops.o
+
+DEV_OBJS += sdev_subr.o sdev_vfsops.o sdev_vnops.o \
+ sdev_ptsops.o sdev_zvolops.o sdev_comm.o \
+ sdev_profile.o sdev_ncache.o sdev_netops.o \
+ sdev_ipnetops.o \
+ sdev_vtops.o
+
+CTFS_OBJS += ctfs_all.o ctfs_cdir.o ctfs_ctl.o ctfs_event.o \
+ ctfs_latest.o ctfs_root.o ctfs_sym.o ctfs_tdir.o ctfs_tmpl.o
+
+OBJFS_OBJS += objfs_vfs.o objfs_root.o objfs_common.o \
+ objfs_odir.o objfs_data.o
+
+FDFS_OBJS += fdops.o
+
+FIFO_OBJS += fifosubr.o fifovnops.o
+
+PIPE_OBJS += pipe.o
+
+HSFS_OBJS += hsfs_node.o hsfs_subr.o hsfs_vfsops.o hsfs_vnops.o \
+ hsfs_susp.o hsfs_rrip.o hsfs_susp_subr.o
+
+LOFS_OBJS += lofs_subr.o lofs_vfsops.o lofs_vnops.o
+
+NAMEFS_OBJS += namevfs.o namevno.o
+
+NFS_OBJS += nfs_client.o nfs_common.o nfs_dump.o \
+ nfs_subr.o nfs_vfsops.o nfs_vnops.o \
+ nfs_xdr.o nfs_sys.o nfs_strerror.o \
+ nfs3_vfsops.o nfs3_vnops.o nfs3_xdr.o \
+ nfs_acl_vnops.o nfs_acl_xdr.o nfs4_vfsops.o \
+ nfs4_vnops.o nfs4_xdr.o nfs4_idmap.o \
+ nfs4_shadow.o nfs4_subr.o \
+ nfs4_attr.o nfs4_rnode.o nfs4_client.o \
+ nfs4_acache.o nfs4_common.o nfs4_client_state.o \
+ nfs4_callback.o nfs4_recovery.o nfs4_client_secinfo.o \
+ nfs4_client_debug.o nfs_stats.o \
+ nfs4_acl.o nfs4_stub_vnops.o nfs_cmd.o
+
+NFSSRV_OBJS += nfs_server.o nfs_srv.o nfs3_srv.o \
+ nfs_acl_srv.o nfs_auth.o nfs_auth_xdr.o \
+ nfs_export.o nfs_log.o nfs_log_xdr.o \
+ nfs4_srv.o nfs4_state.o nfs4_srv_attr.o \
+ nfs4_srv_ns.o nfs4_db.o nfs4_srv_deleg.o \
+ nfs4_deleg_ops.o nfs4_srv_readdir.o nfs4_dispatch.o
+
+SMBSRV_SHARED_OBJS += \
+ smb_inet.o \
+ smb_match.o \
+ smb_msgbuf.o \
+ smb_oem.o \
+ smb_string.o \
+ smb_utf8.o \
+ smb_door_legacy.o \
+ smb_xdr.o \
+ smb_token.o \
+ smb_token_xdr.o \
+ smb_sid.o \
+ smb_native.o \
+ smb_netbios_util.o
+
+SMBSRV_OBJS += $(SMBSRV_SHARED_OBJS) \
+ smb_acl.o \
+ smb_alloc.o \
+ smb_close.o \
+ smb_common_open.o \
+ smb_common_transact.o \
+ smb_create.o \
+ smb_delete.o \
+ smb_directory.o \
+ smb_dispatch.o \
+ smb_echo.o \
+ smb_fem.o \
+ smb_find.o \
+ smb_flush.o \
+ smb_fsinfo.o \
+ smb_fsops.o \
+ smb_init.o \
+ smb_kdoor.o \
+ smb_kshare.o \
+ smb_kutil.o \
+ smb_lock.o \
+ smb_lock_byte_range.o \
+ smb_locking_andx.o \
+ smb_logoff_andx.o \
+ smb_mangle_name.o \
+ smb_mbuf_marshaling.o \
+ smb_mbuf_util.o \
+ smb_negotiate.o \
+ smb_net.o \
+ smb_node.o \
+ smb_nt_cancel.o \
+ smb_nt_create_andx.o \
+ smb_nt_transact_create.o \
+ smb_nt_transact_ioctl.o \
+ smb_nt_transact_notify_change.o \
+ smb_nt_transact_quota.o \
+ smb_nt_transact_security.o \
+ smb_odir.o \
+ smb_ofile.o \
+ smb_open_andx.o \
+ smb_opipe.o \
+ smb_oplock.o \
+ smb_pathname.o \
+ smb_print.o \
+ smb_process_exit.o \
+ smb_query_fileinfo.o \
+ smb_read.o \
+ smb_rename.o \
+ smb_sd.o \
+ smb_seek.o \
+ smb_server.o \
+ smb_session.o \
+ smb_session_setup_andx.o \
+ smb_set_fileinfo.o \
+ smb_signing.o \
+ smb_tree.o \
+ smb_trans2_create_directory.o \
+ smb_trans2_dfs.o \
+ smb_trans2_find.o \
+ smb_tree_connect.o \
+ smb_unlock_byte_range.o \
+ smb_user.o \
+ smb_vfs.o \
+ smb_vops.o \
+ smb_vss.o \
+ smb_write.o \
+ smb_write_raw.o
+
+PCFS_OBJS += pc_alloc.o pc_dir.o pc_node.o pc_subr.o \
+ pc_vfsops.o pc_vnops.o
+
+PROC_OBJS += prcontrol.o prioctl.o prsubr.o prusrio.o \
+ prvfsops.o prvnops.o
+
+MNTFS_OBJS += mntvfsops.o mntvnops.o
+
+SHAREFS_OBJS += sharetab.o sharefs_vfsops.o sharefs_vnops.o
+
+SPEC_OBJS += specsubr.o specvfsops.o specvnops.o
+
+SOCK_OBJS += socksubr.o sockvfsops.o sockparams.o \
+ socksyscalls.o socktpi.o sockstr.o \
+ sockcommon_vnops.o sockcommon_subr.o \
+ sockcommon_sops.o sockcommon.o \
+ sock_notsupp.o socknotify.o \
+ nl7c.o nl7curi.o nl7chttp.o nl7clogd.o \
+ nl7cnca.o sodirect.o sockfilter.o
+
+TMPFS_OBJS += tmp_dir.o tmp_subr.o tmp_tnode.o tmp_vfsops.o \
+ tmp_vnops.o
+
+UDFS_OBJS += udf_alloc.o udf_bmap.o udf_dir.o \
+ udf_inode.o udf_subr.o udf_vfsops.o \
+ udf_vnops.o
+
+UFS_OBJS += ufs_alloc.o ufs_bmap.o ufs_dir.o ufs_xattr.o \
+ ufs_inode.o ufs_subr.o ufs_tables.o ufs_vfsops.o \
+ ufs_vnops.o quota.o quotacalls.o quota_ufs.o \
+ ufs_filio.o ufs_lockfs.o ufs_thread.o ufs_trans.o \
+ ufs_acl.o ufs_panic.o ufs_directio.o ufs_log.o \
+ ufs_extvnops.o ufs_snap.o lufs.o lufs_thread.o \
+ lufs_log.o lufs_map.o lufs_top.o lufs_debug.o
+VSCAN_OBJS += vscan_drv.o vscan_svc.o vscan_door.o
+
+NSMB_OBJS += smb_conn.o smb_dev.o smb_iod.o smb_pass.o \
+ smb_rq.o smb_sign.o smb_smb.o smb_subrs.o \
+ smb_time.o smb_tran.o smb_trantcp.o smb_usr.o \
+ subr_mchain.o
+
+SMBFS_COMMON_OBJS += smbfs_ntacl.o
+SMBFS_OBJS += smbfs_vfsops.o smbfs_vnops.o smbfs_node.o \
+ smbfs_acl.o smbfs_client.o smbfs_smb.o \
+ smbfs_subr.o smbfs_subr2.o \
+ smbfs_rwlock.o smbfs_xattr.o \
+ $(SMBFS_COMMON_OBJS)
+
+
+#
+# LVM modules
+#
+MD_OBJS += md.o md_error.o md_ioctl.o md_mddb.o md_names.o \
+ md_med.o md_rename.o md_subr.o
+
+MD_COMMON_OBJS = md_convert.o md_crc.o md_revchk.o
+
+MD_DERIVED_OBJS = metamed_xdr.o meta_basic_xdr.o
+
+SOFTPART_OBJS += sp.o sp_ioctl.o
+
+STRIPE_OBJS += stripe.o stripe_ioctl.o
+
+HOTSPARES_OBJS += hotspares.o
+
+RAID_OBJS += raid.o raid_ioctl.o raid_replay.o raid_resync.o raid_hotspare.o
+
+MIRROR_OBJS += mirror.o mirror_ioctl.o mirror_resync.o
+
+NOTIFY_OBJS += md_notify.o
+
+TRANS_OBJS += mdtrans.o trans_ioctl.o trans_log.o
+
+ZFS_COMMON_OBJS += \
+ arc.o \
+ bplist.o \
+ bpobj.o \
+ dbuf.o \
+ ddt.o \
+ ddt_zap.o \
+ dmu.o \
+ dmu_diff.o \
+ dmu_send.o \
+ dmu_object.o \
+ dmu_objset.o \
+ dmu_traverse.o \
+ dmu_tx.o \
+ dnode.o \
+ dnode_sync.o \
+ dsl_dir.o \
+ dsl_dataset.o \
+ dsl_deadlist.o \
+ dsl_pool.o \
+ dsl_synctask.o \
+ dmu_zfetch.o \
+ dsl_deleg.o \
+ dsl_prop.o \
+ dsl_scan.o \
+ gzip.o \
+ lzjb.o \
+ metaslab.o \
+ refcount.o \
+ sa.o \
+ sha256.o \
+ spa.o \
+ spa_config.o \
+ spa_errlog.o \
+ spa_history.o \
+ spa_misc.o \
+ space_map.o \
+ txg.o \
+ uberblock.o \
+ unique.o \
+ vdev.o \
+ vdev_cache.o \
+ vdev_file.o \
+ vdev_label.o \
+ vdev_mirror.o \
+ vdev_missing.o \
+ vdev_queue.o \
+ vdev_raidz.o \
+ vdev_root.o \
+ zap.o \
+ zap_leaf.o \
+ zap_micro.o \
+ zfs_byteswap.o \
+ zfs_debug.o \
+ zfs_fm.o \
+ zfs_fuid.o \
+ zfs_sa.o \
+ zfs_znode.o \
+ zil.o \
+ zio.o \
+ zio_checksum.o \
+ zio_compress.o \
+ zio_inject.o \
+ zle.o \
+ zrlock.o
+
+ZFS_SHARED_OBJS += \
+ zfs_namecheck.o \
+ zfs_deleg.o \
+ zfs_prop.o \
+ zfs_comutil.o \
+ zfs_fletcher.o \
+ zpool_prop.o \
+ zprop_common.o
+
+ZFS_OBJS += \
+ $(ZFS_COMMON_OBJS) \
+ $(ZFS_SHARED_OBJS) \
+ vdev_disk.o \
+ zfs_acl.o \
+ zfs_ctldir.o \
+ zfs_dir.o \
+ zfs_ioctl.o \
+ zfs_log.o \
+ zfs_onexit.o \
+ zfs_replay.o \
+ zfs_rlock.o \
+ rrwlock.o \
+ zfs_vfsops.o \
+ zfs_vnops.o \
+ zvol.o
+
+ZUT_OBJS += \
+ zut.o
+
+#
+# streams modules
+#
+BUFMOD_OBJS += bufmod.o
+
+CONNLD_OBJS += connld.o
+
+DEDUMP_OBJS += dedump.o
+
+DRCOMPAT_OBJS += drcompat.o
+
+LDLINUX_OBJS += ldlinux.o
+
+LDTERM_OBJS += ldterm.o uwidth.o
+
+PCKT_OBJS += pckt.o
+
+PFMOD_OBJS += pfmod.o
+
+PTEM_OBJS += ptem.o
+
+REDIRMOD_OBJS += strredirm.o
+
+TIMOD_OBJS += timod.o
+
+TIRDWR_OBJS += tirdwr.o
+
+TTCOMPAT_OBJS +=ttcompat.o
+
+LOG_OBJS += log.o
+
+PIPEMOD_OBJS += pipemod.o
+
+RPCMOD_OBJS += rpcmod.o clnt_cots.o clnt_clts.o \
+ clnt_gen.o clnt_perr.o mt_rpcinit.o rpc_calmsg.o \
+ rpc_prot.o rpc_sztypes.o rpc_subr.o rpcb_prot.o \
+ svc.o svc_clts.o svc_gen.o svc_cots.o \
+ rpcsys.o xdr_sizeof.o clnt_rdma.o svc_rdma.o \
+ xdr_rdma.o rdma_subr.o xdrrdma_sizeof.o
+
+TLIMOD_OBJS += tlimod.o t_kalloc.o t_kbind.o t_kclose.o \
+ t_kconnect.o t_kfree.o t_kgtstate.o t_kopen.o \
+ t_krcvudat.o t_ksndudat.o t_kspoll.o t_kunbind.o \
+ t_kutil.o
+
+RLMOD_OBJS += rlmod.o
+
+TELMOD_OBJS += telmod.o
+
+CRYPTMOD_OBJS += cryptmod.o
+
+KB_OBJS += kbd.o keytables.o
+
+#
+# ID mapping module
+#
+IDMAP_OBJS += idmap_mod.o idmap_kapi.o idmap_xdr.o idmap_cache.o
+
+#
+# scheduling class modules
+#
+SDC_OBJS += sysdc.o
+
+RT_OBJS += rt.o
+RT_DPTBL_OBJS += rt_dptbl.o
+
+TS_OBJS += ts.o
+TS_DPTBL_OBJS += ts_dptbl.o
+
+IA_OBJS += ia.o
+
+FSS_OBJS += fss.o
+
+FX_OBJS += fx.o
+FX_DPTBL_OBJS += fx_dptbl.o
+
+#
+# Inter-Process Communication (IPC) modules
+#
+IPC_OBJS += ipc.o
+
+IPCMSG_OBJS += msg.o
+
+IPCSEM_OBJS += sem.o
+
+IPCSHM_OBJS += shm.o
+
+#
+# bignum module
+#
+COMMON_BIGNUM_OBJS += bignum_mod.o bignumimpl.o
+
+BIGNUM_OBJS += $(COMMON_BIGNUM_OBJS) $(BIGNUM_PSR_OBJS)
+
+#
+# kernel cryptographic framework
+#
+KCF_OBJS += kcf.o kcf_callprov.o kcf_cbufcall.o kcf_cipher.o kcf_crypto.o \
+ kcf_cryptoadm.o kcf_ctxops.o kcf_digest.o kcf_dual.o \
+ kcf_keys.o kcf_mac.o kcf_mech_tabs.o kcf_miscapi.o \
+ kcf_object.o kcf_policy.o kcf_prov_lib.o kcf_prov_tabs.o \
+ kcf_sched.o kcf_session.o kcf_sign.o kcf_spi.o kcf_verify.o \
+ kcf_random.o modes.o ecb.o cbc.o ctr.o ccm.o gcm.o \
+ fips_random.o fips_checksum.o fips_test_vectors.o
+
+CRYPTOADM_OBJS += cryptoadm.o
+
+CRYPTO_OBJS += crypto.o
+
+DPROV_OBJS += dprov.o
+
+DCA_OBJS += dca.o dca_3des.o dca_debug.o dca_dsa.o dca_kstat.o dca_rng.o \
+ dca_rsa.o
+
+AESPROV_OBJS += aes.o aes_impl.o aes_modes.o fips_aes_util.o
+
+ARCFOURPROV_OBJS += arcfour.o arcfour_crypt.o
+
+BLOWFISHPROV_OBJS += blowfish.o blowfish_impl.o
+
+ECCPROV_OBJS += ecc.o ec.o ec2_163.o ec2_mont.o ecdecode.o ecl_mult.o \
+ ecp_384.o ecp_jac.o ec2_193.o ecl.o ecp_192.o ecp_521.o \
+ ecp_jm.o ec2_233.o ecl_curve.o ecp_224.o ecp_aff.o \
+ ecp_mont.o ec2_aff.o ec_naf.o ecl_gf.o ecp_256.o mp_gf2m.o \
+ mpi.o mplogic.o mpmontg.o mpprime.o oid.o \
+ secitem.o ec2_test.o ecp_test.o fips_ecc_util.o
+
+RSAPROV_OBJS += rsa.o rsa_impl.o pkcs1.o fips_rsa_util.o
+
+SWRANDPROV_OBJS += swrand.o fips_random_util.o
+
+#
+# kernel SSL
+#
+KSSL_OBJS += kssl.o ksslioctl.o
+
+KSSL_SOCKFIL_MOD_OBJS += ksslfilter.o ksslapi.o ksslrec.o
+
+#
+# misc. modules
+#
+
+C2AUDIT_OBJS += adr.o audit.o audit_event.o audit_io.o \
+ audit_path.o audit_start.o audit_syscalls.o audit_token.o \
+ audit_mem.o
+
+PCIC_OBJS += pcic.o
+
+RPCSEC_OBJS += secmod.o sec_clnt.o sec_svc.o sec_gen.o \
+ auth_des.o auth_kern.o auth_none.o auth_loopb.o\
+ authdesprt.o authdesubr.o authu_prot.o \
+ key_call.o key_prot.o svc_authu.o svcauthdes.o
+
+RPCSEC_GSS_OBJS += rpcsec_gssmod.o rpcsec_gss.o rpcsec_gss_misc.o \
+ rpcsec_gss_utils.o svc_rpcsec_gss.o
+
+CONSCONFIG_OBJS += consconfig.o
+
+CONSCONFIG_DACF_OBJS += consconfig_dacf.o consplat.o
+
+TEM_OBJS += tem.o tem_safe.o 6x10.o 7x14.o 12x22.o
+
+KBTRANS_OBJS += \
+ kbtrans.o \
+ kbtrans_keytables.o \
+ kbtrans_polled.o \
+ kbtrans_streams.o \
+ usb_keytables.o
+
+KGSSD_OBJS += gssd_clnt_stubs.o gssd_handle.o gssd_prot.o \
+ gss_display_name.o gss_release_name.o gss_import_name.o \
+ gss_release_buffer.o gss_release_oid_set.o gen_oids.o gssdmod.o
+
+KGSSD_DERIVED_OBJS = gssd_xdr.o
+
+KGSS_DUMMY_OBJS += dmech.o
+
+KSOCKET_OBJS += ksocket.o ksocket_mod.o
+
+CRYPTO= cksumtypes.o decrypt.o encrypt.o encrypt_length.o etypes.o \
+ nfold.o verify_checksum.o prng.o block_size.o make_checksum.o\
+ checksum_length.o hmac.o default_state.o mandatory_sumtype.o
+
+# crypto/des
+CRYPTO_DES= f_cbc.o f_cksum.o f_parity.o weak_key.o d3_cbc.o ef_crypto.o
+
+CRYPTO_DK= checksum.o derive.o dk_decrypt.o dk_encrypt.o
+
+CRYPTO_ARCFOUR= k5_arcfour.o
+
+# crypto/enc_provider
+CRYPTO_ENC= des.o des3.o arcfour_provider.o aes_provider.o
+
+# crypto/hash_provider
+CRYPTO_HASH= hash_kef_generic.o hash_kmd5.o hash_crc32.o hash_ksha1.o
+
+# crypto/keyhash_provider
+CRYPTO_KEYHASH= descbc.o k5_kmd5des.o k_hmac_md5.o
+
+# crypto/crc32
+CRYPTO_CRC32= crc32.o
+
+# crypto/old
+CRYPTO_OLD= old_decrypt.o old_encrypt.o
+
+# crypto/raw
+CRYPTO_RAW= raw_decrypt.o raw_encrypt.o
+
+K5_KRB= kfree.o copy_key.o \
+ parse.o init_ctx.o \
+ ser_adata.o ser_addr.o \
+ ser_auth.o ser_cksum.o \
+ ser_key.o ser_princ.o \
+ serialize.o unparse.o \
+ ser_actx.o
+
+K5_OS= timeofday.o toffset.o \
+ init_os_ctx.o c_ustime.o
+
+SEAL=
+# EXPORT DELETE START
+SEAL= seal.o unseal.o
+# EXPORT DELETE END
+
+MECH= delete_sec_context.o \
+ import_sec_context.o \
+ gssapi_krb5.o \
+ k5seal.o k5unseal.o k5sealv3.o \
+ ser_sctx.o \
+ sign.o \
+ util_crypt.o \
+ util_validate.o util_ordering.o \
+ util_seqnum.o util_set.o util_seed.o \
+ wrap_size_limit.o verify.o
+
+
+
+MECH_GEN= util_token.o
+
+
+KGSS_KRB5_OBJS += krb5mech.o \
+ $(MECH) $(SEAL) $(MECH_GEN) \
+ $(CRYPTO) $(CRYPTO_DES) $(CRYPTO_DK) $(CRYPTO_ARCFOUR) \
+ $(CRYPTO_ENC) $(CRYPTO_HASH) \
+ $(CRYPTO_KEYHASH) $(CRYPTO_CRC32) \
+ $(CRYPTO_OLD) \
+ $(CRYPTO_RAW) $(K5_KRB) $(K5_OS)
+
+DES_OBJS += des_crypt.o des_impl.o des_ks.o des_soft.o fips_des_util.o
+
+DLBOOT_OBJS += bootparam_xdr.o nfs_dlinet.o scan.o
+
+KRTLD_OBJS += kobj_bootflags.o getoptstr.o \
+ kobj.o kobj_kdi.o kobj_lm.o kobj_subr.o
+
+MOD_OBJS += modctl.o modsubr.o modsysfile.o modconf.o modhash.o
+
+STRPLUMB_OBJS += strplumb.o
+
+CPR_OBJS += cpr_driver.o cpr_dump.o \
+ cpr_main.o cpr_misc.o cpr_mod.o cpr_stat.o \
+ cpr_uthread.o
+
+PROF_OBJS += prf.o
+
+SE_OBJS += se_driver.o
+
+SYSACCT_OBJS += acct.o
+
+ACCTCTL_OBJS += acctctl.o
+
+EXACCTSYS_OBJS += exacctsys.o
+
+KAIO_OBJS += aio.o
+
+PCMCIA_OBJS += pcmcia.o cs.o cis.o cis_callout.o cis_handlers.o cis_params.o
+
+BUSRA_OBJS += busra.o
+
+PCS_OBJS += pcs.o
+
+PCAN_OBJS += pcan.o
+
+PCATA_OBJS += pcide.o pcdisk.o pclabel.o pcata.o
+
+PCSER_OBJS += pcser.o pcser_cis.o
+
+PCWL_OBJS += pcwl.o
+
+PSET_OBJS += pset.o
+
+OHCI_OBJS += ohci.o ohci_hub.o ohci_polled.o
+
+UHCI_OBJS += uhci.o uhciutil.o uhcitgt.o uhcihub.o uhcipolled.o
+
+EHCI_OBJS += ehci.o ehci_hub.o ehci_xfer.o ehci_intr.o ehci_util.o ehci_polled.o ehci_isoch.o ehci_isoch_util.o
+
+HUBD_OBJS += hubd.o
+
+USB_MID_OBJS += usb_mid.o
+
+USB_IA_OBJS += usb_ia.o
+
+UWBA_OBJS += uwba.o uwbai.o
+
+SCSA2USB_OBJS += scsa2usb.o usb_ms_bulkonly.o usb_ms_cbi.o
+
+HWAHC_OBJS += hwahc.o hwahc_util.o
+
+WUSB_DF_OBJS += wusb_df.o
+WUSB_FWMOD_OBJS += wusb_fwmod.o
+
+IPF_OBJS += ip_fil_solaris.o fil.o solaris.o ip_state.o ip_frag.o ip_nat.o \
+ ip_proxy.o ip_auth.o ip_pool.o ip_htable.o ip_lookup.o \
+ ip_log.o misc.o ip_compat.o ip_nat6.o drand48.o
+
+IBD_OBJS += ibd.o ibd_cm.o
+
+EIBNX_OBJS += enx_main.o enx_hdlrs.o enx_ibt.o enx_log.o enx_fip.o \
+ enx_misc.o enx_q.o enx_ctl.o
+
+EOIB_OBJS += eib_adm.o eib_chan.o eib_cmn.o eib_ctl.o eib_data.o \
+ eib_fip.o eib_ibt.o eib_log.o eib_mac.o eib_main.o \
+ eib_rsrc.o eib_svc.o eib_vnic.o
+
+DLPISTUB_OBJS += dlpistub.o
+
+SDP_OBJS += sdpddi.o
+
+TRILL_OBJS += trill.o
+
+CTF_OBJS += ctf_create.o ctf_decl.o ctf_error.o ctf_hash.o ctf_labels.o \
+ ctf_lookup.o ctf_open.o ctf_types.o ctf_util.o ctf_subr.o ctf_mod.o
+
+SMBIOS_OBJS += smb_error.o smb_info.o smb_open.o smb_subr.o smb_dev.o
+
+RPCIB_OBJS += rpcib.o
+
+KMDB_OBJS += kdrv.o
+
+AFE_OBJS += afe.o
+
+BGE_OBJS += bge_main2.o bge_chip2.o bge_kstats.o bge_log.o bge_ndd.o \
+ bge_atomic.o bge_mii.o bge_send.o bge_recv2.o bge_mii_5906.o
+
+DMFE_OBJS += dmfe_log.o dmfe_main.o dmfe_mii.o
+
+ELXL_OBJS += elxl.o
+
+HME_OBJS += hme.o
+
+IXGB_OBJS += ixgb.o ixgb_atomic.o ixgb_chip.o ixgb_gld.o ixgb_kstats.o \
+ ixgb_log.o ixgb_ndd.o ixgb_rx.o ixgb_tx.o ixgb_xmii.o
+
+NGE_OBJS += nge_main.o nge_atomic.o nge_chip.o nge_ndd.o nge_kstats.o \
+ nge_log.o nge_rx.o nge_tx.o nge_xmii.o
+
+RGE_OBJS += rge_main.o rge_chip.o rge_ndd.o rge_kstats.o rge_log.o rge_rxtx.o
+
+URTW_OBJS += urtw.o
+
+ARN_OBJS += arn_hw.o arn_eeprom.o arn_mac.o arn_calib.o arn_ani.o arn_phy.o arn_regd.o arn_beacon.o \
+ arn_main.o arn_recv.o arn_xmit.o arn_rc.o
+
+ATH_OBJS += ath_aux.o ath_main.o ath_osdep.o ath_rate.o
+
+ATU_OBJS += atu.o
+
+IPW_OBJS += ipw2100_hw.o ipw2100.o
+
+IWI_OBJS += ipw2200_hw.o ipw2200.o
+
+IWH_OBJS += iwh.o
+
+IWK_OBJS += iwk2.o
+
+IWP_OBJS += iwp.o
+
+MWL_OBJS += mwl.o
+
+MWLFW_OBJS += mwlfw_mode.o
+
+WPI_OBJS += wpi.o
+
+RAL_OBJS += rt2560.o ral_rate.o
+
+RUM_OBJS += rum.o
+
+RWD_OBJS += rt2661.o
+
+RWN_OBJS += rt2860.o
+
+UATH_OBJS += uath.o
+
+UATHFW_OBJS += uathfw_mod.o
+
+URAL_OBJS += ural.o
+
+RTW_OBJS += rtw.o smc93cx6.o rtwphy.o rtwphyio.o
+
+ZYD_OBJS += zyd.o zyd_usb.o zyd_hw.o zyd_fw.o
+
+MXFE_OBJS += mxfe.o
+
+MPTSAS_OBJS += mptsas.o mptsas_impl.o mptsas_init.o mptsas_raid.o mptsas_smhba.o
+
+SFE_OBJS += sfe.o sfe_util.o
+
+BFE_OBJS += bfe.o
+
+BRIDGE_OBJS += bridge.o
+
+IDM_SHARED_OBJS += base64.o
+
+IDM_OBJS += $(IDM_SHARED_OBJS) \
+ idm.o idm_impl.o idm_text.o idm_conn_sm.o idm_so.o
+
+VR_OBJS += vr.o
+
+ATGE_OBJS += atge_main.o atge_l1e.o atge_mii.o atge_l1.o
+
+YGE_OBJS = yge.o
+
+#
+# Build up defines and paths.
+#
+LINT_DEFS += -Dunix
+
+#
+# This duality can be removed when the native and target compilers
+# are the same (or at least recognize the same command line syntax!)
+# It is a bug in the current compilation system that the assember
+# can't process the -Y I, flag.
+#
+NATIVE_INC_PATH += $(INC_PATH) $(CCYFLAG)$(UTSBASE)/common
+AS_INC_PATH += $(INC_PATH) -I$(UTSBASE)/common
+INCLUDE_PATH += $(INC_PATH) $(CCYFLAG)$(UTSBASE)/common
+
+PCIEB_OBJS += pcieb.o
+
+# Chelsio N110 10G NIC driver module
+#
+CH_OBJS = ch.o glue.o pe.o sge.o
+
+CH_COM_OBJS = ch_mac.o ch_subr.o cspi.o espi.o ixf1010.o mc3.o mc4.o mc5.o \
+ mv88e1xxx.o mv88x201x.o my3126.o pm3393.o tp.o ulp.o \
+ vsc7321.o vsc7326.o xpak.o
+
+#
+# PCI strings file
+#
+PCI_STRING_OBJS = pci_strings.o
+
+NET_DACF_OBJS += net_dacf.o
+
+#
+# Xframe 10G NIC driver module
+#
+XGE_OBJS = xge.o xgell.o
+
+XGE_HAL_OBJS = xgehal-channel.o xgehal-fifo.o xgehal-ring.o xgehal-config.o \
+ xgehal-driver.o xgehal-mm.o xgehal-stats.o xgehal-device.o \
+ xge-queue.o xgehal-mgmt.o xgehal-mgmtaux.o
+
+#
+# e1000g module
+#
+E1000G_OBJS += e1000_80003es2lan.o e1000_82540.o e1000_82541.o e1000_82542.o \
+ e1000_82543.o e1000_82571.o e1000_api.o e1000_ich8lan.o \
+ e1000_mac.o e1000_manage.o e1000_nvm.o e1000_osdep.o \
+ e1000_phy.o e1000g_debug.o e1000g_main.o e1000g_alloc.o \
+ e1000g_tx.o e1000g_rx.o e1000g_stat.o
+
+#
+# Intel 82575 1G NIC driver module
+#
+IGB_OBJS = igb_82575.o igb_api.o igb_mac.o igb_manage.o \
+ igb_nvm.o igb_osdep.o igb_phy.o igb_buf.o \
+ igb_debug.o igb_gld.o igb_log.o igb_main.o \
+ igb_rx.o igb_stat.o igb_tx.o
+
+#
+# Intel 10GbE PCIE NIC driver module
+#
+IXGBE_OBJS = ixgbe_82598.o ixgbe_82599.o ixgbe_api.o \
+ ixgbe_common.o ixgbe_phy.o \
+ ixgbe_buf.o ixgbe_debug.o ixgbe_gld.o \
+ ixgbe_log.o ixgbe_main.o \
+ ixgbe_osdep.o ixgbe_rx.o ixgbe_stat.o \
+ ixgbe_tx.o
+
+#
+# NIU 10G/1G driver module
+#
+NXGE_OBJS = nxge_mac.o nxge_ipp.o nxge_rxdma.o \
+ nxge_txdma.o nxge_txc.o nxge_main.o \
+ nxge_hw.o nxge_fzc.o nxge_virtual.o \
+ nxge_send.o nxge_classify.o nxge_fflp.o \
+ nxge_fflp_hash.o nxge_ndd.o nxge_kstats.o \
+ nxge_zcp.o nxge_fm.o nxge_espc.o nxge_hv.o \
+ nxge_hio.o nxge_hio_guest.o nxge_intr.o
+
+NXGE_NPI_OBJS = \
+ npi.o npi_mac.o npi_ipp.o \
+ npi_txdma.o npi_rxdma.o npi_txc.o \
+ npi_zcp.o npi_espc.o npi_fflp.o \
+ npi_vir.o
+
+NXGE_HCALL_OBJS = \
+ nxge_hcall.o
+
+#
+# kiconv modules
+#
+KICONV_EMEA_OBJS += kiconv_emea.o
+
+#
+# blk2scsa
+#
+BLK2SCSA_OBJS = blk2scsa.o
+
+KICONV_JA_OBJS += kiconv_ja.o
+
+KICONV_KO_OBJS += kiconv_cck_common.o kiconv_ko.o
+
+KICONV_SC_OBJS += kiconv_cck_common.o kiconv_sc.o
+
+KICONV_TC_OBJS += kiconv_cck_common.o kiconv_tc.o
+
+#
+# AAC module
+#
+AAC_OBJS = aac.o aac_ioctl.o
+
+#
+# sdcard modules
+#
+SDA_OBJS = sda_cmd.o sda_host.o sda_init.o sda_mem.o sda_mod.o sda_slot.o
+SDHOST_OBJS = sdhost.o
+
+#
+# hxge 10G driver module
+#
+HXGE_OBJS = hxge_main.o hxge_vmac.o hxge_send.o \
+ hxge_txdma.o hxge_rxdma.o hxge_virtual.o \
+ hxge_fm.o hxge_fzc.o hxge_hw.o hxge_kstats.o \
+ hxge_ndd.o hxge_pfc.o \
+ hpi.o hpi_vmac.o hpi_rxdma.o hpi_txdma.o \
+ hpi_vir.o hpi_pfc.o
+
+#
+# MEGARAID_SAS module
+#
+MEGA_SAS_OBJS = megaraid_sas.o
+
+#
+# MR_SAS module
+#
+MR_SAS_OBJS = mr_sas.o
+
+#
+# ISCSI_INITIATOR module
+#
+ISCSI_INITIATOR_OBJS = chap.o iscsi_io.o iscsi_thread.o \
+ iscsi_ioctl.o iscsid.o iscsi.o \
+ iscsi_login.o isns_client.o iscsiAuthClient.o \
+ iscsi_lun.o iscsiAuthClientGlue.o \
+ iscsi_net.o nvfile.o iscsi_cmd.o \
+ iscsi_queue.o persistent.o iscsi_conn.o \
+ iscsi_sess.o radius_auth.o iscsi_crc.o \
+ iscsi_stats.o radius_packet.o iscsi_doorclt.o \
+ iscsi_targetparam.o utils.o kifconf.o
+
+#
+# ntxn 10Gb/1Gb NIC driver module
+#
+NTXN_OBJS = unm_nic_init.o unm_gem.o unm_nic_hw.o unm_ndd.o \
+ unm_nic_main.o unm_nic_isr.o unm_nic_ctx.o niu.o
+
+#
+# Myricom 10Gb NIC driver module
+#
+MYRI10GE_OBJS = myri10ge.o myri10ge_lro.o
+
+# nulldriver module
+#
+NULLDRIVER_OBJS = nulldriver.o
+
+TPM_OBJS = tpm.o tpm_hcall.o
diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c
index c721386280f8..2a9df6d403f2 100644
--- a/uts/common/dtrace/dtrace.c
+++ b/uts/common/dtrace/dtrace.c
@@ -20,12 +20,9 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* DTrace - Dynamic Tracing for Solaris
*
@@ -186,7 +183,9 @@ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
static dtrace_genid_t dtrace_probegen; /* current probe generation */
static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
+static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
+static int dtrace_dynvar_failclean; /* dynvars failed to clean */
/*
* DTrace Locking
@@ -240,10 +239,16 @@ static void
dtrace_nullop(void)
{}
+static int
+dtrace_enable_nullop(void)
+{
+ return (0);
+}
+
static dtrace_pops_t dtrace_provider_ops = {
(void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
(void (*)(void *, struct modctl *))dtrace_nullop,
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
@@ -427,6 +432,7 @@ dtrace_load##bits(uintptr_t addr) \
#define DTRACE_DYNHASH_SINK 1
#define DTRACE_DYNHASH_VALID 2
+#define DTRACE_MATCH_FAIL -1
#define DTRACE_MATCH_NEXT 0
#define DTRACE_MATCH_DONE 1
#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
@@ -1182,12 +1188,12 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)
{
dtrace_dynvar_t *dirty;
dtrace_dstate_percpu_t *dcpu;
- int i, work = 0;
+ dtrace_dynvar_t **rinsep;
+ int i, j, work = 0;
for (i = 0; i < NCPU; i++) {
dcpu = &dstate->dtds_percpu[i];
-
- ASSERT(dcpu->dtdsc_rinsing == NULL);
+ rinsep = &dcpu->dtdsc_rinsing;
/*
* If the dirty list is NULL, there is no dirty work to do.
@@ -1195,14 +1201,62 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)
if (dcpu->dtdsc_dirty == NULL)
continue;
- /*
- * If the clean list is non-NULL, then we're not going to do
- * any work for this CPU -- it means that there has not been
- * a dtrace_dynvar() allocation on this CPU (or from this CPU)
- * since the last time we cleaned house.
- */
- if (dcpu->dtdsc_clean != NULL)
+ if (dcpu->dtdsc_rinsing != NULL) {
+ /*
+ * If the rinsing list is non-NULL, then it is because
+ * this CPU was selected to accept another CPU's
+ * dirty list -- and since that time, dirty buffers
+ * have accumulated. This is a highly unlikely
+ * condition, but we choose to ignore the dirty
+ * buffers -- they'll be picked up a future cleanse.
+ */
continue;
+ }
+
+ if (dcpu->dtdsc_clean != NULL) {
+ /*
+ * If the clean list is non-NULL, then we're in a
+ * situation where a CPU has done deallocations (we
+ * have a non-NULL dirty list) but no allocations (we
+ * also have a non-NULL clean list). We can't simply
+ * move the dirty list into the clean list on this
+ * CPU, yet we also don't want to allow this condition
+ * to persist, lest a short clean list prevent a
+ * massive dirty list from being cleaned (which in
+ * turn could lead to otherwise avoidable dynamic
+ * drops). To deal with this, we look for some CPU
+ * with a NULL clean list, NULL dirty list, and NULL
+ * rinsing list -- and then we borrow this CPU to
+ * rinse our dirty list.
+ */
+ for (j = 0; j < NCPU; j++) {
+ dtrace_dstate_percpu_t *rinser;
+
+ rinser = &dstate->dtds_percpu[j];
+
+ if (rinser->dtdsc_rinsing != NULL)
+ continue;
+
+ if (rinser->dtdsc_dirty != NULL)
+ continue;
+
+ if (rinser->dtdsc_clean != NULL)
+ continue;
+
+ rinsep = &rinser->dtdsc_rinsing;
+ break;
+ }
+
+ if (j == NCPU) {
+ /*
+ * We were unable to find another CPU that
+ * could accept this dirty list -- we are
+ * therefore unable to clean it now.
+ */
+ dtrace_dynvar_failclean++;
+ continue;
+ }
+ }
work = 1;
@@ -1219,7 +1273,7 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate)
* on a hash chain, either the dirty list or the
* rinsing list for some CPU must be non-NULL.)
*/
- dcpu->dtdsc_rinsing = dirty;
+ *rinsep = dirty;
dtrace_membar_producer();
} while (dtrace_casptr(&dcpu->dtdsc_dirty,
dirty, NULL) != dirty);
@@ -1650,7 +1704,7 @@ retry:
ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
/*
- * Now we'll move the clean list to the free list.
+ * Now we'll move the clean list to our free list.
* It's impossible for this to fail: the only way
* the free list can be updated is through this
* code path, and only one CPU can own the clean list.
@@ -1663,6 +1717,7 @@ retry:
* owners of the clean lists out before resetting
* the clean lists.
*/
+ dcpu = &dstate->dtds_percpu[me];
rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
ASSERT(rval == NULL);
goto retry;
@@ -3600,7 +3655,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
int64_t index = (int64_t)tupregs[1].dttk_value;
int64_t remaining = (int64_t)tupregs[2].dttk_value;
size_t len = dtrace_strlen((char *)s, size);
- int64_t i = 0;
+ int64_t i;
if (!dtrace_canload(s, len + 1, mstate, vstate)) {
regs[rd] = NULL;
@@ -6655,7 +6710,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
{
dtrace_probe_t template, *probe;
dtrace_hash_t *hash = NULL;
- int len, best = INT_MAX, nmatched = 0;
+ int len, rc, best = INT_MAX, nmatched = 0;
dtrace_id_t i;
ASSERT(MUTEX_HELD(&dtrace_lock));
@@ -6667,7 +6722,8 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
if (pkp->dtpk_id != DTRACE_IDNONE) {
if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
- (void) (*matched)(probe, arg);
+ if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
nmatched++;
}
return (nmatched);
@@ -6714,8 +6770,12 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
nmatched++;
- if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
+ if ((rc = (*matched)(probe, arg)) !=
+ DTRACE_MATCH_NEXT) {
+ if (rc == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
break;
+ }
}
return (nmatched);
@@ -6734,8 +6794,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
nmatched++;
- if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
+ if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
+ if (rc == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
break;
+ }
}
return (nmatched);
@@ -6955,7 +7018,7 @@ dtrace_unregister(dtrace_provider_id_t id)
dtrace_probe_t *probe, *first = NULL;
if (old->dtpv_pops.dtps_enable ==
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) {
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
/*
* If DTrace itself is the provider, we're called with locks
* already held.
@@ -7101,7 +7164,7 @@ dtrace_invalidate(dtrace_provider_id_t id)
dtrace_provider_t *pvp = (dtrace_provider_t *)id;
ASSERT(pvp->dtpv_pops.dtps_enable !=
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
mutex_enter(&dtrace_provider_lock);
mutex_enter(&dtrace_lock);
@@ -7142,7 +7205,7 @@ dtrace_condense(dtrace_provider_id_t id)
* Make sure this isn't the dtrace provider itself.
*/
ASSERT(prov->dtpv_pops.dtps_enable !=
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
mutex_enter(&dtrace_provider_lock);
mutex_enter(&dtrace_lock);
@@ -8103,7 +8166,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
break;
default:
- err += efunc(dp->dtdo_len - 1, "bad return size");
+ err += efunc(dp->dtdo_len - 1, "bad return size\n");
}
}
@@ -9096,7 +9159,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
return (ecb);
}
-static void
+static int
dtrace_ecb_enable(dtrace_ecb_t *ecb)
{
dtrace_probe_t *probe = ecb->dte_probe;
@@ -9109,7 +9172,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
/*
* This is the NULL probe -- there's nothing to do.
*/
- return;
+ return (0);
}
if (probe->dtpr_ecb == NULL) {
@@ -9123,8 +9186,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
if (ecb->dte_predicate != NULL)
probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
- prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
- probe->dtpr_id, probe->dtpr_arg);
+ return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
+ probe->dtpr_id, probe->dtpr_arg));
} else {
/*
* This probe is already active. Swing the last pointer to
@@ -9137,6 +9200,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
probe->dtpr_predcache = 0;
dtrace_sync();
+ return (0);
}
}
@@ -9920,7 +9984,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
return (DTRACE_MATCH_DONE);
- dtrace_ecb_enable(ecb);
+ if (dtrace_ecb_enable(ecb) < 0)
+ return (DTRACE_MATCH_FAIL);
+
return (DTRACE_MATCH_NEXT);
}
@@ -10557,6 +10623,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab)
ASSERT(enab->dten_vstate->dtvs_state != NULL);
ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
enab->dten_vstate->dtvs_state->dts_nretained--;
+ dtrace_retained_gen++;
}
if (enab->dten_prev == NULL) {
@@ -10599,6 +10666,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab)
return (ENOSPC);
state->dts_nretained++;
+ dtrace_retained_gen++;
if (dtrace_retained == NULL) {
dtrace_retained = enab;
@@ -10713,7 +10781,7 @@ static int
dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
{
int i = 0;
- int matched = 0;
+ int total_matched = 0, matched = 0;
ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(MUTEX_HELD(&dtrace_lock));
@@ -10724,7 +10792,14 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
enab->dten_current = ep;
enab->dten_error = 0;
- matched += dtrace_probe_enable(&ep->dted_probe, enab);
+ /*
+ * If a provider failed to enable a probe then get out and
+ * let the consumer know we failed.
+ */
+ if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
+ return (EBUSY);
+
+ total_matched += matched;
if (enab->dten_error != 0) {
/*
@@ -10752,7 +10827,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
enab->dten_probegen = dtrace_probegen;
if (nmatched != NULL)
- *nmatched = matched;
+ *nmatched = total_matched;
return (0);
}
@@ -10766,13 +10841,22 @@ dtrace_enabling_matchall(void)
mutex_enter(&dtrace_lock);
/*
- * Because we can be called after dtrace_detach() has been called, we
- * cannot assert that there are retained enablings. We can safely
- * load from dtrace_retained, however: the taskq_destroy() at the
- * end of dtrace_detach() will block pending our completion.
+ * Iterate over all retained enablings to see if any probes match
+ * against them. We only perform this operation on enablings for which
+ * we have sufficient permissions by virtue of being in the global zone
+ * or in the same zone as the DTrace client. Because we can be called
+ * after dtrace_detach() has been called, we cannot assert that there
+ * are retained enablings. We can safely load from dtrace_retained,
+ * however: the taskq_destroy() at the end of dtrace_detach() will
+ * block pending our completion.
*/
- for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next)
- (void) dtrace_enabling_match(enab, NULL);
+ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
+ cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred;
+
+ if (INGLOBALZONE(curproc) ||
+ cr != NULL && getzoneid() == crgetzoneid(cr))
+ (void) dtrace_enabling_match(enab, NULL);
+ }
mutex_exit(&dtrace_lock);
mutex_exit(&cpu_lock);
@@ -10830,6 +10914,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv)
{
int i, all = 0;
dtrace_probedesc_t desc;
+ dtrace_genid_t gen;
ASSERT(MUTEX_HELD(&dtrace_lock));
ASSERT(MUTEX_HELD(&dtrace_provider_lock));
@@ -10840,15 +10925,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv)
}
do {
- dtrace_enabling_t *enab = dtrace_retained;
+ dtrace_enabling_t *enab;
void *parg = prv->dtpv_arg;
- for (; enab != NULL; enab = enab->dten_next) {
+retry:
+ gen = dtrace_retained_gen;
+ for (enab = dtrace_retained; enab != NULL;
+ enab = enab->dten_next) {
for (i = 0; i < enab->dten_ndesc; i++) {
desc = enab->dten_desc[i]->dted_probe;
mutex_exit(&dtrace_lock);
prv->dtpv_pops.dtps_provide(parg, &desc);
mutex_enter(&dtrace_lock);
+ /*
+ * Process the retained enablings again if
+ * they have changed while we weren't holding
+ * dtrace_lock.
+ */
+ if (gen != dtrace_retained_gen)
+ goto retry;
}
}
} while (all && (prv = prv->dtpv_next) != NULL);
@@ -10970,7 +11065,8 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp)
dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
- if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) {
+ if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
+ dof->dofh_loadsz != hdr.dofh_loadsz) {
kmem_free(dof, hdr.dofh_loadsz);
*errp = EFAULT;
return (NULL);
@@ -11698,6 +11794,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
}
}
+ if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
+ !(sec->dofs_flags & DOF_SECF_LOAD)) {
+ dtrace_dof_error(dof, "loadable section with load "
+ "flag unset");
+ return (-1);
+ }
+
if (!(sec->dofs_flags & DOF_SECF_LOAD))
continue; /* just ignore non-loadable sections */
@@ -14390,7 +14493,8 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
* If this wasn't an open with the "helper" minor, then it must be
* the "dtrace" minor.
*/
- ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE);
+ if (getminor(*devp) != DTRACEMNRN_DTRACE)
+ return (ENXIO);
/*
* If no DTRACE_PRIV_* bits are set in the credential, then the
@@ -14427,7 +14531,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
mutex_exit(&cpu_lock);
if (state == NULL) {
- if (--dtrace_opens == 0)
+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
mutex_exit(&dtrace_lock);
return (EAGAIN);
@@ -14463,7 +14567,12 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
dtrace_state_destroy(state);
ASSERT(dtrace_opens > 0);
- if (--dtrace_opens == 0)
+
+ /*
+ * Only relinquish control of the kernel debugger interface when there
+ * are no consumers and no anonymous enablings.
+ */
+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
mutex_exit(&dtrace_lock);
@@ -15458,7 +15567,8 @@ static struct dev_ops dtrace_ops = {
nodev, /* reset */
&dtrace_cb_ops, /* driver operations */
NULL, /* bus operations */
- nodev /* dev power */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
};
static struct modldrv modldrv = {
diff --git a/uts/common/dtrace/fasttrap.c b/uts/common/dtrace/fasttrap.c
index b7ca92f54a59..42263e4ef274 100644
--- a/uts/common/dtrace/fasttrap.c
+++ b/uts/common/dtrace/fasttrap.c
@@ -20,11 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/atomic.h>
#include <sys/errno.h>
@@ -876,7 +875,7 @@ fasttrap_disable_callbacks(void)
}
/*ARGSUSED*/
-static void
+static int
fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
{
fasttrap_probe_t *probe = parg;
@@ -904,7 +903,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
* provider can't go away while we're in this code path.
*/
if (probe->ftp_prov->ftp_retired)
- return;
+ return (0);
/*
* If we can't find the process, it may be that we're in the context of
@@ -913,7 +912,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
*/
if ((p = sprlock(probe->ftp_pid)) == NULL) {
if ((curproc->p_flag & SFORKING) == 0)
- return;
+ return (0);
mutex_enter(&pidlock);
p = prfind(probe->ftp_pid);
@@ -975,7 +974,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
* drop our reference on the trap table entry.
*/
fasttrap_disable_callbacks();
- return;
+ return (0);
}
}
@@ -983,6 +982,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
sprunlock(p);
probe->ftp_enabled = 1;
+ return (0);
}
/*ARGSUSED*/
@@ -1946,7 +1946,8 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
probe = kmem_alloc(size, KM_SLEEP);
- if (copyin(uprobe, probe, size) != 0) {
+ if (copyin(uprobe, probe, size) != 0 ||
+ probe->ftps_noffs != noffs) {
kmem_free(probe, size);
return (EFAULT);
}
@@ -2044,13 +2045,6 @@ err:
tp->ftt_proc->ftpc_acount != 0)
break;
- /*
- * The count of active providers can only be
- * decremented (i.e. to zero) during exec, exit, and
- * removal of a meta provider so it should be
- * impossible to drop the count during this operation().
- */
- ASSERT(tp->ftt_proc->ftpc_acount != 0);
tp = tp->ftt_next;
}
@@ -2346,7 +2340,8 @@ static struct dev_ops fasttrap_ops = {
nodev, /* reset */
&fasttrap_cb_ops, /* driver operations */
NULL, /* bus operations */
- nodev /* dev power */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
};
/*
diff --git a/uts/common/dtrace/lockstat.c b/uts/common/dtrace/lockstat.c
index 3eb76a061d32..69c8b7254486 100644
--- a/uts/common/dtrace/lockstat.c
+++ b/uts/common/dtrace/lockstat.c
@@ -19,11 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/param.h>
@@ -84,7 +83,7 @@ static kmutex_t lockstat_test; /* for testing purposes only */
static dtrace_provider_id_t lockstat_id;
/*ARGSUSED*/
-static void
+static int
lockstat_enable(void *arg, dtrace_id_t id, void *parg)
{
lockstat_probe_t *probe = parg;
@@ -103,6 +102,7 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg)
*/
mutex_enter(&lockstat_test);
mutex_exit(&lockstat_test);
+ return (0);
}
/*ARGSUSED*/
@@ -310,11 +310,13 @@ static struct dev_ops lockstat_ops = {
nulldev, /* reset */
&lockstat_cb_ops, /* cb_ops */
NULL, /* bus_ops */
+ NULL, /* power */
+ ddi_quiesce_not_needed, /* quiesce */
};
static struct modldrv modldrv = {
&mod_driverops, /* Type of module. This one is a driver */
- "Lock Statistics %I%", /* name of module */
+ "Lock Statistics", /* name of module */
&lockstat_ops, /* driver ops */
};
diff --git a/uts/common/dtrace/profile.c b/uts/common/dtrace/profile.c
index 8de919a851a2..c1a2d1f1c12f 100644
--- a/uts/common/dtrace/profile.c
+++ b/uts/common/dtrace/profile.c
@@ -19,11 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/errno.h>
#include <sys/stat.h>
@@ -361,7 +360,7 @@ profile_offline(void *arg, cpu_t *cpu, void *oarg)
}
/*ARGSUSED*/
-static void
+static int
profile_enable(void *arg, dtrace_id_t id, void *parg)
{
profile_probe_t *prof = parg;
@@ -391,6 +390,7 @@ profile_enable(void *arg, dtrace_id_t id, void *parg)
} else {
prof->prof_cyclic = cyclic_add_omni(&omni);
}
+ return (0);
}
/*ARGSUSED*/
@@ -539,7 +539,8 @@ static struct dev_ops profile_ops = {
nodev, /* reset */
&profile_cb_ops, /* driver operations */
NULL, /* bus operations */
- nodev /* dev power */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
};
/*
diff --git a/uts/common/dtrace/sdt_subr.c b/uts/common/dtrace/sdt_subr.c
index 66ff8a92a01b..242185071bb2 100644
--- a/uts/common/dtrace/sdt_subr.c
+++ b/uts/common/dtrace/sdt_subr.c
@@ -19,12 +19,9 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/sdt_impl.h>
static dtrace_pattr_t vtrace_attr = {
@@ -43,6 +40,14 @@ static dtrace_pattr_t info_attr = {
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
};
+static dtrace_pattr_t fc_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
static dtrace_pattr_t fpu_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
@@ -83,6 +88,14 @@ static dtrace_pattr_t xpv_attr = {
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM },
};
+static dtrace_pattr_t iscsi_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
sdt_provider_t sdt_providers[] = {
{ "vtrace", "__vtrace_", &vtrace_attr, 0 },
{ "sysinfo", "__cpu_sysinfo_", &info_attr, 0 },
@@ -91,11 +104,17 @@ sdt_provider_t sdt_providers[] = {
{ "sched", "__sched_", &stab_attr, 0 },
{ "proc", "__proc_", &stab_attr, 0 },
{ "io", "__io_", &stab_attr, 0 },
+ { "ip", "__ip_", &stab_attr, 0 },
+ { "tcp", "__tcp_", &stab_attr, 0 },
+ { "udp", "__udp_", &stab_attr, 0 },
{ "mib", "__mib_", &stab_attr, 0 },
{ "fsinfo", "__fsinfo_", &fsinfo_attr, 0 },
+ { "iscsi", "__iscsi_", &iscsi_attr, 0 },
{ "nfsv3", "__nfsv3_", &stab_attr, 0 },
{ "nfsv4", "__nfsv4_", &stab_attr, 0 },
{ "xpv", "__xpv_", &xpv_attr, 0 },
+ { "fc", "__fc_", &fc_attr, 0 },
+ { "srp", "__srp_", &fc_attr, 0 },
{ "sysevent", "__sysevent_", &stab_attr, 0 },
{ "sdt", NULL, &sdt_attr, 0 },
{ NULL }
@@ -169,6 +188,73 @@ sdt_argdesc_t sdt_args[] = {
{ "fsinfo", NULL, 0, 0, "vnode_t *", "fileinfo_t *" },
{ "fsinfo", NULL, 1, 1, "int", "int" },
+ { "iscsi", "async-send", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "async-send", 1, 1, "iscsi_async_evt_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "login-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "login-command", 1, 1, "iscsi_login_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "login-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "login-response", 1, 1, "iscsi_login_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "logout-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "logout-command", 1, 1, "iscsi_logout_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "logout-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "logout-response", 1, 1, "iscsi_logout_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "data-request", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "data-request", 1, 1, "iscsi_rtt_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "data-send", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "data-send", 1, 1, "iscsi_data_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "data-receive", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "data-receive", 1, 1, "iscsi_data_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "nop-send", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "nop-send", 1, 1, "iscsi_nop_in_hdr_t *", "iscsiinfo_t *" },
+ { "iscsi", "nop-receive", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "nop-receive", 1, 1, "iscsi_nop_out_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "scsi-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "scsi-command", 1, 1, "iscsi_scsi_cmd_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "scsi-command", 2, 2, "scsi_task_t *", "scsicmd_t *" },
+ { "iscsi", "scsi-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "scsi-response", 1, 1, "iscsi_scsi_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "task-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "task-command", 1, 1, "iscsi_scsi_task_mgt_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "task-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "task-response", 1, 1, "iscsi_scsi_task_mgt_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "text-command", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "text-command", 1, 1, "iscsi_text_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "text-response", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "text-response", 1, 1, "iscsi_text_rsp_hdr_t *",
+ "iscsiinfo_t *" },
+ { "iscsi", "xfer-start", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "xfer-start", 1, 0, "idm_conn_t *", "iscsiinfo_t *" },
+ { "iscsi", "xfer-start", 2, 1, "uintptr_t", "xferinfo_t *" },
+ { "iscsi", "xfer-start", 3, 2, "uint32_t"},
+ { "iscsi", "xfer-start", 4, 3, "uintptr_t"},
+ { "iscsi", "xfer-start", 5, 4, "uint32_t"},
+ { "iscsi", "xfer-start", 6, 5, "uint32_t"},
+ { "iscsi", "xfer-start", 7, 6, "uint32_t"},
+ { "iscsi", "xfer-start", 8, 7, "int"},
+ { "iscsi", "xfer-done", 0, 0, "idm_conn_t *", "conninfo_t *" },
+ { "iscsi", "xfer-done", 1, 0, "idm_conn_t *", "iscsiinfo_t *" },
+ { "iscsi", "xfer-done", 2, 1, "uintptr_t", "xferinfo_t *" },
+ { "iscsi", "xfer-done", 3, 2, "uint32_t"},
+ { "iscsi", "xfer-done", 4, 3, "uintptr_t"},
+ { "iscsi", "xfer-done", 5, 4, "uint32_t"},
+ { "iscsi", "xfer-done", 6, 5, "uint32_t"},
+ { "iscsi", "xfer-done", 7, 6, "uint32_t"},
+ { "iscsi", "xfer-done", 8, 7, "int"},
+
{ "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *",
"conninfo_t *" },
{ "nfsv3", "op-getattr-start", 1, 1, "nfsv3oparg_t *",
@@ -788,6 +874,75 @@ sdt_argdesc_t sdt_args[] = {
"nfsv4cbinfo_t *" },
{ "nfsv4", "cb-recall-done", 2, 2, "CB_RECALL4res *" },
+ { "ip", "send", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "ip", "send", 1, 1, "conn_t *", "csinfo_t *" },
+ { "ip", "send", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "ip", "send", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" },
+ { "ip", "send", 4, 4, "ipha_t *", "ipv4info_t *" },
+ { "ip", "send", 5, 5, "ip6_t *", "ipv6info_t *" },
+ { "ip", "send", 6, 6, "int" }, /* used by __dtrace_ipsr_ill_t */
+ { "ip", "receive", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "ip", "receive", 1, 1, "conn_t *", "csinfo_t *" },
+ { "ip", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "ip", "receive", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" },
+ { "ip", "receive", 4, 4, "ipha_t *", "ipv4info_t *" },
+ { "ip", "receive", 5, 5, "ip6_t *", "ipv6info_t *" },
+ { "ip", "receive", 6, 6, "int" }, /* used by __dtrace_ipsr_ill_t */
+
+ { "tcp", "connect-established", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "connect-established", 1, 1, "ip_xmit_attr_t *",
+ "csinfo_t *" },
+ { "tcp", "connect-established", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "connect-established", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "connect-established", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "connect-refused", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "connect-refused", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "connect-refused", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "connect-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "connect-refused", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "connect-request", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "connect-request", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "connect-request", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "connect-request", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "connect-request", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "accept-established", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "accept-established", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "accept-established", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "accept-established", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "accept-established", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "accept-refused", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "accept-refused", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "accept-refused", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "tcp", "accept-refused", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "accept-refused", 4, 4, "tcph_t *", "tcpinfo_t *" },
+ { "tcp", "state-change", 0, 0, "void", "void" },
+ { "tcp", "state-change", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "state-change", 2, 2, "void", "void" },
+ { "tcp", "state-change", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "state-change", 4, 4, "void", "void" },
+ { "tcp", "state-change", 5, 5, "int32_t", "tcplsinfo_t *" },
+ { "tcp", "send", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "send", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "send", 2, 2, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" },
+ { "tcp", "send", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "send", 4, 4, "__dtrace_tcp_tcph_t *", "tcpinfo_t *" },
+ { "tcp", "receive", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "tcp", "receive", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "tcp", "receive", 2, 2, "__dtrace_tcp_void_ip_t *", "ipinfo_t *" },
+ { "tcp", "receive", 3, 3, "tcp_t *", "tcpsinfo_t *" },
+ { "tcp", "receive", 4, 4, "__dtrace_tcp_tcph_t *", "tcpinfo_t *" },
+
+ { "udp", "send", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "udp", "send", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "udp", "send", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "udp", "send", 3, 3, "udp_t *", "udpsinfo_t *" },
+ { "udp", "send", 4, 4, "udpha_t *", "udpinfo_t *" },
+ { "udp", "receive", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "udp", "receive", 1, 1, "ip_xmit_attr_t *", "csinfo_t *" },
+ { "udp", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" },
+ { "udp", "receive", 3, 3, "udp_t *", "udpsinfo_t *" },
+ { "udp", "receive", 4, 4, "udpha_t *", "udpinfo_t *" },
+
{ "sysevent", "post", 0, 0, "evch_bind_t *", "syseventchaninfo_t *" },
{ "sysevent", "post", 1, 1, "sysevent_impl_t *", "syseventinfo_t *" },
@@ -848,6 +1003,154 @@ sdt_argdesc_t sdt_args[] = {
{ "xpv", "setvcpucontext-end", 0, 0, "int" },
{ "xpv", "setvcpucontext-start", 0, 0, "domid_t" },
{ "xpv", "setvcpucontext-start", 1, 1, "vcpu_guest_context_t *" },
+
+ { "srp", "service-up", 0, 0, "srpt_session_t *", "conninfo_t *" },
+ { "srp", "service-up", 1, 0, "srpt_session_t *", "srp_portinfo_t *" },
+ { "srp", "service-down", 0, 0, "srpt_session_t *", "conninfo_t *" },
+ { "srp", "service-down", 1, 0, "srpt_session_t *",
+ "srp_portinfo_t *" },
+ { "srp", "login-command", 0, 0, "srpt_session_t *", "conninfo_t *" },
+ { "srp", "login-command", 1, 0, "srpt_session_t *",
+ "srp_portinfo_t *" },
+ { "srp", "login-command", 2, 1, "srp_login_req_t *",
+ "srp_logininfo_t *" },
+ { "srp", "login-response", 0, 0, "srpt_session_t *", "conninfo_t *" },
+ { "srp", "login-response", 1, 0, "srpt_session_t *",
+ "srp_portinfo_t *" },
+ { "srp", "login-response", 2, 1, "srp_login_rsp_t *",
+ "srp_logininfo_t *" },
+ { "srp", "login-response", 3, 2, "srp_login_rej_t *" },
+ { "srp", "logout-command", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "logout-command", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "task-command", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "task-command", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "task-command", 2, 1, "srp_cmd_req_t *", "srp_taskinfo_t *" },
+ { "srp", "task-response", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "task-response", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "task-response", 2, 1, "srp_rsp_t *", "srp_taskinfo_t *" },
+ { "srp", "task-response", 3, 2, "scsi_task_t *" },
+ { "srp", "task-response", 4, 3, "int8_t" },
+ { "srp", "scsi-command", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "scsi-command", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "scsi-command", 2, 1, "scsi_task_t *", "scsicmd_t *" },
+ { "srp", "scsi-command", 3, 2, "srp_cmd_req_t *", "srp_taskinfo_t *" },
+ { "srp", "scsi-response", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "scsi-response", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "scsi-response", 2, 1, "srp_rsp_t *", "srp_taskinfo_t *" },
+ { "srp", "scsi-response", 3, 2, "scsi_task_t *" },
+ { "srp", "scsi-response", 4, 3, "int8_t" },
+ { "srp", "xfer-start", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "xfer-start", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "xfer-start", 2, 1, "ibt_wr_ds_t *", "xferinfo_t *" },
+ { "srp", "xfer-start", 3, 2, "srpt_iu_t *", "srp_taskinfo_t *" },
+ { "srp", "xfer-start", 4, 3, "ibt_send_wr_t *"},
+ { "srp", "xfer-start", 5, 4, "uint32_t" },
+ { "srp", "xfer-start", 6, 5, "uint32_t" },
+ { "srp", "xfer-start", 7, 6, "uint32_t" },
+ { "srp", "xfer-start", 8, 7, "uint32_t" },
+ { "srp", "xfer-done", 0, 0, "srpt_channel_t *", "conninfo_t *" },
+ { "srp", "xfer-done", 1, 0, "srpt_channel_t *",
+ "srp_portinfo_t *" },
+ { "srp", "xfer-done", 2, 1, "ibt_wr_ds_t *", "xferinfo_t *" },
+ { "srp", "xfer-done", 3, 2, "srpt_iu_t *", "srp_taskinfo_t *" },
+ { "srp", "xfer-done", 4, 3, "ibt_send_wr_t *"},
+ { "srp", "xfer-done", 5, 4, "uint32_t" },
+ { "srp", "xfer-done", 6, 5, "uint32_t" },
+ { "srp", "xfer-done", 7, 6, "uint32_t" },
+ { "srp", "xfer-done", 8, 7, "uint32_t" },
+
+ { "fc", "link-up", 0, 0, "fct_i_local_port_t *", "conninfo_t *" },
+ { "fc", "link-down", 0, 0, "fct_i_local_port_t *", "conninfo_t *" },
+ { "fc", "fabric-login-start", 0, 0, "fct_i_local_port_t *",
+ "conninfo_t *" },
+ { "fc", "fabric-login-start", 1, 0, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "fabric-login-end", 0, 0, "fct_i_local_port_t *",
+ "conninfo_t *" },
+ { "fc", "fabric-login-end", 1, 0, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-start", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "rport-login-start", 1, 1, "fct_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-start", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-start", 3, 3, "int", "int" },
+ { "fc", "rport-login-end", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "rport-login-end", 1, 1, "fct_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-end", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-login-end", 3, 3, "int", "int" },
+ { "fc", "rport-login-end", 4, 4, "int", "int" },
+ { "fc", "rport-logout-start", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "rport-logout-start", 1, 1, "fct_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-logout-start", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-logout-start", 3, 3, "int", "int" },
+ { "fc", "rport-logout-end", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "rport-logout-end", 1, 1, "fct_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-logout-end", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "rport-logout-end", 3, 3, "int", "int" },
+ { "fc", "scsi-command", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "scsi-command", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "scsi-command", 2, 2, "scsi_task_t *",
+ "scsicmd_t *" },
+ { "fc", "scsi-command", 3, 3, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "scsi-response", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "scsi-response", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "scsi-response", 2, 2, "scsi_task_t *",
+ "scsicmd_t *" },
+ { "fc", "scsi-response", 3, 3, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-start", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "xfer-start", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-start", 2, 2, "scsi_task_t *",
+ "scsicmd_t *" },
+ { "fc", "xfer-start", 3, 3, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-start", 4, 4, "stmf_data_buf_t *",
+ "fc_xferinfo_t *" },
+ { "fc", "xfer-done", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "xfer-done", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-done", 2, 2, "scsi_task_t *",
+ "scsicmd_t *" },
+ { "fc", "xfer-done", 3, 3, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "xfer-done", 4, 4, "stmf_data_buf_t *",
+ "fc_xferinfo_t *" },
+ { "fc", "rscn-receive", 0, 0, "fct_i_local_port_t *",
+ "conninfo_t *" },
+ { "fc", "rscn-receive", 1, 1, "int", "int"},
+ { "fc", "abts-receive", 0, 0, "fct_cmd_t *",
+ "conninfo_t *" },
+ { "fc", "abts-receive", 1, 1, "fct_i_local_port_t *",
+ "fc_port_info_t *" },
+ { "fc", "abts-receive", 2, 2, "fct_i_remote_port_t *",
+ "fc_port_info_t *" },
+
+
{ NULL }
};
diff --git a/uts/common/dtrace/systrace.c b/uts/common/dtrace/systrace.c
index be14660b04c0..b864041c450d 100644
--- a/uts/common/dtrace/systrace.c
+++ b/uts/common/dtrace/systrace.c
@@ -19,11 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dtrace.h>
#include <sys/systrace.h>
@@ -141,7 +140,7 @@ systrace_destroy(void *arg, dtrace_id_t id, void *parg)
}
/*ARGSUSED*/
-static void
+static int
systrace_enable(void *arg, dtrace_id_t id, void *parg)
{
int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
@@ -162,7 +161,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg)
if (enabled) {
ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
- return;
+ return (0);
}
(void) casptr(&sysent[sysnum].sy_callc,
@@ -173,6 +172,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg)
(void *)systrace_sysent32[sysnum].stsy_underlying,
(void *)dtrace_systrace_syscall32);
#endif
+ return (0);
}
/*ARGSUSED*/
@@ -336,7 +336,8 @@ static struct dev_ops systrace_ops = {
nodev, /* reset */
&systrace_cb_ops, /* driver operations */
NULL, /* bus operations */
- nodev /* dev power */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
};
/*
diff --git a/uts/common/fs/gfs.c b/uts/common/fs/gfs.c
new file mode 100644
index 000000000000..4d24df60f75b
--- /dev/null
+++ b/uts/common/fs/gfs.c
@@ -0,0 +1,1178 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/* Portions Copyright 2007 Shivakumar GN */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/dirent.h>
+#include <sys/kmem.h>
+#include <sys/mman.h>
+#include <sys/mutex.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/sunddi.h>
+#include <sys/uio.h>
+#include <sys/vmsystm.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+
+#include <vm/as.h>
+#include <vm/seg_vn.h>
+
+#include <sys/gfs.h>
+
+/*
+ * Generic pseudo-filesystem routines.
+ *
+ * There are significant similarities between the implementation of certain file
+ * system entry points across different filesystems. While one could attempt to
+ * "choke up on the bat" and incorporate common functionality into a VOP
+ * preamble or postamble, such an approach is limited in the benefit it can
+ * provide. In this file we instead define a toolkit of routines which can be
+ * called from a filesystem (with in-kernel pseudo-filesystems being the focus
+ * of the exercise) in a more component-like fashion.
+ *
+ * There are three basic classes of routines:
+ *
+ * 1) Lowlevel support routines
+ *
+ * These routines are designed to play a support role for existing
+ * pseudo-filesystems (such as procfs). They simplify common tasks,
+ * without forcing the filesystem to hand over management to GFS. The
+ * routines covered are:
+ *
+ * gfs_readdir_init()
+ * gfs_readdir_emit()
+ * gfs_readdir_emitn()
+ * gfs_readdir_pred()
+ * gfs_readdir_fini()
+ * gfs_lookup_dot()
+ *
+ * 2) Complete GFS management
+ *
+ * These routines take a more active role in management of the
+ * pseudo-filesystem. They handle the relationship between vnode private
+ * data and VFS data, as well as the relationship between vnodes in the
+ * directory hierarchy.
+ *
+ * In order to use these interfaces, the first member of every private
+ * v_data must be a gfs_file_t or a gfs_dir_t. This hands over all control
+ * to GFS.
+ *
+ * gfs_file_create()
+ * gfs_dir_create()
+ * gfs_root_create()
+ *
+ * gfs_file_inactive()
+ * gfs_dir_inactive()
+ * gfs_dir_lookup()
+ * gfs_dir_readdir()
+ *
+ * gfs_vop_inactive()
+ * gfs_vop_lookup()
+ * gfs_vop_readdir()
+ * gfs_vop_map()
+ *
+ * 3) Single File pseudo-filesystems
+ *
+ * This routine creates a rooted file to be overlayed ontop of another
+ * file in the physical filespace.
+ *
+ * Note that the parent is NULL (actually the vfs), but there is nothing
+ * technically keeping such a file from utilizing the "Complete GFS
+ * management" set of routines.
+ *
+ * gfs_root_create_file()
+ */
+
+/*
+ * gfs_make_opsvec: take an array of vnode type definitions and create
+ * their vnodeops_t structures
+ *
+ * This routine takes an array of gfs_opsvec_t's. It could
+ * alternatively take an array of gfs_opsvec_t*'s, which would allow
+ * vnode types to be completely defined in files external to the caller
+ * of gfs_make_opsvec(). As it stands, much more sharing takes place --
+ * both the caller and the vnode type provider need to access gfsv_ops
+ * and gfsv_template, and the caller also needs to know gfsv_name.
+ */
+int
+gfs_make_opsvec(gfs_opsvec_t *vec)
+{
+ int error, i;
+
+ for (i = 0; ; i++) {
+ if (vec[i].gfsv_name == NULL)
+ return (0);
+ error = vn_make_ops(vec[i].gfsv_name, vec[i].gfsv_template,
+ vec[i].gfsv_ops);
+ if (error)
+ break;
+ }
+
+ cmn_err(CE_WARN, "gfs_make_opsvec: bad vnode ops template for '%s'",
+ vec[i].gfsv_name);
+ for (i--; i >= 0; i--) {
+ vn_freevnodeops(*vec[i].gfsv_ops);
+ *vec[i].gfsv_ops = NULL;
+ }
+ return (error);
+}
+
+/*
+ * Low level directory routines
+ *
+ * These routines provide some simple abstractions for reading directories.
+ * They are designed to be used by existing pseudo filesystems (namely procfs)
+ * that already have a complicated management infrastructure.
+ */
+
+/*
+ * gfs_get_parent_ino: used to obtain a parent inode number and the
+ * inode number of the given vnode in preparation for calling gfs_readdir_init.
+ */
+int
+gfs_get_parent_ino(vnode_t *dvp, cred_t *cr, caller_context_t *ct,
+ ino64_t *pino, ino64_t *ino)
+{
+ vnode_t *parent;
+ gfs_dir_t *dp = dvp->v_data;
+ int error;
+
+ *ino = dp->gfsd_file.gfs_ino;
+ parent = dp->gfsd_file.gfs_parent;
+
+ if (parent == NULL) {
+ *pino = *ino; /* root of filesystem */
+ } else if (dvp->v_flag & V_XATTRDIR) {
+ vattr_t va;
+
+ va.va_mask = AT_NODEID;
+ error = VOP_GETATTR(parent, &va, 0, cr, ct);
+ if (error)
+ return (error);
+ *pino = va.va_nodeid;
+ } else {
+ *pino = ((gfs_file_t *)(parent->v_data))->gfs_ino;
+ }
+
+ return (0);
+}
+
+/*
+ * gfs_readdir_init: initiate a generic readdir
+ * st - a pointer to an uninitialized gfs_readdir_state_t structure
+ * name_max - the directory's maximum file name length
+ * ureclen - the exported file-space record length (1 for non-legacy FSs)
+ * uiop - the uiop passed to readdir
+ * parent - the parent directory's inode
+ * self - this directory's inode
+ * flags - flags from VOP_READDIR
+ *
+ * Returns 0 or a non-zero errno.
+ *
+ * Typical VOP_READDIR usage of gfs_readdir_*:
+ *
+ * if ((error = gfs_readdir_init(...)) != 0)
+ * return (error);
+ * eof = 0;
+ * while ((error = gfs_readdir_pred(..., &voffset)) != 0) {
+ * if (!consumer_entry_at(voffset))
+ * voffset = consumer_next_entry(voffset);
+ * if (consumer_eof(voffset)) {
+ * eof = 1
+ * break;
+ * }
+ * if ((error = gfs_readdir_emit(..., voffset,
+ * consumer_ino(voffset), consumer_name(voffset))) != 0)
+ * break;
+ * }
+ * return (gfs_readdir_fini(..., error, eofp, eof));
+ *
+ * As you can see, a zero result from gfs_readdir_pred() or
+ * gfs_readdir_emit() indicates that processing should continue,
+ * whereas a non-zero result indicates that the loop should terminate.
+ * Most consumers need do nothing more than let gfs_readdir_fini()
+ * determine what the cause of failure was and return the appropriate
+ * value.
+ */
+int
+gfs_readdir_init(gfs_readdir_state_t *st, int name_max, int ureclen,
+ uio_t *uiop, ino64_t parent, ino64_t self, int flags)
+{
+ size_t dirent_size;
+
+ if (uiop->uio_loffset < 0 || uiop->uio_resid <= 0 ||
+ (uiop->uio_loffset % ureclen) != 0)
+ return (EINVAL);
+
+ st->grd_ureclen = ureclen;
+ st->grd_oresid = uiop->uio_resid;
+ st->grd_namlen = name_max;
+ if (flags & V_RDDIR_ENTFLAGS)
+ dirent_size = EDIRENT_RECLEN(st->grd_namlen);
+ else
+ dirent_size = DIRENT64_RECLEN(st->grd_namlen);
+ st->grd_dirent = kmem_zalloc(dirent_size, KM_SLEEP);
+ st->grd_parent = parent;
+ st->grd_self = self;
+ st->grd_flags = flags;
+
+ return (0);
+}
+
+/*
+ * gfs_readdir_emit_int: internal routine to emit directory entry
+ *
+ * st - the current readdir state, which must have d_ino/ed_ino
+ * and d_name/ed_name set
+ * uiop - caller-supplied uio pointer
+ * next - the offset of the next entry
+ */
+static int
+gfs_readdir_emit_int(gfs_readdir_state_t *st, uio_t *uiop, offset_t next)
+{
+ int reclen;
+ dirent64_t *dp;
+ edirent_t *edp;
+
+ if (st->grd_flags & V_RDDIR_ENTFLAGS) {
+ edp = st->grd_dirent;
+ reclen = EDIRENT_RECLEN(strlen(edp->ed_name));
+ } else {
+ dp = st->grd_dirent;
+ reclen = DIRENT64_RECLEN(strlen(dp->d_name));
+ }
+
+ if (reclen > uiop->uio_resid) {
+ /*
+ * Error if no entries were returned yet
+ */
+ if (uiop->uio_resid == st->grd_oresid)
+ return (EINVAL);
+ return (-1);
+ }
+
+ if (st->grd_flags & V_RDDIR_ENTFLAGS) {
+ edp->ed_off = next;
+ edp->ed_reclen = (ushort_t)reclen;
+ } else {
+ dp->d_off = next;
+ dp->d_reclen = (ushort_t)reclen;
+ }
+
+ if (uiomove((caddr_t)st->grd_dirent, reclen, UIO_READ, uiop))
+ return (EFAULT);
+
+ uiop->uio_loffset = next;
+
+ return (0);
+}
+
+/*
+ * gfs_readdir_emit: emit a directory entry
+ * voff - the virtual offset (obtained from gfs_readdir_pred)
+ * ino - the entry's inode
+ * name - the entry's name
+ * eflags - value for ed_eflags (if processing edirent_t)
+ *
+ * Returns a 0 on success, a non-zero errno on failure, or -1 if the
+ * readdir loop should terminate. A non-zero result (either errno or
+ * -1) from this function is typically passed directly to
+ * gfs_readdir_fini().
+ */
+int
+gfs_readdir_emit(gfs_readdir_state_t *st, uio_t *uiop, offset_t voff,
+ ino64_t ino, const char *name, int eflags)
+{
+ offset_t off = (voff + 2) * st->grd_ureclen;
+
+ if (st->grd_flags & V_RDDIR_ENTFLAGS) {
+ edirent_t *edp = st->grd_dirent;
+
+ edp->ed_ino = ino;
+ (void) strncpy(edp->ed_name, name, st->grd_namlen);
+ edp->ed_eflags = eflags;
+ } else {
+ dirent64_t *dp = st->grd_dirent;
+
+ dp->d_ino = ino;
+ (void) strncpy(dp->d_name, name, st->grd_namlen);
+ }
+
+ /*
+ * Inter-entry offsets are invalid, so we assume a record size of
+ * grd_ureclen and explicitly set the offset appropriately.
+ */
+ return (gfs_readdir_emit_int(st, uiop, off + st->grd_ureclen));
+}
+
+/*
+ * gfs_readdir_emitn: like gfs_readdir_emit(), but takes an integer
+ * instead of a string for the entry's name.
+ */
+int
+gfs_readdir_emitn(gfs_readdir_state_t *st, uio_t *uiop, offset_t voff,
+ ino64_t ino, unsigned long num)
+{
+ char buf[40];
+
+ numtos(num, buf);
+ return (gfs_readdir_emit(st, uiop, voff, ino, buf, 0));
+}
+
+/*
+ * gfs_readdir_pred: readdir loop predicate
+ * voffp - a pointer in which the next virtual offset should be stored
+ *
+ * Returns a 0 on success, a non-zero errno on failure, or -1 if the
+ * readdir loop should terminate. A non-zero result (either errno or
+ * -1) from this function is typically passed directly to
+ * gfs_readdir_fini().
+ */
+int
+gfs_readdir_pred(gfs_readdir_state_t *st, uio_t *uiop, offset_t *voffp)
+{
+ offset_t off, voff;
+ int error;
+
+top:
+ if (uiop->uio_resid <= 0)
+ return (-1);
+
+ off = uiop->uio_loffset / st->grd_ureclen;
+ voff = off - 2;
+ if (off == 0) {
+ if ((error = gfs_readdir_emit(st, uiop, voff, st->grd_self,
+ ".", 0)) == 0)
+ goto top;
+ } else if (off == 1) {
+ if ((error = gfs_readdir_emit(st, uiop, voff, st->grd_parent,
+ "..", 0)) == 0)
+ goto top;
+ } else {
+ *voffp = voff;
+ return (0);
+ }
+
+ return (error);
+}
+
+/*
+ * gfs_readdir_fini: generic readdir cleanup
+ * error - if positive, an error to return
+ * eofp - the eofp passed to readdir
+ * eof - the eof value
+ *
+ * Returns a 0 on success, a non-zero errno on failure. This result
+ * should be returned from readdir.
+ */
+int
+gfs_readdir_fini(gfs_readdir_state_t *st, int error, int *eofp, int eof)
+{
+ size_t dirent_size;
+
+ if (st->grd_flags & V_RDDIR_ENTFLAGS)
+ dirent_size = EDIRENT_RECLEN(st->grd_namlen);
+ else
+ dirent_size = DIRENT64_RECLEN(st->grd_namlen);
+ kmem_free(st->grd_dirent, dirent_size);
+ if (error > 0)
+ return (error);
+ if (eofp)
+ *eofp = eof;
+ return (0);
+}
+
+/*
+ * gfs_lookup_dot
+ *
+ * Performs a basic check for "." and ".." directory entries.
+ */
+int
+gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm)
+{
+ if (*nm == '\0' || strcmp(nm, ".") == 0) {
+ VN_HOLD(dvp);
+ *vpp = dvp;
+ return (0);
+ } else if (strcmp(nm, "..") == 0) {
+ if (pvp == NULL) {
+ ASSERT(dvp->v_flag & VROOT);
+ VN_HOLD(dvp);
+ *vpp = dvp;
+ } else {
+ VN_HOLD(pvp);
+ *vpp = pvp;
+ }
+ return (0);
+ }
+
+ return (-1);
+}
+
+/*
+ * gfs_file_create(): create a new GFS file
+ *
+ * size - size of private data structure (v_data)
+ * pvp - parent vnode (GFS directory)
+ * ops - vnode operations vector
+ *
+ * In order to use this interface, the parent vnode must have been created by
+ * gfs_dir_create(), and the private data stored in v_data must have a
+ * 'gfs_file_t' as its first field.
+ *
+ * Given these constraints, this routine will automatically:
+ *
+ * - Allocate v_data for the vnode
+ * - Initialize necessary fields in the vnode
+ * - Hold the parent
+ */
+vnode_t *
+gfs_file_create(size_t size, vnode_t *pvp, vnodeops_t *ops)
+{
+ gfs_file_t *fp;
+ vnode_t *vp;
+
+ /*
+ * Allocate vnode and internal data structure
+ */
+ fp = kmem_zalloc(size, KM_SLEEP);
+ vp = vn_alloc(KM_SLEEP);
+
+ /*
+ * Set up various pointers
+ */
+ fp->gfs_vnode = vp;
+ fp->gfs_parent = pvp;
+ vp->v_data = fp;
+ fp->gfs_size = size;
+ fp->gfs_type = GFS_FILE;
+
+ /*
+ * Initialize vnode and hold parent.
+ */
+ vn_setops(vp, ops);
+ if (pvp) {
+ VN_SET_VFS_TYPE_DEV(vp, pvp->v_vfsp, VREG, 0);
+ VN_HOLD(pvp);
+ }
+
+ return (vp);
+}
+
+/*
+ * gfs_dir_create: creates a new directory in the parent
+ *
+ * size - size of private data structure (v_data)
+ * pvp - parent vnode (GFS directory)
+ * ops - vnode operations vector
+ * entries - NULL-terminated list of static entries (if any)
+ * maxlen - maximum length of a directory entry
+ * readdir_cb - readdir callback (see gfs_dir_readdir)
+ * inode_cb - inode callback (see gfs_dir_readdir)
+ * lookup_cb - lookup callback (see gfs_dir_lookup)
+ *
+ * In order to use this function, the first member of the private vnode
+ * structure (v_data) must be a gfs_dir_t. For each directory, there are
+ * static entries, defined when the structure is initialized, and dynamic
+ * entries, retrieved through callbacks.
+ *
+ * If a directory has static entries, then it must supply a inode callback,
+ * which will compute the inode number based on the parent and the index.
+ * For a directory with dynamic entries, the caller must supply a readdir
+ * callback and a lookup callback. If a static lookup fails, we fall back to
+ * the supplied lookup callback, if any.
+ *
+ * This function also performs the same initialization as gfs_file_create().
+ */
+vnode_t *
+gfs_dir_create(size_t struct_size, vnode_t *pvp, vnodeops_t *ops,
+ gfs_dirent_t *entries, gfs_inode_cb inode_cb, int maxlen,
+ gfs_readdir_cb readdir_cb, gfs_lookup_cb lookup_cb)
+{
+ vnode_t *vp;
+ gfs_dir_t *dp;
+ gfs_dirent_t *de;
+
+ vp = gfs_file_create(struct_size, pvp, ops);
+ vp->v_type = VDIR;
+
+ dp = vp->v_data;
+ dp->gfsd_file.gfs_type = GFS_DIR;
+ dp->gfsd_maxlen = maxlen;
+
+ if (entries != NULL) {
+ for (de = entries; de->gfse_name != NULL; de++)
+ dp->gfsd_nstatic++;
+
+ dp->gfsd_static = kmem_alloc(
+ dp->gfsd_nstatic * sizeof (gfs_dirent_t), KM_SLEEP);
+ bcopy(entries, dp->gfsd_static,
+ dp->gfsd_nstatic * sizeof (gfs_dirent_t));
+ }
+
+ dp->gfsd_readdir = readdir_cb;
+ dp->gfsd_lookup = lookup_cb;
+ dp->gfsd_inode = inode_cb;
+
+ mutex_init(&dp->gfsd_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ return (vp);
+}
+
+/*
+ * gfs_root_create(): create a root vnode for a GFS filesystem
+ *
+ * Similar to gfs_dir_create(), this creates a root vnode for a filesystem. The
+ * only difference is that it takes a vfs_t instead of a vnode_t as its parent.
+ */
+vnode_t *
+gfs_root_create(size_t size, vfs_t *vfsp, vnodeops_t *ops, ino64_t ino,
+ gfs_dirent_t *entries, gfs_inode_cb inode_cb, int maxlen,
+ gfs_readdir_cb readdir_cb, gfs_lookup_cb lookup_cb)
+{
+ vnode_t *vp = gfs_dir_create(size, NULL, ops, entries, inode_cb,
+ maxlen, readdir_cb, lookup_cb);
+
+ /* Manually set the inode */
+ ((gfs_file_t *)vp->v_data)->gfs_ino = ino;
+
+ VFS_HOLD(vfsp);
+ VN_SET_VFS_TYPE_DEV(vp, vfsp, VDIR, 0);
+ vp->v_flag |= VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT;
+
+ return (vp);
+}
+
+/*
+ * gfs_root_create_file(): create a root vnode for a GFS file as a filesystem
+ *
+ * Similar to gfs_root_create(), this creates a root vnode for a file to
+ * be the pseudo-filesystem.
+ */
+vnode_t *
+gfs_root_create_file(size_t size, vfs_t *vfsp, vnodeops_t *ops, ino64_t ino)
+{
+ vnode_t *vp = gfs_file_create(size, NULL, ops);
+
+ ((gfs_file_t *)vp->v_data)->gfs_ino = ino;
+
+ VFS_HOLD(vfsp);
+ VN_SET_VFS_TYPE_DEV(vp, vfsp, VREG, 0);
+ vp->v_flag |= VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT;
+
+ return (vp);
+}
+
+/*
+ * gfs_file_inactive()
+ *
+ * Called from the VOP_INACTIVE() routine. If necessary, this routine will
+ * remove the given vnode from the parent directory and clean up any references
+ * in the VFS layer.
+ *
+ * If the vnode was not removed (due to a race with vget), then NULL is
+ * returned. Otherwise, a pointer to the private data is returned.
+ */
+void *
+gfs_file_inactive(vnode_t *vp)
+{
+ int i;
+ gfs_dirent_t *ge = NULL;
+ gfs_file_t *fp = vp->v_data;
+ gfs_dir_t *dp = NULL;
+ void *data;
+
+ if (fp->gfs_parent == NULL || (vp->v_flag & V_XATTRDIR))
+ goto found;
+
+ dp = fp->gfs_parent->v_data;
+
+ /*
+ * First, see if this vnode is cached in the parent.
+ */
+ gfs_dir_lock(dp);
+
+ /*
+ * Find it in the set of static entries.
+ */
+ for (i = 0; i < dp->gfsd_nstatic; i++) {
+ ge = &dp->gfsd_static[i];
+
+ if (ge->gfse_vnode == vp)
+ goto found;
+ }
+
+ /*
+ * If 'ge' is NULL, then it is a dynamic entry.
+ */
+ ge = NULL;
+
+found:
+ if (vp->v_flag & V_XATTRDIR) {
+ mutex_enter(&fp->gfs_parent->v_lock);
+ }
+ mutex_enter(&vp->v_lock);
+ if (vp->v_count == 1) {
+ /*
+ * Really remove this vnode
+ */
+ data = vp->v_data;
+ if (ge != NULL) {
+ /*
+ * If this was a statically cached entry, simply set the
+ * cached vnode to NULL.
+ */
+ ge->gfse_vnode = NULL;
+ }
+ if (vp->v_flag & V_XATTRDIR) {
+ fp->gfs_parent->v_xattrdir = NULL;
+ mutex_exit(&fp->gfs_parent->v_lock);
+ }
+ mutex_exit(&vp->v_lock);
+
+ /*
+ * Free vnode and release parent
+ */
+ if (fp->gfs_parent) {
+ if (dp) {
+ gfs_dir_unlock(dp);
+ }
+ VN_RELE(fp->gfs_parent);
+ } else {
+ ASSERT(vp->v_vfsp != NULL);
+ VFS_RELE(vp->v_vfsp);
+ }
+ vn_free(vp);
+ } else {
+ vp->v_count--;
+ data = NULL;
+ mutex_exit(&vp->v_lock);
+ if (vp->v_flag & V_XATTRDIR) {
+ mutex_exit(&fp->gfs_parent->v_lock);
+ }
+ if (dp)
+ gfs_dir_unlock(dp);
+ }
+
+ return (data);
+}
+
+/*
+ * gfs_dir_inactive()
+ *
+ * Same as above, but for directories.
+ */
+void *
+gfs_dir_inactive(vnode_t *vp)
+{
+ gfs_dir_t *dp;
+
+ ASSERT(vp->v_type == VDIR);
+
+ if ((dp = gfs_file_inactive(vp)) != NULL) {
+ mutex_destroy(&dp->gfsd_lock);
+ if (dp->gfsd_nstatic)
+ kmem_free(dp->gfsd_static,
+ dp->gfsd_nstatic * sizeof (gfs_dirent_t));
+ }
+
+ return (dp);
+}
+
+/*
+ * gfs_dir_lookup_dynamic()
+ *
+ * This routine looks up the provided name amongst the dynamic entries
+ * in the gfs directory and returns the corresponding vnode, if found.
+ *
+ * The gfs directory is expected to be locked by the caller prior to
+ * calling this function. The directory will be unlocked during the
+ * execution of this function, but will be locked upon return from the
+ * function. This function returns 0 on success, non-zero on error.
+ *
+ * The dynamic lookups are performed by invoking the lookup
+ * callback, which is passed to this function as the first argument.
+ * The arguments to the callback are:
+ *
+ * int gfs_lookup_cb(vnode_t *pvp, const char *nm, vnode_t **vpp, cred_t *cr,
+ * int flags, int *deflgs, pathname_t *rpnp);
+ *
+ * pvp - parent vnode
+ * nm - name of entry
+ * vpp - pointer to resulting vnode
+ * cr - pointer to cred
+ * flags - flags value from lookup request
+ * ignored here; currently only used to request
+ * insensitive lookups
+ * direntflgs - output parameter, directory entry flags
+ * ignored here; currently only used to indicate a lookup
+ * has more than one possible match when case is not considered
+ * realpnp - output parameter, real pathname
+ * ignored here; when lookup was performed case-insensitively,
+ * this field contains the "real" name of the file.
+ *
+ * Returns 0 on success, non-zero on error.
+ */
+static int
+gfs_dir_lookup_dynamic(gfs_lookup_cb callback, gfs_dir_t *dp,
+ const char *nm, vnode_t *dvp, vnode_t **vpp, cred_t *cr, int flags,
+ int *direntflags, pathname_t *realpnp)
+{
+ gfs_file_t *fp;
+ ino64_t ino;
+ int ret;
+
+ ASSERT(GFS_DIR_LOCKED(dp));
+
+ /*
+ * Drop the directory lock, as the lookup routine
+ * will need to allocate memory, or otherwise deadlock on this
+ * directory.
+ */
+ gfs_dir_unlock(dp);
+ ret = callback(dvp, nm, vpp, &ino, cr, flags, direntflags, realpnp);
+ gfs_dir_lock(dp);
+
+ /*
+ * The callback for extended attributes returns a vnode
+ * with v_data from an underlying fs.
+ */
+ if (ret == 0 && !IS_XATTRDIR(dvp)) {
+ fp = (gfs_file_t *)((*vpp)->v_data);
+ fp->gfs_index = -1;
+ fp->gfs_ino = ino;
+ }
+
+ return (ret);
+}
+
+/*
+ * gfs_dir_lookup_static()
+ *
+ * This routine looks up the provided name amongst the static entries
+ * in the gfs directory and returns the corresponding vnode, if found.
+ * The first argument to the function is a pointer to the comparison
+ * function this function should use to decide if names are a match.
+ *
+ * If a match is found, and GFS_CACHE_VNODE is set and the vnode
+ * exists, we simply return the existing vnode. Otherwise, we call
+ * the static entry's callback routine, caching the result if
+ * necessary. If the idx pointer argument is non-NULL, we use it to
+ * return the index of the matching static entry.
+ *
+ * The gfs directory is expected to be locked by the caller prior to calling
+ * this function. The directory may be unlocked during the execution of
+ * this function, but will be locked upon return from the function.
+ *
+ * This function returns 0 if a match is found, ENOENT if not.
+ */
+static int
+gfs_dir_lookup_static(int (*compare)(const char *, const char *),
+ gfs_dir_t *dp, const char *nm, vnode_t *dvp, int *idx,
+ vnode_t **vpp, pathname_t *rpnp)
+{
+ gfs_dirent_t *ge;
+ vnode_t *vp = NULL;
+ int i;
+
+ ASSERT(GFS_DIR_LOCKED(dp));
+
+ /*
+ * Search static entries.
+ */
+ for (i = 0; i < dp->gfsd_nstatic; i++) {
+ ge = &dp->gfsd_static[i];
+
+ if (compare(ge->gfse_name, nm) == 0) {
+ if (rpnp)
+ (void) strlcpy(rpnp->pn_buf, ge->gfse_name,
+ rpnp->pn_bufsize);
+
+ if (ge->gfse_vnode) {
+ ASSERT(ge->gfse_flags & GFS_CACHE_VNODE);
+ vp = ge->gfse_vnode;
+ VN_HOLD(vp);
+ break;
+ }
+
+ /*
+ * We drop the directory lock, as the constructor will
+ * need to do KM_SLEEP allocations. If we return from
+ * the constructor only to find that a parallel
+ * operation has completed, and GFS_CACHE_VNODE is set
+ * for this entry, we discard the result in favor of
+ * the cached vnode.
+ */
+ gfs_dir_unlock(dp);
+ vp = ge->gfse_ctor(dvp);
+ gfs_dir_lock(dp);
+
+ ((gfs_file_t *)vp->v_data)->gfs_index = i;
+
+ /* Set the inode according to the callback. */
+ ((gfs_file_t *)vp->v_data)->gfs_ino =
+ dp->gfsd_inode(dvp, i);
+
+ if (ge->gfse_flags & GFS_CACHE_VNODE) {
+ if (ge->gfse_vnode == NULL) {
+ ge->gfse_vnode = vp;
+ } else {
+ /*
+ * A parallel constructor beat us to it;
+ * return existing vnode. We have to be
+ * careful because we can't release the
+ * current vnode while holding the
+ * directory lock; its inactive routine
+ * will try to lock this directory.
+ */
+ vnode_t *oldvp = vp;
+ vp = ge->gfse_vnode;
+ VN_HOLD(vp);
+
+ gfs_dir_unlock(dp);
+ VN_RELE(oldvp);
+ gfs_dir_lock(dp);
+ }
+ }
+ break;
+ }
+ }
+
+ if (vp == NULL)
+ return (ENOENT);
+ else if (idx)
+ *idx = i;
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * gfs_dir_lookup()
+ *
+ * Looks up the given name in the directory and returns the corresponding
+ * vnode, if found.
+ *
+ * First, we search statically defined entries, if any, with a call to
+ * gfs_dir_lookup_static(). If no static entry is found, and we have
+ * a callback function we try a dynamic lookup via gfs_dir_lookup_dynamic().
+ *
+ * This function returns 0 on success, non-zero on error.
+ */
+int
+gfs_dir_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, cred_t *cr,
+ int flags, int *direntflags, pathname_t *realpnp)
+{
+ gfs_dir_t *dp = dvp->v_data;
+ boolean_t casecheck;
+ vnode_t *dynvp = NULL;
+ vnode_t *vp = NULL;
+ int (*compare)(const char *, const char *);
+ int error, idx;
+
+ ASSERT(dvp->v_type == VDIR);
+
+ if (gfs_lookup_dot(vpp, dvp, dp->gfsd_file.gfs_parent, nm) == 0)
+ return (0);
+
+ casecheck = (flags & FIGNORECASE) != 0 && direntflags != NULL;
+ if (vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) ||
+ (flags & FIGNORECASE))
+ compare = strcasecmp;
+ else
+ compare = strcmp;
+
+ gfs_dir_lock(dp);
+
+ error = gfs_dir_lookup_static(compare, dp, nm, dvp, &idx, &vp, realpnp);
+
+ if (vp && casecheck) {
+ gfs_dirent_t *ge;
+ int i;
+
+ for (i = idx + 1; i < dp->gfsd_nstatic; i++) {
+ ge = &dp->gfsd_static[i];
+
+ if (strcasecmp(ge->gfse_name, nm) == 0) {
+ *direntflags |= ED_CASE_CONFLICT;
+ goto out;
+ }
+ }
+ }
+
+ if ((error || casecheck) && dp->gfsd_lookup)
+ error = gfs_dir_lookup_dynamic(dp->gfsd_lookup, dp, nm, dvp,
+ &dynvp, cr, flags, direntflags, vp ? NULL : realpnp);
+
+ if (vp && dynvp) {
+ /* static and dynamic entries are case-insensitive conflict */
+ ASSERT(casecheck);
+ *direntflags |= ED_CASE_CONFLICT;
+ VN_RELE(dynvp);
+ } else if (vp == NULL) {
+ vp = dynvp;
+ } else if (error == ENOENT) {
+ error = 0;
+ } else if (error) {
+ VN_RELE(vp);
+ vp = NULL;
+ }
+
+out:
+ gfs_dir_unlock(dp);
+
+ *vpp = vp;
+ return (error);
+}
+
+/*
+ * gfs_dir_readdir: does a readdir() on the given directory
+ *
+ * dvp - directory vnode
+ * uiop - uio structure
+ * eofp - eof pointer
+ * data - arbitrary data passed to readdir callback
+ *
+ * This routine does all the readdir() dirty work. Even so, the caller must
+ * supply two callbacks in order to get full compatibility.
+ *
+ * If the directory contains static entries, an inode callback must be
+ * specified. This avoids having to create every vnode and call VOP_GETATTR()
+ * when reading the directory. This function has the following arguments:
+ *
+ * ino_t gfs_inode_cb(vnode_t *vp, int index);
+ *
+ * vp - vnode for the directory
+ * index - index in original gfs_dirent_t array
+ *
+ * Returns the inode number for the given entry.
+ *
+ * For directories with dynamic entries, a readdir callback must be provided.
+ * This is significantly more complex, thanks to the particulars of
+ * VOP_READDIR().
+ *
+ * int gfs_readdir_cb(vnode_t *vp, void *dp, int *eofp,
+ * offset_t *off, offset_t *nextoff, void *data, int flags)
+ *
+ * vp - directory vnode
+ * dp - directory entry, sized according to maxlen given to
+ * gfs_dir_create(). callback must fill in d_name and
+ * d_ino (if a dirent64_t), or ed_name, ed_ino, and ed_eflags
+ * (if an edirent_t). edirent_t is used if V_RDDIR_ENTFLAGS
+ * is set in 'flags'.
+ * eofp - callback must set to 1 when EOF has been reached
+ * off - on entry, the last offset read from the directory. Callback
+ * must set to the offset of the current entry, typically left
+ * untouched.
+ * nextoff - callback must set to offset of next entry. Typically
+ * (off + 1)
+ * data - caller-supplied data
+ * flags - VOP_READDIR flags
+ *
+ * Return 0 on success, or error on failure.
+ */
+int
+gfs_dir_readdir(vnode_t *dvp, uio_t *uiop, int *eofp, void *data, cred_t *cr,
+ caller_context_t *ct, int flags)
+{
+ gfs_readdir_state_t gstate;
+ int error, eof = 0;
+ ino64_t ino, pino;
+ offset_t off, next;
+ gfs_dir_t *dp = dvp->v_data;
+
+ error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino);
+ if (error)
+ return (error);
+
+ if ((error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1, uiop,
+ pino, ino, flags)) != 0)
+ return (error);
+
+ while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 &&
+ !eof) {
+
+ if (off >= 0 && off < dp->gfsd_nstatic) {
+ ino = dp->gfsd_inode(dvp, off);
+
+ if ((error = gfs_readdir_emit(&gstate, uiop,
+ off, ino, dp->gfsd_static[off].gfse_name, 0))
+ != 0)
+ break;
+
+ } else if (dp->gfsd_readdir) {
+ off -= dp->gfsd_nstatic;
+
+ if ((error = dp->gfsd_readdir(dvp,
+ gstate.grd_dirent, &eof, &off, &next,
+ data, flags)) != 0 || eof)
+ break;
+
+ off += dp->gfsd_nstatic + 2;
+ next += dp->gfsd_nstatic + 2;
+
+ if ((error = gfs_readdir_emit_int(&gstate, uiop,
+ next)) != 0)
+ break;
+ } else {
+ /*
+ * Offset is beyond the end of the static entries, and
+ * we have no dynamic entries. Set EOF.
+ */
+ eof = 1;
+ }
+ }
+
+ return (gfs_readdir_fini(&gstate, error, eofp, eof));
+}
+
+
+/*
+ * gfs_vop_lookup: VOP_LOOKUP() entry point
+ *
+ * For use directly in vnode ops table. Given a GFS directory, calls
+ * gfs_dir_lookup() as necessary.
+ */
+/* ARGSUSED */
+int
+gfs_vop_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ return (gfs_dir_lookup(dvp, nm, vpp, cr, flags, direntflags, realpnp));
+}
+
+/*
+ * gfs_vop_readdir: VOP_READDIR() entry point
+ *
+ * For use directly in vnode ops table. Given a GFS directory, calls
+ * gfs_dir_readdir() as necessary.
+ */
+/* ARGSUSED */
+int
+gfs_vop_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ return (gfs_dir_readdir(vp, uiop, eofp, NULL, cr, ct, flags));
+}
+
+
+/*
+ * gfs_vop_map: VOP_MAP() entry point
+ *
+ * Convenient routine for handling pseudo-files that wish to allow mmap() calls.
+ * This function only works for readonly files, and uses the read function for
+ * the vnode to fill in the data. The mapped data is immediately faulted in and
+ * filled with the necessary data during this call; there are no getpage() or
+ * putpage() routines.
+ */
+/* ARGSUSED */
+int
+gfs_vop_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
+ size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cred,
+ caller_context_t *ct)
+{
+ int rv;
+ ssize_t resid = len;
+
+ /*
+ * Check for bad parameters
+ */
+#ifdef _ILP32
+ if (len > MAXOFF_T)
+ return (ENOMEM);
+#endif
+ if (vp->v_flag & VNOMAP)
+ return (ENOTSUP);
+ if (off > MAXOFF_T)
+ return (EFBIG);
+ if ((long)off < 0 || (long)(off + len) < 0)
+ return (EINVAL);
+ if (vp->v_type != VREG)
+ return (ENODEV);
+ if ((prot & (PROT_EXEC | PROT_WRITE)) != 0)
+ return (EACCES);
+
+ /*
+ * Find appropriate address if needed, otherwise clear address range.
+ */
+ as_rangelock(as);
+ rv = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
+ if (rv != 0) {
+ as_rangeunlock(as);
+ return (rv);
+ }
+
+ /*
+ * Create mapping
+ */
+ rv = as_map(as, *addrp, len, segvn_create, zfod_argsp);
+ as_rangeunlock(as);
+ if (rv != 0)
+ return (rv);
+
+ /*
+ * Fill with data from read()
+ */
+ rv = vn_rdwr(UIO_READ, vp, *addrp, len, off, UIO_USERSPACE,
+ 0, (rlim64_t)0, cred, &resid);
+
+ if (rv == 0 && resid != 0)
+ rv = ENXIO;
+
+ if (rv != 0) {
+ as_rangelock(as);
+ (void) as_unmap(as, *addrp, len);
+ as_rangeunlock(as);
+ }
+
+ return (rv);
+}
+
+/*
+ * gfs_vop_inactive: VOP_INACTIVE() entry point
+ *
+ * Given a vnode that is a GFS file or directory, call gfs_file_inactive() or
+ * gfs_dir_inactive() as necessary, and kmem_free()s associated private data.
+ */
+/* ARGSUSED */
+void
+gfs_vop_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+ gfs_file_t *fp = vp->v_data;
+ void *data;
+
+ if (fp->gfs_type == GFS_DIR)
+ data = gfs_dir_inactive(vp);
+ else
+ data = gfs_file_inactive(vp);
+
+ if (data != NULL)
+ kmem_free(data, fp->gfs_size);
+}
diff --git a/uts/common/fs/vnode.c b/uts/common/fs/vnode.c
new file mode 100644
index 000000000000..382369c7fc72
--- /dev/null
+++ b/uts/common/fs/vnode.c
@@ -0,0 +1,4536 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/t_lock.h>
+#include <sys/errno.h>
+#include <sys/cred.h>
+#include <sys/user.h>
+#include <sys/uio.h>
+#include <sys/file.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/rwstlock.h>
+#include <sys/fem.h>
+#include <sys/stat.h>
+#include <sys/mode.h>
+#include <sys/conf.h>
+#include <sys/sysmacros.h>
+#include <sys/cmn_err.h>
+#include <sys/systm.h>
+#include <sys/kmem.h>
+#include <sys/debug.h>
+#include <c2/audit.h>
+#include <sys/acl.h>
+#include <sys/nbmlock.h>
+#include <sys/fcntl.h>
+#include <fs/fs_subr.h>
+#include <sys/taskq.h>
+#include <fs/fs_reparse.h>
+
+/* Determine if this vnode is a file that is read-only */
+#define ISROFILE(vp) \
+ ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
+ (vp)->v_type != VFIFO && vn_is_readonly(vp))
+
+/* Tunable via /etc/system; used only by admin/install */
+int nfs_global_client_only;
+
+/*
+ * Array of vopstats_t for per-FS-type vopstats. This array has the same
+ * number of entries as and parallel to the vfssw table. (Arguably, it could
+ * be part of the vfssw table.) Once it's initialized, it's accessed using
+ * the same fstype index that is used to index into the vfssw table.
+ */
+vopstats_t **vopstats_fstype;
+
+/* vopstats initialization template used for fast initialization via bcopy() */
+static vopstats_t *vs_templatep;
+
+/* Kmem cache handle for vsk_anchor_t allocations */
+kmem_cache_t *vsk_anchor_cache;
+
+/* file events cleanup routine */
+extern void free_fopdata(vnode_t *);
+
+/*
+ * Root of AVL tree for the kstats associated with vopstats. Lock protects
+ * updates to vsktat_tree.
+ */
+avl_tree_t vskstat_tree;
+kmutex_t vskstat_tree_lock;
+
+/* Global variable which enables/disables the vopstats collection */
+int vopstats_enabled = 1;
+
+/*
+ * forward declarations for internal vnode specific data (vsd)
+ */
+static void *vsd_realloc(void *, size_t, size_t);
+
+/*
+ * forward declarations for reparse point functions
+ */
+static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
+
+/*
+ * VSD -- VNODE SPECIFIC DATA
+ * The v_data pointer is typically used by a file system to store a
+ * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
+ * However, there are times when additional project private data needs
+ * to be stored separately from the data (node) pointed to by v_data.
+ * This additional data could be stored by the file system itself or
+ * by a completely different kernel entity. VSD provides a way for
+ * callers to obtain a key and store a pointer to private data associated
+ * with a vnode.
+ *
+ * Callers are responsible for protecting the vsd by holding v_vsd_lock
+ * for calls to vsd_set() and vsd_get().
+ */
+
+/*
+ * vsd_lock protects:
+ * vsd_nkeys - creation and deletion of vsd keys
+ * vsd_list - insertion and deletion of vsd_node in the vsd_list
+ * vsd_destructor - adding and removing destructors to the list
+ */
+static kmutex_t vsd_lock;
+static uint_t vsd_nkeys; /* size of destructor array */
+/* list of vsd_node's */
+static list_t *vsd_list = NULL;
+/* per-key destructor funcs */
+static void (**vsd_destructor)(void *);
+
+/*
+ * The following is the common set of actions needed to update the
+ * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and
+ * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
+ * recording of the bytes transferred. Since the code is similar
+ * but small, it is nearly a duplicate. Consequently any changes
+ * to one may need to be reflected in the other.
+ * Rundown of the variables:
+ * vp - Pointer to the vnode
+ * counter - Partial name structure member to update in vopstats for counts
+ * bytecounter - Partial name structure member to update in vopstats for bytes
+ * bytesval - Value to update in vopstats for bytes
+ * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
+ * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
+ */
+
+#define VOPSTATS_UPDATE(vp, counter) { \
+ vfs_t *vfsp = (vp)->v_vfsp; \
+ if (vfsp && vfsp->vfs_implp && \
+ (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
+ vopstats_t *vsp = &vfsp->vfs_vopstats; \
+ uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
+ extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
+ size_t, uint64_t *); \
+ __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \
+ (*stataddr)++; \
+ if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
+ vsp->n##counter.value.ui64++; \
+ } \
+ } \
+}
+
+#define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \
+ vfs_t *vfsp = (vp)->v_vfsp; \
+ if (vfsp && vfsp->vfs_implp && \
+ (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
+ vopstats_t *vsp = &vfsp->vfs_vopstats; \
+ uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
+ extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
+ size_t, uint64_t *); \
+ __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
+ (*stataddr)++; \
+ vsp->bytecounter.value.ui64 += bytesval; \
+ if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
+ vsp->n##counter.value.ui64++; \
+ vsp->bytecounter.value.ui64 += bytesval; \
+ } \
+ } \
+}
+
+/*
+ * If the filesystem does not support XIDs map credential
+ * If the vfsp is NULL, perhaps we should also map?
+ */
+#define VOPXID_MAP_CR(vp, cr) { \
+ vfs_t *vfsp = (vp)->v_vfsp; \
+ if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \
+ cr = crgetmapped(cr); \
+ }
+
+/*
+ * Convert stat(2) formats to vnode types and vice versa. (Knows about
+ * numerical order of S_IFMT and vnode types.)
+ */
+enum vtype iftovt_tab[] = {
+ VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+ VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
+};
+
+ushort_t vttoif_tab[] = {
+ 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
+ S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
+};
+
+/*
+ * The system vnode cache.
+ */
+
+kmem_cache_t *vn_cache;
+
+
+/*
+ * Vnode operations vector.
+ */
+
+static const fs_operation_trans_def_t vn_ops_table[] = {
+ VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_READ, offsetof(struct vnodeops, vop_read),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
+ fs_setfl, fs_nosys,
+
+ VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
+ fs_rwlock, fs_rwlock,
+
+ VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
+ (fs_generic_func_p) fs_rwunlock,
+ (fs_generic_func_p) fs_rwunlock, /* no errors allowed */
+
+ VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
+ fs_cmp, fs_cmp, /* no errors allowed */
+
+ VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
+ fs_frlock, fs_nosys,
+
+ VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
+ (fs_generic_func_p) fs_nosys_map,
+ (fs_generic_func_p) fs_nosys_map,
+
+ VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
+ (fs_generic_func_p) fs_nosys_addmap,
+ (fs_generic_func_p) fs_nosys_addmap,
+
+ VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
+ (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
+
+ VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
+ fs_pathconf, fs_nosys,
+
+ VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
+ (fs_generic_func_p) fs_dispose,
+ (fs_generic_func_p) fs_nodispose,
+
+ VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
+ fs_fab_acl, fs_nosys,
+
+ VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
+ fs_shrlock, fs_nosys,
+
+ VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
+ (fs_generic_func_p) fs_vnevent_nosupport,
+ (fs_generic_func_p) fs_vnevent_nosupport,
+
+ VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
+ fs_nosys, fs_nosys,
+
+ VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
+ fs_nosys, fs_nosys,
+
+ NULL, 0, NULL, NULL
+};
+
+/* Extensible attribute (xva) routines. */
+
+/*
+ * Zero out the structure, set the size of the requested/returned bitmaps,
+ * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
+ * to the returned attributes array.
+ */
+void
+xva_init(xvattr_t *xvap)
+{
+ bzero(xvap, sizeof (xvattr_t));
+ xvap->xva_mapsize = XVA_MAPSIZE;
+ xvap->xva_magic = XVA_MAGIC;
+ xvap->xva_vattr.va_mask = AT_XVATTR;
+ xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
+}
+
+/*
+ * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
+ * structure. Otherwise, returns NULL.
+ */
+xoptattr_t *
+xva_getxoptattr(xvattr_t *xvap)
+{
+ xoptattr_t *xoap = NULL;
+ if (xvap->xva_vattr.va_mask & AT_XVATTR)
+ xoap = &xvap->xva_xoptattrs;
+ return (xoap);
+}
+
+/*
+ * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
+ * We use the f_fsid reported by VFS_STATVFS() since we use that for the
+ * kstat name.
+ */
+static int
+vska_compar(const void *n1, const void *n2)
+{
+ int ret;
+ ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
+ ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
+
+ if (p1 < p2) {
+ ret = -1;
+ } else if (p1 > p2) {
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * Used to create a single template which will be bcopy()ed to a newly
+ * allocated vsanchor_combo_t structure in new_vsanchor(), below.
+ */
+static vopstats_t *
+create_vopstats_template()
+{
+ vopstats_t *vsp;
+
+ vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
+ bzero(vsp, sizeof (*vsp)); /* Start fresh */
+
+ /* VOP_OPEN */
+ kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
+ /* VOP_CLOSE */
+ kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
+ /* VOP_READ I/O */
+ kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
+ /* VOP_WRITE I/O */
+ kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
+ /* VOP_IOCTL */
+ kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
+ /* VOP_SETFL */
+ kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
+ /* VOP_GETATTR */
+ kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
+ /* VOP_SETATTR */
+ kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
+ /* VOP_ACCESS */
+ kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
+ /* VOP_LOOKUP */
+ kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
+ /* VOP_CREATE */
+ kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
+ /* VOP_REMOVE */
+ kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
+ /* VOP_LINK */
+ kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
+ /* VOP_RENAME */
+ kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
+ /* VOP_MKDIR */
+ kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
+ /* VOP_RMDIR */
+ kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
+ /* VOP_READDIR I/O */
+ kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
+ KSTAT_DATA_UINT64);
+ /* VOP_SYMLINK */
+ kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
+ /* VOP_READLINK */
+ kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
+ /* VOP_FSYNC */
+ kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
+ /* VOP_INACTIVE */
+ kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
+ /* VOP_FID */
+ kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
+ /* VOP_RWLOCK */
+ kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
+ /* VOP_RWUNLOCK */
+ kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
+ /* VOP_SEEK */
+ kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
+ /* VOP_CMP */
+ kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
+ /* VOP_FRLOCK */
+ kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
+ /* VOP_SPACE */
+ kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
+ /* VOP_REALVP */
+ kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
+ /* VOP_GETPAGE */
+ kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
+ /* VOP_PUTPAGE */
+ kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
+ /* VOP_MAP */
+ kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
+ /* VOP_ADDMAP */
+ kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
+ /* VOP_DELMAP */
+ kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
+ /* VOP_POLL */
+ kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
+ /* VOP_DUMP */
+ kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
+ /* VOP_PATHCONF */
+ kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
+ /* VOP_PAGEIO */
+ kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
+ /* VOP_DUMPCTL */
+ kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
+ /* VOP_DISPOSE */
+ kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
+ /* VOP_SETSECATTR */
+ kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
+ /* VOP_GETSECATTR */
+ kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
+ /* VOP_SHRLOCK */
+ kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
+ /* VOP_VNEVENT */
+ kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
+ /* VOP_REQZCBUF */
+ kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64);
+ /* VOP_RETZCBUF */
+ kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64);
+
+ return (vsp);
+}
+
+/*
+ * Creates a kstat structure associated with a vopstats structure.
+ */
+kstat_t *
+new_vskstat(char *ksname, vopstats_t *vsp)
+{
+ kstat_t *ksp;
+
+ if (!vopstats_enabled) {
+ return (NULL);
+ }
+
+ ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
+ sizeof (vopstats_t)/sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
+ if (ksp) {
+ ksp->ks_data = vsp;
+ kstat_install(ksp);
+ }
+
+ return (ksp);
+}
+
+/*
+ * Called from vfsinit() to initialize the support mechanisms for vopstats
+ */
+void
+vopstats_startup()
+{
+ if (!vopstats_enabled)
+ return;
+
+ /*
+ * Creates the AVL tree which holds per-vfs vopstat anchors. This
+ * is necessary since we need to check if a kstat exists before we
+ * attempt to create it. Also, initialize its lock.
+ */
+ avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
+ offsetof(vsk_anchor_t, vsk_node));
+ mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
+ sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
+ NULL, NULL, 0);
+
+ /*
+ * Set up the array of pointers for the vopstats-by-FS-type.
+ * The entries will be allocated/initialized as each file system
+ * goes through modload/mod_installfs.
+ */
+ vopstats_fstype = (vopstats_t **)kmem_zalloc(
+ (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
+
+ /* Set up the global vopstats initialization template */
+ vs_templatep = create_vopstats_template();
+}
+
+/*
+ * We need to have the all of the counters zeroed.
+ * The initialization of the vopstats_t includes on the order of
+ * 50 calls to kstat_named_init(). Rather that do that on every call,
+ * we do it once in a template (vs_templatep) then bcopy it over.
+ */
+void
+initialize_vopstats(vopstats_t *vsp)
+{
+ if (vsp == NULL)
+ return;
+
+ bcopy(vs_templatep, vsp, sizeof (vopstats_t));
+}
+
+/*
+ * If possible, determine which vopstats by fstype to use and
+ * return a pointer to the caller.
+ */
+vopstats_t *
+get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
+{
+ int fstype = 0; /* Index into vfssw[] */
+ vopstats_t *vsp = NULL;
+
+ if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
+ !vopstats_enabled)
+ return (NULL);
+ /*
+ * Set up the fstype. We go to so much trouble because all versions
+ * of NFS use the same fstype in their vfs even though they have
+ * distinct entries in the vfssw[] table.
+ * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
+ */
+ if (vswp) {
+ fstype = vswp - vfssw; /* Gets us the index */
+ } else {
+ fstype = vfsp->vfs_fstype;
+ }
+
+ /*
+ * Point to the per-fstype vopstats. The only valid values are
+ * non-zero positive values less than the number of vfssw[] table
+ * entries.
+ */
+ if (fstype > 0 && fstype < nfstype) {
+ vsp = vopstats_fstype[fstype];
+ }
+
+ return (vsp);
+}
+
+/*
+ * Generate a kstat name, create the kstat structure, and allocate a
+ * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t
+ * to the caller. This must only be called from a mount.
+ */
+vsk_anchor_t *
+get_vskstat_anchor(vfs_t *vfsp)
+{
+ char kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
+ statvfs64_t statvfsbuf; /* Needed to find f_fsid */
+ vsk_anchor_t *vskp = NULL; /* vfs <--> kstat anchor */
+ kstat_t *ksp; /* Ptr to new kstat */
+ avl_index_t where; /* Location in the AVL tree */
+
+ if (vfsp == NULL || vfsp->vfs_implp == NULL ||
+ (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
+ return (NULL);
+
+ /* Need to get the fsid to build a kstat name */
+ if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
+ /* Create a name for our kstats based on fsid */
+ (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
+ VOPSTATS_STR, statvfsbuf.f_fsid);
+
+ /* Allocate and initialize the vsk_anchor_t */
+ vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
+ bzero(vskp, sizeof (*vskp));
+ vskp->vsk_fsid = statvfsbuf.f_fsid;
+
+ mutex_enter(&vskstat_tree_lock);
+ if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
+ avl_insert(&vskstat_tree, vskp, where);
+ mutex_exit(&vskstat_tree_lock);
+
+ /*
+ * Now that we've got the anchor in the AVL
+ * tree, we can create the kstat.
+ */
+ ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
+ if (ksp) {
+ vskp->vsk_ksp = ksp;
+ }
+ } else {
+ /* Oops, found one! Release memory and lock. */
+ mutex_exit(&vskstat_tree_lock);
+ kmem_cache_free(vsk_anchor_cache, vskp);
+ vskp = NULL;
+ }
+ }
+ return (vskp);
+}
+
+/*
+ * We're in the process of tearing down the vfs and need to cleanup
+ * the data structures associated with the vopstats. Must only be called
+ * from dounmount().
+ */
+void
+teardown_vopstats(vfs_t *vfsp)
+{
+ vsk_anchor_t *vskap;
+ avl_index_t where;
+
+ if (vfsp == NULL || vfsp->vfs_implp == NULL ||
+ (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
+ return;
+
+ /* This is a safe check since VFS_STATS must be set (see above) */
+ if ((vskap = vfsp->vfs_vskap) == NULL)
+ return;
+
+ /* Whack the pointer right away */
+ vfsp->vfs_vskap = NULL;
+
+ /* Lock the tree, remove the node, and delete the kstat */
+ mutex_enter(&vskstat_tree_lock);
+ if (avl_find(&vskstat_tree, vskap, &where)) {
+ avl_remove(&vskstat_tree, vskap);
+ }
+
+ if (vskap->vsk_ksp) {
+ kstat_delete(vskap->vsk_ksp);
+ }
+ mutex_exit(&vskstat_tree_lock);
+
+ kmem_cache_free(vsk_anchor_cache, vskap);
+}
+
+/*
+ * Read or write a vnode. Called from kernel code.
+ */
+int
+vn_rdwr(
+ enum uio_rw rw,
+ struct vnode *vp,
+ caddr_t base,
+ ssize_t len,
+ offset_t offset,
+ enum uio_seg seg,
+ int ioflag,
+ rlim64_t ulimit, /* meaningful only if rw is UIO_WRITE */
+ cred_t *cr,
+ ssize_t *residp)
+{
+ struct uio uio;
+ struct iovec iov;
+ int error;
+ int in_crit = 0;
+
+ if (rw == UIO_WRITE && ISROFILE(vp))
+ return (EROFS);
+
+ if (len < 0)
+ return (EIO);
+
+ VOPXID_MAP_CR(vp, cr);
+
+ iov.iov_base = base;
+ iov.iov_len = len;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_loffset = offset;
+ uio.uio_segflg = (short)seg;
+ uio.uio_resid = len;
+ uio.uio_llimit = ulimit;
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ error = nbl_svmand(vp, cr, &svmand);
+ if (error != 0)
+ goto done;
+ if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
+ uio.uio_offset, uio.uio_resid, svmand, NULL)) {
+ error = EACCES;
+ goto done;
+ }
+ }
+
+ (void) VOP_RWLOCK(vp,
+ rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
+ if (rw == UIO_WRITE) {
+ uio.uio_fmode = FWRITE;
+ uio.uio_extflg = UIO_COPY_DEFAULT;
+ error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
+ } else {
+ uio.uio_fmode = FREAD;
+ uio.uio_extflg = UIO_COPY_CACHED;
+ error = VOP_READ(vp, &uio, ioflag, cr, NULL);
+ }
+ VOP_RWUNLOCK(vp,
+ rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
+ if (residp)
+ *residp = uio.uio_resid;
+ else if (uio.uio_resid)
+ error = EIO;
+
+done:
+ if (in_crit)
+ nbl_end_crit(vp);
+ return (error);
+}
+
+/*
+ * Release a vnode. Call VOP_INACTIVE on last reference or
+ * decrement reference count.
+ *
+ * To avoid race conditions, the v_count is left at 1 for
+ * the call to VOP_INACTIVE. This prevents another thread
+ * from reclaiming and releasing the vnode *before* the
+ * VOP_INACTIVE routine has a chance to destroy the vnode.
+ * We can't have more than 1 thread calling VOP_INACTIVE
+ * on a vnode.
+ */
+void
+vn_rele(vnode_t *vp)
+{
+ VERIFY(vp->v_count > 0);
+ mutex_enter(&vp->v_lock);
+ if (vp->v_count == 1) {
+ mutex_exit(&vp->v_lock);
+ VOP_INACTIVE(vp, CRED(), NULL);
+ return;
+ }
+ vp->v_count--;
+ mutex_exit(&vp->v_lock);
+}
+
+/*
+ * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
+ * as a single reference, so v_count is not decremented until the last DNLC hold
+ * is released. This makes it possible to distinguish vnodes that are referenced
+ * only by the DNLC.
+ */
+void
+vn_rele_dnlc(vnode_t *vp)
+{
+ VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
+ mutex_enter(&vp->v_lock);
+ if (--vp->v_count_dnlc == 0) {
+ if (vp->v_count == 1) {
+ mutex_exit(&vp->v_lock);
+ VOP_INACTIVE(vp, CRED(), NULL);
+ return;
+ }
+ vp->v_count--;
+ }
+ mutex_exit(&vp->v_lock);
+}
+
+/*
+ * Like vn_rele() except that it clears v_stream under v_lock.
+ * This is used by sockfs when it dismantels the association between
+ * the sockfs node and the vnode in the underlaying file system.
+ * v_lock has to be held to prevent a thread coming through the lookupname
+ * path from accessing a stream head that is going away.
+ */
+void
+vn_rele_stream(vnode_t *vp)
+{
+ VERIFY(vp->v_count > 0);
+ mutex_enter(&vp->v_lock);
+ vp->v_stream = NULL;
+ if (vp->v_count == 1) {
+ mutex_exit(&vp->v_lock);
+ VOP_INACTIVE(vp, CRED(), NULL);
+ return;
+ }
+ vp->v_count--;
+ mutex_exit(&vp->v_lock);
+}
+
+static void
+vn_rele_inactive(vnode_t *vp)
+{
+ VOP_INACTIVE(vp, CRED(), NULL);
+}
+
+/*
+ * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
+ * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
+ * the file system as a result of releasing the vnode. Note, file systems
+ * already have to handle the race where the vnode is incremented before the
+ * inactive routine is called and does its locking.
+ *
+ * Warning: Excessive use of this routine can lead to performance problems.
+ * This is because taskqs throttle back allocation if too many are created.
+ */
+void
+vn_rele_async(vnode_t *vp, taskq_t *taskq)
+{
+ VERIFY(vp->v_count > 0);
+ mutex_enter(&vp->v_lock);
+ if (vp->v_count == 1) {
+ mutex_exit(&vp->v_lock);
+ VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
+ vp, TQ_SLEEP) != NULL);
+ return;
+ }
+ vp->v_count--;
+ mutex_exit(&vp->v_lock);
+}
+
+int
+vn_open(
+ char *pnamep,
+ enum uio_seg seg,
+ int filemode,
+ int createmode,
+ struct vnode **vpp,
+ enum create crwhy,
+ mode_t umask)
+{
+ return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
+ umask, NULL, -1));
+}
+
+
+/*
+ * Open/create a vnode.
+ * This may be callable by the kernel, the only known use
+ * of user context being that the current user credentials
+ * are used for permissions. crwhy is defined iff filemode & FCREAT.
+ */
+int
+vn_openat(
+ char *pnamep,
+ enum uio_seg seg,
+ int filemode,
+ int createmode,
+ struct vnode **vpp,
+ enum create crwhy,
+ mode_t umask,
+ struct vnode *startvp,
+ int fd)
+{
+ struct vnode *vp;
+ int mode;
+ int accessflags;
+ int error;
+ int in_crit = 0;
+ int open_done = 0;
+ int shrlock_done = 0;
+ struct vattr vattr;
+ enum symfollow follow;
+ int estale_retry = 0;
+ struct shrlock shr;
+ struct shr_locowner shr_own;
+
+ mode = 0;
+ accessflags = 0;
+ if (filemode & FREAD)
+ mode |= VREAD;
+ if (filemode & (FWRITE|FTRUNC))
+ mode |= VWRITE;
+ if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN))
+ mode |= VEXEC;
+
+ /* symlink interpretation */
+ if (filemode & FNOFOLLOW)
+ follow = NO_FOLLOW;
+ else
+ follow = FOLLOW;
+
+ if (filemode & FAPPEND)
+ accessflags |= V_APPEND;
+
+top:
+ if (filemode & FCREAT) {
+ enum vcexcl excl;
+
+ /*
+ * Wish to create a file.
+ */
+ vattr.va_type = VREG;
+ vattr.va_mode = createmode;
+ vattr.va_mask = AT_TYPE|AT_MODE;
+ if (filemode & FTRUNC) {
+ vattr.va_size = 0;
+ vattr.va_mask |= AT_SIZE;
+ }
+ if (filemode & FEXCL)
+ excl = EXCL;
+ else
+ excl = NONEXCL;
+
+ if (error =
+ vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
+ (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
+ return (error);
+ } else {
+ /*
+ * Wish to open a file. Just look it up.
+ */
+ if (error = lookupnameat(pnamep, seg, follow,
+ NULLVPP, &vp, startvp)) {
+ if ((error == ESTALE) &&
+ fs_need_estale_retry(estale_retry++))
+ goto top;
+ return (error);
+ }
+
+ /*
+ * Get the attributes to check whether file is large.
+ * We do this only if the FOFFMAX flag is not set and
+ * only for regular files.
+ */
+
+ if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
+ vattr.va_mask = AT_SIZE;
+ if ((error = VOP_GETATTR(vp, &vattr, 0,
+ CRED(), NULL))) {
+ goto out;
+ }
+ if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
+ /*
+ * Large File API - regular open fails
+ * if FOFFMAX flag is set in file mode
+ */
+ error = EOVERFLOW;
+ goto out;
+ }
+ }
+ /*
+ * Can't write directories, active texts, or
+ * read-only filesystems. Can't truncate files
+ * on which mandatory locking is in effect.
+ */
+ if (filemode & (FWRITE|FTRUNC)) {
+ /*
+ * Allow writable directory if VDIROPEN flag is set.
+ */
+ if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
+ error = EISDIR;
+ goto out;
+ }
+ if (ISROFILE(vp)) {
+ error = EROFS;
+ goto out;
+ }
+ /*
+ * Can't truncate files on which
+ * sysv mandatory locking is in effect.
+ */
+ if (filemode & FTRUNC) {
+ vnode_t *rvp;
+
+ if (VOP_REALVP(vp, &rvp, NULL) != 0)
+ rvp = vp;
+ if (rvp->v_filocks != NULL) {
+ vattr.va_mask = AT_MODE;
+ if ((error = VOP_GETATTR(vp,
+ &vattr, 0, CRED(), NULL)) == 0 &&
+ MANDLOCK(vp, vattr.va_mode))
+ error = EAGAIN;
+ }
+ }
+ if (error)
+ goto out;
+ }
+ /*
+ * Check permissions.
+ */
+ if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL))
+ goto out;
+ /*
+ * Require FSEARCH to return a directory.
+ * Require FEXEC to return a regular file.
+ */
+ if ((filemode & FSEARCH) && vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ if ((filemode & FEXEC) && vp->v_type != VREG) {
+ error = ENOEXEC; /* XXX: error code? */
+ goto out;
+ }
+ }
+
+ /*
+ * Do remaining checks for FNOFOLLOW and FNOLINKS.
+ */
+ if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
+ error = ELOOP;
+ goto out;
+ }
+ if (filemode & FNOLINKS) {
+ vattr.va_mask = AT_NLINK;
+ if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) {
+ goto out;
+ }
+ if (vattr.va_nlink != 1) {
+ error = EMLINK;
+ goto out;
+ }
+ }
+
+ /*
+ * Opening a socket corresponding to the AF_UNIX pathname
+ * in the filesystem name space is not supported.
+ * However, VSOCK nodes in namefs are supported in order
+ * to make fattach work for sockets.
+ *
+ * XXX This uses VOP_REALVP to distinguish between
+ * an unopened namefs node (where VOP_REALVP returns a
+ * different VSOCK vnode) and a VSOCK created by vn_create
+ * in some file system (where VOP_REALVP would never return
+ * a different vnode).
+ */
+ if (vp->v_type == VSOCK) {
+ struct vnode *nvp;
+
+ error = VOP_REALVP(vp, &nvp, NULL);
+ if (error != 0 || nvp == NULL || nvp == vp ||
+ nvp->v_type != VSOCK) {
+ error = EOPNOTSUPP;
+ goto out;
+ }
+ }
+
+ if ((vp->v_type == VREG) && nbl_need_check(vp)) {
+ /* get share reservation */
+ shr.s_access = 0;
+ if (filemode & FWRITE)
+ shr.s_access |= F_WRACC;
+ if (filemode & FREAD)
+ shr.s_access |= F_RDACC;
+ shr.s_deny = 0;
+ shr.s_sysid = 0;
+ shr.s_pid = ttoproc(curthread)->p_pid;
+ shr_own.sl_pid = shr.s_pid;
+ shr_own.sl_id = fd;
+ shr.s_own_len = sizeof (shr_own);
+ shr.s_owner = (caddr_t)&shr_own;
+ error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
+ NULL);
+ if (error)
+ goto out;
+ shrlock_done = 1;
+
+ /* nbmand conflict check if truncating file */
+ if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+
+ vattr.va_mask = AT_SIZE;
+ if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
+ goto out;
+ if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
+ NULL)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+ }
+
+ /*
+ * Do opening protocol.
+ */
+ error = VOP_OPEN(&vp, filemode, CRED(), NULL);
+ if (error)
+ goto out;
+ open_done = 1;
+
+ /*
+ * Truncate if required.
+ */
+ if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
+ vattr.va_size = 0;
+ vattr.va_mask = AT_SIZE;
+ if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
+ goto out;
+ }
+out:
+ ASSERT(vp->v_count > 0);
+
+ if (in_crit) {
+ nbl_end_crit(vp);
+ in_crit = 0;
+ }
+ if (error) {
+ if (open_done) {
+ (void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(),
+ NULL);
+ open_done = 0;
+ shrlock_done = 0;
+ }
+ if (shrlock_done) {
+ (void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(),
+ NULL);
+ shrlock_done = 0;
+ }
+
+ /*
+ * The following clause was added to handle a problem
+ * with NFS consistency. It is possible that a lookup
+ * of the file to be opened succeeded, but the file
+ * itself doesn't actually exist on the server. This
+ * is chiefly due to the DNLC containing an entry for
+ * the file which has been removed on the server. In
+ * this case, we just start over. If there was some
+ * other cause for the ESTALE error, then the lookup
+ * of the file will fail and the error will be returned
+ * above instead of looping around from here.
+ */
+ VN_RELE(vp);
+ if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
+ goto top;
+ } else
+ *vpp = vp;
+ return (error);
+}
+
+/*
+ * The following two accessor functions are for the NFSv4 server. Since there
+ * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the
+ * vnode open counts correct when a client "upgrades" an open or does an
+ * open_downgrade. In NFS, an upgrade or downgrade can not only change the
+ * open mode (add or subtract read or write), but also change the share/deny
+ * modes. However, share reservations are not integrated with OPEN, yet, so
+ * we need to handle each separately. These functions are cleaner than having
+ * the NFS server manipulate the counts directly, however, nobody else should
+ * use these functions.
+ */
+void
+vn_open_upgrade(
+ vnode_t *vp,
+ int filemode)
+{
+ ASSERT(vp->v_type == VREG);
+
+ if (filemode & FREAD)
+ atomic_add_32(&(vp->v_rdcnt), 1);
+ if (filemode & FWRITE)
+ atomic_add_32(&(vp->v_wrcnt), 1);
+
+}
+
+void
+vn_open_downgrade(
+ vnode_t *vp,
+ int filemode)
+{
+ ASSERT(vp->v_type == VREG);
+
+ if (filemode & FREAD) {
+ ASSERT(vp->v_rdcnt > 0);
+ atomic_add_32(&(vp->v_rdcnt), -1);
+ }
+ if (filemode & FWRITE) {
+ ASSERT(vp->v_wrcnt > 0);
+ atomic_add_32(&(vp->v_wrcnt), -1);
+ }
+
+}
+
+int
+vn_create(
+ char *pnamep,
+ enum uio_seg seg,
+ struct vattr *vap,
+ enum vcexcl excl,
+ int mode,
+ struct vnode **vpp,
+ enum create why,
+ int flag,
+ mode_t umask)
+{
+ return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
+ umask, NULL));
+}
+
+/*
+ * Create a vnode (makenode).
+ */
+int
+vn_createat(
+ char *pnamep,
+ enum uio_seg seg,
+ struct vattr *vap,
+ enum vcexcl excl,
+ int mode,
+ struct vnode **vpp,
+ enum create why,
+ int flag,
+ mode_t umask,
+ struct vnode *startvp)
+{
+ struct vnode *dvp; /* ptr to parent dir vnode */
+ struct vnode *vp = NULL;
+ struct pathname pn;
+ int error;
+ int in_crit = 0;
+ struct vattr vattr;
+ enum symfollow follow;
+ int estale_retry = 0;
+ uint32_t auditing = AU_AUDITING();
+
+ ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
+
+ /* symlink interpretation */
+ if ((flag & FNOFOLLOW) || excl == EXCL)
+ follow = NO_FOLLOW;
+ else
+ follow = FOLLOW;
+ flag &= ~(FNOFOLLOW|FNOLINKS);
+
+top:
+ /*
+ * Lookup directory.
+ * If new object is a file, call lower level to create it.
+ * Note that it is up to the lower level to enforce exclusive
+ * creation, if the file is already there.
+ * This allows the lower level to do whatever
+ * locking or protocol that is needed to prevent races.
+ * If the new object is directory call lower level to make
+ * the new directory, with "." and "..".
+ */
+ if (error = pn_get(pnamep, seg, &pn))
+ return (error);
+ if (auditing)
+ audit_vncreate_start();
+ dvp = NULL;
+ *vpp = NULL;
+ /*
+ * lookup will find the parent directory for the vnode.
+ * When it is done the pn holds the name of the entry
+ * in the directory.
+ * If this is a non-exclusive create we also find the node itself.
+ */
+ error = lookuppnat(&pn, NULL, follow, &dvp,
+ (excl == EXCL) ? NULLVPP : vpp, startvp);
+ if (error) {
+ pn_free(&pn);
+ if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
+ goto top;
+ if (why == CRMKDIR && error == EINVAL)
+ error = EEXIST; /* SVID */
+ return (error);
+ }
+
+ if (why != CRMKNOD)
+ vap->va_mode &= ~VSVTX;
+
+ /*
+ * If default ACLs are defined for the directory don't apply the
+ * umask if umask is passed.
+ */
+
+ if (umask) {
+
+ vsecattr_t vsec;
+
+ vsec.vsa_aclcnt = 0;
+ vsec.vsa_aclentp = NULL;
+ vsec.vsa_dfaclcnt = 0;
+ vsec.vsa_dfaclentp = NULL;
+ vsec.vsa_mask = VSA_DFACLCNT;
+ error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL);
+ /*
+ * If error is ENOSYS then treat it as no error
+ * Don't want to force all file systems to support
+ * aclent_t style of ACL's.
+ */
+ if (error == ENOSYS)
+ error = 0;
+ if (error) {
+ if (*vpp != NULL)
+ VN_RELE(*vpp);
+ goto out;
+ } else {
+ /*
+ * Apply the umask if no default ACLs.
+ */
+ if (vsec.vsa_dfaclcnt == 0)
+ vap->va_mode &= ~umask;
+
+ /*
+ * VOP_GETSECATTR() may have allocated memory for
+ * ACLs we didn't request, so double-check and
+ * free it if necessary.
+ */
+ if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
+ kmem_free((caddr_t)vsec.vsa_aclentp,
+ vsec.vsa_aclcnt * sizeof (aclent_t));
+ if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
+ kmem_free((caddr_t)vsec.vsa_dfaclentp,
+ vsec.vsa_dfaclcnt * sizeof (aclent_t));
+ }
+ }
+
+ /*
+ * In general we want to generate EROFS if the file system is
+ * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1
+ * documents the open system call, and it says that O_CREAT has no
+ * effect if the file already exists. Bug 1119649 states
+ * that open(path, O_CREAT, ...) fails when attempting to open an
+ * existing file on a read only file system. Thus, the first part
+ * of the following if statement has 3 checks:
+ * if the file exists &&
+ * it is being open with write access &&
+ * the file system is read only
+ * then generate EROFS
+ */
+ if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
+ (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
+ if (*vpp)
+ VN_RELE(*vpp);
+ error = EROFS;
+ } else if (excl == NONEXCL && *vpp != NULL) {
+ vnode_t *rvp;
+
+ /*
+ * File already exists. If a mandatory lock has been
+ * applied, return error.
+ */
+ vp = *vpp;
+ if (VOP_REALVP(vp, &rvp, NULL) != 0)
+ rvp = vp;
+ if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ }
+ if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
+ vattr.va_mask = AT_MODE|AT_SIZE;
+ if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) {
+ goto out;
+ }
+ if (MANDLOCK(vp, vattr.va_mode)) {
+ error = EAGAIN;
+ goto out;
+ }
+ /*
+ * File cannot be truncated if non-blocking mandatory
+ * locks are currently on the file.
+ */
+ if ((vap->va_mask & AT_SIZE) && in_crit) {
+ u_offset_t offset;
+ ssize_t length;
+
+ offset = vap->va_size > vattr.va_size ?
+ vattr.va_size : vap->va_size;
+ length = vap->va_size > vattr.va_size ?
+ vap->va_size - vattr.va_size :
+ vattr.va_size - vap->va_size;
+ if (nbl_conflict(vp, NBL_WRITE, offset,
+ length, 0, NULL)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+ }
+
+ /*
+ * If the file is the root of a VFS, we've crossed a
+ * mount point and the "containing" directory that we
+ * acquired above (dvp) is irrelevant because it's in
+ * a different file system. We apply VOP_CREATE to the
+ * target itself instead of to the containing directory
+ * and supply a null path name to indicate (conventionally)
+ * the node itself as the "component" of interest.
+ *
+ * The intercession of the file system is necessary to
+ * ensure that the appropriate permission checks are
+ * done.
+ */
+ if (vp->v_flag & VROOT) {
+ ASSERT(why != CRMKDIR);
+ error = VOP_CREATE(vp, "", vap, excl, mode, vpp,
+ CRED(), flag, NULL, NULL);
+ /*
+ * If the create succeeded, it will have created
+ * a new reference to the vnode. Give up the
+ * original reference. The assertion should not
+ * get triggered because NBMAND locks only apply to
+ * VREG files. And if in_crit is non-zero for some
+ * reason, detect that here, rather than when we
+ * deference a null vp.
+ */
+ ASSERT(in_crit == 0);
+ VN_RELE(vp);
+ vp = NULL;
+ goto out;
+ }
+
+ /*
+ * Large File API - non-large open (FOFFMAX flag not set)
+ * of regular file fails if the file size exceeds MAXOFF32_T.
+ */
+ if (why != CRMKDIR &&
+ !(flag & FOFFMAX) &&
+ (vp->v_type == VREG)) {
+ vattr.va_mask = AT_SIZE;
+ if ((error = VOP_GETATTR(vp, &vattr, 0,
+ CRED(), NULL))) {
+ goto out;
+ }
+ if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
+ error = EOVERFLOW;
+ goto out;
+ }
+ }
+ }
+
+ if (error == 0) {
+ /*
+ * Call mkdir() if specified, otherwise create().
+ */
+ int must_be_dir = pn_fixslash(&pn); /* trailing '/'? */
+
+ if (why == CRMKDIR)
+ /*
+ * N.B., if vn_createat() ever requests
+ * case-insensitive behavior then it will need
+ * to be passed to VOP_MKDIR(). VOP_CREATE()
+ * will already get it via "flag"
+ */
+ error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(),
+ NULL, 0, NULL);
+ else if (!must_be_dir)
+ error = VOP_CREATE(dvp, pn.pn_path, vap,
+ excl, mode, vpp, CRED(), flag, NULL, NULL);
+ else
+ error = ENOTDIR;
+ }
+
+out:
+
+ if (auditing)
+ audit_vncreate_finish(*vpp, error);
+ if (in_crit) {
+ nbl_end_crit(vp);
+ in_crit = 0;
+ }
+ if (vp != NULL) {
+ VN_RELE(vp);
+ vp = NULL;
+ }
+ pn_free(&pn);
+ VN_RELE(dvp);
+ /*
+ * The following clause was added to handle a problem
+ * with NFS consistency. It is possible that a lookup
+ * of the file to be created succeeded, but the file
+ * itself doesn't actually exist on the server. This
+ * is chiefly due to the DNLC containing an entry for
+ * the file which has been removed on the server. In
+ * this case, we just start over. If there was some
+ * other cause for the ESTALE error, then the lookup
+ * of the file will fail and the error will be returned
+ * above instead of looping around from here.
+ */
+ if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
+ goto top;
+ return (error);
+}
+
+int
+vn_link(char *from, char *to, enum uio_seg seg)
+{
+ return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg));
+}
+
+int
+vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
+ vnode_t *tstartvp, char *to, enum uio_seg seg)
+{
+ struct vnode *fvp; /* from vnode ptr */
+ struct vnode *tdvp; /* to directory vnode ptr */
+ struct pathname pn;
+ int error;
+ struct vattr vattr;
+ dev_t fsid;
+ int estale_retry = 0;
+ uint32_t auditing = AU_AUDITING();
+
+top:
+ fvp = tdvp = NULL;
+ if (error = pn_get(to, seg, &pn))
+ return (error);
+ if (auditing && fstartvp != NULL)
+ audit_setfsat_path(1);
+ if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp))
+ goto out;
+ if (auditing && tstartvp != NULL)
+ audit_setfsat_path(3);
+ if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp))
+ goto out;
+ /*
+ * Make sure both source vnode and target directory vnode are
+ * in the same vfs and that it is writeable.
+ */
+ vattr.va_mask = AT_FSID;
+ if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL))
+ goto out;
+ fsid = vattr.va_fsid;
+ vattr.va_mask = AT_FSID;
+ if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL))
+ goto out;
+ if (fsid != vattr.va_fsid) {
+ error = EXDEV;
+ goto out;
+ }
+ if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
+ error = EROFS;
+ goto out;
+ }
+ /*
+ * Do the link.
+ */
+ (void) pn_fixslash(&pn);
+ error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
+out:
+ pn_free(&pn);
+ if (fvp)
+ VN_RELE(fvp);
+ if (tdvp)
+ VN_RELE(tdvp);
+ if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
+ goto top;
+ return (error);
+}
+
+int
+vn_rename(char *from, char *to, enum uio_seg seg)
+{
+ return (vn_renameat(NULL, from, NULL, to, seg));
+}
+
+int
+vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
+ char *tname, enum uio_seg seg)
+{
+ int error;
+ struct vattr vattr;
+ struct pathname fpn; /* from pathname */
+ struct pathname tpn; /* to pathname */
+ dev_t fsid;
+ int in_crit_src, in_crit_targ;
+ vnode_t *fromvp, *fvp;
+ vnode_t *tovp, *targvp;
+ int estale_retry = 0;
+ uint32_t auditing = AU_AUDITING();
+
+top:
+ fvp = fromvp = tovp = targvp = NULL;
+ in_crit_src = in_crit_targ = 0;
+ /*
+ * Get to and from pathnames.
+ */
+ if (error = pn_get(fname, seg, &fpn))
+ return (error);
+ if (error = pn_get(tname, seg, &tpn)) {
+ pn_free(&fpn);
+ return (error);
+ }
+
+ /*
+ * First we need to resolve the correct directories
+ * The passed in directories may only be a starting point,
+ * but we need the real directories the file(s) live in.
+ * For example the fname may be something like usr/lib/sparc
+ * and we were passed in the / directory, but we need to
+ * use the lib directory for the rename.
+ */
+
+ if (auditing && fdvp != NULL)
+ audit_setfsat_path(1);
+ /*
+ * Lookup to and from directories.
+ */
+ if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
+ goto out;
+ }
+
+ /*
+ * Make sure there is an entry.
+ */
+ if (fvp == NULL) {
+ error = ENOENT;
+ goto out;
+ }
+
+ if (auditing && tdvp != NULL)
+ audit_setfsat_path(3);
+ if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
+ goto out;
+ }
+
+ /*
+ * Make sure both the from vnode directory and the to directory
+ * are in the same vfs and the to directory is writable.
+ * We check fsid's, not vfs pointers, so loopback fs works.
+ */
+ if (fromvp != tovp) {
+ vattr.va_mask = AT_FSID;
+ if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL))
+ goto out;
+ fsid = vattr.va_fsid;
+ vattr.va_mask = AT_FSID;
+ if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL))
+ goto out;
+ if (fsid != vattr.va_fsid) {
+ error = EXDEV;
+ goto out;
+ }
+ }
+
+ if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
+ error = EROFS;
+ goto out;
+ }
+
+ if (targvp && (fvp != targvp)) {
+ nbl_start_crit(targvp, RW_READER);
+ in_crit_targ = 1;
+ if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ if (nbl_need_check(fvp)) {
+ nbl_start_crit(fvp, RW_READER);
+ in_crit_src = 1;
+ if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ /*
+ * Do the rename.
+ */
+ (void) pn_fixslash(&tpn);
+ error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
+ NULL, 0);
+
+out:
+ pn_free(&fpn);
+ pn_free(&tpn);
+ if (in_crit_src)
+ nbl_end_crit(fvp);
+ if (in_crit_targ)
+ nbl_end_crit(targvp);
+ if (fromvp)
+ VN_RELE(fromvp);
+ if (tovp)
+ VN_RELE(tovp);
+ if (targvp)
+ VN_RELE(targvp);
+ if (fvp)
+ VN_RELE(fvp);
+ if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
+ goto top;
+ return (error);
+}
+
+/*
+ * Remove a file or directory.
+ */
+int
+vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
+{
+ return (vn_removeat(NULL, fnamep, seg, dirflag));
+}
+
+int
+vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
+{
+ struct vnode *vp; /* entry vnode */
+ struct vnode *dvp; /* ptr to parent dir vnode */
+ struct vnode *coveredvp;
+ struct pathname pn; /* name of entry */
+ enum vtype vtype;
+ int error;
+ struct vfs *vfsp;
+ struct vfs *dvfsp; /* ptr to parent dir vfs */
+ int in_crit = 0;
+ int estale_retry = 0;
+
+top:
+ if (error = pn_get(fnamep, seg, &pn))
+ return (error);
+ dvp = vp = NULL;
+ if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
+ pn_free(&pn);
+ if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
+ goto top;
+ return (error);
+ }
+
+ /*
+ * Make sure there is an entry.
+ */
+ if (vp == NULL) {
+ error = ENOENT;
+ goto out;
+ }
+
+ vfsp = vp->v_vfsp;
+ dvfsp = dvp->v_vfsp;
+
+ /*
+ * If the named file is the root of a mounted filesystem, fail,
+ * unless it's marked unlinkable. In that case, unmount the
+ * filesystem and proceed to unlink the covered vnode. (If the
+ * covered vnode is a directory, use rmdir instead of unlink,
+ * to avoid file system corruption.)
+ */
+ if (vp->v_flag & VROOT) {
+ if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
+ error = EBUSY;
+ goto out;
+ }
+
+ /*
+ * Namefs specific code starts here.
+ */
+
+ if (dirflag == RMDIRECTORY) {
+ /*
+ * User called rmdir(2) on a file that has
+ * been namefs mounted on top of. Since
+ * namefs doesn't allow directories to
+ * be mounted on other files we know
+ * vp is not of type VDIR so fail to operation.
+ */
+ error = ENOTDIR;
+ goto out;
+ }
+
+ /*
+ * If VROOT is still set after grabbing vp->v_lock,
+ * noone has finished nm_unmount so far and coveredvp
+ * is valid.
+ * If we manage to grab vn_vfswlock(coveredvp) before releasing
+ * vp->v_lock, any race window is eliminated.
+ */
+
+ mutex_enter(&vp->v_lock);
+ if ((vp->v_flag & VROOT) == 0) {
+ /* Someone beat us to the unmount */
+ mutex_exit(&vp->v_lock);
+ error = EBUSY;
+ goto out;
+ }
+ vfsp = vp->v_vfsp;
+ coveredvp = vfsp->vfs_vnodecovered;
+ ASSERT(coveredvp);
+ /*
+ * Note: Implementation of vn_vfswlock shows that ordering of
+ * v_lock / vn_vfswlock is not an issue here.
+ */
+ error = vn_vfswlock(coveredvp);
+ mutex_exit(&vp->v_lock);
+
+ if (error)
+ goto out;
+
+ VN_HOLD(coveredvp);
+ VN_RELE(vp);
+ error = dounmount(vfsp, 0, CRED());
+
+ /*
+ * Unmounted the namefs file system; now get
+ * the object it was mounted over.
+ */
+ vp = coveredvp;
+ /*
+ * If namefs was mounted over a directory, then
+ * we want to use rmdir() instead of unlink().
+ */
+ if (vp->v_type == VDIR)
+ dirflag = RMDIRECTORY;
+
+ if (error)
+ goto out;
+ }
+
+ /*
+ * Make sure filesystem is writeable.
+ * We check the parent directory's vfs in case this is an lofs vnode.
+ */
+ if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
+ error = EROFS;
+ goto out;
+ }
+
+ vtype = vp->v_type;
+
+ /*
+ * If there is the possibility of an nbmand share reservation, make
+ * sure it's okay to remove the file. Keep a reference to the
+ * vnode, so that we can exit the nbl critical region after
+ * calling VOP_REMOVE.
+ * If there is no possibility of an nbmand share reservation,
+ * release the vnode reference now. Filesystems like NFS may
+ * behave differently if there is an extra reference, so get rid of
+ * this one. Fortunately, we can't have nbmand mounts on NFS
+ * filesystems.
+ */
+ if (nbl_need_check(vp)) {
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
+ error = EACCES;
+ goto out;
+ }
+ } else {
+ VN_RELE(vp);
+ vp = NULL;
+ }
+
+ if (dirflag == RMDIRECTORY) {
+ /*
+ * Caller is using rmdir(2), which can only be applied to
+ * directories.
+ */
+ if (vtype != VDIR) {
+ error = ENOTDIR;
+ } else {
+ vnode_t *cwd;
+ proc_t *pp = curproc;
+
+ mutex_enter(&pp->p_lock);
+ cwd = PTOU(pp)->u_cdir;
+ VN_HOLD(cwd);
+ mutex_exit(&pp->p_lock);
+ error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(),
+ NULL, 0);
+ VN_RELE(cwd);
+ }
+ } else {
+ /*
+ * Unlink(2) can be applied to anything.
+ */
+ error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
+ }
+
+out:
+ pn_free(&pn);
+ if (in_crit) {
+ nbl_end_crit(vp);
+ in_crit = 0;
+ }
+ if (vp != NULL)
+ VN_RELE(vp);
+ if (dvp != NULL)
+ VN_RELE(dvp);
+ if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
+ goto top;
+ return (error);
+}
+
+/*
+ * Utility function to compare equality of vnodes.
+ * Compare the underlying real vnodes, if there are underlying vnodes.
+ * This is a more thorough comparison than the VN_CMP() macro provides.
+ */
+int
+vn_compare(vnode_t *vp1, vnode_t *vp2)
+{
+ vnode_t *realvp;
+
+ if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0)
+ vp1 = realvp;
+ if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0)
+ vp2 = realvp;
+ return (VN_CMP(vp1, vp2));
+}
+
+/*
+ * The number of locks to hash into. This value must be a power
+ * of 2 minus 1 and should probably also be prime.
+ */
+#define NUM_BUCKETS 1023
+
+struct vn_vfslocks_bucket {
+ kmutex_t vb_lock;
+ vn_vfslocks_entry_t *vb_list;
+ char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
+};
+
+/*
+ * Total number of buckets will be NUM_BUCKETS + 1 .
+ */
+
+#pragma align 64(vn_vfslocks_buckets)
+static struct vn_vfslocks_bucket vn_vfslocks_buckets[NUM_BUCKETS + 1];
+
+#define VN_VFSLOCKS_SHIFT 9
+
+#define VN_VFSLOCKS_HASH(vfsvpptr) \
+ ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
+
+/*
+ * vn_vfslocks_getlock() uses an HASH scheme to generate
+ * rwstlock using vfs/vnode pointer passed to it.
+ *
+ * vn_vfslocks_rele() releases a reference in the
+ * HASH table which allows the entry allocated by
+ * vn_vfslocks_getlock() to be freed at a later
+ * stage when the refcount drops to zero.
+ */
+
+vn_vfslocks_entry_t *
+vn_vfslocks_getlock(void *vfsvpptr)
+{
+ struct vn_vfslocks_bucket *bp;
+ vn_vfslocks_entry_t *vep;
+ vn_vfslocks_entry_t *tvep;
+
+ ASSERT(vfsvpptr != NULL);
+ bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
+
+ mutex_enter(&bp->vb_lock);
+ for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
+ if (vep->ve_vpvfs == vfsvpptr) {
+ vep->ve_refcnt++;
+ mutex_exit(&bp->vb_lock);
+ return (vep);
+ }
+ }
+ mutex_exit(&bp->vb_lock);
+ vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
+ rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
+ vep->ve_vpvfs = (char *)vfsvpptr;
+ vep->ve_refcnt = 1;
+ mutex_enter(&bp->vb_lock);
+ for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
+ if (tvep->ve_vpvfs == vfsvpptr) {
+ tvep->ve_refcnt++;
+ mutex_exit(&bp->vb_lock);
+
+ /*
+ * There is already an entry in the hash
+ * destroy what we just allocated.
+ */
+ rwst_destroy(&vep->ve_lock);
+ kmem_free(vep, sizeof (*vep));
+ return (tvep);
+ }
+ }
+ vep->ve_next = bp->vb_list;
+ bp->vb_list = vep;
+ mutex_exit(&bp->vb_lock);
+ return (vep);
+}
+
+void
+vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
+{
+ struct vn_vfslocks_bucket *bp;
+ vn_vfslocks_entry_t *vep;
+ vn_vfslocks_entry_t *pvep;
+
+ ASSERT(vepent != NULL);
+ ASSERT(vepent->ve_vpvfs != NULL);
+
+ bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
+
+ mutex_enter(&bp->vb_lock);
+ vepent->ve_refcnt--;
+
+ if ((int32_t)vepent->ve_refcnt < 0)
+ cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
+
+ if (vepent->ve_refcnt == 0) {
+ for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
+ if (vep->ve_vpvfs == vepent->ve_vpvfs) {
+ if (bp->vb_list == vep)
+ bp->vb_list = vep->ve_next;
+ else {
+ /* LINTED */
+ pvep->ve_next = vep->ve_next;
+ }
+ mutex_exit(&bp->vb_lock);
+ rwst_destroy(&vep->ve_lock);
+ kmem_free(vep, sizeof (*vep));
+ return;
+ }
+ pvep = vep;
+ }
+ cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
+ }
+ mutex_exit(&bp->vb_lock);
+}
+
+/*
+ * vn_vfswlock_wait is used to implement a lock which is logically a writers
+ * lock protecting the v_vfsmountedhere field.
+ * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
+ * except that it blocks to acquire the lock VVFSLOCK.
+ *
+ * traverse() and routines re-implementing part of traverse (e.g. autofs)
+ * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
+ * need the non-blocking version of the writers lock i.e. vn_vfswlock
+ */
+int
+vn_vfswlock_wait(vnode_t *vp)
+{
+ int retval;
+ vn_vfslocks_entry_t *vpvfsentry;
+ ASSERT(vp != NULL);
+
+ vpvfsentry = vn_vfslocks_getlock(vp);
+ retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
+
+ if (retval == EINTR) {
+ vn_vfslocks_rele(vpvfsentry);
+ return (EINTR);
+ }
+ return (retval);
+}
+
+int
+vn_vfsrlock_wait(vnode_t *vp)
+{
+ int retval;
+ vn_vfslocks_entry_t *vpvfsentry;
+ ASSERT(vp != NULL);
+
+ vpvfsentry = vn_vfslocks_getlock(vp);
+ retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
+
+ if (retval == EINTR) {
+ vn_vfslocks_rele(vpvfsentry);
+ return (EINTR);
+ }
+
+ return (retval);
+}
+
+
+/*
+ * vn_vfswlock is used to implement a lock which is logically a writers lock
+ * protecting the v_vfsmountedhere field.
+ */
+int
+vn_vfswlock(vnode_t *vp)
+{
+ vn_vfslocks_entry_t *vpvfsentry;
+
+ /*
+ * If vp is NULL then somebody is trying to lock the covered vnode
+ * of /. (vfs_vnodecovered is NULL for /). This situation will
+ * only happen when unmounting /. Since that operation will fail
+ * anyway, return EBUSY here instead of in VFS_UNMOUNT.
+ */
+ if (vp == NULL)
+ return (EBUSY);
+
+ vpvfsentry = vn_vfslocks_getlock(vp);
+
+ if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
+ return (0);
+
+ vn_vfslocks_rele(vpvfsentry);
+ return (EBUSY);
+}
+
+int
+vn_vfsrlock(vnode_t *vp)
+{
+ vn_vfslocks_entry_t *vpvfsentry;
+
+ /*
+ * If vp is NULL then somebody is trying to lock the covered vnode
+ * of /. (vfs_vnodecovered is NULL for /). This situation will
+ * only happen when unmounting /. Since that operation will fail
+ * anyway, return EBUSY here instead of in VFS_UNMOUNT.
+ */
+ if (vp == NULL)
+ return (EBUSY);
+
+ vpvfsentry = vn_vfslocks_getlock(vp);
+
+ if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
+ return (0);
+
+ vn_vfslocks_rele(vpvfsentry);
+ return (EBUSY);
+}
+
+void
+vn_vfsunlock(vnode_t *vp)
+{
+ vn_vfslocks_entry_t *vpvfsentry;
+
+ /*
+ * ve_refcnt needs to be decremented twice.
+ * 1. To release refernce after a call to vn_vfslocks_getlock()
+ * 2. To release the reference from the locking routines like
+ * vn_vfsrlock/vn_vfswlock etc,.
+ */
+ vpvfsentry = vn_vfslocks_getlock(vp);
+ vn_vfslocks_rele(vpvfsentry);
+
+ rwst_exit(&vpvfsentry->ve_lock);
+ vn_vfslocks_rele(vpvfsentry);
+}
+
+int
+vn_vfswlock_held(vnode_t *vp)
+{
+ int held;
+ vn_vfslocks_entry_t *vpvfsentry;
+
+ ASSERT(vp != NULL);
+
+ vpvfsentry = vn_vfslocks_getlock(vp);
+ held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
+
+ vn_vfslocks_rele(vpvfsentry);
+ return (held);
+}
+
+
+int
+vn_make_ops(
+ const char *name, /* Name of file system */
+ const fs_operation_def_t *templ, /* Operation specification */
+ vnodeops_t **actual) /* Return the vnodeops */
+{
+ int unused_ops;
+ int error;
+
+ *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
+
+ (*actual)->vnop_name = name;
+
+ error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
+ if (error) {
+ kmem_free(*actual, sizeof (vnodeops_t));
+ }
+
+#if DEBUG
+ if (unused_ops != 0)
+ cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
+ "but not used", name, unused_ops);
+#endif
+
+ return (error);
+}
+
+/*
+ * Free the vnodeops created as a result of vn_make_ops()
+ */
+void
+vn_freevnodeops(vnodeops_t *vnops)
+{
+ kmem_free(vnops, sizeof (vnodeops_t));
+}
+
+/*
+ * Vnode cache.
+ */
+
+/* ARGSUSED */
+static int
+vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ struct vnode *vp;
+
+ vp = buf;
+
+ mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
+ rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
+ vp->v_femhead = NULL; /* Must be done before vn_reinit() */
+ vp->v_path = NULL;
+ vp->v_mpssdata = NULL;
+ vp->v_vsd = NULL;
+ vp->v_fopdata = NULL;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+vn_cache_destructor(void *buf, void *cdrarg)
+{
+ struct vnode *vp;
+
+ vp = buf;
+
+ rw_destroy(&vp->v_nbllock);
+ cv_destroy(&vp->v_cv);
+ mutex_destroy(&vp->v_vsd_lock);
+ mutex_destroy(&vp->v_lock);
+}
+
+void
+vn_create_cache(void)
+{
+ /* LINTED */
+ ASSERT((1 << VNODE_ALIGN_LOG2) ==
+ P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN));
+ vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode),
+ VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL,
+ NULL, 0);
+}
+
+void
+vn_destroy_cache(void)
+{
+ kmem_cache_destroy(vn_cache);
+}
+
+/*
+ * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
+ * cached by the file system and vnodes remain associated.
+ */
+void
+vn_recycle(vnode_t *vp)
+{
+ ASSERT(vp->v_pages == NULL);
+
+ /*
+ * XXX - This really belongs in vn_reinit(), but we have some issues
+ * with the counts. Best to have it here for clean initialization.
+ */
+ vp->v_rdcnt = 0;
+ vp->v_wrcnt = 0;
+ vp->v_mmap_read = 0;
+ vp->v_mmap_write = 0;
+
+ /*
+ * If FEM was in use, make sure everything gets cleaned up
+ * NOTE: vp->v_femhead is initialized to NULL in the vnode
+ * constructor.
+ */
+ if (vp->v_femhead) {
+ /* XXX - There should be a free_femhead() that does all this */
+ ASSERT(vp->v_femhead->femh_list == NULL);
+ mutex_destroy(&vp->v_femhead->femh_lock);
+ kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
+ vp->v_femhead = NULL;