aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEnji Cooper <ngie@FreeBSD.org>2019-03-12 01:43:01 +0000
committerEnji Cooper <ngie@FreeBSD.org>2019-03-12 01:43:01 +0000
commite5a5dd6cc488f19e182ad3f694957389e4a7d40a (patch)
treedd55863b2c3d3e4a1521acbd751384d128160957
downloadsrc-e5a5dd6cc488f19e182ad3f694957389e4a7d40a.tar.gz
src-e5a5dd6cc488f19e182ad3f694957389e4a7d40a.zip
Import capsicum-test into ^/vendor/google/capsicum-test/dist
The following change imports google/capsicum-test@9333154 from GitHub, omitting the embedded version of googletest, as well as the incomplete libcasper. This test suite helps verify capsicum(3) support via functional tests written in the GoogleTest test framework. Kernel support for capsicum(4) is tested by side-effect of testing capsicum(3). NB: as discussed in a previous [closed] PR [1], the casper(3) tests are incomplete/buggy and will not pass on FreeBSD. Thus, I have no intention of integrating them into the build/test on FreeBSD as-is. The import command used was: ``` curl -L https://github.com/google/capsicum-test/tarball/9333154 | tar --strip-components=1 -xvzf - -C dist/ rm -Rf dist/*/ ``` 1. https://github.com/google/capsicum-test/pull/26 Reviewed by: emaste (mentor) Differential Revision: https://reviews.freebsd.org/D19261
Notes
Notes: svn path=/vendor/google/capsicum-test/dist/; revision=345046
-rw-r--r--.gitignore19
-rw-r--r--CONTRIBUTING.md20
-rw-r--r--GNUmakefile78
-rw-r--r--LICENSE26
-rw-r--r--README.md62
-rw-r--r--capability-fd-pair.cc188
-rw-r--r--capability-fd.cc1271
-rw-r--r--capmode.cc651
-rw-r--r--capsicum-freebsd.h73
-rw-r--r--capsicum-linux.h40
-rw-r--r--capsicum-rights.h118
-rw-r--r--capsicum-test-main.cc101
-rw-r--r--capsicum-test.cc102
-rw-r--r--capsicum-test.h260
-rw-r--r--capsicum.h170
-rw-r--r--fcntl.cc411
-rw-r--r--fexecve.cc173
-rw-r--r--ioctl.cc234
-rw-r--r--linux.cc1503
-rw-r--r--makefile36
-rw-r--r--mini-me.c38
-rw-r--r--mqueue.cc100
-rw-r--r--openat.cc357
-rw-r--r--overhead.cc45
-rw-r--r--procdesc.cc977
-rw-r--r--rename.cc49
-rw-r--r--sctp.cc212
-rw-r--r--select.cc142
-rwxr-xr-xshowrights99
-rw-r--r--smoketest.c135
-rw-r--r--socket.cc340
-rw-r--r--syscalls.h259
-rw-r--r--sysctl.cc15
-rw-r--r--waittest.c42
34 files changed, 8346 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000000..bc8f8f5ce265
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,19 @@
+capsicum-test
+mini-me
+mini-me.noexec
+mini-me.setuid
+mini-me.32
+mini-me.x32
+mini-me.64
+libgtest.a
+smoketest
+*.o
+libcap*.deb
+libcap*.dsc
+libcap*.tar.gz
+libcap*.changes
+casper*.deb
+casper*.dsc
+casper*.tar.gz
+casper*.changes
+libcaprights.a \ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 000000000000..1a054b840c0b
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,20 @@
+## Contributor License Agreement ##
+
+Contributions to any Google project must be accompanied by a Contributor
+License Agreement. This is not a copyright **assignment**, it simply gives
+Google permission to use and redistribute your contributions as part of the
+project.
+
+ * If you are an individual writing original source code and you're sure you
+ own the intellectual property, then you'll need to sign an [individual
+ CLA][].
+
+ * If you work for a company that wants to allow you to contribute your work,
+ then you'll need to sign a [corporate CLA][].
+
+You generally only need to submit a CLA once, so if you've already submitted
+one (even if it was for a different project), you probably don't need to do it
+again.
+
+[individual CLA]: https://developers.google.com/open-source/cla/individual
+[corporate CLA]: https://developers.google.com/open-source/cla/corporate
diff --git a/GNUmakefile b/GNUmakefile
new file mode 100644
index 000000000000..d7133ca3b386
--- /dev/null
+++ b/GNUmakefile
@@ -0,0 +1,78 @@
+OS:=$(shell uname)
+
+# Set ARCH to 32 or x32 for i386/x32 ABIs
+ARCH?=64
+ARCHFLAG=-m$(ARCH)
+
+ifeq ($(OS),Linux)
+PROCESSOR:=$(shell uname -p)
+
+ifneq ($(wildcard /usr/lib/$(PROCESSOR)-linux-gnu),)
+# Can use standard Debian location for static libraries.
+PLATFORM_LIBDIR=/usr/lib/$(PROCESSOR)-linux-gnu
+else
+# Attempt to determine library location from gcc configuration.
+PLATFORM_LIBDIR=$(shell gcc -v 2>&1 | grep "Configured with:" | sed 's/.*--libdir=\(\/usr\/[^ ]*\).*/\1/g')
+endif
+
+# Override for explicitly specified ARCHFLAG.
+# Use locally compiled libcaprights in this case, on the
+# assumption that any installed version is 64-bit.
+ifeq ($(ARCHFLAG),-m32)
+PROCESSOR=i386
+PLATFORM_LIBDIR=/usr/lib32
+LIBCAPRIGHTS=./libcaprights.a
+endif
+ifeq ($(ARCHFLAG),-mx32)
+PROCESSOR=x32
+PLATFORM_LIBDIR=/usr/libx32
+LIBCAPRIGHTS=./libcaprights.a
+endif
+
+# Detect presence of libsctp in normal Debian location
+ifneq ($(wildcard $(PLATFORM_LIBDIR)/libsctp.a),)
+LIBSCTP=-lsctp
+CXXFLAGS=-DHAVE_SCTP
+endif
+
+ifneq ($(LIBCAPRIGHTS),)
+# Build local libcaprights.a (assuming ./configure
+# has already been done in libcaprights/)
+LOCAL_LIBS=$(LIBCAPRIGHTS)
+LIBCAPRIGHTS_OBJS=libcaprights/capsicum.o libcaprights/linux-bpf-capmode.o libcaprights/procdesc.o libcaprights/signal.o
+LOCAL_CLEAN=$(LOCAL_LIBS) $(LIBCAPRIGHTS_OBJS)
+else
+# Detect installed libcaprights static library.
+ifneq ($(wildcard $(PLATFORM_LIBDIR)/libcaprights.a),)
+LIBCAPRIGHTS=$(PLATFORM_LIBDIR)/libcaprights.a
+else
+ifneq ($(wildcard /usr/lib/libcaprights.a),)
+LIBCAPRIGHTS=/usr/lib/libcaprights.a
+endif
+endif
+endif
+
+endif
+
+# Extra test programs for arch-transition tests
+EXTRA_PROGS = mini-me.32 mini-me.64
+ifneq ($(wildcard /usr/include/gnu/stubs-x32.h),)
+EXTRA_PROGS += mini-me.x32
+endif
+
+# Chain on to the master makefile
+include makefile
+
+./libcaprights.a: $(LIBCAPRIGHTS_OBJS)
+ ar cr $@ $^
+
+# Small static programs of known architectures
+# These may require additional packages to be installed; for example, for Debian:
+# - libc6-dev-i386 provides 32-bit headers for a 64-bit system
+# - libc6-dev-x32 provides headers for the x32 ABI.
+mini-me.32: mini-me.c
+ $(CC) $(CFLAGS) -m32 -static -o $@ $<
+mini-me.x32: mini-me.c
+ $(CC) $(CFLAGS) -mx32 -static -o $@ $<
+mini-me.64: mini-me.c
+ $(CC) $(CFLAGS) -m64 -static -o $@ $<
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000000..446189ec3cd0
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,26 @@
+Copyright (c) 2009-2011 Robert N. M. Watson
+Copyright (c) 2011 Jonathan Anderson
+Copyright (C) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+Copyright (c) 2013-2014 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 000000000000..918534557725
--- /dev/null
+++ b/README.md
@@ -0,0 +1,62 @@
+# Capsicum User Space Tests
+
+This directory holds unit tests for [Capsicum](http://www.cl.cam.ac.uk/research/security/capsicum/)
+object-capabilities. The tests exercise the syscall interface to a Capsicum-enabled operating system,
+currently either [FreeBSD >=10.x](http://www.freebsd.org) or a modified Linux kernel (the
+[capsicum-linux](http://github.com/google/capsicum-linux) project).
+
+The tests are written in C++98, and use the [Google Test](https://code.google.com/p/googletest/)
+framework, with some additions to fork off particular tests (because a process that enters capability
+mode cannot leave it again).
+
+## Provenance
+
+The original basis for these tests was:
+
+ - [unit tests](https://github.com/freebsd/freebsd/tree/master/tools/regression/security/cap_test)
+ written by Robert Watson and Jonathan Anderson for the original FreeBSD 9.x Capsicum implementation
+ - [unit tests](http://git.chromium.org/gitweb/?p=chromiumos/third_party/kernel-capsicum.git;a=tree;f=tools/testing/capsicum_tests;hb=refs/heads/capsicum) written by Meredydd Luff for the original Capsicum-Linux port.
+
+These tests were coalesced and moved into an independent repository to enable
+comparative testing across multiple OSes, and then substantially extended.
+
+## OS Configuration
+
+### Linux
+
+The following kernel configuration options are needed to run the tests:
+
+ - `CONFIG_SECURITY_CAPSICUM`: enable the Capsicum framework
+ - `CONFIG_PROCDESC`: enable Capsicum process-descriptor functionality
+ - `CONFIG_DEBUG_FS`: enable debug filesystem
+ - `CONFIG_IP_SCTP`: enable SCTP support
+
+### FreeBSD (>= 10.x)
+
+The following kernel configuration options are needed so that all tests can run:
+
+ - `options P1003_1B_MQUEUE`: Enable POSIX message queues (or `kldload mqueuefs`)
+
+## Other Dependencies
+
+### Linux
+
+The following additional development packages are needed to build the full test suite on Linux.
+
+ - `libcaprights`: See below
+ - `libcap-dev`: Provides headers for POSIX.1e capabilities.
+ - `libsctp1`: Provides SCTP library functions.
+ - `libsctp-dev`: Provides headers for SCTP library functions.
+
+
+## Linux libcaprights
+
+The Capsicum userspace library is held in the `libcaprights/` subdirectory. Ideally, this
+library should be built (with `./configure; make` or `dpkg-buildpackage -uc -us`) and
+installed (with `make install` or `dpkg -i libcaprights*.deb`) so that the tests will
+use behave like a normal Capsicum-aware application.
+
+However, if no installed copy of the library is found, the `GNUmakefile` will attempt
+to use the local `libcaprights/*.c` source; this requires `./configure` to have been
+performed in the `libcaprights` subdirectory. The local code is also used for
+cross-compiled builds of the test suite (e.g. `make ARCH=32` or `make ARCH=x32`).
diff --git a/capability-fd-pair.cc b/capability-fd-pair.cc
new file mode 100644
index 000000000000..d56dac049805
--- /dev/null
+++ b/capability-fd-pair.cc
@@ -0,0 +1,188 @@
+// Tests involving 2 capability file descriptors.
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <fcntl.h>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+TEST(CapabilityPair, sendfile) {
+ int in_fd = open(TmpFile("cap_sendfile_in"), O_CREAT|O_RDWR, 0644);
+ EXPECT_OK(write(in_fd, "1234", 4));
+ // Output fd for sendfile must be a stream socket in FreeBSD.
+ int sock_fds[2];
+ EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
+
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+ cap_rights_t r_ws;
+ cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+
+ int cap_in_ro = dup(in_fd);
+ EXPECT_OK(cap_in_ro);
+ EXPECT_OK(cap_rights_limit(cap_in_ro, &r_rs));
+ int cap_in_wo = dup(in_fd);
+ EXPECT_OK(cap_in_wo);
+ EXPECT_OK(cap_rights_limit(cap_in_wo, &r_ws));
+ int cap_out_ro = dup(sock_fds[0]);
+ EXPECT_OK(cap_out_ro);
+ EXPECT_OK(cap_rights_limit(cap_out_ro, &r_rs));
+ int cap_out_wo = dup(sock_fds[0]);
+ EXPECT_OK(cap_out_wo);
+ EXPECT_OK(cap_rights_limit(cap_out_wo, &r_ws));
+
+ off_t offset = 0;
+ EXPECT_NOTCAPABLE(sendfile_(cap_out_ro, cap_in_ro, &offset, 4));
+ EXPECT_NOTCAPABLE(sendfile_(cap_out_wo, cap_in_wo, &offset, 4));
+ EXPECT_OK(sendfile_(cap_out_wo, cap_in_ro, &offset, 4));
+
+ close(cap_in_ro);
+ close(cap_in_wo);
+ close(cap_out_ro);
+ close(cap_out_wo);
+ close(in_fd);
+ close(sock_fds[0]);
+ close(sock_fds[1]);
+ unlink(TmpFile("cap_sendfile_in"));
+}
+
+#ifdef HAVE_TEE
+TEST(CapabilityPair, tee) {
+ int pipe1_fds[2];
+ EXPECT_OK(pipe2(pipe1_fds, O_NONBLOCK));
+ int pipe2_fds[2];
+ EXPECT_OK(pipe2(pipe2_fds, O_NONBLOCK));
+
+ // Put some data into pipe1.
+ unsigned char buffer[4] = {1, 2, 3, 4};
+ EXPECT_OK(write(pipe1_fds[1], buffer, 4));
+
+ cap_rights_t r_ro;
+ cap_rights_init(&r_ro, CAP_READ);
+ cap_rights_t r_wo;
+ cap_rights_init(&r_wo, CAP_WRITE);
+ cap_rights_t r_rw;
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+
+ // Various attempts to tee into pipe2.
+ int cap_in_wo = dup(pipe1_fds[0]);
+ EXPECT_OK(cap_in_wo);
+ EXPECT_OK(cap_rights_limit(cap_in_wo, &r_wo));
+ int cap_in_rw = dup(pipe1_fds[0]);
+ EXPECT_OK(cap_in_rw);
+ EXPECT_OK(cap_rights_limit(cap_in_rw, &r_rw));
+ int cap_out_ro = dup(pipe2_fds[1]);
+ EXPECT_OK(cap_out_ro);
+ EXPECT_OK(cap_rights_limit(cap_out_ro, &r_ro));
+ int cap_out_rw = dup(pipe2_fds[1]);
+ EXPECT_OK(cap_out_rw);
+ EXPECT_OK(cap_rights_limit(cap_out_rw, &r_rw));
+
+ EXPECT_NOTCAPABLE(tee(cap_in_wo, cap_out_rw, 4, SPLICE_F_NONBLOCK));
+ EXPECT_NOTCAPABLE(tee(cap_in_rw, cap_out_ro, 4, SPLICE_F_NONBLOCK));
+ EXPECT_OK(tee(cap_in_rw, cap_out_rw, 4, SPLICE_F_NONBLOCK));
+
+ close(cap_in_wo);
+ close(cap_in_rw);
+ close(cap_out_ro);
+ close(cap_out_rw);
+ close(pipe1_fds[0]);
+ close(pipe1_fds[1]);
+ close(pipe2_fds[0]);
+ close(pipe2_fds[1]);
+}
+#endif
+
+#ifdef HAVE_SPLICE
+TEST(CapabilityPair, splice) {
+ int pipe1_fds[2];
+ EXPECT_OK(pipe2(pipe1_fds, O_NONBLOCK));
+ int pipe2_fds[2];
+ EXPECT_OK(pipe2(pipe2_fds, O_NONBLOCK));
+
+ // Put some data into pipe1.
+ unsigned char buffer[4] = {1, 2, 3, 4};
+ EXPECT_OK(write(pipe1_fds[1], buffer, 4));
+
+ cap_rights_t r_ro;
+ cap_rights_init(&r_ro, CAP_READ);
+ cap_rights_t r_wo;
+ cap_rights_init(&r_wo, CAP_WRITE);
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+ cap_rights_t r_ws;
+ cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+
+ // Various attempts to splice.
+ int cap_in_wo = dup(pipe1_fds[0]);
+ EXPECT_OK(cap_in_wo);
+ EXPECT_OK(cap_rights_limit(cap_in_wo, &r_wo));
+ int cap_in_ro = dup(pipe1_fds[0]);
+ EXPECT_OK(cap_in_ro);
+ EXPECT_OK(cap_rights_limit(cap_in_ro, &r_ro));
+ int cap_in_ro_seek = dup(pipe1_fds[0]);
+ EXPECT_OK(cap_in_ro_seek);
+ EXPECT_OK(cap_rights_limit(cap_in_ro_seek, &r_rs));
+ int cap_out_wo = dup(pipe2_fds[1]);
+ EXPECT_OK(cap_out_wo);
+ EXPECT_OK(cap_rights_limit(cap_out_wo, &r_wo));
+ int cap_out_ro = dup(pipe2_fds[1]);
+ EXPECT_OK(cap_out_ro);
+ EXPECT_OK(cap_rights_limit(cap_out_ro, &r_ro));
+ int cap_out_wo_seek = dup(pipe2_fds[1]);
+ EXPECT_OK(cap_out_wo_seek);
+ EXPECT_OK(cap_rights_limit(cap_out_wo_seek, &r_ws));
+
+ EXPECT_NOTCAPABLE(splice(cap_in_ro, NULL, cap_out_wo_seek, NULL, 4, SPLICE_F_NONBLOCK));
+ EXPECT_NOTCAPABLE(splice(cap_in_wo, NULL, cap_out_wo_seek, NULL, 4, SPLICE_F_NONBLOCK));
+ EXPECT_NOTCAPABLE(splice(cap_in_ro_seek, NULL, cap_out_ro, NULL, 4, SPLICE_F_NONBLOCK));
+ EXPECT_NOTCAPABLE(splice(cap_in_ro_seek, NULL, cap_out_wo, NULL, 4, SPLICE_F_NONBLOCK));
+ EXPECT_OK(splice(cap_in_ro_seek, NULL, cap_out_wo_seek, NULL, 4, SPLICE_F_NONBLOCK));
+
+ close(cap_in_wo);
+ close(cap_in_ro);
+ close(cap_in_ro_seek);
+ close(cap_out_wo);
+ close(cap_out_ro);
+ close(cap_out_wo_seek);
+ close(pipe1_fds[0]);
+ close(pipe1_fds[1]);
+ close(pipe2_fds[0]);
+ close(pipe2_fds[1]);
+}
+#endif
+
+#ifdef HAVE_VMSPLICE
+// Although it only involves a single file descriptor, test vmsplice(2) here too.
+TEST(CapabilityPair, vmsplice) {
+ int pipe_fds[2];
+ EXPECT_OK(pipe2(pipe_fds, O_NONBLOCK));
+
+ cap_rights_t r_ro;
+ cap_rights_init(&r_ro, CAP_READ);
+ cap_rights_t r_rw;
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+
+ int cap_ro = dup(pipe_fds[1]);
+ EXPECT_OK(cap_ro);
+ EXPECT_OK(cap_rights_limit(cap_ro, &r_ro));
+ int cap_rw = dup(pipe_fds[1]);
+ EXPECT_OK(cap_rw);
+ EXPECT_OK(cap_rights_limit(cap_rw, &r_rw));
+
+ unsigned char buffer[4] = {1, 2, 3, 4};
+ struct iovec iov;
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = buffer;
+ iov.iov_len = sizeof(buffer);
+
+ EXPECT_NOTCAPABLE(vmsplice(cap_ro, &iov, 1, SPLICE_F_NONBLOCK));
+ EXPECT_OK(vmsplice(cap_rw, &iov, 1, SPLICE_F_NONBLOCK));
+
+ close(cap_ro);
+ close(cap_rw);
+ close(pipe_fds[0]);
+ close(pipe_fds[1]);
+}
+#endif
diff --git a/capability-fd.cc b/capability-fd.cc
new file mode 100644
index 000000000000..043ca236e3e1
--- /dev/null
+++ b/capability-fd.cc
@@ -0,0 +1,1271 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdint.h>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+/* Utilities for printing rights information */
+/* Written in C style to allow for: */
+/* TODO(drysdale): migrate these to somewhere in libcaprights/ */
+#define RIGHTS_INFO(RR) { (RR), #RR}
+typedef struct {
+ uint64_t right;
+ const char* name;
+} right_info;
+right_info known_rights[] = {
+ /* Rights that are common to all versions of Capsicum */
+ RIGHTS_INFO(CAP_READ),
+ RIGHTS_INFO(CAP_WRITE),
+ RIGHTS_INFO(CAP_SEEK_TELL),
+ RIGHTS_INFO(CAP_SEEK),
+ RIGHTS_INFO(CAP_PREAD),
+ RIGHTS_INFO(CAP_PWRITE),
+ RIGHTS_INFO(CAP_MMAP),
+ RIGHTS_INFO(CAP_MMAP_R),
+ RIGHTS_INFO(CAP_MMAP_W),
+ RIGHTS_INFO(CAP_MMAP_X),
+ RIGHTS_INFO(CAP_MMAP_RW),
+ RIGHTS_INFO(CAP_MMAP_RX),
+ RIGHTS_INFO(CAP_MMAP_WX),
+ RIGHTS_INFO(CAP_MMAP_RWX),
+ RIGHTS_INFO(CAP_CREATE),
+ RIGHTS_INFO(CAP_FEXECVE),
+ RIGHTS_INFO(CAP_FSYNC),
+ RIGHTS_INFO(CAP_FTRUNCATE),
+ RIGHTS_INFO(CAP_LOOKUP),
+ RIGHTS_INFO(CAP_FCHDIR),
+ RIGHTS_INFO(CAP_FCHFLAGS),
+ RIGHTS_INFO(CAP_CHFLAGSAT),
+ RIGHTS_INFO(CAP_FCHMOD),
+ RIGHTS_INFO(CAP_FCHMODAT),
+ RIGHTS_INFO(CAP_FCHOWN),
+ RIGHTS_INFO(CAP_FCHOWNAT),
+ RIGHTS_INFO(CAP_FCNTL),
+ RIGHTS_INFO(CAP_FLOCK),
+ RIGHTS_INFO(CAP_FPATHCONF),
+ RIGHTS_INFO(CAP_FSCK),
+ RIGHTS_INFO(CAP_FSTAT),
+ RIGHTS_INFO(CAP_FSTATAT),
+ RIGHTS_INFO(CAP_FSTATFS),
+ RIGHTS_INFO(CAP_FUTIMES),
+ RIGHTS_INFO(CAP_FUTIMESAT),
+ RIGHTS_INFO(CAP_MKDIRAT),
+ RIGHTS_INFO(CAP_MKFIFOAT),
+ RIGHTS_INFO(CAP_MKNODAT),
+ RIGHTS_INFO(CAP_RENAMEAT_SOURCE),
+ RIGHTS_INFO(CAP_SYMLINKAT),
+ RIGHTS_INFO(CAP_UNLINKAT),
+ RIGHTS_INFO(CAP_ACCEPT),
+ RIGHTS_INFO(CAP_BIND),
+ RIGHTS_INFO(CAP_CONNECT),
+ RIGHTS_INFO(CAP_GETPEERNAME),
+ RIGHTS_INFO(CAP_GETSOCKNAME),
+ RIGHTS_INFO(CAP_GETSOCKOPT),
+ RIGHTS_INFO(CAP_LISTEN),
+ RIGHTS_INFO(CAP_PEELOFF),
+ RIGHTS_INFO(CAP_RECV),
+ RIGHTS_INFO(CAP_SEND),
+ RIGHTS_INFO(CAP_SETSOCKOPT),
+ RIGHTS_INFO(CAP_SHUTDOWN),
+ RIGHTS_INFO(CAP_BINDAT),
+ RIGHTS_INFO(CAP_CONNECTAT),
+ RIGHTS_INFO(CAP_LINKAT_SOURCE),
+ RIGHTS_INFO(CAP_RENAMEAT_TARGET),
+ RIGHTS_INFO(CAP_SOCK_CLIENT),
+ RIGHTS_INFO(CAP_SOCK_SERVER),
+ RIGHTS_INFO(CAP_MAC_GET),
+ RIGHTS_INFO(CAP_MAC_SET),
+ RIGHTS_INFO(CAP_SEM_GETVALUE),
+ RIGHTS_INFO(CAP_SEM_POST),
+ RIGHTS_INFO(CAP_SEM_WAIT),
+ RIGHTS_INFO(CAP_EVENT),
+ RIGHTS_INFO(CAP_KQUEUE_EVENT),
+ RIGHTS_INFO(CAP_IOCTL),
+ RIGHTS_INFO(CAP_TTYHOOK),
+ RIGHTS_INFO(CAP_PDWAIT),
+ RIGHTS_INFO(CAP_PDGETPID),
+ RIGHTS_INFO(CAP_PDKILL),
+ RIGHTS_INFO(CAP_EXTATTR_DELETE),
+ RIGHTS_INFO(CAP_EXTATTR_GET),
+ RIGHTS_INFO(CAP_EXTATTR_LIST),
+ RIGHTS_INFO(CAP_EXTATTR_SET),
+ RIGHTS_INFO(CAP_ACL_CHECK),
+ RIGHTS_INFO(CAP_ACL_DELETE),
+ RIGHTS_INFO(CAP_ACL_GET),
+ RIGHTS_INFO(CAP_ACL_SET),
+ RIGHTS_INFO(CAP_KQUEUE_CHANGE),
+ RIGHTS_INFO(CAP_KQUEUE),
+ /* Rights that are only present in some version or some OS, and so are #ifdef'ed */
+ /* LINKAT got split */
+#ifdef CAP_LINKAT
+ RIGHTS_INFO(CAP_LINKAT),
+#endif
+#ifdef CAP_LINKAT_SOURCE
+ RIGHTS_INFO(CAP_LINKAT_SOURCE),
+#endif
+#ifdef CAP_LINKAT_TARGET
+ RIGHTS_INFO(CAP_LINKAT_TARGET),
+#endif
+ /* Linux aliased some FD operations for pdgetpid/pdkill */
+#ifdef CAP_PDGETPID_FREEBSD
+ RIGHTS_INFO(CAP_PDGETPID_FREEBSD),
+#endif
+#ifdef CAP_PDKILL_FREEBSD
+ RIGHTS_INFO(CAP_PDKILL_FREEBSD),
+#endif
+ /* Linux-specific rights */
+#ifdef CAP_FSIGNAL
+ RIGHTS_INFO(CAP_FSIGNAL),
+#endif
+#ifdef CAP_EPOLL_CTL
+ RIGHTS_INFO(CAP_EPOLL_CTL),
+#endif
+#ifdef CAP_NOTIFY
+ RIGHTS_INFO(CAP_NOTIFY),
+#endif
+#ifdef CAP_SETNS
+ RIGHTS_INFO(CAP_SETNS),
+#endif
+#ifdef CAP_PERFMON
+ RIGHTS_INFO(CAP_PERFMON),
+#endif
+#ifdef CAP_BPF
+ RIGHTS_INFO(CAP_BPF),
+#endif
+ /* Rights in later versions of FreeBSD (>10.0) */
+};
+
+void ShowCapRights(FILE *out, int fd) {
+ size_t ii;
+ bool first = true;
+ cap_rights_t rights;
+ CAP_SET_NONE(&rights);
+ if (cap_rights_get(fd, &rights) < 0) {
+ fprintf(out, "Failed to get rights for fd %d: errno %d\n", fd, errno);
+ return;
+ }
+
+ /* First print out all known rights */
+ size_t num_known = (sizeof(known_rights)/sizeof(known_rights[0]));
+ for (ii = 0; ii < num_known; ii++) {
+ if (cap_rights_is_set(&rights, known_rights[ii].right)) {
+ if (!first) fprintf(out, ",");
+ first = false;
+ fprintf(out, "%s", known_rights[ii].name);
+ }
+ }
+ /* Now repeat the loop, clearing rights we know of; this needs to be
+ * a separate loop because some named rights overlap.
+ */
+ for (ii = 0; ii < num_known; ii++) {
+ cap_rights_clear(&rights, known_rights[ii].right);
+ }
+ /* The following relies on the internal structure of cap_rights_t to
+ * try to show rights we don't know about. */
+ for (ii = 0; ii < (size_t)CAPARSIZE(&rights); ii++) {
+ uint64_t bits = (rights.cr_rights[0] & 0x01ffffffffffffffULL);
+ if (bits != 0) {
+ uint64_t which = 1;
+ for (which = 1; which < 0x0200000000000000 ; which <<= 1) {
+ if (bits & which) {
+ if (!first) fprintf(out, ",");
+ fprintf(out, "CAP_RIGHT(%d, 0x%016llxULL)", (int)ii, (long long unsigned)which);
+ }
+ }
+ }
+ }
+ fprintf(out, "\n");
+}
+
+void ShowAllCapRights(FILE *out) {
+ int fd;
+ struct rlimit limits;
+ if (getrlimit(RLIMIT_NOFILE, &limits) != 0) {
+ fprintf(out, "Failed to getrlimit for max FDs: errno %d\n", errno);
+ return;
+ }
+ for (fd = 0; fd < (int)limits.rlim_cur; fd++) {
+ if (fcntl(fd, F_GETFD, 0) != 0) {
+ continue;
+ }
+ fprintf(out, "fd %d: ", fd);
+ ShowCapRights(out, fd);
+ }
+}
+
+FORK_TEST(Capability, CapNew) {
+ cap_rights_t r_rws;
+ cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_all;
+ CAP_SET_ALL(&r_all);
+
+ int cap_fd = dup(STDOUT_FILENO);
+ cap_rights_t rights;
+ CAP_SET_NONE(&rights);
+ EXPECT_OK(cap_rights_get(cap_fd, &rights));
+ EXPECT_RIGHTS_EQ(&r_all, &rights);
+
+ EXPECT_OK(cap_fd);
+ EXPECT_OK(cap_rights_limit(cap_fd, &r_rws));
+ if (cap_fd < 0) return;
+ int rc = write(cap_fd, "OK!\n", 4);
+ EXPECT_OK(rc);
+ EXPECT_EQ(4, rc);
+ EXPECT_OK(cap_rights_get(cap_fd, &rights));
+ EXPECT_RIGHTS_EQ(&r_rws, &rights);
+
+ // dup/dup2 should preserve rights.
+ int cap_dup = dup(cap_fd);
+ EXPECT_OK(cap_dup);
+ EXPECT_OK(cap_rights_get(cap_dup, &rights));
+ EXPECT_RIGHTS_EQ(&r_rws, &rights);
+ close(cap_dup);
+ EXPECT_OK(dup2(cap_fd, cap_dup));
+ EXPECT_OK(cap_rights_get(cap_dup, &rights));
+ EXPECT_RIGHTS_EQ(&r_rws, &rights);
+ close(cap_dup);
+#ifdef HAVE_DUP3
+ EXPECT_OK(dup3(cap_fd, cap_dup, 0));
+ EXPECT_OK(cap_rights_get(cap_dup, &rights));
+ EXPECT_RIGHTS_EQ(&r_rws, &rights);
+ close(cap_dup);
+#endif
+
+ // Try to get a disjoint set of rights in a sub-capability.
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+ cap_rights_t r_rsmapchmod;
+ cap_rights_init(&r_rsmapchmod, CAP_READ, CAP_SEEK, CAP_MMAP, CAP_FCHMOD);
+ int cap_cap_fd = dup(cap_fd);
+ EXPECT_OK(cap_cap_fd);
+ EXPECT_NOTCAPABLE(cap_rights_limit(cap_cap_fd, &r_rsmapchmod));
+
+ // Dump rights info to stderr (mostly to ensure that Show[All]CapRights()
+ // is working.
+ ShowAllCapRights(stderr);
+
+ EXPECT_OK(close(cap_fd));
+}
+
+FORK_TEST(Capability, CapEnter) {
+ EXPECT_EQ(0, cap_enter());
+}
+
+FORK_TEST(Capability, BasicInterception) {
+ cap_rights_t r_0;
+ cap_rights_init(&r_0, 0);
+ int cap_fd = dup(1);
+ EXPECT_OK(cap_fd);
+ EXPECT_OK(cap_rights_limit(cap_fd, &r_0));
+
+ EXPECT_NOTCAPABLE(write(cap_fd, "", 0));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode
+
+ EXPECT_NOTCAPABLE(write(cap_fd, "", 0));
+
+ // Create a new capability which does have write permission
+ cap_rights_t r_ws;
+ cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+ int cap_fd2 = dup(1);
+ EXPECT_OK(cap_fd2);
+ EXPECT_OK(cap_rights_limit(cap_fd2, &r_ws));
+ EXPECT_OK(write(cap_fd2, "", 0));
+
+ // Tidy up.
+ if (cap_fd >= 0) close(cap_fd);
+ if (cap_fd2 >= 0) close(cap_fd2);
+}
+
+FORK_TEST_ON(Capability, OpenAtDirectoryTraversal, TmpFile("cap_openat_testfile")) {
+ int dir = open(tmpdir.c_str(), O_RDONLY);
+ EXPECT_OK(dir);
+
+ cap_enter();
+
+ int file = openat(dir, "cap_openat_testfile", O_RDONLY|O_CREAT, 0644);
+ EXPECT_OK(file);
+
+ // Test that we are confined to /tmp, and cannot
+ // escape using absolute paths or ../.
+ int new_file = openat(dir, "../dev/null", O_RDONLY);
+ EXPECT_EQ(-1, new_file);
+
+ new_file = openat(dir, "..", O_RDONLY);
+ EXPECT_EQ(-1, new_file);
+
+ new_file = openat(dir, "/dev/null", O_RDONLY);
+ EXPECT_EQ(-1, new_file);
+
+ new_file = openat(dir, "/", O_RDONLY);
+ EXPECT_EQ(-1, new_file);
+
+ // Tidy up.
+ close(file);
+ close(dir);
+}
+
+FORK_TEST_ON(Capability, FileInSync, TmpFile("cap_file_sync")) {
+ int fd = open(TmpFile("cap_file_sync"), O_RDWR|O_CREAT, 0644);
+ EXPECT_OK(fd);
+ const char* message = "Hello capability world";
+ EXPECT_OK(write(fd, message, strlen(message)));
+
+ cap_rights_t r_rsstat;
+ cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT);
+
+ int cap_fd = dup(fd);
+ EXPECT_OK(cap_fd);
+ EXPECT_OK(cap_rights_limit(cap_fd, &r_rsstat));
+ int cap_cap_fd = dup(cap_fd);
+ EXPECT_OK(cap_cap_fd);
+ EXPECT_OK(cap_rights_limit(cap_cap_fd, &r_rsstat));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // Changes to one file descriptor affect the others.
+ EXPECT_EQ(1, lseek(fd, 1, SEEK_SET));
+ EXPECT_EQ(1, lseek(fd, 0, SEEK_CUR));
+ EXPECT_EQ(1, lseek(cap_fd, 0, SEEK_CUR));
+ EXPECT_EQ(1, lseek(cap_cap_fd, 0, SEEK_CUR));
+ EXPECT_EQ(3, lseek(cap_fd, 3, SEEK_SET));
+ EXPECT_EQ(3, lseek(fd, 0, SEEK_CUR));
+ EXPECT_EQ(3, lseek(cap_fd, 0, SEEK_CUR));
+ EXPECT_EQ(3, lseek(cap_cap_fd, 0, SEEK_CUR));
+ EXPECT_EQ(5, lseek(cap_cap_fd, 5, SEEK_SET));
+ EXPECT_EQ(5, lseek(fd, 0, SEEK_CUR));
+ EXPECT_EQ(5, lseek(cap_fd, 0, SEEK_CUR));
+ EXPECT_EQ(5, lseek(cap_cap_fd, 0, SEEK_CUR));
+
+ close(cap_cap_fd);
+ close(cap_fd);
+ close(fd);
+}
+
+// Create a capability on /tmp that does not allow CAP_WRITE,
+// and check that this restriction is inherited through openat().
+FORK_TEST_ON(Capability, Inheritance, TmpFile("cap_openat_write_testfile")) {
+ int dir = open(tmpdir.c_str(), O_RDONLY);
+ EXPECT_OK(dir);
+
+ cap_rights_t r_rl;
+ cap_rights_init(&r_rl, CAP_READ, CAP_LOOKUP);
+
+ int cap_dir = dup(dir);
+ EXPECT_OK(cap_dir);
+ EXPECT_OK(cap_rights_limit(cap_dir, &r_rl));
+
+ const char *filename = "cap_openat_write_testfile";
+ int file = openat(dir, filename, O_WRONLY|O_CREAT, 0644);
+ EXPECT_OK(file);
+ EXPECT_EQ(5, write(file, "TEST\n", 5));
+ if (file >= 0) close(file);
+
+ EXPECT_OK(cap_enter());
+ file = openat(cap_dir, filename, O_RDONLY);
+ EXPECT_OK(file);
+
+ cap_rights_t rights;
+ cap_rights_init(&rights, 0);
+ EXPECT_OK(cap_rights_get(file, &rights));
+ EXPECT_RIGHTS_EQ(&r_rl, &rights);
+ if (file >= 0) close(file);
+
+ file = openat(cap_dir, filename, O_WRONLY|O_APPEND);
+ EXPECT_NOTCAPABLE(file);
+ if (file > 0) close(file);
+
+ if (dir > 0) close(dir);
+ if (cap_dir > 0) close(cap_dir);
+}
+
+
+// Ensure that, if the capability had enough rights for the system call to
+// pass, then it did. Otherwise, ensure that the errno is ENOTCAPABLE;
+// capability restrictions should kick in before any other error logic.
+#define CHECK_RIGHT_RESULT(result, rights, ...) do { \
+ cap_rights_t rights_needed; \
+ cap_rights_init(&rights_needed, __VA_ARGS__); \
+ if (cap_rights_contains(&rights, &rights_needed)) { \
+ EXPECT_OK(result) << std::endl \
+ << " need: " << rights_needed \
+ << std::endl \
+ << " got: " << rights; \
+ } else { \
+ EXPECT_EQ(-1, result) << " need: " << rights_needed \
+ << std::endl \
+ << " got: "<< rights; \
+ EXPECT_EQ(ENOTCAPABLE, errno); \
+ } \
+} while (0)
+
+#define EXPECT_MMAP_NOTCAPABLE(result) do { \
+ void *rv = result; \
+ EXPECT_EQ(MAP_FAILED, rv); \
+ EXPECT_EQ(ENOTCAPABLE, errno); \
+ if (rv != MAP_FAILED) munmap(rv, getpagesize()); \
+} while (0)
+
+#define EXPECT_MMAP_OK(result) do { \
+ void *rv = result; \
+ EXPECT_NE(MAP_FAILED, rv) << " with errno " << errno; \
+ if (rv != MAP_FAILED) munmap(rv, getpagesize()); \
+} while (0)
+
+
+// As above, but for the special mmap() case: unmap after successful mmap().
+#define CHECK_RIGHT_MMAP_RESULT(result, rights, ...) do { \
+ cap_rights_t rights_needed; \
+ cap_rights_init(&rights_needed, __VA_ARGS__); \
+ if (cap_rights_contains(&rights, &rights_needed)) { \
+ EXPECT_MMAP_OK(result); \
+ } else { \
+ EXPECT_MMAP_NOTCAPABLE(result); \
+ } \
+} while (0)
+
+FORK_TEST_ON(Capability, Mmap, TmpFile("cap_mmap_operations")) {
+ int fd = open(TmpFile("cap_mmap_operations"), O_RDWR | O_CREAT, 0644);
+ EXPECT_OK(fd);
+ if (fd < 0) return;
+
+ cap_rights_t r_0;
+ cap_rights_init(&r_0, 0);
+ cap_rights_t r_mmap;
+ cap_rights_init(&r_mmap, CAP_MMAP);
+ cap_rights_t r_r;
+ cap_rights_init(&r_r, CAP_PREAD);
+ cap_rights_t r_rmmap;
+ cap_rights_init(&r_rmmap, CAP_PREAD, CAP_MMAP);
+
+ // If we're missing a capability, it will fail.
+ int cap_none = dup(fd);
+ EXPECT_OK(cap_none);
+ EXPECT_OK(cap_rights_limit(cap_none, &r_0));
+ int cap_mmap = dup(fd);
+ EXPECT_OK(cap_mmap);
+ EXPECT_OK(cap_rights_limit(cap_mmap, &r_mmap));
+ int cap_read = dup(fd);
+ EXPECT_OK(cap_read);
+ EXPECT_OK(cap_rights_limit(cap_read, &r_r));
+ int cap_both = dup(fd);
+ EXPECT_OK(cap_both);
+ EXPECT_OK(cap_rights_limit(cap_both, &r_rmmap));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ EXPECT_MMAP_NOTCAPABLE(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE, cap_none, 0));
+ EXPECT_MMAP_NOTCAPABLE(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE, cap_mmap, 0));
+ EXPECT_MMAP_NOTCAPABLE(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE, cap_read, 0));
+
+ EXPECT_MMAP_OK(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE, cap_both, 0));
+
+ // A call with MAP_ANONYMOUS should succeed without any capability requirements.
+ EXPECT_MMAP_OK(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0));
+
+ EXPECT_OK(close(cap_both));
+ EXPECT_OK(close(cap_read));
+ EXPECT_OK(close(cap_mmap));
+ EXPECT_OK(close(cap_none));
+ EXPECT_OK(close(fd));
+}
+
+// Given a file descriptor, create a capability with specific rights and
+// make sure only those rights work.
+#define TRY_FILE_OPS(fd, ...) do { \
+ cap_rights_t rights; \
+ cap_rights_init(&rights, __VA_ARGS__); \
+ TryFileOps((fd), rights); \
+} while (0)
+
+static void TryFileOps(int fd, cap_rights_t rights) {
+ int cap_fd = dup(fd);
+ EXPECT_OK(cap_fd);
+ EXPECT_OK(cap_rights_limit(cap_fd, &rights));
+ if (cap_fd < 0) return;
+ cap_rights_t erights;
+ EXPECT_OK(cap_rights_get(cap_fd, &erights));
+ EXPECT_RIGHTS_EQ(&rights, &erights);
+
+ // Check creation of a capability from a capability.
+ int cap_cap_fd = dup(cap_fd);
+ EXPECT_OK(cap_cap_fd);
+ EXPECT_OK(cap_rights_limit(cap_cap_fd, &rights));
+ EXPECT_NE(cap_fd, cap_cap_fd);
+ EXPECT_OK(cap_rights_get(cap_cap_fd, &erights));
+ EXPECT_RIGHTS_EQ(&rights, &erights);
+ close(cap_cap_fd);
+
+ char ch;
+ CHECK_RIGHT_RESULT(read(cap_fd, &ch, sizeof(ch)), rights, CAP_READ, CAP_SEEK_ASWAS);
+
+ ssize_t len1 = pread(cap_fd, &ch, sizeof(ch), 0);
+ CHECK_RIGHT_RESULT(len1, rights, CAP_PREAD);
+ ssize_t len2 = pread(cap_fd, &ch, sizeof(ch), 0);
+ CHECK_RIGHT_RESULT(len2, rights, CAP_PREAD);
+ EXPECT_EQ(len1, len2);
+
+ CHECK_RIGHT_RESULT(write(cap_fd, &ch, sizeof(ch)), rights, CAP_WRITE, CAP_SEEK_ASWAS);
+ CHECK_RIGHT_RESULT(pwrite(cap_fd, &ch, sizeof(ch), 0), rights, CAP_PWRITE);
+ CHECK_RIGHT_RESULT(lseek(cap_fd, 0, SEEK_SET), rights, CAP_SEEK);
+
+#ifdef HAVE_CHFLAGS
+ // Note: this is not expected to work over NFS.
+ struct statfs sf;
+ EXPECT_OK(fstatfs(fd, &sf));
+ bool is_nfs = (strncmp("nfs", sf.f_fstypename, sizeof(sf.f_fstypename)) == 0);
+ if (!is_nfs) {
+ CHECK_RIGHT_RESULT(fchflags(cap_fd, UF_NODUMP), rights, CAP_FCHFLAGS);
+ }
+#endif
+
+ CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_NONE, MAP_SHARED, cap_fd, 0),
+ rights, CAP_MMAP);
+ CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, cap_fd, 0),
+ rights, CAP_MMAP_R);
+ CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_WRITE, MAP_SHARED, cap_fd, 0),
+ rights, CAP_MMAP_W);
+ CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_EXEC, MAP_SHARED, cap_fd, 0),
+ rights, CAP_MMAP_X);
+ CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, cap_fd, 0),
+ rights, CAP_MMAP_RW);
+ CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_READ | PROT_EXEC, MAP_SHARED, cap_fd, 0),
+ rights, CAP_MMAP_RX);
+ CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_EXEC | PROT_WRITE, MAP_SHARED, cap_fd, 0),
+ rights, CAP_MMAP_WX);
+ CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, cap_fd, 0),
+ rights, CAP_MMAP_RWX);
+
+ CHECK_RIGHT_RESULT(fsync(cap_fd), rights, CAP_FSYNC);
+#ifdef HAVE_SYNC_FILE_RANGE
+ CHECK_RIGHT_RESULT(sync_file_range(cap_fd, 0, 1, 0), rights, CAP_FSYNC, CAP_SEEK);
+#endif
+
+ int rc = fcntl(cap_fd, F_GETFL);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FCNTL);
+ rc = fcntl(cap_fd, F_SETFL, rc);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FCNTL);
+
+ CHECK_RIGHT_RESULT(fchown(cap_fd, -1, -1), rights, CAP_FCHOWN);
+
+ CHECK_RIGHT_RESULT(fchmod(cap_fd, 0644), rights, CAP_FCHMOD);
+
+ CHECK_RIGHT_RESULT(flock(cap_fd, LOCK_SH), rights, CAP_FLOCK);
+ CHECK_RIGHT_RESULT(flock(cap_fd, LOCK_UN), rights, CAP_FLOCK);
+
+ CHECK_RIGHT_RESULT(ftruncate(cap_fd, 0), rights, CAP_FTRUNCATE);
+
+ struct stat sb;
+ CHECK_RIGHT_RESULT(fstat(cap_fd, &sb), rights, CAP_FSTAT);
+
+ struct statfs cap_sf;
+ CHECK_RIGHT_RESULT(fstatfs(cap_fd, &cap_sf), rights, CAP_FSTATFS);
+
+#ifdef HAVE_FPATHCONF
+ CHECK_RIGHT_RESULT(fpathconf(cap_fd, _PC_NAME_MAX), rights, CAP_FPATHCONF);
+#endif
+
+ CHECK_RIGHT_RESULT(futimes(cap_fd, NULL), rights, CAP_FUTIMES);
+
+ struct pollfd pollfd;
+ pollfd.fd = cap_fd;
+ pollfd.events = POLLIN | POLLERR | POLLHUP;
+ pollfd.revents = 0;
+ int ret = poll(&pollfd, 1, 0);
+ if (cap_rights_is_set(&rights, CAP_EVENT)) {
+ EXPECT_OK(ret);
+ } else {
+ EXPECT_NE(0, (pollfd.revents & POLLNVAL));
+ }
+
+ struct timeval tv;
+ tv.tv_sec = 0;
+ tv.tv_usec = 100;
+ fd_set rset;
+ FD_ZERO(&rset);
+ FD_SET(cap_fd, &rset);
+ fd_set wset;
+ FD_ZERO(&wset);
+ FD_SET(cap_fd, &wset);
+ ret = select(cap_fd+1, &rset, &wset, NULL, &tv);
+ if (cap_rights_is_set(&rights, CAP_EVENT)) {
+ EXPECT_OK(ret);
+ } else {
+ EXPECT_NOTCAPABLE(ret);
+ }
+
+ // TODO(FreeBSD): kqueue
+
+ EXPECT_OK(close(cap_fd));
+}
+
+FORK_TEST_ON(Capability, Operations, TmpFile("cap_fd_operations")) {
+ int fd = open(TmpFile("cap_fd_operations"), O_RDWR | O_CREAT, 0644);
+ EXPECT_OK(fd);
+ if (fd < 0) return;
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // Try a variety of different combinations of rights - a full
+ // enumeration is too large (2^N with N~30+) to perform.
+ TRY_FILE_OPS(fd, CAP_READ);
+ TRY_FILE_OPS(fd, CAP_PREAD);
+ TRY_FILE_OPS(fd, CAP_WRITE);
+ TRY_FILE_OPS(fd, CAP_PWRITE);
+ TRY_FILE_OPS(fd, CAP_READ, CAP_WRITE);
+ TRY_FILE_OPS(fd, CAP_PREAD, CAP_PWRITE);
+ TRY_FILE_OPS(fd, CAP_SEEK);
+ TRY_FILE_OPS(fd, CAP_FCHFLAGS);
+ TRY_FILE_OPS(fd, CAP_IOCTL);
+ TRY_FILE_OPS(fd, CAP_FSTAT);
+ TRY_FILE_OPS(fd, CAP_MMAP);
+ TRY_FILE_OPS(fd, CAP_MMAP_R);
+ TRY_FILE_OPS(fd, CAP_MMAP_W);
+ TRY_FILE_OPS(fd, CAP_MMAP_X);
+ TRY_FILE_OPS(fd, CAP_MMAP_RW);
+ TRY_FILE_OPS(fd, CAP_MMAP_RX);
+ TRY_FILE_OPS(fd, CAP_MMAP_WX);
+ TRY_FILE_OPS(fd, CAP_MMAP_RWX);
+ TRY_FILE_OPS(fd, CAP_FCNTL);
+ TRY_FILE_OPS(fd, CAP_EVENT);
+ TRY_FILE_OPS(fd, CAP_FSYNC);
+ TRY_FILE_OPS(fd, CAP_FCHOWN);
+ TRY_FILE_OPS(fd, CAP_FCHMOD);
+ TRY_FILE_OPS(fd, CAP_FTRUNCATE);
+ TRY_FILE_OPS(fd, CAP_FLOCK);
+ TRY_FILE_OPS(fd, CAP_FSTATFS);
+ TRY_FILE_OPS(fd, CAP_FPATHCONF);
+ TRY_FILE_OPS(fd, CAP_FUTIMES);
+ TRY_FILE_OPS(fd, CAP_ACL_GET);
+ TRY_FILE_OPS(fd, CAP_ACL_SET);
+ TRY_FILE_OPS(fd, CAP_ACL_DELETE);
+ TRY_FILE_OPS(fd, CAP_ACL_CHECK);
+ TRY_FILE_OPS(fd, CAP_EXTATTR_GET);
+ TRY_FILE_OPS(fd, CAP_EXTATTR_SET);
+ TRY_FILE_OPS(fd, CAP_EXTATTR_DELETE);
+ TRY_FILE_OPS(fd, CAP_EXTATTR_LIST);
+ TRY_FILE_OPS(fd, CAP_MAC_GET);
+ TRY_FILE_OPS(fd, CAP_MAC_SET);
+
+ // Socket-specific.
+ TRY_FILE_OPS(fd, CAP_GETPEERNAME);
+ TRY_FILE_OPS(fd, CAP_GETSOCKNAME);
+ TRY_FILE_OPS(fd, CAP_ACCEPT);
+
+ close(fd);
+}
+
+#define TRY_DIR_OPS(dfd, ...) do { \
+ cap_rights_t rights; \
+ cap_rights_init(&rights, __VA_ARGS__); \
+ TryDirOps((dfd), rights); \
+} while (0)
+
+static void TryDirOps(int dirfd, cap_rights_t rights) {
+ cap_rights_t erights;
+ int dfd_cap = dup(dirfd);
+ EXPECT_OK(dfd_cap);
+ EXPECT_OK(cap_rights_limit(dfd_cap, &rights));
+ EXPECT_OK(cap_rights_get(dfd_cap, &erights));
+ EXPECT_RIGHTS_EQ(&rights, &erights);
+
+ int rc = openat(dfd_cap, "cap_create", O_CREAT | O_RDONLY, 0600);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_READ, CAP_LOOKUP);
+ if (rc >= 0) {
+ EXPECT_OK(close(rc));
+ EXPECT_OK(unlinkat(dirfd, "cap_create", 0));
+ }
+ rc = openat(dfd_cap, "cap_create", O_CREAT | O_WRONLY | O_APPEND, 0600);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_WRITE, CAP_LOOKUP);
+ if (rc >= 0) {
+ EXPECT_OK(close(rc));
+ EXPECT_OK(unlinkat(dirfd, "cap_create", 0));
+ }
+ rc = openat(dfd_cap, "cap_create", O_CREAT | O_RDWR | O_APPEND, 0600);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_READ, CAP_WRITE, CAP_LOOKUP);
+ if (rc >= 0) {
+ EXPECT_OK(close(rc));
+ EXPECT_OK(unlinkat(dirfd, "cap_create", 0));
+ }
+
+ rc = openat(dirfd, "cap_faccess", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ rc = faccessat(dfd_cap, "cap_faccess", F_OK, 0);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FSTAT, CAP_LOOKUP);
+ EXPECT_OK(unlinkat(dirfd, "cap_faccess", 0));
+
+ rc = openat(dirfd, "cap_fsync", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDONLY);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_READ, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_WRONLY | O_APPEND);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_WRITE, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDWR | O_APPEND);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDONLY);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_READ, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_WRONLY | O_APPEND);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_WRITE, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDWR | O_APPEND);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ EXPECT_OK(unlinkat(dirfd, "cap_fsync", 0));
+
+ rc = openat(dirfd, "cap_ftruncate", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_RDONLY);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FTRUNCATE, CAP_READ, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_WRONLY);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FTRUNCATE, CAP_WRITE, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_RDWR);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FTRUNCATE, CAP_READ, CAP_WRITE, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ EXPECT_OK(unlinkat(dirfd, "cap_ftruncate", 0));
+
+ rc = openat(dfd_cap, "cap_create", O_CREAT | O_WRONLY, 0600);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_WRITE, CAP_SEEK, CAP_LOOKUP);
+ if (rc >= 0) {
+ EXPECT_OK(close(rc));
+ EXPECT_OK(unlinkat(dirfd, "cap_create", 0));
+ }
+ rc = openat(dfd_cap, "cap_create", O_CREAT | O_RDWR, 0600);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_LOOKUP);
+ if (rc >= 0) {
+ EXPECT_OK(close(rc));
+ EXPECT_OK(unlinkat(dirfd, "cap_create", 0));
+ }
+
+ rc = openat(dirfd, "cap_fsync", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_WRONLY);
+ CHECK_RIGHT_RESULT(rc,
+ rights, CAP_FSYNC, CAP_WRITE, CAP_SEEK, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDWR);
+ CHECK_RIGHT_RESULT(rc,
+ rights, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_WRONLY);
+ CHECK_RIGHT_RESULT(rc,
+ rights, CAP_FSYNC, CAP_WRITE, CAP_SEEK, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDWR);
+ CHECK_RIGHT_RESULT(rc,
+ rights, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(close(rc));
+ EXPECT_OK(unlinkat(dirfd, "cap_fsync", 0));
+
+#ifdef HAVE_CHFLAGSAT
+ rc = openat(dirfd, "cap_chflagsat", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ rc = chflagsat(dfd_cap, "cap_chflagsat", UF_NODUMP, 0);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_CHFLAGSAT, CAP_LOOKUP);
+ EXPECT_OK(unlinkat(dirfd, "cap_chflagsat", 0));
+#endif
+
+ rc = openat(dirfd, "cap_fchownat", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ rc = fchownat(dfd_cap, "cap_fchownat", -1, -1, 0);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FCHOWN, CAP_LOOKUP);
+ EXPECT_OK(unlinkat(dirfd, "cap_fchownat", 0));
+
+ rc = openat(dirfd, "cap_fchmodat", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ rc = fchmodat(dfd_cap, "cap_fchmodat", 0600, 0);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FCHMOD, CAP_LOOKUP);
+ EXPECT_OK(unlinkat(dirfd, "cap_fchmodat", 0));
+
+ rc = openat(dirfd, "cap_fstatat", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ struct stat sb;
+ rc = fstatat(dfd_cap, "cap_fstatat", &sb, 0);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FSTAT, CAP_LOOKUP);
+ EXPECT_OK(unlinkat(dirfd, "cap_fstatat", 0));
+
+ rc = openat(dirfd, "cap_futimesat", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ rc = futimesat(dfd_cap, "cap_futimesat", NULL);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_FUTIMES, CAP_LOOKUP);
+ EXPECT_OK(unlinkat(dirfd, "cap_futimesat", 0));
+
+ // For linkat(2), need:
+ // - CAP_LINKAT_SOURCE on source
+ // - CAP_LINKAT_TARGET on destination.
+ rc = openat(dirfd, "cap_linkat_src", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+
+ rc = linkat(dirfd, "cap_linkat_src", dfd_cap, "cap_linkat_dst", 0);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_LINKAT_TARGET);
+ if (rc >= 0) EXPECT_OK(unlinkat(dirfd, "cap_linkat_dst", 0));
+
+ rc = linkat(dfd_cap, "cap_linkat_src", dirfd, "cap_linkat_dst", 0);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_LINKAT_SOURCE);
+ if (rc >= 0) EXPECT_OK(unlinkat(dirfd, "cap_linkat_dst", 0));
+
+ EXPECT_OK(unlinkat(dirfd, "cap_linkat_src", 0));
+
+ rc = mkdirat(dfd_cap, "cap_mkdirat", 0700);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_MKDIRAT, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(unlinkat(dirfd, "cap_mkdirat", AT_REMOVEDIR));
+
+#ifdef HAVE_MKFIFOAT
+ rc = mkfifoat(dfd_cap, "cap_mkfifoat", 0600);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_MKFIFOAT, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(unlinkat(dirfd, "cap_mkfifoat", 0));
+#endif
+
+ if (getuid() == 0) {
+ rc = mknodat(dfd_cap, "cap_mknodat", S_IFCHR | 0600, 0);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_MKNODAT, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(unlinkat(dirfd, "cap_mknodat", 0));
+ }
+
+ // For renameat(2), need:
+ // - CAP_RENAMEAT_SOURCE on source
+ // - CAP_RENAMEAT_TARGET on destination.
+ rc = openat(dirfd, "cap_renameat_src", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+
+ rc = renameat(dirfd, "cap_renameat_src", dfd_cap, "cap_renameat_dst");
+ CHECK_RIGHT_RESULT(rc, rights, CAP_RENAMEAT_TARGET);
+ if (rc >= 0) {
+ EXPECT_OK(unlinkat(dirfd, "cap_renameat_dst", 0));
+ } else {
+ EXPECT_OK(unlinkat(dirfd, "cap_renameat_src", 0));
+ }
+
+ rc = openat(dirfd, "cap_renameat_src", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+
+ rc = renameat(dfd_cap, "cap_renameat_src", dirfd, "cap_renameat_dst");
+ CHECK_RIGHT_RESULT(rc, rights, CAP_RENAMEAT_SOURCE);
+
+ if (rc >= 0) {
+ EXPECT_OK(unlinkat(dirfd, "cap_renameat_dst", 0));
+ } else {
+ EXPECT_OK(unlinkat(dirfd, "cap_renameat_src", 0));
+ }
+
+ rc = symlinkat("test", dfd_cap, "cap_symlinkat");
+ CHECK_RIGHT_RESULT(rc, rights, CAP_SYMLINKAT, CAP_LOOKUP);
+ if (rc >= 0) EXPECT_OK(unlinkat(dirfd, "cap_symlinkat", 0));
+
+ rc = openat(dirfd, "cap_unlinkat", O_CREAT, 0600);
+ EXPECT_OK(rc);
+ EXPECT_OK(close(rc));
+ rc = unlinkat(dfd_cap, "cap_unlinkat", 0);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_UNLINKAT, CAP_LOOKUP);
+ unlinkat(dirfd, "cap_unlinkat", 0);
+ EXPECT_OK(mkdirat(dirfd, "cap_unlinkat", 0700));
+ rc = unlinkat(dfd_cap, "cap_unlinkat", AT_REMOVEDIR);
+ CHECK_RIGHT_RESULT(rc, rights, CAP_UNLINKAT, CAP_LOOKUP);
+ unlinkat(dirfd, "cap_unlinkat", AT_REMOVEDIR);
+
+ EXPECT_OK(close(dfd_cap));
+}
+
+void DirOperationsTest(int extra) {
+ int rc = mkdir(TmpFile("cap_dirops"), 0755);
+ EXPECT_OK(rc);
+ if (rc < 0 && errno != EEXIST) return;
+ int dfd = open(TmpFile("cap_dirops"), O_RDONLY | O_DIRECTORY | extra);
+ EXPECT_OK(dfd);
+ int tmpfd = open(tmpdir.c_str(), O_RDONLY | O_DIRECTORY);
+ EXPECT_OK(tmpfd);
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ TRY_DIR_OPS(dfd, CAP_LINKAT_SOURCE);
+ TRY_DIR_OPS(dfd, CAP_LINKAT_TARGET);
+ TRY_DIR_OPS(dfd, CAP_CREATE, CAP_READ, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_CREATE, CAP_WRITE, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_CREATE, CAP_READ, CAP_WRITE, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FSYNC, CAP_READ, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FSYNC, CAP_WRITE, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FTRUNCATE, CAP_READ, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FTRUNCATE, CAP_WRITE, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FTRUNCATE, CAP_READ, CAP_WRITE, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FCHOWN, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FCHMOD, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FSTAT, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_FUTIMES, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_MKDIRAT, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_MKFIFOAT, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_MKNODAT, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_SYMLINKAT, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_UNLINKAT, CAP_LOOKUP);
+ // Rename needs CAP_RENAMEAT_SOURCE on source directory and
+ // CAP_RENAMEAT_TARGET on destination directory.
+ TRY_DIR_OPS(dfd, CAP_RENAMEAT_SOURCE, CAP_UNLINKAT, CAP_LOOKUP);
+ TRY_DIR_OPS(dfd, CAP_RENAMEAT_TARGET, CAP_UNLINKAT, CAP_LOOKUP);
+
+ EXPECT_OK(unlinkat(tmpfd, "cap_dirops", AT_REMOVEDIR));
+ EXPECT_OK(close(tmpfd));
+ EXPECT_OK(close(dfd));
+}
+
+FORK_TEST(Capability, DirOperations) {
+ DirOperationsTest(0);
+}
+
+#ifdef O_PATH
+FORK_TEST(Capability, PathDirOperations) {
+ // Make the dfd in the test a path-only file descriptor.
+ DirOperationsTest(O_PATH);
+}
+#endif
+
+static void TryReadWrite(int cap_fd) {
+ char buffer[64];
+ EXPECT_OK(read(cap_fd, buffer, sizeof(buffer)));
+ int rc = write(cap_fd, "", 0);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(ENOTCAPABLE, errno);
+}
+
+FORK_TEST_ON(Capability, SocketTransfer, TmpFile("cap_fd_transfer")) {
+ int sock_fds[2];
+ EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
+
+ struct msghdr mh;
+ mh.msg_name = NULL; // No address needed
+ mh.msg_namelen = 0;
+ char buffer1[1024];
+ struct iovec iov[1];
+ iov[0].iov_base = buffer1;
+ iov[0].iov_len = sizeof(buffer1);
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ char buffer2[1024];
+ mh.msg_control = buffer2;
+ mh.msg_controllen = sizeof(buffer2);
+ struct cmsghdr *cmptr;
+
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+
+ pid_t child = fork();
+ if (child == 0) {
+ // Child: enter cap mode
+ EXPECT_OK(cap_enter());
+
+ // Child: wait to receive FD over socket
+ int rc = recvmsg(sock_fds[0], &mh, 0);
+ EXPECT_OK(rc);
+ EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen);
+ cmptr = CMSG_FIRSTHDR(&mh);
+ int cap_fd = *(int*)CMSG_DATA(cmptr);
+ EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len);
+ cmptr = CMSG_NXTHDR(&mh, cmptr);
+ EXPECT_TRUE(cmptr == NULL);
+
+ // Child: confirm we can do the right operations on the capability
+ cap_rights_t rights;
+ EXPECT_OK(cap_rights_get(cap_fd, &rights));
+ EXPECT_RIGHTS_EQ(&r_rs, &rights);
+ TryReadWrite(cap_fd);
+
+ // Child: wait for a normal read
+ int val;
+ read(sock_fds[0], &val, sizeof(val));
+ exit(0);
+ }
+
+ int fd = open(TmpFile("cap_fd_transfer"), O_RDWR | O_CREAT, 0644);
+ EXPECT_OK(fd);
+ if (fd < 0) return;
+ int cap_fd = dup(fd);
+ EXPECT_OK(cap_fd);
+ EXPECT_OK(cap_rights_limit(cap_fd, &r_rs));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // Confirm we can do the right operations on the capability
+ TryReadWrite(cap_fd);
+
+ // Send the file descriptor over the pipe to the sub-process
+ mh.msg_controllen = CMSG_LEN(sizeof(int));
+ cmptr = CMSG_FIRSTHDR(&mh);
+ cmptr->cmsg_level = SOL_SOCKET;
+ cmptr->cmsg_type = SCM_RIGHTS;
+ cmptr->cmsg_len = CMSG_LEN(sizeof(int));
+ *(int *)CMSG_DATA(cmptr) = cap_fd;
+ buffer1[0] = 0;
+ iov[0].iov_len = 1;
+ sleep(3);
+ int rc = sendmsg(sock_fds[1], &mh, 0);
+ EXPECT_OK(rc);
+
+ sleep(1); // Ensure subprocess runs
+ int zero = 0;
+ write(sock_fds[1], &zero, sizeof(zero));
+}
+
+TEST(Capability, SyscallAt) {
+ int rc = mkdir(TmpFile("cap_at_topdir"), 0755);
+ EXPECT_OK(rc);
+ if (rc < 0 && errno != EEXIST) return;
+
+ cap_rights_t r_all;
+ cap_rights_init(&r_all, CAP_READ, CAP_LOOKUP, CAP_MKNODAT, CAP_UNLINKAT, CAP_MKDIRAT, CAP_MKFIFOAT);
+ cap_rights_t r_no_unlink;
+ cap_rights_init(&r_no_unlink, CAP_READ, CAP_LOOKUP, CAP_MKDIRAT, CAP_MKFIFOAT);
+ cap_rights_t r_no_mkdir;
+ cap_rights_init(&r_no_mkdir, CAP_READ, CAP_LOOKUP, CAP_UNLINKAT, CAP_MKFIFOAT);
+ cap_rights_t r_no_mkfifo;
+ cap_rights_init(&r_no_mkfifo, CAP_READ, CAP_LOOKUP, CAP_UNLINKAT, CAP_MKDIRAT);
+ cap_rights_t r_no_mknod;
+ cap_rights_init(&r_no_mknod, CAP_READ, CAP_LOOKUP, CAP_UNLINKAT, CAP_MKDIRAT);
+ cap_rights_t r_create;
+ cap_rights_init(&r_create, CAP_READ, CAP_LOOKUP, CAP_CREATE);
+ cap_rights_t r_bind;
+ cap_rights_init(&r_bind, CAP_READ, CAP_LOOKUP, CAP_BIND);
+
+ int dfd = open(TmpFile("cap_at_topdir"), O_RDONLY);
+ EXPECT_OK(dfd);
+ int cap_dfd_all = dup(dfd);
+ EXPECT_OK(cap_dfd_all);
+ EXPECT_OK(cap_rights_limit(cap_dfd_all, &r_all));
+ int cap_dfd_no_unlink = dup(dfd);
+ EXPECT_OK(cap_dfd_no_unlink);
+ EXPECT_OK(cap_rights_limit(cap_dfd_no_unlink, &r_no_unlink));
+ int cap_dfd_no_mkdir = dup(dfd);
+ EXPECT_OK(cap_dfd_no_mkdir);
+ EXPECT_OK(cap_rights_limit(cap_dfd_no_mkdir, &r_no_mkdir));
+ int cap_dfd_no_mkfifo = dup(dfd);
+ EXPECT_OK(cap_dfd_no_mkfifo);
+ EXPECT_OK(cap_rights_limit(cap_dfd_no_mkfifo, &r_no_mkfifo));
+ int cap_dfd_no_mknod = dup(dfd);
+ EXPECT_OK(cap_dfd_no_mknod);
+ EXPECT_OK(cap_rights_limit(cap_dfd_no_mknod, &r_no_mknod));
+ int cap_dfd_create = dup(dfd);
+ EXPECT_OK(cap_dfd_create);
+ EXPECT_OK(cap_rights_limit(cap_dfd_create, &r_create));
+ int cap_dfd_bind = dup(dfd);
+ EXPECT_OK(cap_dfd_bind);
+ EXPECT_OK(cap_rights_limit(cap_dfd_bind, &r_bind));
+
+ // Need CAP_MKDIRAT to mkdirat(2).
+ EXPECT_NOTCAPABLE(mkdirat(cap_dfd_no_mkdir, "cap_subdir", 0755));
+ rmdir(TmpFile("cap_at_topdir/cap_subdir"));
+ EXPECT_OK(mkdirat(cap_dfd_all, "cap_subdir", 0755));
+
+ // Need CAP_UNLINKAT to unlinkat(dfd, name, AT_REMOVEDIR).
+ EXPECT_NOTCAPABLE(unlinkat(cap_dfd_no_unlink, "cap_subdir", AT_REMOVEDIR));
+ EXPECT_OK(unlinkat(cap_dfd_all, "cap_subdir", AT_REMOVEDIR));
+ rmdir(TmpFile("cap_at_topdir/cap_subdir"));
+
+ // Need CAP_MKFIFOAT to mkfifoat(2).
+ EXPECT_NOTCAPABLE(mkfifoat(cap_dfd_no_mkfifo, "cap_fifo", 0755));
+ unlink(TmpFile("cap_at_topdir/cap_fifo"));
+ EXPECT_OK(mkfifoat(cap_dfd_all, "cap_fifo", 0755));
+ unlink(TmpFile("cap_at_topdir/cap_fifo"));
+
+#ifdef HAVE_MKNOD_REG
+ // Need CAP_CREATE to create a regular file with mknodat(2).
+ EXPECT_NOTCAPABLE(mknodat(cap_dfd_all, "cap_regular", S_IFREG|0755, 0));
+ unlink(TmpFile("cap_at_topdir/cap_regular"));
+ EXPECT_OK(mknodat(cap_dfd_create, "cap_regular", S_IFREG|0755, 0));
+ unlink(TmpFile("cap_at_topdir/cap_regular"));
+#endif
+
+#ifdef HAVE_MKNOD_SOCKET
+ // Need CAP_BIND to create a UNIX domain socket with mknodat(2).
+ EXPECT_NOTCAPABLE(mknodat(cap_dfd_all, "cap_socket", S_IFSOCK|0755, 0));
+ unlink(TmpFile("cap_at_topdir/cap_socket"));
+ EXPECT_OK(mknodat(cap_dfd_bind, "cap_socket", S_IFSOCK|0755, 0));
+ unlink(TmpFile("cap_at_topdir/cap_socket"));
+#endif
+
+ if (getuid() == 0) {
+ // Need CAP_MKNODAT to mknodat(2) a device
+ EXPECT_NOTCAPABLE(mknodat(cap_dfd_no_mknod, "cap_device", S_IFCHR|0755, makedev(99, 123)));
+ unlink(TmpFile("cap_at_topdir/cap_device"));
+ EXPECT_OK(mknodat(cap_dfd_all, "cap_device", S_IFCHR|0755, makedev(99, 123)));
+ unlink(TmpFile("cap_at_topdir/cap_device"));
+
+ // Need CAP_MKFIFOAT to mknodat(2) for a FIFO.
+ EXPECT_NOTCAPABLE(mknodat(cap_dfd_no_mkfifo, "cap_fifo", S_IFIFO|0755, 0));
+ unlink(TmpFile("cap_at_topdir/cap_fifo"));
+ EXPECT_OK(mknodat(cap_dfd_all, "cap_fifo", S_IFIFO|0755, 0));
+ unlink(TmpFile("cap_at_topdir/cap_fifo"));
+ } else {
+ TEST_SKIPPED("requires root (partial)");
+ }
+
+ close(cap_dfd_all);
+ close(cap_dfd_no_mknod);
+ close(cap_dfd_no_mkfifo);
+ close(cap_dfd_no_mkdir);
+ close(cap_dfd_no_unlink);
+ close(cap_dfd_create);
+ close(cap_dfd_bind);
+ close(dfd);
+
+ // Tidy up.
+ rmdir(TmpFile("cap_at_topdir"));
+}
+
+FORK_TEST_ON(Capability, ExtendedAttributes, TmpFile("cap_extattr")) {
+ int fd = open(TmpFile("cap_extattr"), O_RDONLY|O_CREAT, 0644);
+ EXPECT_OK(fd);
+
+ char buffer[1024];
+ int rc = fgetxattr_(fd, "user.capsicumtest", buffer, sizeof(buffer));
+ if (rc < 0 && errno == ENOTSUP) {
+ // Need user_xattr mount option for non-root users on Linux
+ TEST_SKIPPED("/tmp doesn't support extended attributes");
+ close(fd);
+ return;
+ }
+
+ cap_rights_t r_rws;
+ cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_xlist;
+ cap_rights_init(&r_xlist, CAP_EXTATTR_LIST);
+ cap_rights_t r_xget;
+ cap_rights_init(&r_xget, CAP_EXTATTR_GET);
+ cap_rights_t r_xset;
+ cap_rights_init(&r_xset, CAP_EXTATTR_SET);
+ cap_rights_t r_xdel;
+ cap_rights_init(&r_xdel, CAP_EXTATTR_DELETE);
+
+ int cap = dup(fd);
+ EXPECT_OK(cap);
+ EXPECT_OK(cap_rights_limit(cap, &r_rws));
+ int cap_xlist = dup(fd);
+ EXPECT_OK(cap_xlist);
+ EXPECT_OK(cap_rights_limit(cap_xlist, &r_xlist));
+ int cap_xget = dup(fd);
+ EXPECT_OK(cap_xget);
+ EXPECT_OK(cap_rights_limit(cap_xget, &r_xget));
+ int cap_xset = dup(fd);
+ EXPECT_OK(cap_xset);
+ EXPECT_OK(cap_rights_limit(cap_xset, &r_xset));
+ int cap_xdel = dup(fd);
+ EXPECT_OK(cap_xdel);
+ EXPECT_OK(cap_rights_limit(cap_xdel, &r_xdel));
+
+ const char* value = "capsicum";
+ int len = strlen(value) + 1;
+ EXPECT_NOTCAPABLE(fsetxattr_(cap, "user.capsicumtest", value, len, 0));
+ EXPECT_NOTCAPABLE(fsetxattr_(cap_xlist, "user.capsicumtest", value, len, 0));
+ EXPECT_NOTCAPABLE(fsetxattr_(cap_xget, "user.capsicumtest", value, len, 0));
+ EXPECT_NOTCAPABLE(fsetxattr_(cap_xdel, "user.capsicumtest", value, len, 0));
+ EXPECT_OK(fsetxattr_(cap_xset, "user.capsicumtest", value, len, 0));
+
+ EXPECT_NOTCAPABLE(flistxattr_(cap, buffer, sizeof(buffer)));
+ EXPECT_NOTCAPABLE(flistxattr_(cap_xget, buffer, sizeof(buffer)));
+ EXPECT_NOTCAPABLE(flistxattr_(cap_xset, buffer, sizeof(buffer)));
+ EXPECT_NOTCAPABLE(flistxattr_(cap_xdel, buffer, sizeof(buffer)));
+ EXPECT_OK(flistxattr_(cap_xlist, buffer, sizeof(buffer)));
+
+ EXPECT_NOTCAPABLE(fgetxattr_(cap, "user.capsicumtest", buffer, sizeof(buffer)));
+ EXPECT_NOTCAPABLE(fgetxattr_(cap_xlist, "user.capsicumtest", buffer, sizeof(buffer)));
+ EXPECT_NOTCAPABLE(fgetxattr_(cap_xset, "user.capsicumtest", buffer, sizeof(buffer)));
+ EXPECT_NOTCAPABLE(fgetxattr_(cap_xdel, "user.capsicumtest", buffer, sizeof(buffer)));
+ EXPECT_OK(fgetxattr_(cap_xget, "user.capsicumtest", buffer, sizeof(buffer)));
+
+ EXPECT_NOTCAPABLE(fremovexattr_(cap, "user.capsicumtest"));
+ EXPECT_NOTCAPABLE(fremovexattr_(cap_xlist, "user.capsicumtest"));
+ EXPECT_NOTCAPABLE(fremovexattr_(cap_xget, "user.capsicumtest"));
+ EXPECT_NOTCAPABLE(fremovexattr_(cap_xset, "user.capsicumtest"));
+ EXPECT_OK(fremovexattr_(cap_xdel, "user.capsicumtest"));
+
+ close(cap_xdel);
+ close(cap_xset);
+ close(cap_xget);
+ close(cap_xlist);
+ close(cap);
+ close(fd);
+}
+
+TEST(Capability, PipeUnseekable) {
+ int fds[2];
+ EXPECT_OK(pipe(fds));
+
+ // Some programs detect pipes by calling seek() and getting ESPIPE.
+ EXPECT_EQ(-1, lseek(fds[0], 0, SEEK_SET));
+ EXPECT_EQ(ESPIPE, errno);
+
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK);
+ EXPECT_OK(cap_rights_limit(fds[0], &rights));
+
+ EXPECT_EQ(-1, lseek(fds[0], 0, SEEK_SET));
+ EXPECT_EQ(ESPIPE, errno);
+
+ // Remove CAP_SEEK and see if ENOTCAPABLE trumps ESPIPE.
+ cap_rights_init(&rights, CAP_READ, CAP_WRITE);
+ EXPECT_OK(cap_rights_limit(fds[0], &rights));
+ EXPECT_EQ(-1, lseek(fds[0], 0, SEEK_SET));
+ EXPECT_EQ(ENOTCAPABLE, errno);
+ // TODO(drysdale): in practical terms it might be nice if ESPIPE trumped ENOTCAPABLE.
+ // EXPECT_EQ(ESPIPE, errno);
+
+ close(fds[0]);
+ close(fds[1]);
+}
+
+TEST(Capability, NoBypassDAC) {
+ REQUIRE_ROOT();
+ int fd = open(TmpFile("cap_root_owned"), O_RDONLY|O_CREAT, 0644);
+ EXPECT_OK(fd);
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+
+ pid_t child = fork();
+ if (child == 0) {
+ // Child: change uid to a lesser being
+ setuid(other_uid);
+ // Attempt to fchmod the file, and fail.
+ // Having CAP_FCHMOD doesn't bypass the need to comply with DAC policy.
+ int rc = fchmod(fd, 0666);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EPERM, errno);
+ exit(HasFailure());
+ }
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_TRUE(WIFEXITED(status)) << "0x" << std::hex << status;
+ EXPECT_EQ(0, WEXITSTATUS(status));
+ struct stat info;
+ EXPECT_OK(fstat(fd, &info));
+ EXPECT_EQ((mode_t)(S_IFREG|0644), info.st_mode);
+ close(fd);
+ unlink(TmpFile("cap_root_owned"));
+}
diff --git a/capmode.cc b/capmode.cc
new file mode 100644
index 000000000000..eb4e5c9ff929
--- /dev/null
+++ b/capmode.cc
@@ -0,0 +1,651 @@
+// Test routines to make sure a variety of system calls are or are not
+// available in capability mode. The goal is not to see if they work, just
+// whether or not they return the expected ECAPMODE.
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/ptrace.h>
+#include <dirent.h>
+#include <netinet/in.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <time.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+// Test fixture that opens (and closes) a bunch of files.
+class WithFiles : public ::testing::Test {
+ public:
+ WithFiles() :
+ fd_file_(open(TmpFile("cap_capmode"), O_RDWR|O_CREAT, 0644)),
+ fd_close_(open("/dev/null", O_RDWR)),
+ fd_dir_(open(tmpdir.c_str(), O_RDONLY)),
+ fd_socket_(socket(PF_INET, SOCK_DGRAM, 0)),
+ fd_tcp_socket_(socket(PF_INET, SOCK_STREAM, 0)) {
+ EXPECT_OK(fd_file_);
+ EXPECT_OK(fd_close_);
+ EXPECT_OK(fd_dir_);
+ EXPECT_OK(fd_socket_);
+ EXPECT_OK(fd_tcp_socket_);
+ }
+ ~WithFiles() {
+ if (fd_tcp_socket_ >= 0) close(fd_tcp_socket_);
+ if (fd_socket_ >= 0) close(fd_socket_);
+ if (fd_dir_ >= 0) close(fd_dir_);
+ if (fd_close_ >= 0) close(fd_close_);
+ if (fd_file_ >= 0) close(fd_file_);
+ unlink(TmpFile("cap_capmode"));
+ }
+ protected:
+ int fd_file_;
+ int fd_close_;
+ int fd_dir_;
+ int fd_socket_;
+ int fd_tcp_socket_;
+};
+
+FORK_TEST_F(WithFiles, DisallowedFileSyscalls) {
+ unsigned int mode = -1;
+ EXPECT_OK(cap_getmode(&mode));
+ EXPECT_EQ(0, (int)mode);
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+ EXPECT_OK(cap_getmode(&mode));
+ EXPECT_EQ(1, (int)mode);
+
+ // System calls that are not permitted in capability mode.
+ EXPECT_CAPMODE(access(TmpFile("cap_capmode_access"), F_OK));
+ EXPECT_CAPMODE(acct(TmpFile("cap_capmode_acct")));
+ EXPECT_CAPMODE(chdir(TmpFile("cap_capmode_chdir")));
+#ifdef HAVE_CHFLAGS
+ EXPECT_CAPMODE(chflags(TmpFile("cap_capmode_chflags"), UF_NODUMP));
+#endif
+ EXPECT_CAPMODE(chmod(TmpFile("cap_capmode_chmod"), 0644));
+ EXPECT_CAPMODE(chown(TmpFile("cap_capmode_chown"), -1, -1));
+ EXPECT_CAPMODE(chroot(TmpFile("cap_capmode_chroot")));
+ EXPECT_CAPMODE(creat(TmpFile("cap_capmode_creat"), 0644));
+ EXPECT_CAPMODE(fchdir(fd_dir_));
+#ifdef HAVE_GETFSSTAT
+ struct statfs statfs;
+ EXPECT_CAPMODE(getfsstat(&statfs, sizeof(statfs), MNT_NOWAIT));
+#endif
+ EXPECT_CAPMODE(link(TmpFile("foo"), TmpFile("bar")));
+ struct stat sb;
+ EXPECT_CAPMODE(lstat(TmpFile("cap_capmode_lstat"), &sb));
+ EXPECT_CAPMODE(mknod(TmpFile("capmode_mknod"), 0644 | S_IFIFO, 0));
+ EXPECT_CAPMODE(bogus_mount_());
+ EXPECT_CAPMODE(open("/dev/null", O_RDWR));
+ char buf[64];
+ EXPECT_CAPMODE(readlink(TmpFile("cap_capmode_readlink"), buf, sizeof(buf)));
+#ifdef HAVE_REVOKE
+ EXPECT_CAPMODE(revoke(TmpFile("cap_capmode_revoke")));
+#endif
+ EXPECT_CAPMODE(stat(TmpFile("cap_capmode_stat"), &sb));
+ EXPECT_CAPMODE(symlink(TmpFile("cap_capmode_symlink_from"), TmpFile("cap_capmode_symlink_to")));
+ EXPECT_CAPMODE(unlink(TmpFile("cap_capmode_unlink")));
+ EXPECT_CAPMODE(umount2("/not_mounted", 0));
+}
+
+FORK_TEST_F(WithFiles, DisallowedSocketSyscalls) {
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // System calls that are not permitted in capability mode.
+ struct sockaddr_in addr;
+ addr.sin_family = AF_INET;
+ addr.sin_port = 0;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ EXPECT_CAPMODE(bind_(fd_socket_, (sockaddr*)&addr, sizeof(addr)));
+ addr.sin_family = AF_INET;
+ addr.sin_port = 53;
+ addr.sin_addr.s_addr = htonl(0x08080808);
+ EXPECT_CAPMODE(connect_(fd_tcp_socket_, (sockaddr*)&addr, sizeof(addr)));
+}
+
+FORK_TEST_F(WithFiles, AllowedFileSyscalls) {
+ int rc;
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ EXPECT_OK(close(fd_close_));
+ fd_close_ = -1;
+ int fd_dup = dup(fd_file_);
+ EXPECT_OK(fd_dup);
+ EXPECT_OK(dup2(fd_file_, fd_dup));
+#ifdef HAVE_DUP3
+ EXPECT_OK(dup3(fd_file_, fd_dup, 0));
+#endif
+ if (fd_dup >= 0) close(fd_dup);
+
+ struct stat sb;
+ EXPECT_OK(fstat(fd_file_, &sb));
+ EXPECT_OK(lseek(fd_file_, 0, SEEK_SET));
+ char ch;
+ EXPECT_OK(read(fd_file_, &ch, sizeof(ch)));
+ EXPECT_OK(write(fd_file_, &ch, sizeof(ch)));
+
+#ifdef HAVE_CHFLAGS
+ rc = fchflags(fd_file_, UF_NODUMP);
+ if (rc < 0) EXPECT_NE(ECAPMODE, errno);
+#endif
+
+ char buf[1024];
+ rc = getdents_(fd_dir_, (void*)buf, sizeof(buf));
+ EXPECT_OK(rc);
+
+ char data[] = "123";
+ EXPECT_OK(pwrite(fd_file_, data, 1, 0));
+ EXPECT_OK(pread(fd_file_, data, 1, 0));
+
+ struct iovec io;
+ io.iov_base = data;
+ io.iov_len = 2;
+#if !defined(__i386__) && !defined(__linux__)
+ // TODO(drysdale): reinstate these tests for 32-bit runs when possible
+ // libc bug is fixed.
+ EXPECT_OK(pwritev(fd_file_, &io, 1, 0));
+ EXPECT_OK(preadv(fd_file_, &io, 1, 0));
+#endif
+ EXPECT_OK(writev(fd_file_, &io, 1));
+ EXPECT_OK(readv(fd_file_, &io, 1));
+
+#ifdef HAVE_SYNCFS
+ EXPECT_OK(syncfs(fd_file_));
+#endif
+#ifdef HAVE_SYNC_FILE_RANGE
+ EXPECT_OK(sync_file_range(fd_file_, 0, 1, 0));
+#endif
+#ifdef HAVE_READAHEAD
+ if (!tmpdir_on_tmpfs) { // tmpfs doesn't support readahead(2)
+ EXPECT_OK(readahead(fd_file_, 0, 1));
+ }
+#endif
+}
+
+FORK_TEST_F(WithFiles, AllowedSocketSyscalls) {
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // recvfrom() either returns -1 with EAGAIN, or 0.
+ int rc = recvfrom(fd_socket_, NULL, 0, MSG_DONTWAIT, NULL, NULL);
+ if (rc < 0) EXPECT_EQ(EAGAIN, errno);
+ char ch;
+ EXPECT_OK(write(fd_file_, &ch, sizeof(ch)));
+
+ // These calls will fail for lack of e.g. a proper name to send to,
+ // but they are allowed in capability mode, so errno != ECAPMODE.
+ EXPECT_FAIL_NOT_CAPMODE(accept(fd_socket_, NULL, NULL));
+ EXPECT_FAIL_NOT_CAPMODE(getpeername(fd_socket_, NULL, NULL));
+ EXPECT_FAIL_NOT_CAPMODE(getsockname(fd_socket_, NULL, NULL));
+ EXPECT_FAIL_NOT_CAPMODE(recvmsg(fd_socket_, NULL, 0));
+ EXPECT_FAIL_NOT_CAPMODE(sendmsg(fd_socket_, NULL, 0));
+ EXPECT_FAIL_NOT_CAPMODE(sendto(fd_socket_, NULL, 0, 0, NULL, 0));
+ off_t offset = 0;
+ EXPECT_FAIL_NOT_CAPMODE(sendfile_(fd_socket_, fd_file_, &offset, 1));
+
+ // The socket/socketpair syscalls are allowed, but they don't give
+ // anything externally useful (can't call bind/connect on them).
+ int fd_socket2 = socket(PF_INET, SOCK_DGRAM, 0);
+ EXPECT_OK(fd_socket2);
+ if (fd_socket2 >= 0) close(fd_socket2);
+ int fd_pair[2] = {-1, -1};
+ EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, fd_pair));
+ if (fd_pair[0] >= 0) close(fd_pair[0]);
+ if (fd_pair[1] >= 0) close(fd_pair[1]);
+}
+
+#ifdef HAVE_SEND_RECV_MMSG
+FORK_TEST(Capmode, AllowedMmsgSendRecv) {
+ int fd_socket = socket(PF_INET, SOCK_DGRAM, 0);
+
+ struct sockaddr_in addr;
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(0);
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ EXPECT_OK(bind(fd_socket, (sockaddr*)&addr, sizeof(addr)));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ char buffer[256] = {0};
+ struct iovec iov;
+ iov.iov_base = buffer;
+ iov.iov_len = sizeof(buffer);
+ struct mmsghdr mm;
+ memset(&mm, 0, sizeof(mm));
+ mm.msg_hdr.msg_iov = &iov;
+ mm.msg_hdr.msg_iovlen = 1;
+ struct timespec ts;
+ ts.tv_sec = 1;
+ ts.tv_nsec = 100;
+ EXPECT_FAIL_NOT_CAPMODE(recvmmsg(fd_socket, &mm, 1, MSG_DONTWAIT, &ts));
+ EXPECT_FAIL_NOT_CAPMODE(sendmmsg(fd_socket, &mm, 1, 0));
+ close(fd_socket);
+}
+#endif
+
+FORK_TEST(Capmode, AllowedIdentifierSyscalls) {
+ // Record some identifiers
+ gid_t my_gid = getgid();
+ pid_t my_pid = getpid();
+ pid_t my_ppid = getppid();
+ uid_t my_uid = getuid();
+ pid_t my_sid = getsid(my_pid);
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ EXPECT_EQ(my_gid, getegid_());
+ EXPECT_EQ(my_uid, geteuid_());
+ EXPECT_EQ(my_gid, getgid_());
+ EXPECT_EQ(my_pid, getpid());
+ EXPECT_EQ(my_ppid, getppid());
+ EXPECT_EQ(my_uid, getuid_());
+ EXPECT_EQ(my_sid, getsid(my_pid));
+ gid_t grps[128];
+ EXPECT_OK(getgroups_(128, grps));
+ uid_t ruid;
+ uid_t euid;
+ uid_t suid;
+ EXPECT_OK(getresuid(&ruid, &euid, &suid));
+ gid_t rgid;
+ gid_t egid;
+ gid_t sgid;
+ EXPECT_OK(getresgid(&rgid, &egid, &sgid));
+#ifdef HAVE_GETLOGIN
+ EXPECT_TRUE(getlogin() != NULL);
+#endif
+
+ // Set various identifiers (to their existing values).
+ EXPECT_OK(setgid(my_gid));
+#ifdef HAVE_SETFSGID
+ EXPECT_OK(setfsgid(my_gid));
+#endif
+ EXPECT_OK(setuid(my_uid));
+#ifdef HAVE_SETFSUID
+ EXPECT_OK(setfsuid(my_uid));
+#endif
+ EXPECT_OK(setregid(my_gid, my_gid));
+ EXPECT_OK(setresgid(my_gid, my_gid, my_gid));
+ EXPECT_OK(setreuid(my_uid, my_uid));
+ EXPECT_OK(setresuid(my_uid, my_uid, my_uid));
+ EXPECT_OK(setsid());
+}
+
+FORK_TEST(Capmode, AllowedSchedSyscalls) {
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+ int policy = sched_getscheduler(0);
+ EXPECT_OK(policy);
+ struct sched_param sp;
+ EXPECT_OK(sched_getparam(0, &sp));
+ if (policy >= 0 && (!SCHED_SETSCHEDULER_REQUIRES_ROOT || getuid() == 0)) {
+ EXPECT_OK(sched_setscheduler(0, policy, &sp));
+ }
+ EXPECT_OK(sched_setparam(0, &sp));
+ EXPECT_OK(sched_get_priority_max(policy));
+ EXPECT_OK(sched_get_priority_min(policy));
+ struct timespec ts;
+ EXPECT_OK(sched_rr_get_interval(0, &ts));
+ EXPECT_OK(sched_yield());
+}
+
+
+FORK_TEST(Capmode, AllowedTimerSyscalls) {
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+ struct timespec ts;
+ EXPECT_OK(clock_getres(CLOCK_REALTIME, &ts));
+ EXPECT_OK(clock_gettime(CLOCK_REALTIME, &ts));
+ struct itimerval itv;
+ EXPECT_OK(getitimer(ITIMER_REAL, &itv));
+ EXPECT_OK(setitimer(ITIMER_REAL, &itv, NULL));
+ struct timeval tv;
+ struct timezone tz;
+ EXPECT_OK(gettimeofday(&tv, &tz));
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ EXPECT_OK(nanosleep(&ts, NULL));
+}
+
+
+FORK_TEST(Capmode, AllowedProfilSyscall) {
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+ char sbuf[32];
+ EXPECT_OK(profil((profil_arg1_t*)sbuf, sizeof(sbuf), 0, 1));
+}
+
+
+FORK_TEST(Capmode, AllowedResourceSyscalls) {
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+ errno = 0;
+ int rc = getpriority(PRIO_PROCESS, 0);
+ EXPECT_EQ(0, errno);
+ EXPECT_OK(setpriority(PRIO_PROCESS, 0, rc));
+ struct rlimit rlim;
+ EXPECT_OK(getrlimit_(RLIMIT_CORE, &rlim));
+ EXPECT_OK(setrlimit(RLIMIT_CORE, &rlim));
+ struct rusage ruse;
+ EXPECT_OK(getrusage(RUSAGE_SELF, &ruse));
+}
+
+FORK_TEST(CapMode, AllowedMmapSyscalls) {
+ // mmap() some memory.
+ size_t mem_size = getpagesize();
+ void *mem = mmap(NULL, mem_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ EXPECT_TRUE(mem != NULL);
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ EXPECT_OK(msync(mem, mem_size, MS_ASYNC));
+ EXPECT_OK(madvise(mem, mem_size, MADV_NORMAL));
+ unsigned char vec[2];
+ EXPECT_OK(mincore_(mem, mem_size, vec));
+ EXPECT_OK(mprotect(mem, mem_size, PROT_READ|PROT_WRITE));
+
+ if (!MLOCK_REQUIRES_ROOT || getuid() == 0) {
+ EXPECT_OK(mlock(mem, mem_size));
+ EXPECT_OK(munlock(mem, mem_size));
+ int rc = mlockall(MCL_CURRENT);
+ if (rc != 0) {
+ // mlockall may well fail with ENOMEM for non-root users, as the
+ // default RLIMIT_MEMLOCK value isn't that big.
+ EXPECT_NE(ECAPMODE, errno);
+ }
+ EXPECT_OK(munlockall());
+ }
+ // Unmap the memory.
+ EXPECT_OK(munmap(mem, mem_size));
+}
+
+FORK_TEST(Capmode, AllowedPipeSyscalls) {
+ EXPECT_OK(cap_enter()); // Enter capability mode
+ int fd2[2];
+ int rc = pipe(fd2);
+ EXPECT_EQ(0, rc);
+
+#ifdef HAVE_VMSPLICE
+ char buf[11] = "0123456789";
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ EXPECT_FAIL_NOT_CAPMODE(vmsplice(fd2[0], &iov, 1, SPLICE_F_NONBLOCK));
+#endif
+
+ if (rc == 0) {
+ close(fd2[0]);
+ close(fd2[1]);
+ };
+#ifdef HAVE_PIPE2
+ rc = pipe2(fd2, 0);
+ EXPECT_EQ(0, rc);
+ if (rc == 0) {
+ close(fd2[0]);
+ close(fd2[1]);
+ };
+#endif
+}
+
+TEST(Capmode, AllowedAtSyscalls) {
+ int rc = mkdir(TmpFile("cap_at_syscalls"), 0755);
+ EXPECT_OK(rc);
+ if (rc < 0 && errno != EEXIST) return;
+ int dfd = open(TmpFile("cap_at_syscalls"), O_RDONLY);
+ EXPECT_OK(dfd);
+
+ int file = openat(dfd, "testfile", O_RDONLY|O_CREAT, 0644);
+ EXPECT_OK(file);
+ EXPECT_OK(close(file));
+
+
+ pid_t child = fork();
+ if (child == 0) {
+ // Child: enter cap mode and run tests
+ EXPECT_OK(cap_enter()); // Enter capability mode
+
+ struct stat fs;
+ EXPECT_OK(fstatat(dfd, "testfile", &fs, 0));
+ EXPECT_OK(mkdirat(dfd, "subdir", 0600));
+ EXPECT_OK(fchmodat(dfd, "subdir", 0644, 0));
+ EXPECT_OK(faccessat(dfd, "subdir", F_OK, 0));
+ EXPECT_OK(renameat(dfd, "subdir", dfd, "subdir2"));
+ EXPECT_OK(renameat(dfd, "subdir2", dfd, "subdir"));
+ struct timeval tv[2];
+ struct timezone tz;
+ EXPECT_OK(gettimeofday(&tv[0], &tz));
+ EXPECT_OK(gettimeofday(&tv[1], &tz));
+ EXPECT_OK(futimesat(dfd, "testfile", tv));
+
+ EXPECT_OK(fchownat(dfd, "testfile", fs.st_uid, fs.st_gid, 0));
+ EXPECT_OK(linkat(dfd, "testfile", dfd, "linky", 0));
+ EXPECT_OK(symlinkat("testfile", dfd, "symlink"));
+ char buffer[256];
+ EXPECT_OK(readlinkat(dfd, "symlink", buffer, sizeof(buffer)));
+ EXPECT_OK(unlinkat(dfd, "linky", 0));
+ EXPECT_OK(unlinkat(dfd, "subdir", AT_REMOVEDIR));
+
+ // Check that invalid requests get a non-Capsicum errno.
+ errno = 0;
+ rc = readlinkat(-1, "symlink", buffer, sizeof(buffer));
+ EXPECT_GE(0, rc);
+ EXPECT_NE(ECAPMODE, errno);
+
+ exit(HasFailure());
+ }
+
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ // Tidy up.
+ close(dfd);
+ rmdir(TmpFile("cap_at_syscalls/subdir"));
+ unlink(TmpFile("cap_at_syscalls/symlink"));
+ unlink(TmpFile("cap_at_syscalls/linky"));
+ unlink(TmpFile("cap_at_syscalls/testfile"));
+ rmdir(TmpFile("cap_at_syscalls"));
+}
+
+TEST(Capmode, AllowedAtSyscallsCwd) {
+ int rc = mkdir(TmpFile("cap_at_syscalls_cwd"), 0755);
+ EXPECT_OK(rc);
+ if (rc < 0 && errno != EEXIST) return;
+ int dfd = open(TmpFile("cap_at_syscalls_cwd"), O_RDONLY);
+ EXPECT_OK(dfd);
+
+ int file = openat(dfd, "testfile", O_RDONLY|O_CREAT, 0644);
+ EXPECT_OK(file);
+ EXPECT_OK(close(file));
+
+ pid_t child = fork();
+ if (child == 0) {
+ // Child: move into temp dir, enter cap mode and run tests
+ EXPECT_OK(fchdir(dfd));
+ EXPECT_OK(cap_enter()); // Enter capability mode
+
+ // Test that *at(AT_FDCWD, path,...) is policed with ECAPMODE.
+ EXPECT_CAPMODE(openat(AT_FDCWD, "testfile", O_RDONLY));
+ struct stat fs;
+ EXPECT_CAPMODE(fstatat(AT_FDCWD, "testfile", &fs, 0));
+ EXPECT_CAPMODE(mkdirat(AT_FDCWD, "subdir", 0600));
+ EXPECT_CAPMODE(fchmodat(AT_FDCWD, "subdir", 0644, 0));
+ EXPECT_CAPMODE(faccessat(AT_FDCWD, "subdir", F_OK, 0));
+ EXPECT_CAPMODE(renameat(AT_FDCWD, "subdir", AT_FDCWD, "subdir2"));
+ EXPECT_CAPMODE(renameat(AT_FDCWD, "subdir2", AT_FDCWD, "subdir"));
+ struct timeval tv[2];
+ struct timezone tz;
+ EXPECT_OK(gettimeofday(&tv[0], &tz));
+ EXPECT_OK(gettimeofday(&tv[1], &tz));
+ EXPECT_CAPMODE(futimesat(AT_FDCWD, "testfile", tv));
+
+ EXPECT_CAPMODE(fchownat(AT_FDCWD, "testfile", fs.st_uid, fs.st_gid, 0));
+ EXPECT_CAPMODE(linkat(AT_FDCWD, "testfile", AT_FDCWD, "linky", 0));
+ EXPECT_CAPMODE(symlinkat("testfile", AT_FDCWD, "symlink"));
+ char buffer[256];
+ EXPECT_CAPMODE(readlinkat(AT_FDCWD, "symlink", buffer, sizeof(buffer)));
+ EXPECT_CAPMODE(unlinkat(AT_FDCWD, "linky", 0));
+
+ exit(HasFailure());
+ }
+
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ // Tidy up.
+ close(dfd);
+ rmdir(TmpFile("cap_at_syscalls_cwd/subdir"));
+ unlink(TmpFile("cap_at_syscalls_cwd/symlink"));
+ unlink(TmpFile("cap_at_syscalls_cwd/linky"));
+ unlink(TmpFile("cap_at_syscalls_cwd/testfile"));
+ rmdir(TmpFile("cap_at_syscalls_cwd"));
+}
+
+TEST(Capmode, Abort) {
+ // Check that abort(3) works even in capability mode.
+ pid_t child = fork();
+ if (child == 0) {
+ // Child: enter capability mode and call abort(3).
+ // Triggers something like kill(getpid(), SIGABRT).
+ cap_enter(); // Enter capability mode.
+ abort();
+ exit(99);
+ }
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_TRUE(WIFSIGNALED(status)) << " status = " << std::hex << status;
+ EXPECT_EQ(SIGABRT, WTERMSIG(status)) << " status = " << std::hex << status;
+}
+
+FORK_TEST_F(WithFiles, AllowedMiscSyscalls) {
+ umask(022);
+ mode_t um_before = umask(022);
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ mode_t um = umask(022);
+ EXPECT_NE(-ECAPMODE, (int)um);
+ EXPECT_EQ(um_before, um);
+ stack_t ss;
+ EXPECT_OK(sigaltstack(NULL, &ss));
+
+ // Finally, tests for system calls that don't fit the pattern very well.
+ pid_t pid = fork();
+ EXPECT_OK(pid);
+ if (pid == 0) {
+ // Child: almost immediately exit.
+ sleep(1);
+ exit(0);
+ } else if (pid > 0) {
+ errno = 0;
+ EXPECT_CAPMODE(ptrace_(PTRACE_PEEKDATA_, pid, &pid, NULL));
+ EXPECT_CAPMODE(waitpid(pid, NULL, 0));
+ }
+
+ // No error return from sync(2) to test, but check errno remains unset.
+ errno = 0;
+ sync();
+ EXPECT_EQ(0, errno);
+
+ // TODO(FreeBSD): ktrace
+
+#ifdef HAVE_SYSARCH
+ // sysarch() is, by definition, architecture-dependent
+#if defined (__amd64__) || defined (__i386__)
+ long sysarch_arg = 0;
+ EXPECT_CAPMODE(sysarch(I386_SET_IOPERM, &sysarch_arg));
+#else
+ // TOOD(jra): write a test for arm
+ FAIL("capmode:no sysarch() test for current architecture");
+#endif
+#endif
+}
+
+void *thread_fn(void *p) {
+ int delay = *(int *)p;
+ sleep(delay);
+ EXPECT_OK(getpid_());
+ EXPECT_CAPMODE(open("/dev/null", O_RDWR));
+ return NULL;
+}
+
+// Check that restrictions are the same in subprocesses and threads
+FORK_TEST(Capmode, NewThread) {
+ // Fire off a new thread before entering capability mode
+ pthread_t early_thread;
+ int one = 1; // second
+ EXPECT_OK(pthread_create(&early_thread, NULL, thread_fn, &one));
+
+ // Fire off a new process before entering capability mode.
+ int early_child = fork();
+ EXPECT_OK(early_child);
+ if (early_child == 0) {
+ // Child: wait and then confirm this process is unaffect by capability mode in the parent.
+ sleep(1);
+ int fd = open("/dev/null", O_RDWR);
+ EXPECT_OK(fd);
+ close(fd);
+ exit(0);
+ }
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+ // Do an allowed syscall.
+ EXPECT_OK(getpid_());
+ int child = fork();
+ EXPECT_OK(child);
+ if (child == 0) {
+ // Child: do an allowed and a disallowed syscall.
+ EXPECT_OK(getpid_());
+ EXPECT_CAPMODE(open("/dev/null", O_RDWR));
+ exit(0);
+ }
+ // Don't (can't) wait for either child.
+
+ // Wait for the early-started thread.
+ EXPECT_OK(pthread_join(early_thread, NULL));
+
+ // Fire off a new thread.
+ pthread_t child_thread;
+ int zero = 0; // seconds
+ EXPECT_OK(pthread_create(&child_thread, NULL, thread_fn, &zero));
+ EXPECT_OK(pthread_join(child_thread, NULL));
+
+ // Fork a subprocess which fires off a new thread.
+ child = fork();
+ EXPECT_OK(child);
+ if (child == 0) {
+ pthread_t child_thread2;
+ EXPECT_OK(pthread_create(&child_thread2, NULL, thread_fn, &zero));
+ EXPECT_OK(pthread_join(child_thread2, NULL));
+ exit(0);
+ }
+ // Sleep for a bit to allow the subprocess to finish.
+ sleep(2);
+}
+
+static int had_signal = 0;
+static void handle_signal(int x) { had_signal = 1; }
+
+FORK_TEST(Capmode, SelfKill) {
+ pid_t me = getpid();
+ sighandler_t original = signal(SIGUSR1, handle_signal);
+
+ pid_t child = fork();
+ if (child == 0) {
+ // Child: sleep and exit
+ sleep(1);
+ exit(0);
+ }
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // Can only kill(2) to own pid.
+ EXPECT_CAPMODE(kill(child, SIGUSR1));
+ EXPECT_OK(kill(me, SIGUSR1));
+ EXPECT_EQ(1, had_signal);
+
+ signal(SIGUSR1, original);
+}
diff --git a/capsicum-freebsd.h b/capsicum-freebsd.h
new file mode 100644
index 000000000000..96ceb9b6d5f1
--- /dev/null
+++ b/capsicum-freebsd.h
@@ -0,0 +1,73 @@
+#ifndef __CAPSICUM_FREEBSD_H__
+#define __CAPSICUM_FREEBSD_H__
+#ifdef __FreeBSD__
+/************************************************************
+ * FreeBSD Capsicum Functionality.
+ ************************************************************/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* FreeBSD definitions. */
+#include <errno.h>
+#include <sys/param.h>
+#if __FreeBSD_version >= 1100014 || \
+ (__FreeBSD_version >= 1001511 && __FreeBSD_version < 1100000)
+#include <sys/capsicum.h>
+#else
+#include <sys/capability.h>
+#endif
+#include <sys/procdesc.h>
+
+#if __FreeBSD_version >= 1000000
+#define AT_SYSCALLS_IN_CAPMODE
+#define HAVE_CAP_RIGHTS_GET
+#define HAVE_CAP_RIGHTS_LIMIT
+#define HAVE_PROCDESC_FSTAT
+#define HAVE_CAP_FCNTLS_LIMIT
+// fcntl(2) takes int, cap_fcntls_limit(2) takes uint32_t.
+typedef uint32_t cap_fcntl_t;
+#define HAVE_CAP_IOCTLS_LIMIT
+// ioctl(2) and cap_ioctls_limit(2) take unsigned long.
+typedef unsigned long cap_ioctl_t;
+
+#if __FreeBSD_version >= 1101000
+#define HAVE_OPENAT_INTERMEDIATE_DOTDOT
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+// Use fexecve_() in tests to allow Linux variant to bypass glibc version.
+#define fexecve_(F, A, E) fexecve(F, A, E)
+
+#ifdef ENOTBENEATH
+#define E_NO_TRAVERSE_CAPABILITY ENOTBENEATH
+#define E_NO_TRAVERSE_O_BENEATH ENOTBENEATH
+#else
+#define E_NO_TRAVERSE_CAPABILITY ENOTCAPABLE
+#define E_NO_TRAVERSE_O_BENEATH ENOTCAPABLE
+#endif
+
+// FreeBSD limits the number of ioctls in cap_ioctls_limit to 256
+#define CAP_IOCTLS_LIMIT_MAX 256
+
+// Too many links
+#define E_TOO_MANY_LINKS EMLINK
+
+// TODO(FreeBSD): uncomment if/when FreeBSD propagates rights on accept.
+// FreeBSD does not generate a capability from accept(cap_fd,...).
+// https://bugs.freebsd.org/201052
+// #define CAP_FROM_ACCEPT
+// TODO(FreeBSD): uncomment if/when FreeBSD propagates rights on sctp_peeloff.
+// FreeBSD does not generate a capability from sctp_peeloff(cap_fd,...).
+// https://bugs.freebsd.org/201052
+// #define CAP_FROM_PEELOFF
+
+#endif /* __FreeBSD__ */
+
+#endif /*__CAPSICUM_FREEBSD_H__*/
diff --git a/capsicum-linux.h b/capsicum-linux.h
new file mode 100644
index 000000000000..5bea418a2bc7
--- /dev/null
+++ b/capsicum-linux.h
@@ -0,0 +1,40 @@
+#ifndef __CAPSICUM_LINUX_H__
+#define __CAPSICUM_LINUX_H__
+
+#ifdef __linux__
+/************************************************************
+ * Linux Capsicum Functionality.
+ ************************************************************/
+#include <errno.h>
+#include <sys/procdesc.h>
+#include <sys/capsicum.h>
+
+#define HAVE_CAP_RIGHTS_LIMIT
+#define HAVE_CAP_RIGHTS_GET
+#define HAVE_CAP_FCNTLS_LIMIT
+#define HAVE_CAP_IOCTLS_LIMIT
+#define HAVE_PROC_FDINFO
+#define HAVE_PDWAIT4
+#define CAP_FROM_ACCEPT
+// TODO(drysdale): uncomment if/when Linux propagates rights on sctp_peeloff.
+// Linux does not generate a capability from sctp_peeloff(cap_fd,...).
+// #define CAP_FROM_PEELOFF
+// TODO(drysdale): uncomment if/when Linux allows intermediate .. path segments
+// for openat()-like operations.
+// #define HAVE_OPENAT_INTERMEDIATE_DOTDOT
+
+// Failure to open file due to path traversal generates EPERM
+#ifdef ENOTBENEATH
+#define E_NO_TRAVERSE_CAPABILITY ENOTBENEATH
+#define E_NO_TRAVERSE_O_BENEATH ENOTBENEATH
+#else
+#define E_NO_TRAVERSE_CAPABILITY EPERM
+#define E_NO_TRAVERSE_O_BENEATH EPERM
+#endif
+
+// Too many links
+#define E_TOO_MANY_LINKS ELOOP
+
+#endif /* __linux__ */
+
+#endif /*__CAPSICUM_LINUX_H__*/
diff --git a/capsicum-rights.h b/capsicum-rights.h
new file mode 100644
index 000000000000..73c445434404
--- /dev/null
+++ b/capsicum-rights.h
@@ -0,0 +1,118 @@
+#ifndef __CAPSICUM_RIGHTS_H__
+#define __CAPSICUM_RIGHTS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __FreeBSD__
+#include <sys/param.h>
+#if __FreeBSD_version >= 1100014 || \
+ (__FreeBSD_version >= 1001511 && __FreeBSD_version < 1100000)
+#include <sys/capsicum.h>
+#else
+#include <sys/capability.h>
+#endif
+#endif
+
+#ifdef __linux__
+#include <linux/capsicum.h>
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef CAP_RIGHTS_VERSION
+/************************************************************
+ * Capsicum compatibility layer: implement new (FreeBSD10.x)
+ * rights manipulation API in terms of original (FreeBSD9.x)
+ * functionality.
+ ************************************************************/
+#include <stdarg.h>
+#include <stdbool.h>
+
+/* Rights manipulation macros/functions.
+ * Note that these use variadic macros, available in C99 / C++11 (and
+ * also in earlier gcc versions).
+ */
+#define cap_rights_init(rights, ...) _cap_rights_init((rights), __VA_ARGS__, 0ULL)
+#define cap_rights_set(rights, ...) _cap_rights_set((rights), __VA_ARGS__, 0ULL)
+#define cap_rights_clear(rights, ...) _cap_rights_clear((rights), __VA_ARGS__, 0ULL)
+#define cap_rights_is_set(rights, ...) _cap_rights_is_set((rights), __VA_ARGS__, 0ULL)
+
+inline cap_rights_t* _cap_rights_init(cap_rights_t *rights, ...) {
+ va_list ap;
+ cap_rights_t right;
+ *rights = 0;
+ va_start(ap, rights);
+ while (true) {
+ right = va_arg(ap, cap_rights_t);
+ *rights |= right;
+ if (right == 0) break;
+ }
+ va_end(ap);
+ return rights;
+}
+
+inline cap_rights_t* _cap_rights_set(cap_rights_t *rights, ...) {
+ va_list ap;
+ cap_rights_t right;
+ va_start(ap, rights);
+ while (true) {
+ right = va_arg(ap, cap_rights_t);
+ *rights |= right;
+ if (right == 0) break;
+ }
+ va_end(ap);
+ return rights;
+}
+
+inline cap_rights_t* _cap_rights_clear(cap_rights_t *rights, ...) {
+ va_list ap;
+ cap_rights_t right;
+ va_start(ap, rights);
+ while (true) {
+ right = va_arg(ap, cap_rights_t);
+ *rights &= ~right;
+ if (right == 0) break;
+ }
+ va_end(ap);
+ return rights;
+}
+
+inline bool _cap_rights_is_set(const cap_rights_t *rights, ...) {
+ va_list ap;
+ cap_rights_t right;
+ cap_rights_t accumulated = 0;
+ va_start(ap, rights);
+ while (true) {
+ right = va_arg(ap, cap_rights_t);
+ accumulated |= right;
+ if (right == 0) break;
+ }
+ va_end(ap);
+ return (accumulated & *rights) == accumulated;
+}
+
+inline bool _cap_rights_is_valid(const cap_rights_t *rights) {
+ return true;
+}
+
+inline cap_rights_t* cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src) {
+ *dst |= *src;
+ return dst;
+}
+
+inline cap_rights_t* cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src) {
+ *dst &= ~(*src);
+ return dst;
+}
+
+inline bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little) {
+ return ((*big) & (*little)) == (*little);
+}
+
+#endif /* old/new style rights manipulation */
+
+#endif /*__CAPSICUM_RIGHTS_H__*/
diff --git a/capsicum-test-main.cc b/capsicum-test-main.cc
new file mode 100644
index 000000000000..c8f35b71a000
--- /dev/null
+++ b/capsicum-test-main.cc
@@ -0,0 +1,101 @@
+#include <sys/types.h>
+#ifdef __linux__
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#endif
+#include <ctype.h>
+#include <errno.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <iostream>
+#include "gtest/gtest.h"
+#include "capsicum-test.h"
+
+std::string tmpdir;
+
+class SetupEnvironment : public ::testing::Environment
+{
+public:
+ SetupEnvironment() : teardown_tmpdir_(false) {}
+ void SetUp() override {
+ if (tmpdir.empty()) {
+ std::cerr << "Generating temporary directory root: ";
+ CreateTemporaryRoot();
+ } else {
+ std::cerr << "User provided temporary directory root: ";
+ }
+ std::cerr << tmpdir << std::endl;
+ }
+ void CreateTemporaryRoot() {
+ char *tmpdir_name = tempnam(nullptr, "cptst");
+
+ ASSERT_NE(tmpdir_name, nullptr);
+ ASSERT_EQ(mkdir(tmpdir_name, 0700), 0) <<
+ "Could not create temp directory, " << tmpdir_name << ": " <<
+ strerror(errno);
+ tmpdir = std::string(tmpdir_name);
+ free(tmpdir_name);
+ teardown_tmpdir_ = true;
+ }
+ void TearDown() override {
+ if (teardown_tmpdir_) {
+ rmdir(tmpdir.c_str());
+ }
+ }
+private:
+ bool teardown_tmpdir_;
+};
+
+int main(int argc, char* argv[]) {
+ ::testing::InitGoogleTest(&argc, argv);
+ for (int ii = 1; ii < argc; ii++) {
+ if (strcmp(argv[ii], "-v") == 0) {
+ verbose = true;
+ } else if (strcmp(argv[ii], "-T") == 0) {
+ ii++;
+ assert(ii < argc);
+ tmpdir = argv[ii];
+ struct stat info;
+ stat(tmpdir.c_str(), &info);
+ assert(S_ISDIR(info.st_mode));
+ } else if (strcmp(argv[ii], "-t") == 0) {
+ force_mt = true;
+ } else if (strcmp(argv[ii], "-F") == 0) {
+ force_nofork = true;
+ } else if (strcmp(argv[ii], "-u") == 0) {
+ if (++ii >= argc) {
+ std::cerr << "-u needs argument" << std::endl;
+ exit(1);
+ }
+ if (isdigit(argv[ii][0])) {
+ other_uid = atoi(argv[ii]);
+ } else {
+ struct passwd *p = getpwnam(argv[ii]);
+ if (!p) {
+ std::cerr << "Failed to get entry for " << argv[ii] << ", errno=" << errno << std::endl;
+ exit(1);
+ }
+ other_uid = p->pw_uid;
+ }
+ }
+ }
+ if (other_uid == 0) {
+ struct stat info;
+ if (stat(argv[0], &info) == 0) {
+ other_uid = info.st_uid;
+ }
+ }
+
+#ifdef __linux__
+ // Check whether our temporary directory is on a tmpfs volume.
+ struct statfs fsinfo;
+ statfs(tmpdir.c_str(), &fsinfo);
+ tmpdir_on_tmpfs = (fsinfo.f_type == TMPFS_MAGIC);
+#endif
+
+ testing::AddGlobalTestEnvironment(new SetupEnvironment());
+ int rc = RUN_ALL_TESTS();
+ ShowSkippedTests(std::cerr);
+ return rc;
+}
diff --git a/capsicum-test.cc b/capsicum-test.cc
new file mode 100644
index 000000000000..24b096ed877c
--- /dev/null
+++ b/capsicum-test.cc
@@ -0,0 +1,102 @@
+#include "capsicum-test.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+
+#include <map>
+#include <vector>
+#include <string>
+
+bool verbose = false;
+bool tmpdir_on_tmpfs = false;
+bool force_mt = false;
+bool force_nofork = false;
+uid_t other_uid = 0;
+
+namespace {
+std::map<std::string, std::string> tmp_paths;
+}
+
+const char *TmpFile(const char *p) {
+ std::string pathname(p);
+ if (tmp_paths.find(pathname) == tmp_paths.end()) {
+ std::string fullname = tmpdir + "/" + pathname;
+ tmp_paths[pathname] = fullname;
+ }
+ return tmp_paths[pathname].c_str();
+}
+
+char ProcessState(int pid) {
+#ifdef __linux__
+ // Open the process status file.
+ char s[1024];
+ snprintf(s, sizeof(s), "/proc/%d/status", pid);
+ FILE *f = fopen(s, "r");
+ if (f == NULL) return '\0';
+
+ // Read the file line by line looking for the state line.
+ const char *prompt = "State:\t";
+ while (!feof(f)) {
+ fgets(s, sizeof(s), f);
+ if (!strncmp(s, prompt, strlen(prompt))) {
+ fclose(f);
+ return s[strlen(prompt)];
+ }
+ }
+ fclose(f);
+ return '?';
+#endif
+#ifdef __FreeBSD__
+ char buffer[1024];
+ snprintf(buffer, sizeof(buffer), "ps -p %d -o state | grep -v STAT", pid);
+ sig_t original = signal(SIGCHLD, SIG_IGN);
+ FILE* cmd = popen(buffer, "r");
+ usleep(50000); // allow any pending SIGCHLD signals to arrive
+ signal(SIGCHLD, original);
+ int result = fgetc(cmd);
+ fclose(cmd);
+ // Map FreeBSD codes to Linux codes.
+ switch (result) {
+ case EOF:
+ return '\0';
+ case 'D': // disk wait
+ case 'R': // runnable
+ case 'S': // sleeping
+ case 'T': // stopped
+ case 'Z': // zombie
+ return result;
+ case 'W': // idle interrupt thread
+ return 'S';
+ case 'I': // idle
+ return 'S';
+ case 'L': // waiting to acquire lock
+ default:
+ return '?';
+ }
+#endif
+}
+
+typedef std::vector<std::string> TestList;
+typedef std::map<std::string, TestList*> SkippedTestMap;
+static SkippedTestMap skipped_tests;
+void TestSkipped(const char *testcase, const char *test, const std::string& reason) {
+ if (skipped_tests.find(reason) == skipped_tests.end()) {
+ skipped_tests[reason] = new TestList;
+ }
+ std::string testname(testcase);
+ testname += ".";
+ testname += test;
+ skipped_tests[reason]->push_back(testname);
+}
+
+void ShowSkippedTests(std::ostream& os) {
+ for (SkippedTestMap::iterator skiplist = skipped_tests.begin();
+ skiplist != skipped_tests.end(); ++skiplist) {
+ os << "Following tests were skipped because: " << skiplist->first << std::endl;
+ for (size_t ii = 0; ii < skiplist->second->size(); ++ii) {
+ const std::string& testname((*skiplist->second)[ii]);
+ os << " " << testname << std::endl;
+ }
+ }
+}
diff --git a/capsicum-test.h b/capsicum-test.h
new file mode 100644
index 000000000000..61d95711eec0
--- /dev/null
+++ b/capsicum-test.h
@@ -0,0 +1,260 @@
+/* -*- C++ -*- */
+#ifndef CAPSICUM_TEST_H
+#define CAPSICUM_TEST_H
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/resource.h>
+#include <signal.h>
+
+#include <ios>
+#include <ostream>
+
+#include "gtest/gtest.h"
+
+extern bool verbose;
+extern std::string tmpdir;
+extern bool tmpdir_on_tmpfs;
+extern bool force_mt;
+extern bool force_nofork;
+extern uid_t other_uid;
+
+static inline void *WaitingThreadFn(void *p) {
+ // Loop until cancelled
+ while (true) {
+ usleep(10000);
+ pthread_testcancel();
+ }
+ return NULL;
+}
+
+// If force_mt is set, run another thread in parallel with the test. This forces
+// the kernel into multi-threaded mode.
+template <typename T, typename Function>
+void MaybeRunWithThread(T *self, Function fn) {
+ pthread_t subthread;
+ if (force_mt) {
+ pthread_create(&subthread, NULL, WaitingThreadFn, NULL);
+ }
+ (self->*fn)();
+ if (force_mt) {
+ pthread_cancel(subthread);
+ pthread_join(subthread, NULL);
+ }
+}
+template <typename Function>
+void MaybeRunWithThread(Function fn) {
+ pthread_t subthread;
+ if (force_mt) {
+ pthread_create(&subthread, NULL, WaitingThreadFn, NULL);
+ }
+ (fn)();
+ if (force_mt) {
+ pthread_cancel(subthread);
+ pthread_join(subthread, NULL);
+ }
+}
+
+// Return the absolute path of a filename in the temp directory, `tmpdir`,
+// with the given pathname, e.g., "/tmp/<pathname>", if `tmpdir` was set to
+// "/tmp".
+const char *TmpFile(const char *pathname);
+
+// Run the given test function in a forked process, so that trapdoor
+// entry doesn't affect other tests, and watch out for hung processes.
+// Implemented as a macro to allow access to the test case instance's
+// HasFailure() method, which is reported as the forked process's
+// exit status.
+#define _RUN_FORKED(INNERCODE, TESTCASENAME, TESTNAME) \
+ pid_t pid = force_nofork ? 0 : fork(); \
+ if (pid == 0) { \
+ INNERCODE; \
+ if (!force_nofork) { \
+ exit(HasFailure()); \
+ } \
+ } else if (pid > 0) { \
+ int rc, status; \
+ int remaining_us = 10000000; \
+ while (remaining_us > 0) { \
+ status = 0; \
+ rc = waitpid(pid, &status, WNOHANG); \
+ if (rc != 0) break; \
+ remaining_us -= 10000; \
+ usleep(10000); \
+ } \
+ if (remaining_us <= 0) { \
+ fprintf(stderr, "Warning: killing unresponsive test " \
+ "%s.%s (pid %d)\n", \
+ TESTCASENAME, TESTNAME, pid); \
+ kill(pid, SIGKILL); \
+ ADD_FAILURE() << "Test hung"; \
+ } else if (rc < 0) { \
+ fprintf(stderr, "Warning: waitpid error %s (%d)\n", \
+ strerror(errno), errno); \
+ ADD_FAILURE() << "Failed to wait for child"; \
+ } else { \
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; \
+ EXPECT_EQ(0, rc); \
+ } \
+ }
+#define _RUN_FORKED_MEM(THIS, TESTFN, TESTCASENAME, TESTNAME) \
+ _RUN_FORKED(MaybeRunWithThread(THIS, &TESTFN), TESTCASENAME, TESTNAME);
+#define _RUN_FORKED_FN(TESTFN, TESTCASENAME, TESTNAME) \
+ _RUN_FORKED(MaybeRunWithThread(&TESTFN), TESTCASENAME, TESTNAME);
+
+// Run a test case in a forked process, possibly cleaning up a
+// test file after completion
+#define FORK_TEST_ON(test_case_name, test_name, test_file) \
+ static void test_case_name##_##test_name##_ForkTest(); \
+ TEST(test_case_name, test_name ## Forked) { \
+ _RUN_FORKED_FN(test_case_name##_##test_name##_ForkTest, \
+ #test_case_name, #test_name); \
+ const char *filename = test_file; \
+ if (filename) unlink(filename); \
+ } \
+ static void test_case_name##_##test_name##_ForkTest()
+
+#define FORK_TEST(test_case_name, test_name) FORK_TEST_ON(test_case_name, test_name, NULL)
+
+// Run a test case fixture in a forked process, so that trapdoors don't
+// affect other tests.
+#define ICLASS_NAME(test_case_name, test_name) Forked##test_case_name##_##test_name
+#define FORK_TEST_F(test_case_name, test_name) \
+ class ICLASS_NAME(test_case_name, test_name) : public test_case_name { \
+ public: \
+ ICLASS_NAME(test_case_name, test_name)() {} \
+ void InnerTestBody(); \
+ }; \
+ TEST_F(ICLASS_NAME(test_case_name, test_name), _) { \
+ _RUN_FORKED_MEM(this, \
+ ICLASS_NAME(test_case_name, test_name)::InnerTestBody, \
+ #test_case_name, #test_name); \
+ } \
+ void ICLASS_NAME(test_case_name, test_name)::InnerTestBody()
+
+// Emit errno information on failure
+#define EXPECT_OK(v) EXPECT_LE(0, v) << " errno " << errno << " " << strerror(errno)
+
+// Expect a syscall to fail with the given error.
+#define EXPECT_SYSCALL_FAIL(E, C) \
+ do { \
+ EXPECT_GT(0, C); \
+ EXPECT_EQ(E, errno); \
+ } while (0)
+
+// Expect a syscall to fail with anything other than the given error.
+#define EXPECT_SYSCALL_FAIL_NOT(E, C) \
+ do { \
+ EXPECT_GT(0, C); \
+ EXPECT_NE(E, errno); \
+ } while (0)
+
+// Expect a void syscall to fail with anything other than the given error.
+#define EXPECT_VOID_SYSCALL_FAIL_NOT(E, C) \
+ do { \
+ errno = 0; \
+ C; \
+ EXPECT_NE(E, errno) << #C << " failed with ECAPMODE"; \
+ } while (0)
+
+// Expect a system call to fail due to path traversal; exact error
+// code is OS-specific.
+#ifdef O_BENEATH
+#define EXPECT_OPENAT_FAIL_TRAVERSAL(fd, path, flags) \
+ do { \
+ const int result = openat((fd), (path), (flags)); \
+ if (((flags) & O_BENEATH) == O_BENEATH) { \
+ EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_O_BENEATH, result); \
+ } else { \
+ EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY, result); \
+ } \
+ } while (0)
+#else
+#define EXPECT_OPENAT_FAIL_TRAVERSAL(fd, path, flags) \
+ do { \
+ const int result = openat((fd), (path), (flags)); \
+ EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY, result); \
+ } while (0)
+#endif
+
+// Expect a system call to fail with ECAPMODE.
+#define EXPECT_CAPMODE(C) EXPECT_SYSCALL_FAIL(ECAPMODE, C)
+
+// Expect a system call to fail, but not with ECAPMODE.
+#define EXPECT_FAIL_NOT_CAPMODE(C) EXPECT_SYSCALL_FAIL_NOT(ECAPMODE, C)
+#define EXPECT_FAIL_VOID_NOT_CAPMODE(C) EXPECT_VOID_SYSCALL_FAIL_NOT(ECAPMODE, C)
+
+// Expect a system call to fail with ENOTCAPABLE.
+#define EXPECT_NOTCAPABLE(C) EXPECT_SYSCALL_FAIL(ENOTCAPABLE, C)
+
+// Expect a system call to fail, but not with ENOTCAPABLE.
+#define EXPECT_FAIL_NOT_NOTCAPABLE(C) EXPECT_SYSCALL_FAIL_NOT(ENOTCAPABLE, C)
+
+// Expect a system call to fail with either ENOTCAPABLE or ECAPMODE.
+#define EXPECT_CAPFAIL(C) \
+ do { \
+ int rc = C; \
+ EXPECT_GT(0, rc); \
+ EXPECT_TRUE(errno == ECAPMODE || errno == ENOTCAPABLE) \
+ << #C << " did not fail with ECAPMODE/ENOTCAPABLE but " << errno; \
+ } while (0)
+
+// Ensure that 'rights' are a subset of 'max'.
+#define EXPECT_RIGHTS_IN(rights, max) \
+ EXPECT_TRUE(cap_rights_contains((max), (rights))) \
+ << "rights " << std::hex << *(rights) \
+ << " not a subset of " << std::hex << *(max)
+
+// Ensure rights are identical
+#define EXPECT_RIGHTS_EQ(a, b) \
+ do { \
+ EXPECT_RIGHTS_IN((a), (b)); \
+ EXPECT_RIGHTS_IN((b), (a)); \
+ } while (0)
+
+// Get the state of a process as a single character.
+// - 'D': disk wait
+// - 'R': runnable
+// - 'S': sleeping/idle
+// - 'T': stopped
+// - 'Z': zombie
+// On error, return either '?' or '\0'.
+char ProcessState(int pid);
+
+// Check process state reaches a particular expected state (or two).
+// Retries a few times to allow for timing issues.
+#define EXPECT_PID_REACHES_STATES(pid, expected1, expected2) { \
+ int counter = 5; \
+ char state; \
+ do { \
+ state = ProcessState(pid); \
+ if (state == expected1 || state == expected2) break; \
+ usleep(100000); \
+ } while (--counter > 0); \
+ EXPECT_TRUE(state == expected1 || state == expected2) \
+ << " pid " << pid << " in state " << state; \
+}
+
+#define EXPECT_PID_ALIVE(pid) EXPECT_PID_REACHES_STATES(pid, 'R', 'S')
+#define EXPECT_PID_DEAD(pid) EXPECT_PID_REACHES_STATES(pid, 'Z', '\0')
+#define EXPECT_PID_ZOMBIE(pid) EXPECT_PID_REACHES_STATES(pid, 'Z', 'Z');
+#define EXPECT_PID_GONE(pid) EXPECT_PID_REACHES_STATES(pid, '\0', '\0');
+
+void ShowSkippedTests(std::ostream& os);
+void TestSkipped(const char *testcase, const char *test, const std::string& reason);
+#define TEST_SKIPPED(reason) \
+ do { \
+ const ::testing::TestInfo* const info = ::testing::UnitTest::GetInstance()->current_test_info(); \
+ std::cerr << "Skipping " << info->test_case_name() << "::" << info->name() << " because: " << reason << std::endl; \
+ TestSkipped(info->test_case_name(), info->name(), reason); \
+ } while (0)
+
+// Mark a test that can only be run as root.
+#define REQUIRE_ROOT() \
+ if (getuid() != 0) { \
+ TEST_SKIPPED("requires root"); \
+ return; \
+ }
+
+#endif // CAPSICUM_TEST_H
diff --git a/capsicum.h b/capsicum.h
new file mode 100644
index 000000000000..c7704ea03cdc
--- /dev/null
+++ b/capsicum.h
@@ -0,0 +1,170 @@
+/*
+ * Minimal portability layer for Capsicum-related features.
+ */
+#ifndef __CAPSICUM_H__
+#define __CAPSICUM_H__
+
+#ifdef __FreeBSD__
+#include "capsicum-freebsd.h"
+#endif
+
+#ifdef __linux__
+#include "capsicum-linux.h"
+#endif
+
+/*
+ * CAP_ALL/CAP_NONE is a value in FreeBSD9.x Capsicum, but a functional macro
+ * in FreeBSD10.x Capsicum. Always use CAP_SET_ALL/CAP_SET_NONE instead.
+ */
+#ifndef CAP_SET_ALL
+#ifdef CAP_RIGHTS_VERSION
+#define CAP_SET_ALL(rights) CAP_ALL(rights)
+#else
+#define CAP_SET_ALL(rights) *(rights) = CAP_MASK_VALID
+#endif
+#endif
+
+#ifndef CAP_SET_NONE
+#ifdef CAP_RIGHTS_VERSION
+#define CAP_SET_NONE(rights) CAP_NONE(rights)
+#else
+#define CAP_SET_NONE(rights) *(rights) = 0
+#endif
+#endif
+
+
+/************************************************************
+ * Define new-style rights in terms of old-style rights if
+ * absent.
+ ************************************************************/
+#include "capsicum-rights.h"
+
+/*
+ * Cope with systems (e.g. FreeBSD 10.x) where CAP_RENAMEAT hasn't been split out.
+ * (src, dest): RENAMEAT, LINKAT => RENAMEAT_SOURCE, RENAMEAT_TARGET
+ */
+#ifndef CAP_RENAMEAT_SOURCE
+#define CAP_RENAMEAT_SOURCE CAP_RENAMEAT
+#endif
+#ifndef CAP_RENAMEAT_TARGET
+#define CAP_RENAMEAT_TARGET CAP_LINKAT
+#endif
+/*
+ * Cope with systems (e.g. FreeBSD 10.x) where CAP_RENAMEAT hasn't been split out.
+ * (src, dest): 0, LINKAT => LINKAT_SOURCE, LINKAT_TARGET
+ */
+#ifndef CAP_LINKAT_SOURCE
+#define CAP_LINKAT_SOURCE CAP_LOOKUP
+#endif
+#ifndef CAP_LINKAT_TARGET
+#define CAP_LINKAT_TARGET CAP_LINKAT
+#endif
+
+#ifdef CAP_PREAD
+/* Existence of CAP_PREAD implies new-style CAP_SEEK semantics */
+#define CAP_SEEK_ASWAS 0
+#else
+/* Old-style CAP_SEEK semantics */
+#define CAP_SEEK_ASWAS CAP_SEEK
+#define CAP_PREAD CAP_READ
+#define CAP_PWRITE CAP_WRITE
+#endif
+
+#ifndef CAP_MMAP_R
+#define CAP_MMAP_R (CAP_READ|CAP_MMAP)
+#define CAP_MMAP_W (CAP_WRITE|CAP_MMAP)
+#define CAP_MMAP_X (CAP_MAPEXEC|CAP_MMAP)
+#define CAP_MMAP_RW (CAP_MMAP_R|CAP_MMAP_W)
+#define CAP_MMAP_RX (CAP_MMAP_R|CAP_MMAP_X)
+#define CAP_MMAP_WX (CAP_MMAP_W|CAP_MMAP_X)
+#define CAP_MMAP_RWX (CAP_MMAP_R|CAP_MMAP_W|CAP_MMAP_X)
+#endif
+
+#ifndef CAP_MKFIFOAT
+#define CAP_MKFIFOAT CAP_MKFIFO
+#endif
+
+#ifndef CAP_MKNODAT
+#define CAP_MKNODAT CAP_MKFIFOAT
+#endif
+
+#ifndef CAP_MKDIRAT
+#define CAP_MKDIRAT CAP_MKDIR
+#endif
+
+#ifndef CAP_UNLINKAT
+#define CAP_UNLINKAT CAP_RMDIR
+#endif
+
+#ifndef CAP_SOCK_CLIENT
+#define CAP_SOCK_CLIENT \
+ (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
+ CAP_PEELOFF | CAP_READ | CAP_WRITE | CAP_SETSOCKOPT | CAP_SHUTDOWN)
+#endif
+
+#ifndef CAP_SOCK_SERVER
+#define CAP_SOCK_SERVER \
+ (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
+ CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_READ | CAP_WRITE | \
+ CAP_SETSOCKOPT | CAP_SHUTDOWN)
+#endif
+
+#ifndef CAP_EVENT
+#define CAP_EVENT CAP_POLL_EVENT
+#endif
+
+/************************************************************
+ * Define new-style API functions in terms of old-style API
+ * functions if absent.
+ ************************************************************/
+#ifndef HAVE_CAP_RIGHTS_GET
+/* Define cap_rights_get() in terms of old-style cap_getrights() */
+inline int cap_rights_get(int fd, cap_rights_t *rights) {
+ return cap_getrights(fd, rights);
+}
+#endif
+
+#ifndef HAVE_CAP_RIGHTS_LIMIT
+/* Define cap_rights_limit() in terms of old-style cap_new() and dup2() */
+#include <unistd.h>
+inline int cap_rights_limit(int fd, const cap_rights_t *rights) {
+ int cap = cap_new(fd, *rights);
+ if (cap < 0) return cap;
+ int rc = dup2(cap, fd);
+ if (rc < 0) return rc;
+ close(cap);
+ return rc;
+}
+#endif
+
+#include <stdio.h>
+#ifdef CAP_RIGHTS_VERSION
+/* New-style Capsicum API extras for debugging */
+static inline void cap_rights_describe(const cap_rights_t *rights, char *buffer) {
+ int ii;
+ for (ii = 0; ii < (CAP_RIGHTS_VERSION+2); ii++) {
+ int len = sprintf(buffer, "0x%016llx ", (unsigned long long)rights->cr_rights[ii]);
+ buffer += len;
+ }
+}
+
+#ifdef __cplusplus
+#include <iostream>
+#include <iomanip>
+inline std::ostream& operator<<(std::ostream& os, cap_rights_t rights) {
+ for (int ii = 0; ii < (CAP_RIGHTS_VERSION+2); ii++) {
+ os << std::hex << std::setw(16) << std::setfill('0') << (unsigned long long)rights.cr_rights[ii] << " ";
+ }
+ return os;
+}
+#endif
+
+#else
+
+static inline void cap_rights_describe(const cap_rights_t *rights, char *buffer) {
+ sprintf(buffer, "0x%016llx", (*rights));
+}
+
+#endif /* new/old style rights manipulation */
+
+#endif /*__CAPSICUM_H__*/
diff --git a/fcntl.cc b/fcntl.cc
new file mode 100644
index 000000000000..8e3d0e14c44c
--- /dev/null
+++ b/fcntl.cc
@@ -0,0 +1,411 @@
+// Test that fcntl works in capability mode.
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include <string>
+#include <map>
+
+#include "capsicum.h"
+#include "capsicum-test.h"
+#include "syscalls.h"
+
+// Ensure that fcntl() works consistently for both regular file descriptors and
+// capability-wrapped ones.
+FORK_TEST(Fcntl, Basic) {
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_READ, CAP_FCNTL);
+
+ typedef std::map<std::string, int> FileMap;
+
+ // Open some files of different types, and wrap them in capabilities.
+ FileMap files;
+ files["file"] = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(files["file"]);
+ files["socket"] = socket(PF_LOCAL, SOCK_STREAM, 0);
+ EXPECT_OK(files["socket"]);
+ char shm_name[128];
+ sprintf(shm_name, "/capsicum-test-%d", getuid());
+ files["SHM"] = shm_open(shm_name, (O_CREAT|O_RDWR), 0600);
+ if ((files["SHM"] == -1) && errno == ENOSYS) {
+ // shm_open() is not implemented in user-mode Linux.
+ files.erase("SHM");
+ } else {
+ EXPECT_OK(files["SHM"]);
+ }
+
+ FileMap caps;
+ for (FileMap::iterator ii = files.begin(); ii != files.end(); ++ii) {
+ std::string key = ii->first + " cap";
+ caps[key] = dup(ii->second);
+ EXPECT_OK(cap_rights_limit(caps[key], &rights));
+ EXPECT_OK(caps[key]) << " on " << ii->first;
+ }
+
+ FileMap all(files);
+ all.insert(files.begin(), files.end());
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // Ensure that we can fcntl() all the files that we opened above.
+ cap_rights_t r_ro;
+ cap_rights_init(&r_ro, CAP_READ);
+ for (FileMap::iterator ii = all.begin(); ii != all.end(); ++ii) {
+ EXPECT_OK(fcntl(ii->second, F_GETFL, 0)) << " on " << ii->first;
+ int cap = dup(ii->second);
+ EXPECT_OK(cap) << " on " << ii->first;
+ EXPECT_OK(cap_rights_limit(cap, &r_ro)) << " on " << ii->first;
+ EXPECT_EQ(-1, fcntl(cap, F_GETFL, 0)) << " on " << ii->first;
+ EXPECT_EQ(ENOTCAPABLE, errno) << " on " << ii->first;
+ close(cap);
+ }
+ for (FileMap::iterator ii = all.begin(); ii != all.end(); ++ii) {
+ close(ii->second);
+ }
+ shm_unlink(shm_name);
+}
+
+// Supported fcntl(2) operations:
+// FreeBSD10 FreeBSD9.1: Linux: Rights: Summary:
+// F_DUPFD F_DUPFD F_DUPFD NONE as dup(2)
+// F_DUPFD_CLOEXEC F_DUPFD_CLOEXEC NONE as dup(2) with close-on-exec
+// F_DUP2FD F_DUP2FD NONE as dup2(2)
+// F_DUP2FD_CLOEXEC NONE as dup2(2) with close-on-exec
+// F_GETFD F_GETFD F_GETFD NONE get close-on-exec flag
+// F_SETFD F_SETFD F_SETFD NONE set close-on-exec flag
+// * F_GETFL F_GETFL F_GETFL FCNTL get file status flag
+// * F_SETFL F_SETFL F_SETFL FCNTL set file status flag
+// * F_GETOWN F_GETOWN F_GETOWN FCNTL get pid receiving SIGIO/SIGURG
+// * F_SETOWN F_SETOWN F_SETOWN FCNTL set pid receiving SIGIO/SIGURG
+// * F_GETOWN_EX FCNTL get pid/thread receiving SIGIO/SIGURG
+// * F_SETOWN_EX FCNTL set pid/thread receiving SIGIO/SIGURG
+// F_GETLK F_GETLK F_GETLK FLOCK get lock info
+// F_SETLK F_SETLK F_SETLK FLOCK set lock info
+// F_SETLK_REMOTE FLOCK set lock info
+// F_SETLKW F_SETLKW F_SETLKW FLOCK set lock info (blocking)
+// F_READAHEAD F_READAHEAD NONE set or clear readahead amount
+// F_RDAHEAD F_RDAHEAD NONE set or clear readahead amount to 128KB
+// F_GETSIG POLL_EVENT+FSIGNAL get signal sent when I/O possible
+// F_SETSIG POLL_EVENT+FSIGNAL set signal sent when I/O possible
+// F_GETLEASE FLOCK+FSIGNAL get lease on file descriptor
+// F_SETLEASE FLOCK+FSIGNAL set new lease on file descriptor
+// F_NOTIFY NOTIFY generate signal on changes (dnotify)
+// F_GETPIPE_SZ GETSOCKOPT get pipe size
+// F_SETPIPE_SZ SETSOCKOPT set pipe size
+// F_GET_SEAL FSTAT get memfd seals
+// F_ADD_SEAL FCHMOD set memfd seal
+// If HAVE_CAP_FCNTLS_LIMIT is defined, then fcntl(2) operations that require
+// CAP_FCNTL (marked with * above) can be further limited with cap_fcntls_limit(2).
+namespace {
+#define FCNTL_NUM_RIGHTS 9
+cap_rights_t fcntl_rights[FCNTL_NUM_RIGHTS];
+void InitRights() {
+ cap_rights_init(&(fcntl_rights[0]), 0); // Later code assumes this is at [0]
+ cap_rights_init(&(fcntl_rights[1]), CAP_READ, CAP_WRITE);
+ cap_rights_init(&(fcntl_rights[2]), CAP_FCNTL);
+ cap_rights_init(&(fcntl_rights[3]), CAP_FLOCK);
+#ifdef CAP_FSIGNAL
+ cap_rights_init(&(fcntl_rights[4]), CAP_EVENT, CAP_FSIGNAL);
+ cap_rights_init(&(fcntl_rights[5]), CAP_FLOCK, CAP_FSIGNAL);
+#else
+ cap_rights_init(&(fcntl_rights[4]), 0);
+ cap_rights_init(&(fcntl_rights[5]), 0);
+#endif
+#ifdef CAP_NOTIFY
+ cap_rights_init(&(fcntl_rights[6]), CAP_NOTIFY);
+#else
+ cap_rights_init(&(fcntl_rights[6]), 0);
+#endif
+ cap_rights_init(&(fcntl_rights[7]), CAP_SETSOCKOPT);
+ cap_rights_init(&(fcntl_rights[8]), CAP_GETSOCKOPT);
+}
+
+int CheckFcntl(unsigned long long right, int caps[FCNTL_NUM_RIGHTS], int cmd, long arg, const char* context) {
+ SCOPED_TRACE(context);
+ cap_rights_t rights;
+ cap_rights_init(&rights, right);
+ int ok_index = -1;
+ for (int ii = 0; ii < FCNTL_NUM_RIGHTS; ++ii) {
+ if (cap_rights_contains(&(fcntl_rights[ii]), &rights)) {
+ if (ok_index == -1) ok_index = ii;
+ continue;
+ }
+ EXPECT_NOTCAPABLE(fcntl(caps[ii], cmd, arg));
+ }
+ EXPECT_NE(-1, ok_index);
+ int rc = fcntl(caps[ok_index], cmd, arg);
+ EXPECT_OK(rc);
+ return rc;
+}
+} // namespace
+
+#define CHECK_FCNTL(right, caps, cmd, arg) \
+ CheckFcntl(right, caps, cmd, arg, "fcntl(" #cmd ") expect " #right)
+
+TEST(Fcntl, Commands) {
+ InitRights();
+ int fd = open(TmpFile("cap_fcntl_cmds"), O_RDWR|O_CREAT, 0644);
+ EXPECT_OK(fd);
+ write(fd, "TEST", 4);
+ int sock = socket(PF_LOCAL, SOCK_STREAM, 0);
+ EXPECT_OK(sock);
+ int caps[FCNTL_NUM_RIGHTS];
+ int sock_caps[FCNTL_NUM_RIGHTS];
+ for (int ii = 0; ii < FCNTL_NUM_RIGHTS; ++ii) {
+ caps[ii] = dup(fd);
+ EXPECT_OK(caps[ii]);
+ EXPECT_OK(cap_rights_limit(caps[ii], &(fcntl_rights[ii])));
+ sock_caps[ii] = dup(sock);
+ EXPECT_OK(sock_caps[ii]);
+ EXPECT_OK(cap_rights_limit(sock_caps[ii], &(fcntl_rights[ii])));
+ }
+
+ // Check the things that need no rights against caps[0].
+ int newfd = fcntl(caps[0], F_DUPFD, 0);
+ EXPECT_OK(newfd);
+ // dup()'ed FD should have same rights.
+ cap_rights_t rights;
+ cap_rights_init(&rights, 0);
+ EXPECT_OK(cap_rights_get(newfd, &rights));
+ EXPECT_RIGHTS_EQ(&(fcntl_rights[0]), &rights);
+ close(newfd);
+#ifdef HAVE_F_DUP2FD
+ EXPECT_OK(fcntl(caps[0], F_DUP2FD, newfd));
+ // dup2()'ed FD should have same rights.
+ EXPECT_OK(cap_rights_get(newfd, &rights));
+ EXPECT_RIGHTS_EQ(&(fcntl_rights[0]), &rights);
+ close(newfd);
+#endif
+
+ EXPECT_OK(fcntl(caps[0], F_GETFD, 0));
+ EXPECT_OK(fcntl(caps[0], F_SETFD, 0));
+
+ // Check operations that need CAP_FCNTL.
+ int fd_flag = CHECK_FCNTL(CAP_FCNTL, caps, F_GETFL, 0);
+ EXPECT_EQ(0, CHECK_FCNTL(CAP_FCNTL, caps, F_SETFL, fd_flag));
+ int owner = CHECK_FCNTL(CAP_FCNTL, sock_caps, F_GETOWN, 0);
+ EXPECT_EQ(0, CHECK_FCNTL(CAP_FCNTL, sock_caps, F_SETOWN, owner));
+
+ // Check an operation needing CAP_FLOCK.
+ struct flock fl;
+ memset(&fl, 0, sizeof(fl));
+ fl.l_type = F_RDLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 1;
+ EXPECT_EQ(0, CHECK_FCNTL(CAP_FLOCK, caps, F_GETLK, (long)&fl));
+
+ for (int ii = 0; ii < FCNTL_NUM_RIGHTS; ++ii) {
+ close(sock_caps[ii]);
+ close(caps[ii]);
+ }
+ close(sock);
+ close(fd);
+ unlink(TmpFile("cap_fcntl_cmds"));
+}
+
+TEST(Fcntl, WriteLock) {
+ int fd = open(TmpFile("cap_fcntl_readlock"), O_RDWR|O_CREAT, 0644);
+ EXPECT_OK(fd);
+ write(fd, "TEST", 4);
+
+ int cap = dup(fd);
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_FCNTL, CAP_READ, CAP_WRITE, CAP_FLOCK);
+ EXPECT_OK(cap_rights_limit(cap, &rights));
+
+ struct flock fl;
+ memset(&fl, 0, sizeof(fl));
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 1;
+ // Write-Lock
+ EXPECT_OK(fcntl(cap, F_SETLK, (long)&fl));
+
+ // Check write-locked (from another process).
+ pid_t child = fork();
+ if (child == 0) {
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 1;
+ EXPECT_OK(fcntl(fd, F_GETLK, (long)&fl));
+ EXPECT_NE(F_UNLCK, fl.l_type);
+ exit(HasFailure());
+ }
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ // Unlock
+ fl.l_type = F_UNLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 1;
+ EXPECT_OK(fcntl(cap, F_SETLK, (long)&fl));
+
+ close(cap);
+ close(fd);
+ unlink(TmpFile("cap_fcntl_readlock"));
+}
+
+#ifdef HAVE_CAP_FCNTLS_LIMIT
+TEST(Fcntl, SubRightNormalFD) {
+ int fd = open(TmpFile("cap_fcntl_subrightnorm"), O_RDWR|O_CREAT, 0644);
+ EXPECT_OK(fd);
+
+ // Restrict the fcntl(2) subrights of a normal FD.
+ EXPECT_OK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL));
+ int fd_flag = fcntl(fd, F_GETFL, 0);
+ EXPECT_OK(fd_flag);
+ EXPECT_NOTCAPABLE(fcntl(fd, F_SETFL, fd_flag));
+
+ // Expect to have all capabilities.
+ cap_rights_t rights;
+ EXPECT_OK(cap_rights_get(fd, &rights));
+ cap_rights_t all;
+ CAP_SET_ALL(&all);
+ EXPECT_RIGHTS_EQ(&all, &rights);
+ cap_fcntl_t fcntls;
+ EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+ EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls);
+
+ // Can't widen the subrights.
+ EXPECT_NOTCAPABLE(cap_fcntls_limit(fd, CAP_FCNTL_GETFL|CAP_FCNTL_SETFL));
+
+ close(fd);
+ unlink(TmpFile("cap_fcntl_subrightnorm"));
+}
+
+TEST(Fcntl, PreserveSubRights) {
+ int fd = open(TmpFile("cap_fcntl_subrightpreserve"), O_RDWR|O_CREAT, 0644);
+ EXPECT_OK(fd);
+
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FCNTL);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+ EXPECT_OK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL));
+
+ cap_rights_t cur_rights;
+ cap_fcntl_t fcntls;
+ EXPECT_OK(cap_rights_get(fd, &cur_rights));
+ EXPECT_RIGHTS_EQ(&rights, &cur_rights);
+ EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+ EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls);
+
+ // Limiting the top-level rights leaves the subrights unaffected...
+ cap_rights_clear(&rights, CAP_READ);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+ EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+ EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls);
+
+ // ... until we remove CAP_FCNTL.
+ cap_rights_clear(&rights, CAP_FCNTL);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+ EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+ EXPECT_EQ((cap_fcntl_t)0, fcntls);
+ EXPECT_EQ(-1, cap_fcntls_limit(fd, CAP_FCNTL_GETFL));
+
+ close(fd);
+ unlink(TmpFile("cap_fcntl_subrightpreserve"));
+}
+
+TEST(Fcntl, FLSubRights) {
+ int fd = open(TmpFile("cap_fcntl_subrights"), O_RDWR|O_CREAT, 0644);
+ EXPECT_OK(fd);
+ write(fd, "TEST", 4);
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_FCNTL);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+
+ // Check operations that need CAP_FCNTL with subrights pristine => OK.
+ int fd_flag = fcntl(fd, F_GETFL, 0);
+ EXPECT_OK(fd_flag);
+ EXPECT_OK(fcntl(fd, F_SETFL, fd_flag));
+
+ // Check operations that need CAP_FCNTL with all subrights => OK.
+ EXPECT_OK(cap_fcntls_limit(fd, CAP_FCNTL_ALL));
+ fd_flag = fcntl(fd, F_GETFL, 0);
+ EXPECT_OK(fd_flag);
+ EXPECT_OK(fcntl(fd, F_SETFL, fd_flag));
+
+ // Check operations that need CAP_FCNTL with specific subrights.
+ int fd_get = dup(fd);
+ int fd_set = dup(fd);
+ EXPECT_OK(cap_fcntls_limit(fd_get, CAP_FCNTL_GETFL));
+ EXPECT_OK(cap_fcntls_limit(fd_set, CAP_FCNTL_SETFL));
+
+ fd_flag = fcntl(fd_get, F_GETFL, 0);
+ EXPECT_OK(fd_flag);
+ EXPECT_NOTCAPABLE(fcntl(fd_set, F_GETFL, 0));
+ EXPECT_OK(fcntl(fd_set, F_SETFL, fd_flag));
+ EXPECT_NOTCAPABLE(fcntl(fd_get, F_SETFL, fd_flag));
+ close(fd_get);
+ close(fd_set);
+
+ // Check operations that need CAP_FCNTL with no subrights => ENOTCAPABLE.
+ EXPECT_OK(cap_fcntls_limit(fd, 0));
+ EXPECT_NOTCAPABLE(fcntl(fd, F_GETFL, 0));
+ EXPECT_NOTCAPABLE(fcntl(fd, F_SETFL, fd_flag));
+
+ close(fd);
+ unlink(TmpFile("cap_fcntl_subrights"));
+}
+
+TEST(Fcntl, OWNSubRights) {
+ int sock = socket(PF_LOCAL, SOCK_STREAM, 0);
+ EXPECT_OK(sock);
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_FCNTL);
+ EXPECT_OK(cap_rights_limit(sock, &rights));
+
+ // Check operations that need CAP_FCNTL with no subrights => OK.
+ int owner = fcntl(sock, F_GETOWN, 0);
+ EXPECT_OK(owner);
+ EXPECT_OK(fcntl(sock, F_SETOWN, owner));
+
+ // Check operations that need CAP_FCNTL with all subrights => OK.
+ EXPECT_OK(cap_fcntls_limit(sock, CAP_FCNTL_ALL));
+ owner = fcntl(sock, F_GETOWN, 0);
+ EXPECT_OK(owner);
+ EXPECT_OK(fcntl(sock, F_SETOWN, owner));
+
+ // Check operations that need CAP_FCNTL with specific subrights.
+ int sock_get = dup(sock);
+ int sock_set = dup(sock);
+ EXPECT_OK(cap_fcntls_limit(sock_get, CAP_FCNTL_GETOWN));
+ EXPECT_OK(cap_fcntls_limit(sock_set, CAP_FCNTL_SETOWN));
+ owner = fcntl(sock_get, F_GETOWN, 0);
+ EXPECT_OK(owner);
+ EXPECT_NOTCAPABLE(fcntl(sock_set, F_GETOWN, 0));
+ EXPECT_OK(fcntl(sock_set, F_SETOWN, owner));
+ EXPECT_NOTCAPABLE(fcntl(sock_get, F_SETOWN, owner));
+ // Also check we can retrieve the subrights.
+ cap_fcntl_t fcntls;
+ EXPECT_OK(cap_fcntls_get(sock_get, &fcntls));
+ EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETOWN, fcntls);
+ EXPECT_OK(cap_fcntls_get(sock_set, &fcntls));
+ EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_SETOWN, fcntls);
+ // And that we can't widen the subrights.
+ EXPECT_NOTCAPABLE(cap_fcntls_limit(sock_get, CAP_FCNTL_GETOWN|CAP_FCNTL_SETOWN));
+ EXPECT_NOTCAPABLE(cap_fcntls_limit(sock_set, CAP_FCNTL_GETOWN|CAP_FCNTL_SETOWN));
+ close(sock_get);
+ close(sock_set);
+
+ // Check operations that need CAP_FCNTL with no subrights => ENOTCAPABLE.
+ EXPECT_OK(cap_fcntls_limit(sock, 0));
+ EXPECT_NOTCAPABLE(fcntl(sock, F_GETOWN, 0));
+ EXPECT_NOTCAPABLE(fcntl(sock, F_SETOWN, owner));
+
+ close(sock);
+}
+#endif
diff --git a/fexecve.cc b/fexecve.cc
new file mode 100644
index 000000000000..d4971320a2b7
--- /dev/null
+++ b/fexecve.cc
@@ -0,0 +1,173 @@
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include <sstream>
+
+#include "syscalls.h"
+#include "capsicum.h"
+#include "capsicum-test.h"
+
+// We need a program to exec(), but for fexecve() to work in capability
+// mode that program needs to be statically linked (otherwise ld.so will
+// attempt to traverse the filesystem to load (e.g.) /lib/libc.so and
+// fail).
+#define EXEC_PROG "./mini-me"
+#define EXEC_PROG_NOEXEC EXEC_PROG ".noexec"
+#define EXEC_PROG_SETUID EXEC_PROG ".setuid"
+
+// Arguments to use in execve() calls.
+static char* argv_pass[] = {(char*)EXEC_PROG, (char*)"--pass", NULL};
+static char* argv_fail[] = {(char*)EXEC_PROG, (char*)"--fail", NULL};
+static char* argv_checkroot[] = {(char*)EXEC_PROG, (char*)"--checkroot", NULL};
+static char* null_envp[] = {NULL};
+
+class Execve : public ::testing::Test {
+ public:
+ Execve() : exec_fd_(open(EXEC_PROG, O_RDONLY)) {
+ if (exec_fd_ < 0) {
+ fprintf(stderr, "Error! Failed to open %s\n", EXEC_PROG);
+ }
+ }
+ ~Execve() { if (exec_fd_ >= 0) close(exec_fd_); }
+protected:
+ int exec_fd_;
+};
+
+FORK_TEST_F(Execve, BasicFexecve) {
+ EXPECT_OK(fexecve_(exec_fd_, argv_pass, null_envp));
+ // Should not reach here, exec() takes over.
+ EXPECT_TRUE(!"fexecve() should never return");
+}
+
+FORK_TEST_F(Execve, InCapMode) {
+ EXPECT_OK(cap_enter());
+ EXPECT_OK(fexecve_(exec_fd_, argv_pass, null_envp));
+ // Should not reach here, exec() takes over.
+ EXPECT_TRUE(!"fexecve() should never return");
+}
+
+FORK_TEST_F(Execve, FailWithoutCap) {
+ EXPECT_OK(cap_enter());
+ int cap_fd = dup(exec_fd_);
+ EXPECT_OK(cap_fd);
+ cap_rights_t rights;
+ cap_rights_init(&rights, 0);
+ EXPECT_OK(cap_rights_limit(cap_fd, &rights));
+ EXPECT_EQ(-1, fexecve_(cap_fd, argv_fail, null_envp));
+ EXPECT_EQ(ENOTCAPABLE, errno);
+}
+
+FORK_TEST_F(Execve, SucceedWithCap) {
+ EXPECT_OK(cap_enter());
+ int cap_fd = dup(exec_fd_);
+ EXPECT_OK(cap_fd);
+ cap_rights_t rights;
+ // TODO(drysdale): would prefer that Linux Capsicum not need all of these
+ // rights -- just CAP_FEXECVE|CAP_READ or CAP_FEXECVE would be preferable.
+ cap_rights_init(&rights, CAP_FEXECVE, CAP_LOOKUP, CAP_READ);
+ EXPECT_OK(cap_rights_limit(cap_fd, &rights));
+ EXPECT_OK(fexecve_(cap_fd, argv_pass, null_envp));
+ // Should not reach here, exec() takes over.
+ EXPECT_TRUE(!"fexecve() should have succeeded");
+}
+
+FORK_TEST(Fexecve, ExecutePermissionCheck) {
+ int fd = open(EXEC_PROG_NOEXEC, O_RDONLY);
+ EXPECT_OK(fd);
+ if (fd >= 0) {
+ struct stat data;
+ EXPECT_OK(fstat(fd, &data));
+ EXPECT_EQ((mode_t)0, data.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH));
+ EXPECT_EQ(-1, fexecve_(fd, argv_fail, null_envp));
+ EXPECT_EQ(EACCES, errno);
+ close(fd);
+ }
+}
+
+FORK_TEST(Fexecve, SetuidIgnored) {
+ if (geteuid() == 0) {
+ TEST_SKIPPED("requires non-root");
+ return;
+ }
+ int fd = open(EXEC_PROG_SETUID, O_RDONLY);
+ EXPECT_OK(fd);
+ EXPECT_OK(cap_enter());
+ if (fd >= 0) {
+ struct stat data;
+ EXPECT_OK(fstat(fd, &data));
+ EXPECT_EQ((mode_t)S_ISUID, data.st_mode & S_ISUID);
+ EXPECT_OK(fexecve_(fd, argv_checkroot, null_envp));
+ // Should not reach here, exec() takes over.
+ EXPECT_TRUE(!"fexecve() should have succeeded");
+ close(fd);
+ }
+}
+
+FORK_TEST(Fexecve, ExecveFailure) {
+ EXPECT_OK(cap_enter());
+ EXPECT_EQ(-1, execve(argv_fail[0], argv_fail, null_envp));
+ EXPECT_EQ(ECAPMODE, errno);
+}
+
+FORK_TEST_ON(Fexecve, CapModeScriptFail, TmpFile("cap_sh_script")) {
+ // First, build an executable shell script
+ int fd = open(TmpFile("cap_sh_script"), O_RDWR|O_CREAT, 0755);
+ EXPECT_OK(fd);
+ const char* contents = "#!/bin/sh\nexit 99\n";
+ EXPECT_OK(write(fd, contents, strlen(contents)));
+ close(fd);
+
+ // Open the script file, with CAP_FEXECVE rights.
+ fd = open(TmpFile("cap_sh_script"), O_RDONLY);
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_FEXECVE, CAP_READ, CAP_SEEK);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode
+
+ // Attempt fexecve; should fail, because "/bin/sh" is inaccessible.
+ EXPECT_EQ(-1, fexecve_(fd, argv_pass, null_envp));
+}
+
+#ifdef HAVE_EXECVEAT
+TEST(Execveat, NoUpwardTraversal) {
+ char *abspath = realpath(EXEC_PROG, NULL);
+ char cwd[1024];
+ getcwd(cwd, sizeof(cwd));
+
+ int dfd = open(".", O_DIRECTORY|O_RDONLY);
+ pid_t child = fork();
+ if (child == 0) {
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+ // Can't execveat() an absolute path, even relative to a dfd.
+ EXPECT_SYSCALL_FAIL(ECAPMODE,
+ execveat(AT_FDCWD, abspath, argv_pass, null_envp, 0));
+ EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY,
+ execveat(dfd, abspath, argv_pass, null_envp, 0));
+
+ // Can't execveat() a relative path ("../<dir>/./<exe>").
+ char *p = cwd + strlen(cwd);
+ while (*p != '/') p--;
+ char buffer[1024] = "../";
+ strcat(buffer, ++p);
+ strcat(buffer, "/");
+ strcat(buffer, EXEC_PROG);
+ EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY,
+ execveat(dfd, buffer, argv_pass, null_envp, 0));
+ exit(HasFailure() ? 99 : 123);
+ }
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_TRUE(WIFEXITED(status)) << "0x" << std::hex << status;
+ EXPECT_EQ(123, WEXITSTATUS(status));
+ free(abspath);
+ close(dfd);
+}
+#endif
diff --git a/ioctl.cc b/ioctl.cc
new file mode 100644
index 000000000000..0570e6f514da
--- /dev/null
+++ b/ioctl.cc
@@ -0,0 +1,234 @@
+// Test that ioctl works in capability mode.
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+
+#include "capsicum.h"
+#include "capsicum-test.h"
+
+// Ensure that ioctl() works consistently for both regular file descriptors and
+// capability-wrapped ones.
+TEST(Ioctl, Basic) {
+ cap_rights_t rights_ioctl;
+ cap_rights_init(&rights_ioctl, CAP_IOCTL);
+ cap_rights_t rights_many;
+ cap_rights_init(&rights_many, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSTAT, CAP_FSYNC);
+
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(fd);
+ int fd_no = dup(fd);
+ EXPECT_OK(fd_no);
+ EXPECT_OK(cap_rights_limit(fd, &rights_ioctl));
+ EXPECT_OK(cap_rights_limit(fd_no, &rights_many));
+
+ // Check that CAP_IOCTL is required.
+ int bytes;
+ EXPECT_OK(ioctl(fd, FIONREAD, &bytes));
+ EXPECT_NOTCAPABLE(ioctl(fd_no, FIONREAD, &bytes));
+
+ int one = 1;
+ EXPECT_OK(ioctl(fd, FIOCLEX, &one));
+ EXPECT_NOTCAPABLE(ioctl(fd_no, FIOCLEX, &one));
+
+ close(fd);
+ close(fd_no);
+}
+
+#ifdef HAVE_CAP_IOCTLS_LIMIT
+TEST(Ioctl, SubRightNormalFD) {
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(fd);
+
+ // Restrict the ioctl(2) subrights of a normal FD.
+ cap_ioctl_t ioctl_nread = FIONREAD;
+ EXPECT_OK(cap_ioctls_limit(fd, &ioctl_nread, 1));
+ int bytes;
+ EXPECT_OK(ioctl(fd, FIONREAD, &bytes));
+ int one = 1;
+ EXPECT_NOTCAPABLE(ioctl(fd, FIOCLEX, &one));
+
+ // Expect to have all primary rights.
+ cap_rights_t rights;
+ EXPECT_OK(cap_rights_get(fd, &rights));
+ cap_rights_t all;
+ CAP_SET_ALL(&all);
+ EXPECT_RIGHTS_EQ(&all, &rights);
+ cap_ioctl_t ioctls[16];
+ memset(ioctls, 0, sizeof(ioctls));
+ ssize_t nioctls = cap_ioctls_get(fd, ioctls, 16);
+ EXPECT_OK(nioctls);
+ EXPECT_EQ(1, nioctls);
+ EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+
+ // Can't widen the subrights.
+ cap_ioctl_t both_ioctls[2] = {FIONREAD, FIOCLEX};
+ EXPECT_NOTCAPABLE(cap_ioctls_limit(fd, both_ioctls, 2));
+
+ close(fd);
+}
+
+TEST(Ioctl, PreserveSubRights) {
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(fd);
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_IOCTL);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+ cap_ioctl_t ioctl_nread = FIONREAD;
+ EXPECT_OK(cap_ioctls_limit(fd, &ioctl_nread, 1));
+
+ cap_rights_t cur_rights;
+ cap_ioctl_t ioctls[16];
+ ssize_t nioctls;
+ EXPECT_OK(cap_rights_get(fd, &cur_rights));
+ EXPECT_RIGHTS_EQ(&rights, &cur_rights);
+ nioctls = cap_ioctls_get(fd, ioctls, 16);
+ EXPECT_OK(nioctls);
+ EXPECT_EQ(1, nioctls);
+ EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+
+ // Limiting the top-level rights leaves the subrights unaffected...
+ cap_rights_clear(&rights, CAP_READ);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+ nioctls = cap_ioctls_get(fd, ioctls, 16);
+ EXPECT_OK(nioctls);
+ EXPECT_EQ(1, nioctls);
+ EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+
+ // ... until we remove CAP_IOCTL
+ cap_rights_clear(&rights, CAP_IOCTL);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+ nioctls = cap_ioctls_get(fd, ioctls, 16);
+ EXPECT_OK(nioctls);
+ EXPECT_EQ(0, nioctls);
+ EXPECT_EQ(-1, cap_ioctls_limit(fd, &ioctl_nread, 1));
+
+ close(fd);
+}
+
+TEST(Ioctl, SubRights) {
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(fd);
+
+ cap_ioctl_t ioctls[16];
+ ssize_t nioctls;
+ memset(ioctls, 0, sizeof(ioctls));
+ nioctls = cap_ioctls_get(fd, ioctls, 16);
+ EXPECT_OK(nioctls);
+ EXPECT_EQ(CAP_IOCTLS_ALL, nioctls);
+
+ cap_rights_t rights_ioctl;
+ cap_rights_init(&rights_ioctl, CAP_IOCTL);
+ EXPECT_OK(cap_rights_limit(fd, &rights_ioctl));
+
+ nioctls = cap_ioctls_get(fd, ioctls, 16);
+ EXPECT_OK(nioctls);
+ EXPECT_EQ(CAP_IOCTLS_ALL, nioctls);
+
+ // Check operations that need CAP_IOCTL with subrights pristine => OK.
+ int bytes;
+ EXPECT_OK(ioctl(fd, FIONREAD, &bytes));
+ int one = 1;
+ EXPECT_OK(ioctl(fd, FIOCLEX, &one));
+
+ // Check operations that need CAP_IOCTL with all relevant subrights => OK.
+ cap_ioctl_t both_ioctls[2] = {FIONREAD, FIOCLEX};
+ EXPECT_OK(cap_ioctls_limit(fd, both_ioctls, 2));
+ EXPECT_OK(ioctl(fd, FIONREAD, &bytes));
+ EXPECT_OK(ioctl(fd, FIOCLEX, &one));
+
+
+ // Check what happens if we ask for subrights but don't have the space for them.
+ cap_ioctl_t before = 0xBBBBBBBB;
+ cap_ioctl_t one_ioctl = 0;
+ cap_ioctl_t after = 0xAAAAAAAA;
+ nioctls = cap_ioctls_get(fd, &one_ioctl, 1);
+ EXPECT_EQ(2, nioctls);
+ EXPECT_EQ(0xBBBBBBBB, before);
+ EXPECT_TRUE(one_ioctl == FIONREAD || one_ioctl == FIOCLEX);
+ EXPECT_EQ(0xAAAAAAAA, after);
+
+ // Check operations that need CAP_IOCTL with particular subrights.
+ int fd_nread = dup(fd);
+ int fd_clex = dup(fd);
+ cap_ioctl_t ioctl_nread = FIONREAD;
+ cap_ioctl_t ioctl_clex = FIOCLEX;
+ EXPECT_OK(cap_ioctls_limit(fd_nread, &ioctl_nread, 1));
+ EXPECT_OK(cap_ioctls_limit(fd_clex, &ioctl_clex, 1));
+ EXPECT_OK(ioctl(fd_nread, FIONREAD, &bytes));
+ EXPECT_NOTCAPABLE(ioctl(fd_clex, FIONREAD, &bytes));
+ EXPECT_OK(ioctl(fd_clex, FIOCLEX, &one));
+ EXPECT_NOTCAPABLE(ioctl(fd_nread, FIOCLEX, &one));
+
+ // Also check we can retrieve the subrights.
+ memset(ioctls, 0, sizeof(ioctls));
+ nioctls = cap_ioctls_get(fd_nread, ioctls, 16);
+ EXPECT_OK(nioctls);
+ EXPECT_EQ(1, nioctls);
+ EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+ memset(ioctls, 0, sizeof(ioctls));
+ nioctls = cap_ioctls_get(fd_clex, ioctls, 16);
+ EXPECT_OK(nioctls);
+ EXPECT_EQ(1, nioctls);
+ EXPECT_EQ((cap_ioctl_t)FIOCLEX, ioctls[0]);
+ // And that we can't widen the subrights.
+ EXPECT_NOTCAPABLE(cap_ioctls_limit(fd_nread, both_ioctls, 2));
+ EXPECT_NOTCAPABLE(cap_ioctls_limit(fd_clex, both_ioctls, 2));
+ close(fd_nread);
+ close(fd_clex);
+
+ // Check operations that need CAP_IOCTL with no subrights => ENOTCAPABLE.
+ EXPECT_OK(cap_ioctls_limit(fd, NULL, 0));
+ EXPECT_NOTCAPABLE(ioctl(fd, FIONREAD, &bytes));
+ EXPECT_NOTCAPABLE(ioctl(fd, FIOCLEX, &one));
+
+ close(fd);
+}
+
+#ifdef CAP_IOCTLS_LIMIT_MAX
+TEST(Ioctl, TooManySubRights) {
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(fd);
+
+ cap_ioctl_t ioctls[CAP_IOCTLS_LIMIT_MAX + 1];
+ for (int ii = 0; ii <= CAP_IOCTLS_LIMIT_MAX; ii++) {
+ ioctls[ii] = ii + 1;
+ }
+
+ cap_rights_t rights_ioctl;
+ cap_rights_init(&rights_ioctl, CAP_IOCTL);
+ EXPECT_OK(cap_rights_limit(fd, &rights_ioctl));
+
+ // Can only limit to a certain number of ioctls
+ EXPECT_EQ(-1, cap_ioctls_limit(fd, ioctls, CAP_IOCTLS_LIMIT_MAX + 1));
+ EXPECT_EQ(EINVAL, errno);
+ EXPECT_OK(cap_ioctls_limit(fd, ioctls, CAP_IOCTLS_LIMIT_MAX));
+
+ close(fd);
+}
+#else
+TEST(Ioctl, ManySubRights) {
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(fd);
+
+ const int nioctls = 150000;
+ cap_ioctl_t* ioctls = (cap_ioctl_t*)calloc(nioctls, sizeof(cap_ioctl_t));
+ for (int ii = 0; ii < nioctls; ii++) {
+ ioctls[ii] = ii + 1;
+ }
+
+ cap_rights_t rights_ioctl;
+ cap_rights_init(&rights_ioctl, CAP_IOCTL);
+ EXPECT_OK(cap_rights_limit(fd, &rights_ioctl));
+
+ EXPECT_OK(cap_ioctls_limit(fd, ioctls, nioctls));
+ // Limit to a subset; if this takes a long time then there's an
+ // O(N^2) implementation of the ioctl list comparison.
+ EXPECT_OK(cap_ioctls_limit(fd, ioctls, nioctls - 1));
+
+ close(fd);
+}
+#endif
+
+#endif
diff --git a/linux.cc b/linux.cc
new file mode 100644
index 000000000000..dee1f99897f6
--- /dev/null
+++ b/linux.cc
@@ -0,0 +1,1503 @@
+// Tests of Linux-specific functionality
+#ifdef __linux__
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/timerfd.h>
+#include <sys/signalfd.h>
+#include <sys/eventfd.h>
+#include <sys/epoll.h>
+#include <sys/inotify.h>
+#include <sys/fanotify.h>
+#include <sys/mman.h>
+#include <sys/capability.h> // Requires e.g. libcap-dev package for POSIX.1e capabilities headers
+#include <linux/aio_abi.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/version.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+TEST(Linux, TimerFD) {
+ int fd = timerfd_create(CLOCK_MONOTONIC, 0);
+
+ cap_rights_t r_ro;
+ cap_rights_init(&r_ro, CAP_READ);
+ cap_rights_t r_wo;
+ cap_rights_init(&r_wo, CAP_WRITE);
+ cap_rights_t r_rw;
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+ cap_rights_t r_rwpoll;
+ cap_rights_init(&r_rwpoll, CAP_READ, CAP_WRITE, CAP_EVENT);
+
+ int cap_fd_ro = dup(fd);
+ EXPECT_OK(cap_fd_ro);
+ EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_ro));
+ int cap_fd_wo = dup(fd);
+ EXPECT_OK(cap_fd_wo);
+ EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_wo));
+ int cap_fd_rw = dup(fd);
+ EXPECT_OK(cap_fd_rw);
+ EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rw));
+ int cap_fd_all = dup(fd);
+ EXPECT_OK(cap_fd_all);
+ EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwpoll));
+
+ struct itimerspec old_ispec;
+ struct itimerspec ispec;
+ ispec.it_interval.tv_sec = 0;
+ ispec.it_interval.tv_nsec = 0;
+ ispec.it_value.tv_sec = 0;
+ ispec.it_value.tv_nsec = 100000000; // 100ms
+ EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_ro, 0, &ispec, NULL));
+ EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_wo, 0, &ispec, &old_ispec));
+ EXPECT_OK(timerfd_settime(cap_fd_wo, 0, &ispec, NULL));
+ EXPECT_OK(timerfd_settime(cap_fd_rw, 0, &ispec, NULL));
+ EXPECT_OK(timerfd_settime(cap_fd_all, 0, &ispec, NULL));
+
+ EXPECT_NOTCAPABLE(timerfd_gettime(cap_fd_wo, &old_ispec));
+ EXPECT_OK(timerfd_gettime(cap_fd_ro, &old_ispec));
+ EXPECT_OK(timerfd_gettime(cap_fd_rw, &old_ispec));
+ EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
+
+ // To be able to poll() for the timer pop, still need CAP_EVENT.
+ struct pollfd poll_fd;
+ for (int ii = 0; ii < 3; ii++) {
+ poll_fd.revents = 0;
+ poll_fd.events = POLLIN;
+ switch (ii) {
+ case 0: poll_fd.fd = cap_fd_ro; break;
+ case 1: poll_fd.fd = cap_fd_wo; break;
+ case 2: poll_fd.fd = cap_fd_rw; break;
+ }
+ // Poll immediately returns with POLLNVAL
+ EXPECT_OK(poll(&poll_fd, 1, 400));
+ EXPECT_EQ(0, (poll_fd.revents & POLLIN));
+ EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
+ }
+
+ poll_fd.fd = cap_fd_all;
+ EXPECT_OK(poll(&poll_fd, 1, 400));
+ EXPECT_NE(0, (poll_fd.revents & POLLIN));
+ EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
+
+ EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
+ EXPECT_EQ(0, old_ispec.it_value.tv_sec);
+ EXPECT_EQ(0, old_ispec.it_value.tv_nsec);
+ EXPECT_EQ(0, old_ispec.it_interval.tv_sec);
+ EXPECT_EQ(0, old_ispec.it_interval.tv_nsec);
+
+ close(cap_fd_all);
+ close(cap_fd_rw);
+ close(cap_fd_wo);
+ close(cap_fd_ro);
+ close(fd);
+}
+
+FORK_TEST(Linux, SignalFD) {
+ if (force_mt) {
+ TEST_SKIPPED("multi-threaded run clashes with signals");
+ return;
+ }
+ pid_t me = getpid();
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGUSR1);
+
+ // Block signals before registering against a new signal FD.
+ EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
+ int fd = signalfd(-1, &mask, 0);
+ EXPECT_OK(fd);
+
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+ cap_rights_t r_ws;
+ cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_sig;
+ cap_rights_init(&r_sig, CAP_FSIGNAL);
+ cap_rights_t r_rssig;
+ cap_rights_init(&r_rssig, CAP_FSIGNAL, CAP_READ, CAP_SEEK);
+ cap_rights_t r_rssig_poll;
+ cap_rights_init(&r_rssig_poll, CAP_FSIGNAL, CAP_READ, CAP_SEEK, CAP_EVENT);
+
+ // Various capability variants.
+ int cap_fd_none = dup(fd);
+ EXPECT_OK(cap_fd_none);
+ EXPECT_OK(cap_rights_limit(cap_fd_none, &r_ws));
+ int cap_fd_read = dup(fd);
+ EXPECT_OK(cap_fd_read);
+ EXPECT_OK(cap_rights_limit(cap_fd_read, &r_rs));
+ int cap_fd_sig = dup(fd);
+ EXPECT_OK(cap_fd_sig);
+ EXPECT_OK(cap_rights_limit(cap_fd_sig, &r_sig));
+ int cap_fd_sig_read = dup(fd);
+ EXPECT_OK(cap_fd_sig_read);
+ EXPECT_OK(cap_rights_limit(cap_fd_sig_read, &r_rssig));
+ int cap_fd_all = dup(fd);
+ EXPECT_OK(cap_fd_all);
+ EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rssig_poll));
+
+ struct signalfd_siginfo fdsi;
+
+ // Need CAP_READ to read the signal information
+ kill(me, SIGUSR1);
+ EXPECT_NOTCAPABLE(read(cap_fd_none, &fdsi, sizeof(struct signalfd_siginfo)));
+ EXPECT_NOTCAPABLE(read(cap_fd_sig, &fdsi, sizeof(struct signalfd_siginfo)));
+ int len = read(cap_fd_read, &fdsi, sizeof(struct signalfd_siginfo));
+ EXPECT_OK(len);
+ EXPECT_EQ(sizeof(struct signalfd_siginfo), (size_t)len);
+ EXPECT_EQ(SIGUSR1, (int)fdsi.ssi_signo);
+
+ // Need CAP_FSIGNAL to modify the signal mask.
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGUSR1);
+ sigaddset(&mask, SIGUSR2);
+ EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
+ EXPECT_NOTCAPABLE(signalfd(cap_fd_none, &mask, 0));
+ EXPECT_NOTCAPABLE(signalfd(cap_fd_read, &mask, 0));
+ EXPECT_EQ(cap_fd_sig, signalfd(cap_fd_sig, &mask, 0));
+
+ // Need CAP_EVENT to get notification of a signal in poll(2).
+ kill(me, SIGUSR2);
+
+ struct pollfd poll_fd;
+ poll_fd.revents = 0;
+ poll_fd.events = POLLIN;
+ poll_fd.fd = cap_fd_sig_read;
+ EXPECT_OK(poll(&poll_fd, 1, 400));
+ EXPECT_EQ(0, (poll_fd.revents & POLLIN));
+ EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
+
+ poll_fd.fd = cap_fd_all;
+ EXPECT_OK(poll(&poll_fd, 1, 400));
+ EXPECT_NE(0, (poll_fd.revents & POLLIN));
+ EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
+}
+
+TEST(Linux, EventFD) {
+ int fd = eventfd(0, 0);
+ EXPECT_OK(fd);
+
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+ cap_rights_t r_ws;
+ cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_rws;
+ cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_rwspoll;
+ cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
+
+ int cap_ro = dup(fd);
+ EXPECT_OK(cap_ro);
+ EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
+ int cap_wo = dup(fd);
+ EXPECT_OK(cap_wo);
+ EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
+ int cap_rw = dup(fd);
+ EXPECT_OK(cap_rw);
+ EXPECT_OK(cap_rights_limit(cap_rw, &r_rws));
+ int cap_all = dup(fd);
+ EXPECT_OK(cap_all);
+ EXPECT_OK(cap_rights_limit(cap_all, &r_rwspoll));
+
+ pid_t child = fork();
+ if (child == 0) {
+ // Child: write counter to eventfd
+ uint64_t u = 42;
+ EXPECT_NOTCAPABLE(write(cap_ro, &u, sizeof(u)));
+ EXPECT_OK(write(cap_wo, &u, sizeof(u)));
+ exit(HasFailure());
+ }
+
+ sleep(1); // Allow child to write
+
+ struct pollfd poll_fd;
+ poll_fd.revents = 0;
+ poll_fd.events = POLLIN;
+ poll_fd.fd = cap_rw;
+ EXPECT_OK(poll(&poll_fd, 1, 400));
+ EXPECT_EQ(0, (poll_fd.revents & POLLIN));
+ EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
+
+ poll_fd.fd = cap_all;
+ EXPECT_OK(poll(&poll_fd, 1, 400));
+ EXPECT_NE(0, (poll_fd.revents & POLLIN));
+ EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
+
+ uint64_t u;
+ EXPECT_NOTCAPABLE(read(cap_wo, &u, sizeof(u)));
+ EXPECT_OK(read(cap_ro, &u, sizeof(u)));
+ EXPECT_EQ(42, (int)u);
+
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ close(cap_all);
+ close(cap_rw);
+ close(cap_wo);
+ close(cap_ro);
+ close(fd);
+}
+
+FORK_TEST(Linux, epoll) {
+ int sock_fds[2];
+ EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
+ // Queue some data.
+ char buffer[4] = {1, 2, 3, 4};
+ EXPECT_OK(write(sock_fds[1], buffer, sizeof(buffer)));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ int epoll_fd = epoll_create(1);
+ EXPECT_OK(epoll_fd);
+
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+ cap_rights_t r_ws;
+ cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_rws;
+ cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_rwspoll;
+ cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
+ cap_rights_t r_epoll;
+ cap_rights_init(&r_epoll, CAP_EPOLL_CTL);
+
+ int cap_epoll_wo = dup(epoll_fd);
+ EXPECT_OK(cap_epoll_wo);
+ EXPECT_OK(cap_rights_limit(cap_epoll_wo, &r_ws));
+ int cap_epoll_ro = dup(epoll_fd);
+ EXPECT_OK(cap_epoll_ro);
+ EXPECT_OK(cap_rights_limit(cap_epoll_ro, &r_rs));
+ int cap_epoll_rw = dup(epoll_fd);
+ EXPECT_OK(cap_epoll_rw);
+ EXPECT_OK(cap_rights_limit(cap_epoll_rw, &r_rws));
+ int cap_epoll_poll = dup(epoll_fd);
+ EXPECT_OK(cap_epoll_poll);
+ EXPECT_OK(cap_rights_limit(cap_epoll_poll, &r_rwspoll));
+ int cap_epoll_ctl = dup(epoll_fd);
+ EXPECT_OK(cap_epoll_ctl);
+ EXPECT_OK(cap_rights_limit(cap_epoll_ctl, &r_epoll));
+
+ // Can only modify the FDs being monitored if the CAP_EPOLL_CTL right is present.
+ struct epoll_event eev;
+ memset(&eev, 0, sizeof(eev));
+ eev.events = EPOLLIN|EPOLLOUT|EPOLLPRI;
+ EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_ADD, sock_fds[0], &eev));
+ EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_ADD, sock_fds[0], &eev));
+ EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_ADD, sock_fds[0], &eev));
+ EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_ADD, sock_fds[0], &eev));
+ eev.events = EPOLLIN|EPOLLOUT;
+ EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_MOD, sock_fds[0], &eev));
+ EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_MOD, sock_fds[0], &eev));
+ EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_MOD, sock_fds[0], &eev));
+ EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_MOD, sock_fds[0], &eev));
+
+ // Running epoll_pwait(2) requires CAP_EVENT.
+ eev.events = 0;
+ EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_ro, &eev, 1, 100, NULL));
+ EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_wo, &eev, 1, 100, NULL));
+ EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_rw, &eev, 1, 100, NULL));
+ EXPECT_OK(epoll_pwait(cap_epoll_poll, &eev, 1, 100, NULL));
+ EXPECT_EQ(EPOLLIN, eev.events & EPOLLIN);
+
+ EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_DEL, sock_fds[0], &eev));
+ EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_DEL, sock_fds[0], &eev));
+ EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_DEL, sock_fds[0], &eev));
+ EXPECT_OK(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, sock_fds[0], &eev));
+
+ close(cap_epoll_ctl);
+ close(cap_epoll_poll);
+ close(cap_epoll_rw);
+ close(cap_epoll_ro);
+ close(cap_epoll_wo);
+ close(epoll_fd);
+ close(sock_fds[1]);
+ close(sock_fds[0]);
+}
+
+TEST(Linux, fstatat) {
+ int fd = open(TmpFile("cap_fstatat"), O_CREAT|O_RDWR, 0644);
+ EXPECT_OK(fd);
+ unsigned char buffer[] = {1, 2, 3, 4};
+ EXPECT_OK(write(fd, buffer, sizeof(buffer)));
+ cap_rights_t rights;
+ int cap_rf = dup(fd);
+ EXPECT_OK(cap_rf);
+ EXPECT_OK(cap_rights_limit(cap_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
+ int cap_ro = dup(fd);
+ EXPECT_OK(cap_ro);
+ EXPECT_OK(cap_rights_limit(cap_ro, cap_rights_init(&rights, CAP_READ)));
+
+ struct stat info;
+ EXPECT_OK(fstatat(fd, "", &info, AT_EMPTY_PATH));
+ EXPECT_NOTCAPABLE(fstatat(cap_ro, "", &info, AT_EMPTY_PATH));
+ EXPECT_OK(fstatat(cap_rf, "", &info, AT_EMPTY_PATH));
+
+ close(cap_ro);
+ close(cap_rf);
+ close(fd);
+
+ int dir = open(tmpdir.c_str(), O_RDONLY);
+ EXPECT_OK(dir);
+ int dir_rf = dup(dir);
+ EXPECT_OK(dir_rf);
+ EXPECT_OK(cap_rights_limit(dir_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
+ int dir_ro = dup(fd);
+ EXPECT_OK(dir_ro);
+ EXPECT_OK(cap_rights_limit(dir_ro, cap_rights_init(&rights, CAP_READ)));
+
+ EXPECT_OK(fstatat(dir, "cap_fstatat", &info, AT_EMPTY_PATH));
+ EXPECT_NOTCAPABLE(fstatat(dir_ro, "cap_fstatat", &info, AT_EMPTY_PATH));
+ EXPECT_OK(fstatat(dir_rf, "cap_fstatat", &info, AT_EMPTY_PATH));
+
+ close(dir_ro);
+ close(dir_rf);
+ close(dir);
+
+ unlink(TmpFile("cap_fstatat"));
+}
+
+// fanotify support may not be available at compile-time
+#ifdef __NR_fanotify_init
+TEST(Linux, fanotify) {
+ REQUIRE_ROOT();
+ int fa_fd = fanotify_init(FAN_CLASS_NOTIF, O_RDWR);
+ EXPECT_OK(fa_fd);
+ if (fa_fd < 0) return; // May not be enabled
+
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+ cap_rights_t r_ws;
+ cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_rws;
+ cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_rwspoll;
+ cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
+ cap_rights_t r_rwsnotify;
+ cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
+ cap_rights_t r_rsl;
+ cap_rights_init(&r_rsl, CAP_READ, CAP_SEEK, CAP_LOOKUP);
+ cap_rights_t r_rslstat;
+ cap_rights_init(&r_rslstat, CAP_READ, CAP_SEEK, CAP_LOOKUP, CAP_FSTAT);
+ cap_rights_t r_rsstat;
+ cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT);
+
+ int cap_fd_ro = dup(fa_fd);
+ EXPECT_OK(cap_fd_ro);
+ EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
+ int cap_fd_wo = dup(fa_fd);
+ EXPECT_OK(cap_fd_wo);
+ EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
+ int cap_fd_rw = dup(fa_fd);
+ EXPECT_OK(cap_fd_rw);
+ EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
+ int cap_fd_poll = dup(fa_fd);
+ EXPECT_OK(cap_fd_poll);
+ EXPECT_OK(cap_rights_limit(cap_fd_poll, &r_rwspoll));
+ int cap_fd_not = dup(fa_fd);
+ EXPECT_OK(cap_fd_not);
+ EXPECT_OK(cap_rights_limit(cap_fd_not, &r_rwsnotify));
+
+ int rc = mkdir(TmpFile("cap_notify"), 0755);
+ EXPECT_TRUE(rc == 0 || errno == EEXIST);
+ int dfd = open(TmpFile("cap_notify"), O_RDONLY);
+ EXPECT_OK(dfd);
+ int fd = open(TmpFile("cap_notify/file"), O_CREAT|O_RDWR, 0644);
+ close(fd);
+ int cap_dfd = dup(dfd);
+ EXPECT_OK(cap_dfd);
+ EXPECT_OK(cap_rights_limit(cap_dfd, &r_rslstat));
+ EXPECT_OK(cap_dfd);
+ int cap_dfd_rs = dup(dfd);
+ EXPECT_OK(cap_dfd_rs);
+ EXPECT_OK(cap_rights_limit(cap_dfd_rs, &r_rs));
+ EXPECT_OK(cap_dfd_rs);
+ int cap_dfd_rsstat = dup(dfd);
+ EXPECT_OK(cap_dfd_rsstat);
+ EXPECT_OK(cap_rights_limit(cap_dfd_rsstat, &r_rsstat));
+ EXPECT_OK(cap_dfd_rsstat);
+ int cap_dfd_rsl = dup(dfd);
+ EXPECT_OK(cap_dfd_rsl);
+ EXPECT_OK(cap_rights_limit(cap_dfd_rsl, &r_rsl));
+ EXPECT_OK(cap_dfd_rsl);
+
+ // Need CAP_NOTIFY to change what's monitored.
+ EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_ro, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
+ EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_wo, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
+ EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_rw, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
+ EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
+
+ // Need CAP_FSTAT on the thing monitored.
+ EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rs, NULL));
+ EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rsstat, NULL));
+
+ // Too add monitoring of a file under a dfd, need CAP_LOOKUP|CAP_FSTAT on the dfd.
+ EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsstat, "file"));
+ EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsl, "file"));
+ EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd, "file"));
+
+ pid_t child = fork();
+ if (child == 0) {
+ // Child: Perform activity in the directory under notify.
+ sleep(1);
+ unlink(TmpFile("cap_notify/temp"));
+ int fd = open(TmpFile("cap_notify/temp"), O_CREAT|O_RDWR, 0644);
+ close(fd);
+ exit(0);
+ }
+
+ // Need CAP_EVENT to poll.
+ struct pollfd poll_fd;
+ poll_fd.revents = 0;
+ poll_fd.events = POLLIN;
+ poll_fd.fd = cap_fd_rw;
+ EXPECT_OK(poll(&poll_fd, 1, 1400));
+ EXPECT_EQ(0, (poll_fd.revents & POLLIN));
+ EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
+
+ poll_fd.fd = cap_fd_not;
+ EXPECT_OK(poll(&poll_fd, 1, 1400));
+ EXPECT_EQ(0, (poll_fd.revents & POLLIN));
+ EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
+
+ poll_fd.fd = cap_fd_poll;
+ EXPECT_OK(poll(&poll_fd, 1, 1400));
+ EXPECT_NE(0, (poll_fd.revents & POLLIN));
+ EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
+
+ // Need CAP_READ to read.
+ struct fanotify_event_metadata ev;
+ memset(&ev, 0, sizeof(ev));
+ EXPECT_NOTCAPABLE(read(cap_fd_wo, &ev, sizeof(ev)));
+ rc = read(fa_fd, &ev, sizeof(ev));
+ EXPECT_OK(rc);
+ EXPECT_EQ((int)sizeof(struct fanotify_event_metadata), rc);
+ EXPECT_EQ(child, ev.pid);
+ EXPECT_NE(0, ev.fd);
+
+ // TODO(drysdale): reinstate if/when capsicum-linux propagates rights
+ // to fanotify-generated FDs.
+#ifdef OMIT
+ // fanotify(7) gives us a FD for the changed file. This should
+ // only have rights that are a subset of those for the original
+ // monitored directory file descriptor.
+ cap_rights_t rights;
+ CAP_SET_ALL(&rights);
+ EXPECT_OK(cap_rights_get(ev.fd, &rights));
+ EXPECT_RIGHTS_IN(&rights, &r_rslstat);
+#endif
+
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ close(cap_dfd_rsstat);
+ close(cap_dfd_rsl);
+ close(cap_dfd_rs);
+ close(cap_dfd);
+ close(dfd);
+ unlink(TmpFile("cap_notify/file"));
+ unlink(TmpFile("cap_notify/temp"));
+ rmdir(TmpFile("cap_notify"));
+ close(cap_fd_not);
+ close(cap_fd_poll);
+ close(cap_fd_rw);
+ close(cap_fd_wo);
+ close(cap_fd_ro);
+ close(fa_fd);
+}
+#endif
+
+TEST(Linux, inotify) {
+ int i_fd = inotify_init();
+ EXPECT_OK(i_fd);
+
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+ cap_rights_t r_ws;
+ cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_rws;
+ cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_rwsnotify;
+ cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
+
+ int cap_fd_ro = dup(i_fd);
+ EXPECT_OK(cap_fd_ro);
+ EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
+ int cap_fd_wo = dup(i_fd);
+ EXPECT_OK(cap_fd_wo);
+ EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
+ int cap_fd_rw = dup(i_fd);
+ EXPECT_OK(cap_fd_rw);
+ EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
+ int cap_fd_all = dup(i_fd);
+ EXPECT_OK(cap_fd_all);
+ EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwsnotify));
+
+ int fd = open(TmpFile("cap_inotify"), O_CREAT|O_RDWR, 0644);
+ EXPECT_NOTCAPABLE(inotify_add_watch(cap_fd_rw, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY));
+ int wd = inotify_add_watch(i_fd, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY);
+ EXPECT_OK(wd);
+
+ unsigned char buffer[] = {1, 2, 3, 4};
+ EXPECT_OK(write(fd, buffer, sizeof(buffer)));
+
+ struct inotify_event iev;
+ memset(&iev, 0, sizeof(iev));
+ EXPECT_NOTCAPABLE(read(cap_fd_wo, &iev, sizeof(iev)));
+ int rc = read(cap_fd_ro, &iev, sizeof(iev));
+ EXPECT_OK(rc);
+ EXPECT_EQ((int)sizeof(iev), rc);
+ EXPECT_EQ(wd, iev.wd);
+
+ EXPECT_NOTCAPABLE(inotify_rm_watch(cap_fd_wo, wd));
+ EXPECT_OK(inotify_rm_watch(cap_fd_all, wd));
+
+ close(fd);
+ close(cap_fd_all);
+ close(cap_fd_rw);
+ close(cap_fd_wo);
+ close(cap_fd_ro);
+ close(i_fd);
+ unlink(TmpFile("cap_inotify"));
+}
+
+TEST(Linux, ArchChange) {
+ const char* prog_candidates[] = {"./mini-me.32", "./mini-me.x32", "./mini-me.64"};
+ const char* progs[] = {NULL, NULL, NULL};
+ char* argv_pass[] = {(char*)"to-come", (char*)"--capmode", NULL};
+ char* null_envp[] = {NULL};
+ int fds[3];
+ int count = 0;
+
+ for (int ii = 0; ii < 3; ii++) {
+ fds[count] = open(prog_candidates[ii], O_RDONLY);
+ if (fds[count] >= 0) {
+ progs[count] = prog_candidates[ii];
+ count++;
+ }
+ }
+ if (count == 0) {
+ TEST_SKIPPED("no different-architecture programs available");
+ return;
+ }
+
+ for (int ii = 0; ii < count; ii++) {
+ // Fork-and-exec a binary of this architecture.
+ pid_t child = fork();
+ if (child == 0) {
+ EXPECT_OK(cap_enter()); // Enter capability mode
+ if (verbose) fprintf(stderr, "[%d] call fexecve(%s, %s)\n",
+ getpid_(), progs[ii], argv_pass[1]);
+ argv_pass[0] = (char *)progs[ii];
+ int rc = fexecve_(fds[ii], argv_pass, null_envp);
+ fprintf(stderr, "fexecve(%s) returned %d errno %d\n", progs[ii], rc, errno);
+ exit(99); // Should not reach here.
+ }
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+ close(fds[ii]);
+ }
+}
+
+FORK_TEST(Linux, Namespace) {
+ REQUIRE_ROOT();
+ pid_t me = getpid_();
+
+ // Create a new UTS namespace.
+ EXPECT_OK(unshare(CLONE_NEWUTS));
+ // Open an FD to its symlink.
+ char buffer[256];
+ sprintf(buffer, "/proc/%d/ns/uts", me);
+ int ns_fd = open(buffer, O_RDONLY);
+
+ cap_rights_t r_rwlstat;
+ cap_rights_init(&r_rwlstat, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT);
+ cap_rights_t r_rwlstatns;
+ cap_rights_init(&r_rwlstatns, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT, CAP_SETNS);
+
+ int cap_fd = dup(ns_fd);
+ EXPECT_OK(cap_fd);
+ EXPECT_OK(cap_rights_limit(cap_fd, &r_rwlstat));
+ int cap_fd_setns = dup(ns_fd);
+ EXPECT_OK(cap_fd_setns);
+ EXPECT_OK(cap_rights_limit(cap_fd_setns, &r_rwlstatns));
+ EXPECT_NOTCAPABLE(setns(cap_fd, CLONE_NEWUTS));
+ EXPECT_OK(setns(cap_fd_setns, CLONE_NEWUTS));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // No setns(2) but unshare(2) is allowed.
+ EXPECT_CAPMODE(setns(ns_fd, CLONE_NEWUTS));
+ EXPECT_OK(unshare(CLONE_NEWUTS));
+}
+
+static void SendFD(int fd, int over) {
+ struct msghdr mh;
+ mh.msg_name = NULL; // No address needed
+ mh.msg_namelen = 0;
+ char buffer1[1024];
+ struct iovec iov[1];
+ iov[0].iov_base = buffer1;
+ iov[0].iov_len = sizeof(buffer1);
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ char buffer2[1024];
+ mh.msg_control = buffer2;
+ mh.msg_controllen = CMSG_LEN(sizeof(int));
+ struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
+ cmptr->cmsg_level = SOL_SOCKET;
+ cmptr->cmsg_type = SCM_RIGHTS;
+ cmptr->cmsg_len = CMSG_LEN(sizeof(int));
+ *(int *)CMSG_DATA(cmptr) = fd;
+ buffer1[0] = 0;
+ iov[0].iov_len = 1;
+ int rc = sendmsg(over, &mh, 0);
+ EXPECT_OK(rc);
+}
+
+static int ReceiveFD(int over) {
+ struct msghdr mh;
+ mh.msg_name = NULL; // No address needed
+ mh.msg_namelen = 0;
+ char buffer1[1024];
+ struct iovec iov[1];
+ iov[0].iov_base = buffer1;
+ iov[0].iov_len = sizeof(buffer1);
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ char buffer2[1024];
+ mh.msg_control = buffer2;
+ mh.msg_controllen = sizeof(buffer2);
+ int rc = recvmsg(over, &mh, 0);
+ EXPECT_OK(rc);
+ EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen);
+ struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
+ int fd = *(int*)CMSG_DATA(cmptr);
+ EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len);
+ cmptr = CMSG_NXTHDR(&mh, cmptr);
+ EXPECT_TRUE(cmptr == NULL);
+ return fd;
+}
+
+static int shared_pd = -1;
+static int shared_sock_fds[2];
+
+static int ChildFunc(void *arg) {
+ // This function is running in a new PID namespace, and so is pid 1.
+ if (verbose) fprintf(stderr, " ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
+ EXPECT_EQ(1, getpid_());
+ EXPECT_EQ(0, getppid());
+
+ // The shared process descriptor is outside our namespace, so we cannot
+ // get its pid.
+ if (verbose) fprintf(stderr, " ChildFunc: shared_pd=%d\n", shared_pd);
+ pid_t shared_child = -1;
+ EXPECT_OK(pdgetpid(shared_pd, &shared_child));
+ if (verbose) fprintf(stderr, " ChildFunc: corresponding pid=%d\n", shared_child);
+ EXPECT_EQ(0, shared_child);
+
+ // But we can pdkill() it even so.
+ if (verbose) fprintf(stderr, " ChildFunc: call pdkill(pd=%d)\n", shared_pd);
+ EXPECT_OK(pdkill(shared_pd, SIGINT));
+
+ int pd;
+ pid_t child = pdfork(&pd, 0);
+ EXPECT_OK(child);
+ if (child == 0) {
+ // Child: expect pid 2.
+ if (verbose) fprintf(stderr, " child of ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
+ EXPECT_EQ(2, getpid_());
+ EXPECT_EQ(1, getppid());
+ while (true) {
+ if (verbose) fprintf(stderr, " child of ChildFunc: \"I aten't dead\"\n");
+ sleep(1);
+ }
+ exit(0);
+ }
+ EXPECT_EQ(2, child);
+ EXPECT_PID_ALIVE(child);
+ if (verbose) fprintf(stderr, " ChildFunc: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
+ pd, child, ProcessState(child));
+
+ pid_t pid;
+ EXPECT_OK(pdgetpid(pd, &pid));
+ EXPECT_EQ(child, pid);
+
+ sleep(2);
+
+ // Send the process descriptor over UNIX domain socket back to parent.
+ SendFD(pd, shared_sock_fds[1]);
+
+ // Wait for death of (grand)child, killed by our parent.
+ if (verbose) fprintf(stderr, " ChildFunc: wait on pid=%d\n", child);
+ int status;
+ EXPECT_EQ(child, wait4(child, &status, __WALL, NULL));
+
+ if (verbose) fprintf(stderr, " ChildFunc: return 0\n");
+ return 0;
+}
+
+#define STACK_SIZE (1024 * 1024)
+static char child_stack[STACK_SIZE];
+
+// TODO(drysdale): fork into a user namespace first so REQUIRE_ROOT can be removed.
+TEST(Linux, PidNamespacePdFork) {
+ REQUIRE_ROOT();
+ // Pass process descriptors in both directions across a PID namespace boundary.
+ // pdfork() off a child before we start, holding its process descriptor in a global
+ // variable that's accessible to children.
+ pid_t firstborn = pdfork(&shared_pd, 0);
+ EXPECT_OK(firstborn);
+ if (firstborn == 0) {
+ while (true) {
+ if (verbose) fprintf(stderr, " Firstborn: \"I aten't dead\"\n");
+ sleep(1);
+ }
+ exit(0);
+ }
+ EXPECT_PID_ALIVE(firstborn);
+ if (verbose) fprintf(stderr, "Parent: pre-pdfork()ed pd=%d, pid=%d state='%c'\n",
+ shared_pd, firstborn, ProcessState(firstborn));
+ sleep(2);
+
+ // Prepare sockets to communicate with child process.
+ EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
+
+ // Clone into a child process with a new pid namespace.
+ pid_t child = clone(ChildFunc, child_stack + STACK_SIZE,
+ CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
+ EXPECT_OK(child);
+ EXPECT_PID_ALIVE(child);
+ if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
+
+ // Ensure the child runs. First thing it does is to kill our firstborn, using shared_pd.
+ sleep(1);
+ EXPECT_PID_DEAD(firstborn);
+
+ // But we can still retrieve firstborn's PID, as it's not been reaped yet.
+ pid_t child0;
+ EXPECT_OK(pdgetpid(shared_pd, &child0));
+ EXPECT_EQ(firstborn, child0);
+ if (verbose) fprintf(stderr, "Parent: check on firstborn: pdgetpid(pd=%d) -> child=%d state='%c'\n",
+ shared_pd, child0, ProcessState(child0));
+
+ // Now reap it.
+ int status;
+ EXPECT_EQ(firstborn, waitpid(firstborn, &status, __WALL));
+
+ // Get the process descriptor of the child-of-child via socket transfer.
+ int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
+
+ // Our notion of the pid associated with the grandchild is in the main PID namespace.
+ pid_t grandchild;
+ EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
+ EXPECT_NE(2, grandchild);
+ if (verbose) fprintf(stderr, "Parent: pre-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
+ grandchild_pd, grandchild, ProcessState(grandchild));
+ EXPECT_PID_ALIVE(grandchild);
+
+ // Kill the grandchild via the process descriptor.
+ EXPECT_OK(pdkill(grandchild_pd, SIGINT));
+ usleep(10000);
+ if (verbose) fprintf(stderr, "Parent: post-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
+ grandchild_pd, grandchild, ProcessState(grandchild));
+ EXPECT_PID_DEAD(grandchild);
+
+ sleep(2);
+
+ // Wait for the child.
+ EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ close(shared_sock_fds[0]);
+ close(shared_sock_fds[1]);
+ close(shared_pd);
+ close(grandchild_pd);
+}
+
+int NSInit(void *data) {
+ // This function is running in a new PID namespace, and so is pid 1.
+ if (verbose) fprintf(stderr, " NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
+ EXPECT_EQ(1, getpid_());
+ EXPECT_EQ(0, getppid());
+
+ int pd;
+ pid_t child = pdfork(&pd, 0);
+ EXPECT_OK(child);
+ if (child == 0) {
+ // Child: loop forever until terminated.
+ if (verbose) fprintf(stderr, " child of NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
+ while (true) {
+ if (verbose) fprintf(stderr, " child of NSInit: \"I aten't dead\"\n");
+ usleep(100000);
+ }
+ exit(0);
+ }
+ EXPECT_EQ(2, child);
+ EXPECT_PID_ALIVE(child);
+ if (verbose) fprintf(stderr, " NSInit: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
+ pd, child, ProcessState(child));
+ sleep(1);
+
+ // Send the process descriptor over UNIX domain socket back to parent.
+ SendFD(pd, shared_sock_fds[1]);
+ close(pd);
+
+ // Wait for a byte back in the other direction.
+ int value;
+ if (verbose) fprintf(stderr, " NSInit: block waiting for value\n");
+ read(shared_sock_fds[1], &value, sizeof(value));
+
+ if (verbose) fprintf(stderr, " NSInit: return 0\n");
+ return 0;
+}
+
+TEST(Linux, DeadNSInit) {
+ REQUIRE_ROOT();
+
+ // Prepare sockets to communicate with child process.
+ EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
+
+ // Clone into a child process with a new pid namespace.
+ pid_t child = clone(NSInit, child_stack + STACK_SIZE,
+ CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
+ usleep(10000);
+ EXPECT_OK(child);
+ EXPECT_PID_ALIVE(child);
+ if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
+
+ // Get the process descriptor of the child-of-child via socket transfer.
+ int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
+ pid_t grandchild;
+ EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
+ if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
+
+ // Send an int to the child to trigger its termination. Grandchild should also
+ // go, as its init process is gone.
+ int zero = 0;
+ if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
+ write(shared_sock_fds[0], &zero, sizeof(zero));
+ EXPECT_PID_ZOMBIE(child);
+ EXPECT_PID_GONE(grandchild);
+
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+ EXPECT_PID_GONE(child);
+
+ close(shared_sock_fds[0]);
+ close(shared_sock_fds[1]);
+ close(grandchild_pd);
+
+ if (verbose) {
+ fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
+ fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
+ }
+}
+
+TEST(Linux, DeadNSInit2) {
+ REQUIRE_ROOT();
+
+ // Prepare sockets to communicate with child process.
+ EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
+
+ // Clone into a child process with a new pid namespace.
+ pid_t child = clone(NSInit, child_stack + STACK_SIZE,
+ CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
+ usleep(10000);
+ EXPECT_OK(child);
+ EXPECT_PID_ALIVE(child);
+ if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
+
+ // Get the process descriptor of the child-of-child via socket transfer.
+ int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
+ pid_t grandchild;
+ EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
+ if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
+
+ // Kill the grandchild
+ EXPECT_OK(pdkill(grandchild_pd, SIGINT));
+ usleep(10000);
+ EXPECT_PID_ZOMBIE(grandchild);
+ // Close the process descriptor, so there are now no procdesc references to grandchild.
+ close(grandchild_pd);
+
+ // Send an int to the child to trigger its termination. Grandchild should also
+ // go, as its init process is gone.
+ int zero = 0;
+ if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
+ write(shared_sock_fds[0], &zero, sizeof(zero));
+ EXPECT_PID_ZOMBIE(child);
+ EXPECT_PID_GONE(grandchild);
+
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ close(shared_sock_fds[0]);
+ close(shared_sock_fds[1]);
+
+ if (verbose) {
+ fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
+ fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
+ }
+}
+
+#ifdef __x86_64__
+FORK_TEST(Linux, CheckHighWord) {
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ EXPECT_OK(rc);
+ EXPECT_EQ(1, rc); // no_new_privs = 1
+
+ // Set some of the high 32-bits of argument zero.
+ uint64_t big_cmd = PR_GET_NO_NEW_PRIVS | 0x100000000LL;
+ EXPECT_CAPMODE(syscall(__NR_prctl, big_cmd, 0, 0, 0, 0));
+}
+#endif
+
+FORK_TEST(Linux, PrctlOpenatBeneath) {
+ // Set no_new_privs = 1
+ EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ EXPECT_OK(rc);
+ EXPECT_EQ(1, rc); // no_new_privs = 1
+
+ // Set openat-beneath mode
+ EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 1, 0, 0, 0));
+ rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
+ EXPECT_OK(rc);
+ EXPECT_EQ(1, rc); // openat_beneath = 1
+
+ // Clear openat-beneath mode
+ EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
+ rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
+ EXPECT_OK(rc);
+ EXPECT_EQ(0, rc); // openat_beneath = 0
+
+ EXPECT_OK(cap_enter()); // Enter capability mode
+
+ // Expect to be in openat_beneath mode
+ rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
+ EXPECT_OK(rc);
+ EXPECT_EQ(1, rc); // openat_beneath = 1
+
+ // Expect this to be immutable.
+ EXPECT_CAPMODE(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
+ rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
+ EXPECT_OK(rc);
+ EXPECT_EQ(1, rc); // openat_beneath = 1
+
+}
+
+FORK_TEST(Linux, NoNewPrivs) {
+ if (getuid() == 0) {
+ // If root, drop CAP_SYS_ADMIN POSIX.1e capability.
+ struct __user_cap_header_struct hdr;
+ hdr.version = _LINUX_CAPABILITY_VERSION_3;
+ hdr.pid = getpid_();
+ struct __user_cap_data_struct data[3];
+ EXPECT_OK(capget(&hdr, &data[0]));
+ data[0].effective &= ~(1 << CAP_SYS_ADMIN);
+ data[0].permitted &= ~(1 << CAP_SYS_ADMIN);
+ data[0].inheritable &= ~(1 << CAP_SYS_ADMIN);
+ EXPECT_OK(capset(&hdr, &data[0]));
+ }
+ int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ EXPECT_OK(rc);
+ EXPECT_EQ(0, rc); // no_new_privs == 0
+
+ // Can't enter seccomp-bpf mode with no_new_privs == 0
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
+ };
+ struct sock_fprog bpf;
+ bpf.len = (sizeof(filter) / sizeof(filter[0]));
+ bpf.filter = filter;
+ rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EACCES, errno);
+
+ // Set no_new_privs = 1
+ EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ EXPECT_OK(rc);
+ EXPECT_EQ(1, rc); // no_new_privs = 1
+
+ // Can now turn on seccomp mode
+ EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
+}
+
+/* Macros for BPF generation */
+#define BPF_RETURN_ERRNO(err) \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (err & 0xFFFF))
+#define BPF_KILL_PROCESS \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
+#define BPF_ALLOW \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
+#define EXAMINE_SYSCALL \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr))
+#define ALLOW_SYSCALL(name) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
+ BPF_ALLOW
+#define KILL_SYSCALL(name) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
+ BPF_KILL_PROCESS
+#define FAIL_SYSCALL(name, err) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
+ BPF_RETURN_ERRNO(err)
+
+TEST(Linux, CapModeWithBPF) {
+ pid_t child = fork();
+ EXPECT_OK(child);
+ if (child == 0) {
+ int fd = open(TmpFile("cap_bpf_capmode"), O_CREAT|O_RDWR, 0644);
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+
+ struct sock_filter filter[] = { EXAMINE_SYSCALL,
+ FAIL_SYSCALL(fchmod, ENOMEM),
+ FAIL_SYSCALL(fstat, ENOEXEC),
+ ALLOW_SYSCALL(close),
+ KILL_SYSCALL(fsync),
+ BPF_ALLOW };
+ struct sock_fprog bpf = {.len = (sizeof(filter) / sizeof(filter[0])),
+ .filter = filter};
+ // Set up seccomp-bpf first.
+ EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // fchmod is allowed by Capsicum, but failed by BPF.
+ EXPECT_SYSCALL_FAIL(ENOMEM, fchmod(fd, 0644));
+ // open is allowed by BPF, but failed by Capsicum
+ EXPECT_SYSCALL_FAIL(ECAPMODE, open(TmpFile("cap_bpf_capmode"), O_RDONLY));
+ // fstat is failed by both BPF and Capsicum; tie-break is on errno
+ struct stat buf;
+ EXPECT_SYSCALL_FAIL(ENOEXEC, fstat(fd, &buf));
+ // fsync is allowed by Capsicum, but BPF's SIGSYS generation take precedence
+ fsync(fd); // terminate with unhandled SIGSYS
+ exit(0);
+ }
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_TRUE(WIFSIGNALED(status));
+ EXPECT_EQ(SIGSYS, WTERMSIG(status));
+ unlink(TmpFile("cap_bpf_capmode"));
+}
+
+TEST(Linux, AIO) {
+ int fd = open(TmpFile("cap_aio"), O_CREAT|O_RDWR, 0644);
+ EXPECT_OK(fd);
+
+ cap_rights_t r_rs;
+ cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
+ cap_rights_t r_ws;
+ cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
+ cap_rights_t r_rwssync;
+ cap_rights_init(&r_rwssync, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
+
+ int cap_ro = dup(fd);
+ EXPECT_OK(cap_ro);
+ EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
+ EXPECT_OK(cap_ro);
+ int cap_wo = dup(fd);
+ EXPECT_OK(cap_wo);
+ EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
+ EXPECT_OK(cap_wo);
+ int cap_all = dup(fd);
+ EXPECT_OK(cap_all);
+ EXPECT_OK(cap_rights_limit(cap_all, &r_rwssync));
+ EXPECT_OK(cap_all);
+
+ // Linux: io_setup, io_submit, io_getevents, io_cancel, io_destroy
+ aio_context_t ctx = 0;
+ EXPECT_OK(syscall(__NR_io_setup, 10, &ctx));
+
+ unsigned char buffer[32] = {1, 2, 3, 4};
+ struct iocb req;
+ memset(&req, 0, sizeof(req));
+ req.aio_reqprio = 0;
+ req.aio_fildes = fd;
+ uintptr_t bufaddr = (uintptr_t)buffer;
+ req.aio_buf = (__u64)bufaddr;
+ req.aio_nbytes = 4;
+ req.aio_offset = 0;
+ struct iocb* reqs[1] = {&req};
+
+ // Write operation
+ req.aio_lio_opcode = IOCB_CMD_PWRITE;
+ req.aio_fildes = cap_ro;
+ EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
+ req.aio_fildes = cap_wo;
+ EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
+
+ // Sync operation
+ req.aio_lio_opcode = IOCB_CMD_FSYNC;
+ EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
+ req.aio_lio_opcode = IOCB_CMD_FDSYNC;
+ EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
+ // Even with CAP_FSYNC, turns out fsync/fdsync aren't implemented
+ req.aio_fildes = cap_all;
+ EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
+ req.aio_lio_opcode = IOCB_CMD_FSYNC;
+ EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
+
+ // Read operation
+ req.aio_lio_opcode = IOCB_CMD_PREAD;
+ req.aio_fildes = cap_wo;
+ EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
+ req.aio_fildes = cap_ro;
+ EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
+
+ EXPECT_OK(syscall(__NR_io_destroy, ctx));
+
+ close(cap_all);
+ close(cap_wo);
+ close(cap_ro);
+ close(fd);
+ unlink(TmpFile("cap_aio"));
+}
+
+#ifndef KCMP_FILE
+#define KCMP_FILE 0
+#endif
+TEST(Linux, Kcmp) {
+ // This requires CONFIG_CHECKPOINT_RESTORE in kernel config.
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(fd);
+ pid_t parent = getpid_();
+
+ errno = 0;
+ int rc = syscall(__NR_kcmp, parent, parent, KCMP_FILE, fd, fd);
+ if (rc == -1 && errno == ENOSYS) {
+ TEST_SKIPPED("kcmp(2) gives -ENOSYS");
+ return;
+ }
+
+ pid_t child = fork();
+ if (child == 0) {
+ // Child: limit rights on FD.
+ child = getpid_();
+ EXPECT_OK(syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_READ, CAP_WRITE);
+ EXPECT_OK(cap_rights_limit(fd, &rights));
+ // A capability wrapping a normal FD is different (from a kcmp(2) perspective)
+ // than the original file.
+ EXPECT_NE(0, syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
+ exit(HasFailure());
+ }
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ close(fd);
+}
+
+TEST(Linux, ProcFS) {
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_READ, CAP_SEEK);
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(fd);
+ lseek(fd, 4, SEEK_SET);
+ int cap = dup(fd);
+ EXPECT_OK(cap);
+ EXPECT_OK(cap_rights_limit(cap, &rights));
+ pid_t me = getpid_();
+
+ char buffer[1024];
+ sprintf(buffer, "/proc/%d/fdinfo/%d", me, cap);
+ int procfd = open(buffer, O_RDONLY);
+ EXPECT_OK(procfd) << " failed to open " << buffer;
+ if (procfd < 0) return;
+ int proccap = dup(procfd);
+ EXPECT_OK(proccap);
+ EXPECT_OK(cap_rights_limit(proccap, &rights));
+
+ EXPECT_OK(read(proccap, buffer, sizeof(buffer)));
+ // The fdinfo should include the file pos of the underlying file
+ EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t4"));
+ // ...and the rights of the Capsicum capability.
+ EXPECT_NE((char*)NULL, strstr(buffer, "rights:\t0x"));
+
+ close(procfd);
+ close(proccap);
+ close(cap);
+ close(fd);
+}
+
+FORK_TEST(Linux, ProcessClocks) {
+ pid_t self = getpid_();
+ pid_t child = fork();
+ EXPECT_OK(child);
+ if (child == 0) {
+ child = getpid_();
+ usleep(100000);
+ exit(0);
+ }
+
+ EXPECT_OK(cap_enter()); // Enter capability mode.
+
+ // Nefariously build a clock ID for the child's CPU time.
+ // This relies on knowledge of the internal layout of clock IDs.
+ clockid_t child_clock;
+ child_clock = ((~child) << 3) | 0x0;
+ struct timespec ts;
+ memset(&ts, 0, sizeof(ts));
+
+ // TODO(drysdale): Should not be possible to retrieve info about a
+ // different process, as the PID global namespace should be locked
+ // down.
+ EXPECT_OK(clock_gettime(child_clock, &ts));
+ if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(child=%d->0x%08x) is %ld.%09ld \n",
+ self, child, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
+
+ child_clock = ((~1) << 3) | 0x0;
+ memset(&ts, 0, sizeof(ts));
+ EXPECT_OK(clock_gettime(child_clock, &ts));
+ if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(init=1->0x%08x) is %ld.%09ld \n",
+ self, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
+
+ // Orphan the child.
+}
+
+TEST(Linux, SetLease) {
+ int fd_all = open(TmpFile("cap_lease"), O_CREAT|O_RDWR, 0644);
+ EXPECT_OK(fd_all);
+ int fd_rw = dup(fd_all);
+ EXPECT_OK(fd_rw);
+
+ cap_rights_t r_all;
+ cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_FLOCK, CAP_FSIGNAL);
+ EXPECT_OK(cap_rights_limit(fd_all, &r_all));
+
+ cap_rights_t r_rw;
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+ EXPECT_OK(cap_rights_limit(fd_rw, &r_rw));
+
+ EXPECT_NOTCAPABLE(fcntl(fd_rw, F_SETLEASE, F_WRLCK));
+ EXPECT_NOTCAPABLE(fcntl(fd_rw, F_GETLEASE));
+
+ if (!tmpdir_on_tmpfs) { // tmpfs doesn't support leases
+ EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_WRLCK));
+ EXPECT_EQ(F_WRLCK, fcntl(fd_all, F_GETLEASE));
+
+ EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_UNLCK, 0));
+ EXPECT_EQ(F_UNLCK, fcntl(fd_all, F_GETLEASE));
+ }
+ close(fd_all);
+ close(fd_rw);
+ unlink(TmpFile("cap_lease"));
+}
+
+TEST(Linux, InvalidRightsSyscall) {
+ int fd = open(TmpFile("cap_invalid_rights"), O_RDONLY|O_CREAT, 0644);
+ EXPECT_OK(fd);
+
+ cap_rights_t rights;
+ cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT);
+
+ // Use the raw syscall throughout.
+ EXPECT_EQ(0, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
+
+ // Directly access the syscall, and find all unseemly manner of use for it.
+ // - Invalid flags
+ EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 1));
+ EXPECT_EQ(EINVAL, errno);
+ // - Specify an fcntl subright, but no CAP_FCNTL set
+ EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, CAP_FCNTL_GETFL, 0, NULL, 0));
+ EXPECT_EQ(EINVAL, errno);
+ // - Specify an ioctl subright, but no CAP_IOCTL set
+ unsigned int ioctl1 = 1;
+ EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, &ioctl1, 0));
+ EXPECT_EQ(EINVAL, errno);
+ // - N ioctls, but null pointer passed
+ EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, NULL, 0));
+ EXPECT_EQ(EINVAL, errno);
+ // - Invalid nioctls
+ EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, -2, NULL, 0));
+ EXPECT_EQ(EINVAL, errno);
+ // - Null primary rights
+ EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, NULL, 0, 0, NULL, 0));
+ EXPECT_EQ(EFAULT, errno);
+ // - Invalid index bitmask
+ rights.cr_rights[0] |= 3ULL << 57;
+ EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
+ EXPECT_EQ(EINVAL, errno);
+ // - Invalid version
+ rights.cr_rights[0] |= 2ULL << 62;
+ EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ close(fd);
+ unlink(TmpFile("cap_invalid_rights"));
+}
+
+FORK_TEST_ON(Linux, OpenByHandleAt, TmpFile("cap_openbyhandle_testfile")) {
+ REQUIRE_ROOT();
+ int dir = open(tmpdir.c_str(), O_RDONLY);
+ EXPECT_OK(dir);
+ int fd = openat(dir, "cap_openbyhandle_testfile", O_RDWR|O_CREAT, 0644);
+ EXPECT_OK(fd);
+ const char* message = "Saved text";
+ EXPECT_OK(write(fd, message, strlen(message)));
+ close(fd);
+
+ struct file_handle* fhandle = (struct file_handle*)malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fhandle->handle_bytes = MAX_HANDLE_SZ;
+ int mount_id;
+ EXPECT_OK(name_to_handle_at(dir, "cap_openbyhandle_testfile", fhandle, &mount_id, 0));
+
+ fd = open_by_handle_at(dir, fhandle, O_RDONLY);
+ EXPECT_OK(fd);
+ char buffer[200];
+ EXPECT_OK(read(fd, buffer, 199));
+ EXPECT_EQ(std::string(message), std::string(buffer));
+ close(fd);
+
+ // Cannot issue open_by_handle_at after entering capability mode.
+ cap_enter();
+ EXPECT_CAPMODE(open_by_handle_at(dir, fhandle, O_RDONLY));
+
+ close(dir);
+}
+
+int getrandom_(void *buf, size_t buflen, unsigned int flags) {
+#ifdef __NR_getrandom
+ return syscall(__NR_getrandom, buf, buflen, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include <linux/random.h> // Requires 3.17 kernel
+FORK_TEST(Linux, GetRandom) {
+ EXPECT_OK(cap_enter());
+ unsigned char buffer[1024];
+ unsigned char buffer2[1024];
+ EXPECT_OK(getrandom_(buffer, sizeof(buffer), GRND_NONBLOCK));
+ EXPECT_OK(getrandom_(buffer2, sizeof(buffer2), GRND_NONBLOCK));
+ EXPECT_NE(0, memcmp(buffer, buffer2, sizeof(buffer)));
+}
+#endif
+
+int memfd_create_(const char *name, unsigned int flags) {
+#ifdef __NR_memfd_create
+ return syscall(__NR_memfd_create, name, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include <linux/memfd.h> // Requires 3.17 kernel
+TEST(Linux, MemFDDeathTest) {
+ int memfd = memfd_create_("capsicum-test", MFD_ALLOW_SEALING);
+ if (memfd == -1 && errno == ENOSYS) {
+ TEST_SKIPPED("memfd_create(2) gives -ENOSYS");
+ return;
+ }
+ const int LEN = 16;
+ EXPECT_OK(ftruncate(memfd, LEN));
+ int memfd_ro = dup(memfd);
+ int memfd_rw = dup(memfd);
+ EXPECT_OK(memfd_ro);
+ EXPECT_OK(memfd_rw);
+ cap_rights_t rights;
+ EXPECT_OK(cap_rights_limit(memfd_ro, cap_rights_init(&rights, CAP_MMAP_R, CAP_FSTAT)));
+ EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW, CAP_FCHMOD)));
+
+ unsigned char *p_ro = (unsigned char *)mmap(NULL, LEN, PROT_READ, MAP_SHARED, memfd_ro, 0);
+ EXPECT_NE((unsigned char *)MAP_FAILED, p_ro);
+ unsigned char *p_rw = (unsigned char *)mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_rw, 0);
+ EXPECT_NE((unsigned char *)MAP_FAILED, p_rw);
+ EXPECT_EQ(MAP_FAILED,
+ mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_ro, 0));
+
+ *p_rw = 42;
+ EXPECT_EQ(42, *p_ro);
+ EXPECT_DEATH(*p_ro = 42, "");
+
+#ifndef F_ADD_SEALS
+ // Hack for when libc6 does not yet include the updated linux/fcntl.h from kernel 3.17
+#define _F_LINUX_SPECIFIC_BASE F_SETLEASE
+#define F_ADD_SEALS (_F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (_F_LINUX_SPECIFIC_BASE + 10)
+#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW 0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+ // Reading the seal information requires CAP_FSTAT.
+ int seals = fcntl(memfd, F_GET_SEALS);
+ EXPECT_OK(seals);
+ if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
+ int seals_ro = fcntl(memfd_ro, F_GET_SEALS);
+ EXPECT_EQ(seals, seals_ro);
+ if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
+ int seals_rw = fcntl(memfd_rw, F_GET_SEALS);
+ EXPECT_NOTCAPABLE(seals_rw);
+
+ // Fail to seal as a writable mapping exists.
+ EXPECT_EQ(-1, fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
+ EXPECT_EQ(EBUSY, errno);
+ *p_rw = 42;
+
+ // Seal the rw version; need to unmap first.
+ munmap(p_rw, LEN);
+ munmap(p_ro, LEN);
+ EXPECT_OK(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
+
+ seals = fcntl(memfd, F_GET_SEALS);
+ EXPECT_OK(seals);
+ if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
+ seals_ro = fcntl(memfd_ro, F_GET_SEALS);
+ EXPECT_EQ(seals, seals_ro);
+ if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
+
+ // Remove the CAP_FCHMOD right, can no longer add seals.
+ EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW)));
+ EXPECT_NOTCAPABLE(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
+
+ close(memfd);
+ close(memfd_ro);
+ close(memfd_rw);
+}
+#endif
+
+#else
+void noop() {}
+#endif
diff --git a/makefile b/makefile
new file mode 100644
index 000000000000..c92caeb3bc10
--- /dev/null
+++ b/makefile
@@ -0,0 +1,36 @@
+all: capsicum-test smoketest mini-me mini-me.noexec mini-me.setuid $(EXTRA_PROGS)
+OBJECTS=capsicum-test-main.o capsicum-test.o capability-fd.o fexecve.o procdesc.o capmode.o fcntl.o ioctl.o openat.o sysctl.o select.o mqueue.o socket.o sctp.o capability-fd-pair.o linux.o overhead.o rename.o
+
+GTEST_DIR=gtest-1.8.1
+GTEST_INCS=-I$(GTEST_DIR)/include -I$(GTEST_DIR)
+GTEST_FLAGS=-DGTEST_USE_OWN_TR1_TUPLE=1 -DGTEST_HAS_TR1_TUPLE=1
+CXXFLAGS+=$(ARCHFLAG) -Wall -g $(GTEST_INCS) $(GTEST_FLAGS) --std=c++11
+CFLAGS+=$(ARCHFLAG) -Wall -g
+
+capsicum-test: $(OBJECTS) libgtest.a $(LOCAL_LIBS)
+ $(CXX) $(CXXFLAGS) -g -o $@ $(OBJECTS) libgtest.a -lpthread -lrt $(LIBSCTP) $(LIBCAPRIGHTS)
+
+# Small statically-linked program for fexecve tests
+# (needs to be statically linked so that execve()ing it
+# doesn't involve ld.so traversing the filesystem).
+mini-me: mini-me.c
+ $(CC) $(CFLAGS) -static -o $@ $<
+mini-me.noexec: mini-me
+ cp mini-me $@ && chmod -x $@
+mini-me.setuid: mini-me
+ rm -f $@ && cp mini-me $@&& sudo chown root $@ && sudo chmod u+s $@
+
+# Simple C test of Capsicum syscalls
+SMOKETEST_OBJECTS=smoketest.o
+smoketest: $(SMOKETEST_OBJECTS) $(LOCAL_LIBS)
+ $(CC) $(CFLAGS) -o $@ $(SMOKETEST_OBJECTS) $(LIBCAPRIGHTS)
+
+test: capsicum-test mini-me mini-me.noexec mini-me.setuid $(EXTRA_PROGS)
+ ./capsicum-test
+gtest-all.o:
+ $(CXX) $(ARCHFLAG) -I$(GTEST_DIR)/include -I$(GTEST_DIR) $(GTEST_FLAGS) -c ${GTEST_DIR}/src/gtest-all.cc
+libgtest.a: gtest-all.o
+ $(AR) -rv libgtest.a gtest-all.o
+
+clean:
+ rm -rf gtest-all.o libgtest.a capsicum-test mini-me mini-me.noexec smoketest $(SMOKETEST_OBJECTS) $(OBJECTS) $(LOCAL_CLEAN) $(EXTRA_PROGS)
diff --git a/mini-me.c b/mini-me.c
new file mode 100644
index 000000000000..be909cad4709
--- /dev/null
+++ b/mini-me.c
@@ -0,0 +1,38 @@
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(int argc, char* argv[]) {
+ if (argc == 2 && !strcmp(argv[1], "--pass")) {
+ fprintf(stderr,"[%d] %s immediately returning 0\n", getpid(), argv[0]);
+ return 0;
+ }
+
+ if (argc == 2 && !strcmp(argv[1], "--fail")) {
+ fprintf(stderr,"[%d] %s immediately returning 1\n", getpid(), argv[0]);
+ return 1;
+ }
+
+ if (argc == 2 && !strcmp(argv[1], "--checkroot")) {
+ int rc = (geteuid() == 0);
+ fprintf(stderr,"[uid:%d] %s immediately returning (geteuid() == 0) = %d\n", geteuid(), argv[0], rc);
+ return rc;
+ }
+
+ if (argc == 2 && !strcmp(argv[1], "--capmode")) {
+ /* Expect to already be in capability mode: check we can't open a file */
+ int rc = 0;
+
+ int fd = open("/etc/passwd", O_RDONLY);
+ if (fd > 0) {
+ fprintf(stderr,"[%d] %s unexpectedly able to open file\n", getpid(), argv[0]);
+ rc = 1;
+ }
+ fprintf(stderr,"[%d] %s --capmode returning %d\n", getpid(), argv[0], rc);
+ return rc;
+ }
+
+ return -1;
+}
diff --git a/mqueue.cc b/mqueue.cc
new file mode 100644
index 000000000000..b98523121fe0
--- /dev/null
+++ b/mqueue.cc
@@ -0,0 +1,100 @@
+// Tests for POSIX message queue functionality.
+
+#include <time.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+
+#include <string>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+// Run a test case in a forked process, possibly cleaning up a
+// message after completion
+#define FORK_TEST_ON_MQ(test_case_name, test_name, test_mq) \
+ static void test_case_name##_##test_name##_ForkTest(); \
+ TEST(test_case_name, test_name ## Forked) { \
+ _RUN_FORKED_FN(test_case_name##_##test_name##_ForkTest, \
+ #test_case_name, #test_name); \
+ const char *mqname = test_mq; \
+ if (mqname) mq_unlink_(mqname); \
+ } \
+ static void test_case_name##_##test_name##_ForkTest()
+
+static bool invoked;
+void seen_it_done_it(int v) {
+ invoked = true;
+}
+
+FORK_TEST_ON_MQ(PosixMqueue, CapMode, "/cap_mq") {
+ int mq = mq_open_("/cap_mq", O_RDWR|O_CREAT, 0644, NULL);
+ // On FreeBSD, turn on message queue support with:
+ // - 'kldload mqueuefs'
+ // - 'options P1003_1B_MQUEUE' in kernel build config.
+ if (mq < 0 && errno == ENOSYS) {
+ TEST_SKIPPED("mq_open -> -ENOSYS");
+ return;
+ }
+ EXPECT_OK(mq);
+ cap_rights_t r_read;
+ cap_rights_init(&r_read, CAP_READ);
+ cap_rights_t r_write;
+ cap_rights_init(&r_write, CAP_WRITE);
+ cap_rights_t r_poll;
+ cap_rights_init(&r_poll, CAP_EVENT);
+
+ int cap_read_mq = dup(mq);
+ EXPECT_OK(cap_read_mq);
+ EXPECT_OK(cap_rights_limit(cap_read_mq, &r_read));
+ int cap_write_mq = dup(mq);
+ EXPECT_OK(cap_write_mq);
+ EXPECT_OK(cap_rights_limit(cap_write_mq, &r_write));
+ int cap_poll_mq = dup(mq);
+ EXPECT_OK(cap_poll_mq);
+ EXPECT_OK(cap_rights_limit(cap_poll_mq, &r_poll));
+ EXPECT_OK(mq_close_(mq));
+
+ signal(SIGUSR2, seen_it_done_it);
+
+ EXPECT_OK(cap_enter()); // Enter capability mode
+
+ // Can no longer access the message queue via the POSIX IPC namespace.
+ EXPECT_CAPMODE(mq_open_("/cap_mw", O_RDWR|O_CREAT, 0644, NULL));
+
+ struct sigevent se;
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = SIGUSR2;
+ EXPECT_OK(mq_notify_(cap_poll_mq, &se));
+ EXPECT_NOTCAPABLE(mq_notify_(cap_read_mq, &se));
+ EXPECT_NOTCAPABLE(mq_notify_(cap_write_mq, &se));
+
+ const unsigned int kPriority = 10;
+ const char* message = "xyzzy";
+ struct timespec ts;
+ ts.tv_sec = 1;
+ ts.tv_nsec = 0;
+ EXPECT_OK(mq_timedsend_(cap_write_mq, message, strlen(message) + 1, kPriority, &ts));
+ EXPECT_NOTCAPABLE(mq_timedsend_(cap_read_mq, message, strlen(message) + 1, kPriority, &ts));
+
+ sleep(1); // Give the notification a chance to arrive.
+ EXPECT_TRUE(invoked);
+
+ struct mq_attr mqa;
+ EXPECT_OK(mq_getattr_(cap_poll_mq, &mqa));
+ EXPECT_OK(mq_setattr_(cap_poll_mq, &mqa, NULL));
+ EXPECT_NOTCAPABLE(mq_getattr_(cap_write_mq, &mqa));
+
+ char* buffer = (char *)malloc(mqa.mq_msgsize);
+ unsigned int priority;
+ EXPECT_NOTCAPABLE(mq_timedreceive_(cap_write_mq, buffer, mqa.mq_msgsize, &priority, &ts));
+ EXPECT_OK(mq_timedreceive_(cap_read_mq, buffer, mqa.mq_msgsize, &priority, &ts));
+ EXPECT_EQ(std::string(message), std::string(buffer));
+ EXPECT_EQ(kPriority, priority);
+ free(buffer);
+
+ close(cap_read_mq);
+ close(cap_write_mq);
+ close(cap_poll_mq);
+}
diff --git a/openat.cc b/openat.cc
new file mode 100644
index 000000000000..c35630bd60aa
--- /dev/null
+++ b/openat.cc
@@ -0,0 +1,357 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+
+#include <string>
+
+#include "capsicum.h"
+#include "capsicum-test.h"
+#include "syscalls.h"
+
+// Check an open call works and close the resulting fd.
+#define EXPECT_OPEN_OK(f) do { \
+ int fd = f; \
+ EXPECT_OK(fd); \
+ close(fd); \
+ } while (0)
+
+static void CreateFile(const char *filename, const char *contents) {
+ int fd = open(filename, O_CREAT|O_RDWR, 0644);
+ EXPECT_OK(fd);
+ EXPECT_OK(write(fd, contents, strlen(contents)));
+ close(fd);
+}
+
+// Test openat(2) in a variety of sitations to ensure that it obeys Capsicum
+// "strict relative" rules:
+//
+// 1. Use strict relative lookups in capability mode or when operating
+// relative to a capability.
+// 2. When performing strict relative lookups, absolute paths (including
+// symlinks to absolute paths) are not allowed, nor are paths containing
+// '..' components.
+//
+// These rules apply when:
+// - the directory FD is a Capsicum capability
+// - the process is in capability mode
+// - the openat(2) operation includes the O_BENEATH flag.
+FORK_TEST(Openat, Relative) {
+ int etc = open("/etc/", O_RDONLY);
+ EXPECT_OK(etc);
+
+ cap_rights_t r_base;
+ cap_rights_init(&r_base, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_LOOKUP, CAP_FCNTL, CAP_IOCTL);
+ cap_rights_t r_ro;
+ cap_rights_init(&r_ro, CAP_READ);
+ cap_rights_t r_rl;
+ cap_rights_init(&r_rl, CAP_READ, CAP_LOOKUP);
+
+ int etc_cap = dup(etc);
+ EXPECT_OK(etc_cap);
+ EXPECT_OK(cap_rights_limit(etc_cap, &r_ro));
+ int etc_cap_ro = dup(etc);
+ EXPECT_OK(etc_cap_ro);
+ EXPECT_OK(cap_rights_limit(etc_cap_ro, &r_rl));
+ int etc_cap_base = dup(etc);
+ EXPECT_OK(etc_cap_base);
+ EXPECT_OK(cap_rights_limit(etc_cap_base, &r_base));
+#ifdef HAVE_CAP_FCNTLS_LIMIT
+ // Also limit fcntl(2) subrights.
+ EXPECT_OK(cap_fcntls_limit(etc_cap_base, CAP_FCNTL_GETFL));
+#endif
+#ifdef HAVE_CAP_IOCTLS_LIMIT
+ // Also limit ioctl(2) subrights.
+ cap_ioctl_t ioctl_nread = FIONREAD;
+ EXPECT_OK(cap_ioctls_limit(etc_cap_base, &ioctl_nread, 1));
+#endif
+
+ // openat(2) with regular file descriptors in non-capability mode
+ // Should Just Work (tm).
+ EXPECT_OPEN_OK(openat(etc, "/etc/passwd", O_RDONLY));
+ EXPECT_OPEN_OK(openat(AT_FDCWD, "/etc/passwd", O_RDONLY));
+ EXPECT_OPEN_OK(openat(etc, "passwd", O_RDONLY));
+ EXPECT_OPEN_OK(openat(etc, "../etc/passwd", O_RDONLY));
+
+ // Lookups relative to capabilities should be strictly relative.
+ // When not in capability mode, we don't actually require CAP_LOOKUP.
+ EXPECT_OPEN_OK(openat(etc_cap_ro, "passwd", O_RDONLY));
+ EXPECT_OPEN_OK(openat(etc_cap_base, "passwd", O_RDONLY));
+
+ // Performing openat(2) on a path with leading slash ignores
+ // the provided directory FD.
+ EXPECT_OPEN_OK(openat(etc_cap_ro, "/etc/passwd", O_RDONLY));
+ EXPECT_OPEN_OK(openat(etc_cap_base, "/etc/passwd", O_RDONLY));
+ // Relative lookups that go upward are not allowed.
+ EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_ro, "../etc/passwd", O_RDONLY);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_base, "../etc/passwd", O_RDONLY);
+
+ // A file opened relative to a capability should itself be a capability.
+ int fd = openat(etc_cap_base, "passwd", O_RDONLY);
+ EXPECT_OK(fd);
+ cap_rights_t rights;
+ EXPECT_OK(cap_rights_get(fd, &rights));
+ EXPECT_RIGHTS_IN(&rights, &r_base);
+#ifdef HAVE_CAP_FCNTLS_LIMIT
+ cap_fcntl_t fcntls;
+ EXPECT_OK(cap_fcntls_get(fd, &fcntls));
+ EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls);
+#endif
+#ifdef HAVE_CAP_IOCTLS_LIMIT
+ cap_ioctl_t ioctls[16];
+ ssize_t nioctls;
+ memset(ioctls, 0, sizeof(ioctls));
+ nioctls = cap_ioctls_get(fd, ioctls, 16);
+ EXPECT_OK(nioctls);
+ EXPECT_EQ(1, nioctls);
+ EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]);
+#endif
+ close(fd);
+
+ // Enter capability mode; now ALL lookups are strictly relative.
+ EXPECT_OK(cap_enter());
+
+ // Relative lookups on regular files or capabilities with CAP_LOOKUP
+ // ought to succeed.
+ EXPECT_OPEN_OK(openat(etc, "passwd", O_RDONLY));
+ EXPECT_OPEN_OK(openat(etc_cap_ro, "passwd", O_RDONLY));
+ EXPECT_OPEN_OK(openat(etc_cap_base, "passwd", O_RDONLY));
+
+ // Lookup relative to capabilities without CAP_LOOKUP should fail.
+ EXPECT_NOTCAPABLE(openat(etc_cap, "passwd", O_RDONLY));
+
+ // Absolute lookups should fail.
+ EXPECT_CAPMODE(openat(AT_FDCWD, "/etc/passwd", O_RDONLY));
+ EXPECT_OPENAT_FAIL_TRAVERSAL(etc, "/etc/passwd", O_RDONLY);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_ro, "/etc/passwd", O_RDONLY);
+
+ // Lookups containing '..' should fail in capability mode.
+ EXPECT_OPENAT_FAIL_TRAVERSAL(etc, "../etc/passwd", O_RDONLY);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_ro, "../etc/passwd", O_RDONLY);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_base, "../etc/passwd", O_RDONLY);
+
+ fd = openat(etc, "passwd", O_RDONLY);
+ EXPECT_OK(fd);
+
+ // A file opened relative to a capability should itself be a capability.
+ fd = openat(etc_cap_base, "passwd", O_RDONLY);
+ EXPECT_OK(fd);
+ EXPECT_OK(cap_rights_get(fd, &rights));
+ EXPECT_RIGHTS_IN(&rights, &r_base);
+ close(fd);
+
+ fd = openat(etc_cap_ro, "passwd", O_RDONLY);
+ EXPECT_OK(fd);
+ EXPECT_OK(cap_rights_get(fd, &rights));
+ EXPECT_RIGHTS_IN(&rights, &r_rl);
+ close(fd);
+}
+
+#define TOPDIR "cap_topdir"
+#define SUBDIR_ABS TOPDIR "/subdir"
+class OpenatTest : public ::testing::Test {
+ public:
+ // Build a collection of files, subdirs and symlinks:
+ // /tmp/cap_topdir/
+ // /topfile
+ // /subdir/
+ // /subdir/bottomfile
+ // /symlink.samedir -> topfile
+ // /dsymlink.samedir -> ./
+ // /symlink.down -> subdir/bottomfile
+ // /dsymlink.down -> subdir/
+ // /symlink.absolute_in -> /tmp/cap_topdir/topfile
+ // /dsymlink.absolute_in -> /tmp/cap_topdir/
+ // /symlink.absolute_out -> /etc/passwd
+ // /dsymlink.absolute_out -> /etc/
+ // /symlink.relative_in -> ../../tmp/cap_topdir/topfile
+ // /dsymlink.relative_in -> ../../tmp/cap_topdir/
+ // /symlink.relative_out -> ../../etc/passwd
+ // /dsymlink.relative_out -> ../../etc/
+ // /subdir/symlink.up -> ../topfile
+ // /subdir/dsymlink.up -> ../
+ // (In practice, this is a little more complicated because tmpdir might
+ // not be "/tmp".)
+ OpenatTest() {
+ // Create a couple of nested directories
+ int rc = mkdir(TmpFile(TOPDIR), 0755);
+ EXPECT_OK(rc);
+ if (rc < 0) EXPECT_EQ(EEXIST, errno);
+ rc = mkdir(TmpFile(SUBDIR_ABS), 0755);
+ EXPECT_OK(rc);
+ if (rc < 0) EXPECT_EQ(EEXIST, errno);
+
+ // Figure out a path prefix (like "../..") that gets us to the root
+ // directory from TmpFile(TOPDIR).
+ const char *p = TmpFile(TOPDIR); // maybe "/tmp/somewhere/cap_topdir"
+ std::string dots2root = "..";
+ while (*p++ != '\0') {
+ if (*p == '/') {
+ dots2root += "/..";
+ }
+ }
+
+ // Create normal files in each.
+ CreateFile(TmpFile(TOPDIR "/topfile"), "Top-level file");
+ CreateFile(TmpFile(SUBDIR_ABS "/bottomfile"), "File in subdirectory");
+
+ // Create various symlinks to files.
+ EXPECT_OK(symlink("topfile", TmpFile(TOPDIR "/symlink.samedir")));
+ EXPECT_OK(symlink("subdir/bottomfile", TmpFile(TOPDIR "/symlink.down")));
+ EXPECT_OK(symlink(TmpFile(TOPDIR "/topfile"), TmpFile(TOPDIR "/symlink.absolute_in")));
+ EXPECT_OK(symlink("/etc/passwd", TmpFile(TOPDIR "/symlink.absolute_out")));
+ std::string dots2top = dots2root + TmpFile(TOPDIR "/topfile");
+ EXPECT_OK(symlink(dots2top.c_str(), TmpFile(TOPDIR "/symlink.relative_in")));
+ std::string dots2passwd = dots2root + "/etc/passwd";
+ EXPECT_OK(symlink(dots2passwd.c_str(), TmpFile(TOPDIR "/symlink.relative_out")));
+ EXPECT_OK(symlink("../topfile", TmpFile(SUBDIR_ABS "/symlink.up")));
+
+ // Create various symlinks to directories.
+ EXPECT_OK(symlink("./", TmpFile(TOPDIR "/dsymlink.samedir")));
+ EXPECT_OK(symlink("subdir/", TmpFile(TOPDIR "/dsymlink.down")));
+ EXPECT_OK(symlink(TmpFile(TOPDIR "/"), TmpFile(TOPDIR "/dsymlink.absolute_in")));
+ EXPECT_OK(symlink("/etc/", TmpFile(TOPDIR "/dsymlink.absolute_out")));
+ std::string dots2cwd = dots2root + tmpdir + "/";
+ EXPECT_OK(symlink(dots2cwd.c_str(), TmpFile(TOPDIR "/dsymlink.relative_in")));
+ std::string dots2etc = dots2root + "/etc/";
+ EXPECT_OK(symlink(dots2etc.c_str(), TmpFile(TOPDIR "/dsymlink.relative_out")));
+ EXPECT_OK(symlink("../", TmpFile(SUBDIR_ABS "/dsymlink.up")));
+
+ // Open directory FDs for those directories and for cwd.
+ dir_fd_ = open(TmpFile(TOPDIR), O_RDONLY);
+ EXPECT_OK(dir_fd_);
+ sub_fd_ = open(TmpFile(SUBDIR_ABS), O_RDONLY);
+ EXPECT_OK(sub_fd_);
+ cwd_ = openat(AT_FDCWD, ".", O_RDONLY);
+ EXPECT_OK(cwd_);
+ // Move into the directory for the test.
+ EXPECT_OK(fchdir(dir_fd_));
+ }
+ ~OpenatTest() {
+ fchdir(cwd_);
+ close(cwd_);
+ close(sub_fd_);
+ close(dir_fd_);
+ unlink(TmpFile(SUBDIR_ABS "/symlink.up"));
+ unlink(TmpFile(TOPDIR "/symlink.absolute_in"));
+ unlink(TmpFile(TOPDIR "/symlink.absolute_out"));
+ unlink(TmpFile(TOPDIR "/symlink.relative_in"));
+ unlink(TmpFile(TOPDIR "/symlink.relative_out"));
+ unlink(TmpFile(TOPDIR "/symlink.down"));
+ unlink(TmpFile(TOPDIR "/symlink.samedir"));
+ unlink(TmpFile(SUBDIR_ABS "/dsymlink.up"));
+ unlink(TmpFile(TOPDIR "/dsymlink.absolute_in"));
+ unlink(TmpFile(TOPDIR "/dsymlink.absolute_out"));
+ unlink(TmpFile(TOPDIR "/dsymlink.relative_in"));
+ unlink(TmpFile(TOPDIR "/dsymlink.relative_out"));
+ unlink(TmpFile(TOPDIR "/dsymlink.down"));
+ unlink(TmpFile(TOPDIR "/dsymlink.samedir"));
+ unlink(TmpFile(SUBDIR_ABS "/bottomfile"));
+ unlink(TmpFile(TOPDIR "/topfile"));
+ rmdir(TmpFile(SUBDIR_ABS));
+ rmdir(TmpFile(TOPDIR));
+ }
+
+ // Check openat(2) policing that is common across capabilities, capability mode and O_BENEATH.
+ void CheckPolicing(int oflag) {
+ // OK for normal access.
+ EXPECT_OPEN_OK(openat(dir_fd_, "topfile", O_RDONLY|oflag));
+ EXPECT_OPEN_OK(openat(dir_fd_, "subdir/bottomfile", O_RDONLY|oflag));
+ EXPECT_OPEN_OK(openat(sub_fd_, "bottomfile", O_RDONLY|oflag));
+ EXPECT_OPEN_OK(openat(sub_fd_, ".", O_RDONLY|oflag));
+
+ // Can't open paths with ".." in them.
+ EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "../topfile", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "../subdir/bottomfile", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "..", O_RDONLY|oflag);
+
+#ifdef HAVE_OPENAT_INTERMEDIATE_DOTDOT
+ // OK for dotdot lookups that don't escape the top directory
+ EXPECT_OPEN_OK(openat(dir_fd_, "subdir/../topfile", O_RDONLY|oflag));
+#endif
+
+ // Check that we can't escape the top directory by the cunning
+ // ruse of going via a subdirectory.
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "subdir/../../etc/passwd", O_RDONLY|oflag);
+
+ // Should only be able to open symlinks that stay within the directory.
+ EXPECT_OPEN_OK(openat(dir_fd_, "symlink.samedir", O_RDONLY|oflag));
+ EXPECT_OPEN_OK(openat(dir_fd_, "symlink.down", O_RDONLY|oflag));
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.absolute_in", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.absolute_out", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.relative_in", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.relative_out", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "symlink.up", O_RDONLY|oflag);
+
+ EXPECT_OPEN_OK(openat(dir_fd_, "dsymlink.samedir/topfile", O_RDONLY|oflag));
+ EXPECT_OPEN_OK(openat(dir_fd_, "dsymlink.down/bottomfile", O_RDONLY|oflag));
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.absolute_in/topfile", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.absolute_out/passwd", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.relative_in/topfile", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.relative_out/passwd", O_RDONLY|oflag);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "dsymlink.up/topfile", O_RDONLY|oflag);
+
+ // Although recall that O_NOFOLLOW prevents symlink following in final component.
+ EXPECT_SYSCALL_FAIL(E_TOO_MANY_LINKS, openat(dir_fd_, "symlink.samedir", O_RDONLY|O_NOFOLLOW|oflag));
+ EXPECT_SYSCALL_FAIL(E_TOO_MANY_LINKS, openat(dir_fd_, "symlink.down", O_RDONLY|O_NOFOLLOW|oflag));
+ }
+
+ protected:
+ int dir_fd_;
+ int sub_fd_;
+ int cwd_;
+};
+
+TEST_F(OpenatTest, WithCapability) {
+ // Any kind of symlink can be opened relative to an ordinary directory FD.
+ EXPECT_OPEN_OK(openat(dir_fd_, "symlink.samedir", O_RDONLY));
+ EXPECT_OPEN_OK(openat(dir_fd_, "symlink.down", O_RDONLY));
+ EXPECT_OPEN_OK(openat(dir_fd_, "symlink.absolute_in", O_RDONLY));
+ EXPECT_OPEN_OK(openat(dir_fd_, "symlink.absolute_out", O_RDONLY));
+ EXPECT_OPEN_OK(openat(dir_fd_, "symlink.relative_in", O_RDONLY));
+ EXPECT_OPEN_OK(openat(dir_fd_, "symlink.relative_out", O_RDONLY));
+ EXPECT_OPEN_OK(openat(sub_fd_, "symlink.up", O_RDONLY));
+
+ // Now make both DFDs into Capsicum capabilities.
+ cap_rights_t r_rl;
+ cap_rights_init(&r_rl, CAP_READ, CAP_LOOKUP, CAP_FCHDIR);
+ EXPECT_OK(cap_rights_limit(dir_fd_, &r_rl));
+ EXPECT_OK(cap_rights_limit(sub_fd_, &r_rl));
+ CheckPolicing(0);
+ // Use of AT_FDCWD is independent of use of a capability.
+ // Can open paths starting with "/" against a capability dfd, because the dfd is ignored.
+}
+
+FORK_TEST_F(OpenatTest, InCapabilityMode) {
+ EXPECT_OK(cap_enter()); // Enter capability mode
+ CheckPolicing(0);
+
+ // Use of AT_FDCWD is banned in capability mode.
+ EXPECT_CAPMODE(openat(AT_FDCWD, "topfile", O_RDONLY));
+ EXPECT_CAPMODE(openat(AT_FDCWD, "subdir/bottomfile", O_RDONLY));
+ EXPECT_CAPMODE(openat(AT_FDCWD, "/etc/passwd", O_RDONLY));
+
+ // Can't open paths starting with "/" in capability mode.
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "/etc/passwd", O_RDONLY);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "/etc/passwd", O_RDONLY);
+}
+
+#ifdef O_BENEATH
+TEST_F(OpenatTest, WithFlag) {
+ CheckPolicing(O_BENEATH);
+
+ // Check with AT_FDCWD.
+ EXPECT_OPEN_OK(openat(AT_FDCWD, "topfile", O_RDONLY|O_BENEATH));
+ EXPECT_OPEN_OK(openat(AT_FDCWD, "subdir/bottomfile", O_RDONLY|O_BENEATH));
+
+ // Can't open paths starting with "/" with O_BENEATH specified.
+ EXPECT_OPENAT_FAIL_TRAVERSAL(AT_FDCWD, "/etc/passwd", O_RDONLY|O_BENEATH);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "/etc/passwd", O_RDONLY|O_BENEATH);
+ EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "/etc/passwd", O_RDONLY|O_BENEATH);
+}
+
+FORK_TEST_F(OpenatTest, WithFlagInCapabilityMode) {
+ EXPECT_OK(cap_enter()); // Enter capability mode
+ CheckPolicing(O_BENEATH);
+}
+#endif
diff --git a/overhead.cc b/overhead.cc
new file mode 100644
index 000000000000..90d98895b04a
--- /dev/null
+++ b/overhead.cc
@@ -0,0 +1,45 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+#ifdef HAVE_SYSCALL
+double RepeatSyscall(int count, int nr, long arg1, long arg2, long arg3) {
+ const clock_t t0 = clock(); // or gettimeofday or whatever
+ for (int ii = 0; ii < count; ii++) {
+ syscall(nr, arg1, arg2, arg3);
+ }
+ const clock_t t1 = clock();
+ return (t1 - t0) / (double)CLOCKS_PER_SEC;
+}
+
+typedef int (*EntryFn)(void);
+
+double CompareSyscall(EntryFn entry_fn, int count, int nr,
+ long arg1, long arg2, long arg3) {
+ double bare = RepeatSyscall(count, nr, arg1, arg2, arg3);
+ EXPECT_OK(entry_fn());
+ double capmode = RepeatSyscall(count, nr, arg1, arg2, arg3);
+ if (verbose) fprintf(stderr, "%d iterations bare=%fs capmode=%fs ratio=%.2f%%\n",
+ count, bare, capmode, 100.0*capmode/bare);
+ if (bare==0.0) {
+ if (capmode==0.0) return 1.0;
+ return 999.0;
+ }
+ return capmode/bare;
+}
+
+FORK_TEST(Overhead, GetTid) {
+ EXPECT_GT(10, CompareSyscall(&cap_enter, 10000, __NR_gettid, 0, 0, 0));
+}
+FORK_TEST(Overhead, Seek) {
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_GT(50, CompareSyscall(&cap_enter, 10000, __NR_lseek, fd, 0, SEEK_SET));
+ close(fd);
+}
+#endif
diff --git a/procdesc.cc b/procdesc.cc
new file mode 100644
index 000000000000..476dfe01ab71
--- /dev/null
+++ b/procdesc.cc
@@ -0,0 +1,977 @@
+// Tests for the process descriptor API for Linux.
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <iomanip>
+#include <map>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+#ifndef __WALL
+// Linux requires __WALL in order for waitpid(specific_pid,...) to
+// see and reap any specific pid. Define this to nothing for platforms
+// (FreeBSD) where it doesn't exist, to reduce macroing.
+#define __WALL 0
+#endif
+
+// TODO(drysdale): it would be nice to use proper synchronization between
+// processes, rather than synchronization-via-sleep; faster too.
+
+
+//------------------------------------------------
+// Utilities for the tests.
+
+static pid_t pdwait4_(int pd, int *status, int options, struct rusage *ru) {
+#ifdef HAVE_PDWAIT4
+ return pdwait4(pd, status, options, ru);
+#else
+ // Simulate pdwait4() with wait4(pdgetpid()); this won't work in capability mode.
+ pid_t pid = -1;
+ int rc = pdgetpid(pd, &pid);
+ if (rc < 0) {
+ return rc;
+ }
+ options |= __WALL;
+ return wait4(pid, status, options, ru);
+#endif
+}
+
+static void print_rusage(FILE *f, struct rusage *ru) {
+ fprintf(f, " User CPU time=%ld.%06ld\n", (long)ru->ru_utime.tv_sec, (long)ru->ru_utime.tv_usec);
+ fprintf(f, " System CPU time=%ld.%06ld\n", (long)ru->ru_stime.tv_sec, (long)ru->ru_stime.tv_usec);
+ fprintf(f, " Max RSS=%ld\n", ru->ru_maxrss);
+}
+
+static void print_stat(FILE *f, const struct stat *stat) {
+ fprintf(f,
+ "{ .st_dev=%ld, st_ino=%ld, st_mode=%04o, st_nlink=%ld, st_uid=%d, st_gid=%d,\n"
+ " .st_rdev=%ld, .st_size=%ld, st_blksize=%ld, .st_block=%ld,\n "
+#ifdef HAVE_STAT_BIRTHTIME
+ ".st_birthtime=%ld, "
+#endif
+ ".st_atime=%ld, .st_mtime=%ld, .st_ctime=%ld}\n",
+ (long)stat->st_dev, (long)stat->st_ino, stat->st_mode,
+ (long)stat->st_nlink, stat->st_uid, stat->st_gid,
+ (long)stat->st_rdev, (long)stat->st_size, (long)stat->st_blksize,
+ (long)stat->st_blocks,
+#ifdef HAVE_STAT_BIRTHTIME
+ (long)stat->st_birthtime,
+#endif
+ (long)stat->st_atime, (long)stat->st_mtime, (long)stat->st_ctime);
+}
+
+static std::map<int,bool> had_signal;
+static void handle_signal(int x) {
+ had_signal[x] = true;
+}
+
+// Check that the given child process terminates as expected.
+void CheckChildFinished(pid_t pid, bool signaled=false) {
+ // Wait for the child to finish.
+ int rc;
+ int status = 0;
+ do {
+ rc = waitpid(pid, &status, __WALL);
+ if (rc < 0) {
+ fprintf(stderr, "Warning: waitpid error %s (%d)\n", strerror(errno), errno);
+ ADD_FAILURE() << "Failed to wait for child";
+ break;
+ } else if (rc == pid) {
+ break;
+ }
+ } while (true);
+ EXPECT_EQ(pid, rc);
+ if (rc == pid) {
+ if (signaled) {
+ EXPECT_TRUE(WIFSIGNALED(status));
+ } else {
+ EXPECT_TRUE(WIFEXITED(status)) << std::hex << status;
+ EXPECT_EQ(0, WEXITSTATUS(status));
+ }
+ }
+}
+
+//------------------------------------------------
+// Basic tests of process descriptor functionality
+
+TEST(Pdfork, Simple) {
+ int pd = -1;
+ pid_t parent = getpid_();
+ int pid = pdfork(&pd, 0);
+ EXPECT_OK(pid);
+ if (pid == 0) {
+ // Child: check pid values.
+ EXPECT_EQ(-1, pd);
+ EXPECT_NE(parent, getpid_());
+ EXPECT_EQ(parent, getppid());
+ sleep(1);
+ exit(0);
+ }
+ usleep(100); // ensure the child has a chance to run
+ EXPECT_NE(-1, pd);
+ EXPECT_PID_ALIVE(pid);
+ int pid_got;
+ EXPECT_OK(pdgetpid(pd, &pid_got));
+ EXPECT_EQ(pid, pid_got);
+
+ // Wait long enough for the child to exit().
+ sleep(2);
+ EXPECT_PID_ZOMBIE(pid);
+
+ // Wait for the the child.
+ int status;
+ struct rusage ru;
+ memset(&ru, 0, sizeof(ru));
+ int waitrc = pdwait4_(pd, &status, 0, &ru);
+ EXPECT_EQ(pid, waitrc);
+ if (verbose) {
+ fprintf(stderr, "For pd %d pid %d:\n", pd, pid);
+ print_rusage(stderr, &ru);
+ }
+ EXPECT_PID_GONE(pid);
+
+ // Can only pdwait4(pd) once (as initial call reaps zombie).
+ memset(&ru, 0, sizeof(ru));
+ EXPECT_EQ(-1, pdwait4_(pd, &status, 0, &ru));
+ EXPECT_EQ(ECHILD, errno);
+
+ EXPECT_OK(close(pd));
+}
+
+TEST(Pdfork, InvalidFlag) {
+ int pd = -1;
+ int pid = pdfork(&pd, PD_DAEMON<<5);
+ if (pid == 0) {
+ exit(1);
+ }
+ EXPECT_EQ(-1, pid);
+ EXPECT_EQ(EINVAL, errno);
+ if (pid > 0) waitpid(pid, NULL, __WALL);
+}
+
+TEST(Pdfork, TimeCheck) {
+ time_t now = time(NULL); // seconds since epoch
+ EXPECT_NE(-1, now);
+ if (verbose) fprintf(stderr, "Calling pdfork around %ld\n", (long)(long)now);
+
+ int pd = -1;
+ pid_t pid = pdfork(&pd, 0);
+ EXPECT_OK(pid);
+ if (pid == 0) {
+ // Child: check we didn't get a valid process descriptor then exit.
+ EXPECT_EQ(-1, pdgetpid(pd, &pid));
+ EXPECT_EQ(EBADF, errno);
+ exit(HasFailure());
+ }
+
+#ifdef HAVE_PROCDESC_FSTAT
+ // Parent process. Ensure that [acm]times have been set correctly.
+ struct stat stat;
+ memset(&stat, 0, sizeof(stat));
+ EXPECT_OK(fstat(pd, &stat));
+ if (verbose) print_stat(stderr, &stat);
+
+#ifdef HAVE_STAT_BIRTHTIME
+ EXPECT_GE(now, stat.st_birthtime);
+ EXPECT_EQ(stat.st_birthtime, stat.st_atime);
+#endif
+ EXPECT_LT((now - stat.st_atime), 2);
+ EXPECT_EQ(stat.st_atime, stat.st_ctime);
+ EXPECT_EQ(stat.st_ctime, stat.st_mtime);
+#endif
+
+ // Wait for the child to finish.
+ pid_t pd_pid = -1;
+ EXPECT_OK(pdgetpid(pd, &pd_pid));
+ EXPECT_EQ(pid, pd_pid);
+ CheckChildFinished(pid);
+}
+
+TEST(Pdfork, UseDescriptor) {
+ int pd = -1;
+ pid_t pid = pdfork(&pd, 0);
+ EXPECT_OK(pid);
+ if (pid == 0) {
+ // Child: immediately exit
+ exit(0);
+ }
+ CheckChildFinished(pid);
+}
+
+TEST(Pdfork, NonProcessDescriptor) {
+ int fd = open("/etc/passwd", O_RDONLY);
+ EXPECT_OK(fd);
+ // pd*() operations should fail on a non-process descriptor.
+ EXPECT_EQ(-1, pdkill(fd, SIGUSR1));
+ int status;
+ EXPECT_EQ(-1, pdwait4_(fd, &status, 0, NULL));
+ pid_t pid;
+ EXPECT_EQ(-1, pdgetpid(fd, &pid));
+ close(fd);
+}
+
+static void *SubThreadMain(void *data) {
+ while (true) {
+ if (verbose) fprintf(stderr, " subthread: \"I aten't dead\"\n");
+ usleep(100000);
+ }
+ return NULL;
+}
+
+static void *ThreadMain(void *data) {
+ int pd;
+ pid_t child = pdfork(&pd, 0);
+ if (child == 0) {
+ // Child: start a subthread then loop
+ pthread_t child_subthread;
+ EXPECT_OK(pthread_create(&child_subthread, NULL, SubThreadMain, NULL));
+ while (true) {
+ if (verbose) fprintf(stderr, " pdforked process %d: \"I aten't dead\"\n", getpid());
+ usleep(100000);
+ }
+ exit(0);
+ }
+ if (verbose) fprintf(stderr, " thread generated pd %d\n", pd);
+ sleep(2);
+
+ // Pass the process descriptor back to the main thread.
+ return reinterpret_cast<void *>(pd);
+}
+
+TEST(Pdfork, FromThread) {
+ // Fire off a new thread to do all of the creation work.
+ pthread_t child_thread;
+ EXPECT_OK(pthread_create(&child_thread, NULL, ThreadMain, NULL));
+ void *data;
+ EXPECT_OK(pthread_join(child_thread, &data));
+ int pd = reinterpret_cast<intptr_t>(data);
+ if (verbose) fprintf(stderr, "retrieved pd %d from terminated thread\n", pd);
+
+ // Kill and reap.
+ pid_t pid;
+ EXPECT_OK(pdgetpid(pd, &pid));
+ EXPECT_OK(pdkill(pd, SIGKILL));
+ int status;
+ EXPECT_EQ(pid, pdwait4_(pd, &status, 0, NULL));
+ EXPECT_TRUE(WIFSIGNALED(status));
+}
+
+//------------------------------------------------
+// More complicated tests.
+
+
+// Test fixture that pdfork()s off a child process, which terminates
+// when it receives anything on a pipe.
+class PipePdforkBase : public ::testing::Test {
+ public:
+ PipePdforkBase(int pdfork_flags) : pd_(-1), pid_(-1) {
+ had_signal.clear();
+ int pipes[2];
+ EXPECT_OK(pipe(pipes));
+ pipe_ = pipes[1];
+ int parent = getpid_();
+ if (verbose) fprintf(stderr, "[%d] about to pdfork()\n", getpid_());
+ int rc = pdfork(&pd_, pdfork_flags);
+ EXPECT_OK(rc);
+ if (rc == 0) {
+ // Child process: blocking-read an int from the pipe then exit with that value.
+ EXPECT_NE(parent, getpid_());
+ EXPECT_EQ(parent, getppid());
+ if (verbose) fprintf(stderr, " [%d] child of %d waiting for value on pipe\n", getpid_(), getppid());
+ read(pipes[0], &rc, sizeof(rc));
+ if (verbose) fprintf(stderr, " [%d] got value %d on pipe, exiting\n", getpid_(), rc);
+ exit(rc);
+ }
+ pid_ = rc;
+ usleep(100); // ensure the child has a chance to run
+ }
+ ~PipePdforkBase() {
+ // Terminate by any means necessary.
+ if (pd_ > 0) {
+ pdkill(pd_, SIGKILL);
+ close(pd_);
+ }
+ if (pid_ > 0) {
+ kill(pid_, SIGKILL);
+ waitpid(pid_, NULL, __WALL|WNOHANG);
+ }
+ // Check signal expectations.
+ EXPECT_FALSE(had_signal[SIGCHLD]);
+ }
+ int TerminateChild() {
+ // Tell the child to exit.
+ int zero = 0;
+ if (verbose) fprintf(stderr, "[%d] write 0 to pipe\n", getpid_());
+ return write(pipe_, &zero, sizeof(zero));
+ }
+ protected:
+ int pd_;
+ int pipe_;
+ pid_t pid_;
+};
+
+class PipePdfork : public PipePdforkBase {
+ public:
+ PipePdfork() : PipePdforkBase(0) {}
+};
+
+class PipePdforkDaemon : public PipePdforkBase {
+ public:
+ PipePdforkDaemon() : PipePdforkBase(PD_DAEMON) {}
+};
+
+// Can we poll a process descriptor?
+TEST_F(PipePdfork, Poll) {
+ // Poll the process descriptor, nothing happening.
+ struct pollfd fdp;
+ fdp.fd = pd_;
+ fdp.events = POLLIN | POLLERR | POLLHUP;
+ fdp.revents = 0;
+ EXPECT_EQ(0, poll(&fdp, 1, 0));
+
+ TerminateChild();
+
+ // Poll again, should have activity on the process descriptor.
+ EXPECT_EQ(1, poll(&fdp, 1, 2000));
+ EXPECT_TRUE(fdp.revents & POLLHUP);
+
+ // Poll a third time, still have POLLHUP.
+ fdp.revents = 0;
+ EXPECT_EQ(1, poll(&fdp, 1, 0));
+ EXPECT_TRUE(fdp.revents & POLLHUP);
+}
+
+// Can multiple processes poll on the same descriptor?
+TEST_F(PipePdfork, PollMultiple) {
+ int child = fork();
+ EXPECT_OK(child);
+ if (child == 0) {
+ // Child: wait to give time for setup, then write to the pipe (which will
+ // induce exit of the pdfork()ed process) and exit.
+ sleep(1);
+ TerminateChild();
+ exit(0);
+ }
+ usleep(100); // ensure the child has a chance to run
+
+ // Fork again
+ int doppel = fork();
+ EXPECT_OK(doppel);
+ // We now have:
+ // pid A: main process, here
+ // |--pid B: pdfork()ed process, blocked on read()
+ // |--pid C: fork()ed process, in sleep(1) above
+ // +--pid D: doppel process, here
+
+ // Both A and D execute the following code.
+ // First, check no activity on the process descriptor yet.
+ struct pollfd fdp;
+ fdp.fd = pd_;
+ fdp.events = POLLIN | POLLERR | POLLHUP;
+ fdp.revents = 0;
+ EXPECT_EQ(0, poll(&fdp, 1, 0));
+
+ // Now, wait (indefinitely) for activity on the process descriptor.
+ // We expect:
+ // - pid C will finish its sleep, write to the pipe and exit
+ // - pid B will unblock from read(), and exit
+ // - this will generate an event on the process descriptor...
+ // - ...in both process A and process D.
+ EXPECT_EQ(1, poll(&fdp, 1, 2000));
+ EXPECT_TRUE(fdp.revents & POLLHUP);
+
+ if (doppel == 0) {
+ // Child: process D exits.
+ exit(0);
+ } else {
+ // Parent: wait on process D.
+ int rc = 0;
+ waitpid(doppel, &rc, __WALL);
+ EXPECT_TRUE(WIFEXITED(rc));
+ EXPECT_EQ(0, WEXITSTATUS(rc));
+ // Also wait on process B.
+ CheckChildFinished(child);
+ }
+}
+
+// Check that exit status/rusage for a dead pdfork()ed child can be retrieved
+// via any process descriptor, multiple times.
+TEST_F(PipePdfork, MultipleRetrieveExitStatus) {
+ EXPECT_PID_ALIVE(pid_);
+ int pd_copy = dup(pd_);
+ EXPECT_LT(0, TerminateChild());
+
+ int status;
+ struct rusage ru;
+ memset(&ru, 0, sizeof(ru));
+ int waitrc = pdwait4_(pd_copy, &status, 0, &ru);
+ EXPECT_EQ(pid_, waitrc);
+ if (verbose) {
+ fprintf(stderr, "For pd %d -> pid %d:\n", pd_, pid_);
+ print_rusage(stderr, &ru);
+ }
+ EXPECT_PID_GONE(pid_);
+
+#ifdef NOTYET
+ // Child has been reaped, so original process descriptor dangles but
+ // still has access to rusage information.
+ memset(&ru, 0, sizeof(ru));
+ EXPECT_EQ(0, pdwait4_(pd_, &status, 0, &ru));
+#endif
+ close(pd_copy);
+}
+
+TEST_F(PipePdfork, ChildExit) {
+ EXPECT_PID_ALIVE(pid_);
+ EXPECT_LT(0, TerminateChild());
+ EXPECT_PID_DEAD(pid_);
+
+ int status;
+ int rc = pdwait4_(pd_, &status, 0, NULL);
+ EXPECT_OK(rc);
+ EXPECT_EQ(pid_, rc);
+ pid_ = 0;
+}
+
+#ifdef HAVE_PROC_FDINFO
+TEST_F(PipePdfork, FdInfo) {
+ char buffer[1024];
+ sprintf(buffer, "/proc/%d/fdinfo/%d", getpid_(), pd_);
+ int procfd = open(buffer, O_RDONLY);
+ EXPECT_OK(procfd);
+
+ EXPECT_OK(read(procfd, buffer, sizeof(buffer)));
+ // The fdinfo should include the file pos of the underlying file
+ EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t0")) << buffer;
+ // ...and the underlying pid
+ char pidline[256];
+ sprintf(pidline, "pid:\t%d", pid_);
+ EXPECT_NE((char*)NULL, strstr(buffer, pidline)) << buffer;
+ close(procfd);
+}
+#endif
+
+// Closing a normal process descriptor terminates the underlying process.
+TEST_F(PipePdfork, Close) {
+ sighandler_t original = signal(SIGCHLD, handle_signal);
+ EXPECT_PID_ALIVE(pid_);
+ int status;
+ EXPECT_EQ(0, waitpid(pid_, &status, __WALL|WNOHANG));
+
+ EXPECT_OK(close(pd_));
+ pd_ = -1;
+ EXPECT_FALSE(had_signal[SIGCHLD]);
+ EXPECT_PID_DEAD(pid_);
+
+#ifdef __FreeBSD__
+ EXPECT_EQ(-1, waitpid(pid_, NULL, __WALL));
+ EXPECT_EQ(errno, ECHILD);
+#else
+ // Having closed the process descriptor means that pdwait4(pd) now doesn't work.
+ int rc = pdwait4_(pd_, &status, 0, NULL);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EBADF, errno);
+
+ // Closing all process descriptors means the the child can only be reaped via pid.
+ EXPECT_EQ(pid_, waitpid(pid_, &status, __WALL|WNOHANG));
+#endif
+ signal(SIGCHLD, original);
+}
+
+TEST_F(PipePdfork, CloseLast) {
+ sighandler_t original = signal(SIGCHLD, handle_signal);
+ // Child should only die when last process descriptor is closed.
+ EXPECT_PID_ALIVE(pid_);
+ int pd_other = dup(pd_);
+
+ EXPECT_OK(close(pd_));
+ pd_ = -1;
+
+ EXPECT_PID_ALIVE(pid_);
+ int status;
+ EXPECT_EQ(0, waitpid(pid_, &status, __WALL|WNOHANG));
+
+ // Can no longer pdwait4() the closed process descriptor...
+ EXPECT_EQ(-1, pdwait4_(pd_, &status, WNOHANG, NULL));
+ EXPECT_EQ(EBADF, errno);
+ // ...but can pdwait4() the still-open process descriptor.
+ errno = 0;
+ EXPECT_EQ(0, pdwait4_(pd_other, &status, WNOHANG, NULL));
+ EXPECT_EQ(0, errno);
+
+ EXPECT_OK(close(pd_other));
+ EXPECT_PID_DEAD(pid_);
+
+ EXPECT_FALSE(had_signal[SIGCHLD]);
+ signal(SIGCHLD, original);
+}
+
+FORK_TEST(Pdfork, OtherUser) {
+ REQUIRE_ROOT();
+ int pd;
+ pid_t pid = pdfork(&pd, 0);
+ EXPECT_OK(pid);
+ if (pid == 0) {
+ // Child process: loop forever.
+ while (true) usleep(100000);
+ }
+ usleep(100);
+
+ // Now that the second process has been pdfork()ed, change euid.
+ setuid(other_uid);
+ if (verbose) fprintf(stderr, "uid=%d euid=%d\n", getuid(), geteuid());
+
+ // Fail to kill child with normal PID operation.
+ EXPECT_EQ(-1, kill(pid, SIGKILL));
+ EXPECT_EQ(EPERM, errno);
+ EXPECT_PID_ALIVE(pid);
+
+ // Succeed with pdkill though.
+ EXPECT_OK(pdkill(pd, SIGKILL));
+ EXPECT_PID_ZOMBIE(pid);
+
+ int status;
+ int rc = pdwait4_(pd, &status, WNOHANG, NULL);
+ EXPECT_OK(rc);
+ EXPECT_EQ(pid, rc);
+ EXPECT_TRUE(WIFSIGNALED(status));
+}
+
+TEST_F(PipePdfork, WaitPidThenPd) {
+ TerminateChild();
+ int status;
+ // If we waitpid(pid) first...
+ int rc = waitpid(pid_, &status, __WALL);
+ EXPECT_OK(rc);
+ EXPECT_EQ(pid_, rc);
+
+#ifdef NOTYET
+ // ...the zombie is reaped but we can still subsequently pdwait4(pd).
+ EXPECT_EQ(0, pdwait4_(pd_, &status, 0, NULL));
+#endif
+}
+
+TEST_F(PipePdfork, WaitPdThenPid) {
+ TerminateChild();
+ int status;
+ // If we pdwait4(pd) first...
+ int rc = pdwait4_(pd_, &status, 0, NULL);
+ EXPECT_OK(rc);
+ EXPECT_EQ(pid_, rc);
+
+ // ...the zombie is reaped and cannot subsequently waitpid(pid).
+ EXPECT_EQ(-1, waitpid(pid_, &status, __WALL));
+ EXPECT_EQ(ECHILD, errno);
+}
+
+// Setting PD_DAEMON prevents close() from killing the child.
+TEST_F(PipePdforkDaemon, Close) {
+ EXPECT_OK(close(pd_));
+ pd_ = -1;
+ EXPECT_PID_ALIVE(pid_);
+
+ // Can still explicitly kill it via the pid.
+ if (pid_ > 0) {
+ EXPECT_OK(kill(pid_, SIGKILL));
+ EXPECT_PID_DEAD(pid_);
+ }
+}
+
+static void TestPdkill(pid_t pid, int pd) {
+ EXPECT_PID_ALIVE(pid);
+ // SIGCONT is ignored by default.
+ EXPECT_OK(pdkill(pd, SIGCONT));
+ EXPECT_PID_ALIVE(pid);
+
+ // SIGINT isn't
+ EXPECT_OK(pdkill(pd, SIGINT));
+ EXPECT_PID_DEAD(pid);
+
+ // pdkill() on zombie is no-op.
+ errno = 0;
+ EXPECT_EQ(0, pdkill(pd, SIGINT));
+ EXPECT_EQ(0, errno);
+
+ // pdkill() on reaped process gives -ESRCH.
+ CheckChildFinished(pid, true);
+ EXPECT_EQ(-1, pdkill(pd, SIGINT));
+ EXPECT_EQ(ESRCH, errno);
+}
+
+TEST_F(PipePdfork, Pdkill) {
+ TestPdkill(pid_, pd_);
+}
+
+TEST_F(PipePdforkDaemon, Pdkill) {
+ TestPdkill(pid_, pd_);
+}
+
+TEST(Pdfork, PdkillOtherSignal) {
+ int pd = -1;
+ int pid = pdfork(&pd, 0);
+ EXPECT_OK(pid);
+ if (pid == 0) {
+ // Child: watch for SIGUSR1 forever.
+ had_signal.clear();
+ signal(SIGUSR1, handle_signal);
+ while (!had_signal[SIGUSR1]) {
+ usleep(100000);
+ }
+ exit(123);
+ }
+ sleep(1);
+
+ // Send an invalid signal.
+ EXPECT_EQ(-1, pdkill(pd, 0xFFFF));
+ EXPECT_EQ(EINVAL, errno);
+
+ // Send an expected SIGUSR1 to the pdfork()ed child.
+ EXPECT_PID_ALIVE(pid);
+ pdkill(pd, SIGUSR1);
+ EXPECT_PID_DEAD(pid);
+
+ // Child's exit status confirms whether it received the signal.
+ int status;
+ int rc = waitpid(pid, &status, __WALL);
+ EXPECT_OK(rc);
+ EXPECT_EQ(pid, rc);
+ EXPECT_TRUE(WIFEXITED(status)) << "0x" << std::hex << rc;
+ EXPECT_EQ(123, WEXITSTATUS(status));
+}
+
+pid_t PdforkParentDeath(int pdfork_flags) {
+ // Set up:
+ // pid A: main process, here
+ // +--pid B: fork()ed process, sleep(4)s then exits
+ // +--pid C: pdfork()ed process, looping forever
+ int sock_fds[2];
+ EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
+ if (verbose) fprintf(stderr, "[%d] parent about to fork()...\n", getpid_());
+ pid_t child = fork();
+ EXPECT_OK(child);
+ if (child == 0) {
+ int pd;
+ if (verbose) fprintf(stderr, " [%d] child about to pdfork()...\n", getpid_());
+ pid_t grandchild = pdfork(&pd, pdfork_flags);
+ if (grandchild == 0) {
+ while (true) {
+ if (verbose) fprintf(stderr, " [%d] grandchild: \"I aten't dead\"\n", getpid_());
+ sleep(1);
+ }
+ }
+ if (verbose) fprintf(stderr, " [%d] pdfork()ed grandchild %d, sending ID to parent\n", getpid_(), grandchild);
+ // send grandchild pid to parent
+ write(sock_fds[1], &grandchild, sizeof(grandchild));
+ sleep(4);
+ if (verbose) fprintf(stderr, " [%d] child terminating\n", getpid_());
+ exit(0);
+ }
+ if (verbose) fprintf(stderr, "[%d] fork()ed child is %d\n", getpid_(), child);
+ pid_t grandchild;
+ read(sock_fds[0], &grandchild, sizeof(grandchild));
+ if (verbose) fprintf(stderr, "[%d] receive grandchild id %d\n", getpid_(), grandchild);
+ EXPECT_PID_ALIVE(child);
+ EXPECT_PID_ALIVE(grandchild);
+ sleep(6);
+ // Child dies, closing its process descriptor for the grandchild.
+ EXPECT_PID_DEAD(child);
+ CheckChildFinished(child);
+ return grandchild;
+}
+
+TEST(Pdfork, Bagpuss) {
+ // "And of course when Bagpuss goes to sleep, all his friends go to sleep too"
+ pid_t grandchild = PdforkParentDeath(0);
+ // By default: child death => closed process descriptor => grandchild death.
+ EXPECT_PID_DEAD(grandchild);
+}
+
+TEST(Pdfork, BagpussDaemon) {
+ pid_t grandchild = PdforkParentDeath(PD_DAEMON);
+ // With PD_DAEMON: child death => closed process descriptor => no effect on grandchild.
+ EXPECT_PID_ALIVE(grandchild);
+ if (grandchild > 0) {
+ EXPECT_OK(kill(grandchild, SIGKILL));
+ }
+}
+
+// The exit of a pdfork()ed process should not generate SIGCHLD.
+TEST_F(PipePdfork, NoSigchld) {
+ had_signal.clear();
+ sighandler_t original = signal(SIGCHLD, handle_signal);
+ TerminateChild();
+ int rc = 0;
+ // Can waitpid() for the specific pid of the pdfork()ed child.
+ EXPECT_EQ(pid_, waitpid(pid_, &rc, __WALL));
+ EXPECT_TRUE(WIFEXITED(rc)) << "0x" << std::hex << rc;
+ EXPECT_FALSE(had_signal[SIGCHLD]);
+ signal(SIGCHLD, original);
+}
+
+// The exit of a pdfork()ed process whose process descriptors have
+// all been closed should generate SIGCHLD. The child process needs
+// PD_DAEMON to survive the closure of the process descriptors.
+TEST_F(PipePdforkDaemon, NoPDSigchld) {
+ had_signal.clear();
+ sighandler_t original = signal(SIGCHLD, handle_signal);
+
+ EXPECT_OK(close(pd_));
+ TerminateChild();
+#ifdef __FreeBSD__
+ EXPECT_EQ(-1, waitpid(pid_, NULL, __WALL));
+ EXPECT_EQ(errno, ECHILD);
+#else
+ int rc = 0;
+ // Can waitpid() for the specific pid of the pdfork()ed child.
+ EXPECT_EQ(pid_, waitpid(pid_, &rc, __WALL));
+ EXPECT_TRUE(WIFEXITED(rc)) << "0x" << std::hex << rc;
+#endif
+ EXPECT_FALSE(had_signal[SIGCHLD]);
+ signal(SIGCHLD, original);
+}
+
+#ifdef HAVE_PROCDESC_FSTAT
+TEST_F(PipePdfork, ModeBits) {
+ // Owner rwx bits indicate liveness of child
+ struct stat stat;
+ memset(&stat, 0, sizeof(stat));
+ EXPECT_OK(fstat(pd_, &stat));
+ if (verbose) print_stat(stderr, &stat);
+ EXPECT_EQ(S_IRWXU, (long)(stat.st_mode & S_IRWXU));
+
+ TerminateChild();
+ usleep(100000);
+
+ memset(&stat, 0, sizeof(stat));
+ EXPECT_OK(fstat(pd_, &stat));
+ if (verbose) print_stat(stderr, &stat);
+ EXPECT_EQ(0, (int)(stat.st_mode & S_IRWXU));
+}
+#endif
+
+TEST_F(PipePdfork, WildcardWait) {
+ // TODO(FreeBSD): make wildcard wait ignore pdfork()ed children
+ // https://bugs.freebsd.org/201054
+ TerminateChild();
+ sleep(1); // Ensure child is truly dead.
+
+ // Wildcard waitpid(-1) should not see the pdfork()ed child because
+ // there is still a process descriptor for it.
+ int rc;
+ EXPECT_EQ(-1, waitpid(-1, &rc, WNOHANG));
+ EXPECT_EQ(ECHILD, errno);
+
+ EXPECT_OK(close(pd_));
+ pd_ = -1;
+}
+
+FORK_TEST(Pdfork, Pdkill) {
+ had_signal.clear();
+ int pd;
+ pid_t pid = pdfork(&pd, 0);
+ EXPECT_OK(pid);
+
+ if (pid == 0) {
+ // Child: set a SIGINT handler and sleep.
+ had_signal.clear();
+ signal(SIGINT, handle_signal);
+ if (verbose) fprintf(stderr, "[%d] child about to sleep(10)\n", getpid_());
+ int left = sleep(10);
+ if (verbose) fprintf(stderr, "[%d] child slept, %d sec left, had[SIGINT]=%d\n",
+ getpid_(), left, had_signal[SIGINT]);
+ // Expect this sleep to be interrupted by the signal (and so left > 0).
+ exit(left == 0);
+ }
+
+ // Parent: get child's PID.
+ pid_t pd_pid;
+ EXPECT_OK(pdgetpid(pd, &pd_pid));
+ EXPECT_EQ(pid, pd_pid);
+
+ // Interrupt the child after a second.
+ sleep(1);
+ EXPECT_OK(pdkill(pd, SIGINT));
+
+ // Make sure the child finished properly (caught signal then exited).
+ CheckChildFinished(pid);
+}
+
+FORK_TEST(Pdfork, PdkillSignal) {
+ int pd;
+ pid_t pid = pdfork(&pd, 0);
+ EXPECT_OK(pid);
+
+ if (pid == 0) {
+ // Child: sleep. No SIGINT handler.
+ if (verbose) fprintf(stderr, "[%d] child about to sleep(10)\n", getpid_());
+ int left = sleep(10);
+ if (verbose) fprintf(stderr, "[%d] child slept, %d sec left\n", getpid_(), left);
+ exit(99);
+ }
+
+ // Kill the child (as it doesn't handle SIGINT).
+ sleep(1);
+ EXPECT_OK(pdkill(pd, SIGINT));
+
+ // Make sure the child finished properly (terminated by signal).
+ CheckChildFinished(pid, true);
+}
+
+//------------------------------------------------
+// Test interactions with other parts of Capsicum:
+// - capability mode
+// - capabilities
+
+FORK_TEST(Pdfork, DaemonUnrestricted) {
+ EXPECT_OK(cap_enter());
+ int fd;
+
+ // Capability mode leaves pdfork() available, with and without flag.
+ int rc;
+ rc = pdfork(&fd, PD_DAEMON);
+ EXPECT_OK(rc);
+ if (rc == 0) {
+ // Child: immediately terminate.
+ exit(0);
+ }
+
+ rc = pdfork(&fd, 0);
+ EXPECT_OK(rc);
+ if (rc == 0) {
+ // Child: immediately terminate.
+ exit(0);
+ }
+}
+
+TEST(Pdfork, MissingRights) {
+ pid_t parent = getpid_();
+ int pd = -1;
+ pid_t pid = pdfork(&pd, 0);
+ EXPECT_OK(pid);
+ if (pid == 0) {
+ // Child: loop forever.
+ EXPECT_NE(parent, getpid_());
+ while (true) sleep(1);
+ }
+ // Create two capabilities from the process descriptor.
+ cap_rights_t r_ro;
+ cap_rights_init(&r_ro, CAP_READ, CAP_LOOKUP);
+ int cap_incapable = dup(pd);
+ EXPECT_OK(cap_incapable);
+ EXPECT_OK(cap_rights_limit(cap_incapable, &r_ro));
+ cap_rights_t r_pdall;
+ cap_rights_init(&r_pdall, CAP_PDGETPID, CAP_PDWAIT, CAP_PDKILL);
+ int cap_capable = dup(pd);
+ EXPECT_OK(cap_capable);
+ EXPECT_OK(cap_rights_limit(cap_capable, &r_pdall));
+
+ pid_t other_pid;
+ EXPECT_NOTCAPABLE(pdgetpid(cap_incapable, &other_pid));
+ EXPECT_NOTCAPABLE(pdkill(cap_incapable, SIGINT));
+ int status;
+ EXPECT_NOTCAPABLE(pdwait4_(cap_incapable, &status, 0, NULL));
+
+ EXPECT_OK(pdgetpid(cap_capable, &other_pid));
+ EXPECT_EQ(pid, other_pid);
+ EXPECT_OK(pdkill(cap_capable, SIGINT));
+ int rc = pdwait4_(pd, &status, 0, NULL);
+ EXPECT_OK(rc);
+ EXPECT_EQ(pid, rc);
+}
+
+
+//------------------------------------------------
+// Passing process descriptors between processes.
+
+TEST_F(PipePdfork, PassProcessDescriptor) {
+ int sock_fds[2];
+ EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
+
+ struct msghdr mh;
+ mh.msg_name = NULL; // No address needed
+ mh.msg_namelen = 0;
+ char buffer1[1024];
+ struct iovec iov[1];
+ iov[0].iov_base = buffer1;
+ iov[0].iov_len = sizeof(buffer1);
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ char buffer2[1024];
+ mh.msg_control = buffer2;
+ mh.msg_controllen = sizeof(buffer2);
+ struct cmsghdr *cmptr;
+
+ if (verbose) fprintf(stderr, "[%d] about to fork()\n", getpid_());
+ pid_t child2 = fork();
+ if (child2 == 0) {
+ // Child: close our copy of the original process descriptor.
+ close(pd_);
+
+ // Child: wait to receive process descriptor over socket
+ if (verbose) fprintf(stderr, " [%d] child of %d waiting for process descriptor on socket\n", getpid_(), getppid());
+ int rc = recvmsg(sock_fds[0], &mh, 0);
+ EXPECT_OK(rc);
+ EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen);
+ cmptr = CMSG_FIRSTHDR(&mh);
+ int pd = *(int*)CMSG_DATA(cmptr);
+ EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len);
+ cmptr = CMSG_NXTHDR(&mh, cmptr);
+ EXPECT_TRUE(cmptr == NULL);
+ if (verbose) fprintf(stderr, " [%d] got process descriptor %d on socket\n", getpid_(), pd);
+
+ // Child: confirm we can do pd*() operations on the process descriptor
+ pid_t other;
+ EXPECT_OK(pdgetpid(pd, &other));
+ if (verbose) fprintf(stderr, " [%d] process descriptor %d is pid %d\n", getpid_(), pd, other);
+
+ sleep(2);
+ if (verbose) fprintf(stderr, " [%d] close process descriptor %d\n", getpid_(), pd);
+ close(pd);
+
+ // Last process descriptor closed, expect death
+ EXPECT_PID_DEAD(other);
+
+ exit(HasFailure());
+ }
+ usleep(1000); // Ensure subprocess runs
+
+ // Send the process descriptor over the pipe to the sub-process
+ mh.msg_controllen = CMSG_LEN(sizeof(int));
+ cmptr = CMSG_FIRSTHDR(&mh);
+ cmptr->cmsg_level = SOL_SOCKET;
+ cmptr->cmsg_type = SCM_RIGHTS;
+ cmptr->cmsg_len = CMSG_LEN(sizeof(int));
+ *(int *)CMSG_DATA(cmptr) = pd_;
+ buffer1[0] = 0;
+ iov[0].iov_len = 1;
+ sleep(1);
+ if (verbose) fprintf(stderr, "[%d] send process descriptor %d on socket\n", getpid_(), pd_);
+ int rc = sendmsg(sock_fds[1], &mh, 0);
+ EXPECT_OK(rc);
+
+ if (verbose) fprintf(stderr, "[%d] close process descriptor %d\n", getpid_(), pd_);
+ close(pd_); // Not last open process descriptor
+
+ // wait for child2
+ int status;
+ EXPECT_EQ(child2, waitpid(child2, &status, __WALL));
+ rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ // confirm death all round
+ EXPECT_PID_DEAD(child2);
+ EXPECT_PID_DEAD(pid_);
+}
diff --git a/rename.cc b/rename.cc
new file mode 100644
index 000000000000..080db66756b8
--- /dev/null
+++ b/rename.cc
@@ -0,0 +1,49 @@
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "./capsicum-test.h"
+
+// There was a Capsicum-related regression in FreeBSD renameat,
+// which affects certain cases independent of Capsicum or capability mode
+//
+// added to test the renameat syscall for the case that
+// - the "to" file already exists
+// - the "to" file is specified by an absolute path
+// - the "to" file descriptor is used
+// (this descriptor should be ignored if absolute path is provided)
+//
+// details at: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=222258
+
+
+const char * create_tmp_src(const char* filename) {
+ const char *src_path = TmpFile(filename);
+ int src_fd = open(src_path, O_CREAT|O_RDWR, 0644);
+ close(src_fd);
+ return src_path;
+}
+
+TEST(Rename, AbsDesignationSame) {
+ const char *src_path = create_tmp_src("rename_test");
+ EXPECT_OK(rename(src_path, src_path));
+ unlink(src_path);
+}
+
+TEST(RenameAt, AbsDesignationSame) {
+ const char *src_path = create_tmp_src("renameat_test");
+ const char *dir_path = TmpFile("renameat_test_dir");
+
+ EXPECT_OK(mkdir(dir_path, 0755));
+ // random temporary directory descriptor
+ int dfd = open(dir_path, O_DIRECTORY);
+
+ // Various rename from/to the same absolute path; in each case the source
+ // and dest directory FDs should be irrelevant.
+ EXPECT_OK(renameat(AT_FDCWD, src_path, AT_FDCWD, src_path));
+ EXPECT_OK(renameat(AT_FDCWD, src_path, dfd, src_path));
+ EXPECT_OK(renameat(dfd, src_path, AT_FDCWD, src_path));
+ EXPECT_OK(renameat(dfd, src_path, dfd, src_path));
+
+ close(dfd);
+ rmdir(dir_path);
+ unlink(src_path);
+}
diff --git a/sctp.cc b/sctp.cc
new file mode 100644
index 000000000000..5f0d169456a8
--- /dev/null
+++ b/sctp.cc
@@ -0,0 +1,212 @@
+// Tests of SCTP functionality
+// Requires: libsctp-dev package on Debian Linux, CONFIG_IP_SCTP in kernel config
+#ifdef HAVE_SCTP
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/sctp.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+
+#include "syscalls.h"
+#include "capsicum.h"
+#include "capsicum-test.h"
+
+static cap_rights_t r_ro;
+static cap_rights_t r_wo;
+static cap_rights_t r_rw;
+static cap_rights_t r_all;
+static cap_rights_t r_all_nopeel;
+#define DO_PEELOFF 0x1A
+#define DO_TERM 0x1B
+
+static int SctpClient(int port, unsigned char byte) {
+ // Create sockets
+ int sock = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
+ EXPECT_OK(sock);
+ if (sock < 0) return sock;
+ int cap_sock_ro = dup(sock);
+ EXPECT_OK(cap_sock_ro);
+ EXPECT_OK(cap_rights_limit(cap_sock_ro, &r_rw));
+ int cap_sock_rw = dup(sock);
+ EXPECT_OK(cap_sock_rw);
+ EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+ int cap_sock_all = dup(sock);
+ EXPECT_OK(cap_sock_all);
+ EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+ close(sock);
+
+ // Send a message. Requires CAP_WRITE and CAP_CONNECT.
+ struct sockaddr_in serv_addr;
+ memset(&serv_addr, 0, sizeof(serv_addr));
+ serv_addr.sin_family = AF_INET;
+ serv_addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ serv_addr.sin_port = htons(port);
+
+ EXPECT_NOTCAPABLE(sctp_sendmsg(cap_sock_ro, &byte, 1,
+ (struct sockaddr*)&serv_addr, sizeof(serv_addr),
+ 0, 0, 1, 0, 0));
+ EXPECT_NOTCAPABLE(sctp_sendmsg(cap_sock_rw, &byte, 1,
+ (struct sockaddr*)&serv_addr, sizeof(serv_addr),
+ 0, 0, 1, 0, 0));
+ if (verbose) fprintf(stderr, " [%d]sctp_sendmsg(%02x)\n", getpid_(), byte);
+ EXPECT_OK(sctp_sendmsg(cap_sock_all, &byte, 1,
+ (struct sockaddr*)&serv_addr, sizeof(serv_addr),
+ 0, 0, 1, 0, 0));
+ close(cap_sock_ro);
+ close(cap_sock_rw);
+ return cap_sock_all;
+}
+
+
+TEST(Sctp, Socket) {
+ int sock = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
+ EXPECT_OK(sock);
+ if (sock < 0) return;
+
+ cap_rights_init(&r_ro, CAP_READ);
+ cap_rights_init(&r_wo, CAP_WRITE);
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+ cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+ cap_rights_init(&r_all_nopeel, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+ cap_rights_clear(&r_all_nopeel, CAP_PEELOFF);
+
+ int cap_sock_wo = dup(sock);
+ EXPECT_OK(cap_sock_wo);
+ EXPECT_OK(cap_rights_limit(cap_sock_wo, &r_wo));
+ int cap_sock_rw = dup(sock);
+ EXPECT_OK(cap_sock_rw);
+ EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+ int cap_sock_all = dup(sock);
+ EXPECT_OK(cap_sock_all);
+ EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+ int cap_sock_all_nopeel = dup(sock);
+ EXPECT_OK(cap_sock_all_nopeel);
+ EXPECT_OK(cap_rights_limit(cap_sock_all_nopeel, &r_all_nopeel));
+ close(sock);
+
+ struct sockaddr_in addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(0);
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ socklen_t len = sizeof(addr);
+
+ // Can only bind the fully-capable socket.
+ EXPECT_NOTCAPABLE(bind(cap_sock_rw, (struct sockaddr *)&addr, len));
+ EXPECT_OK(bind(cap_sock_all, (struct sockaddr *)&addr, len));
+
+ EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr *)&addr, &len));
+ int port = ntohs(addr.sin_port);
+
+ // Now we know the port involved, fork off children to run clients.
+ pid_t child1 = fork();
+ if (child1 == 0) {
+ // Child process 1: wait for server setup
+ sleep(1);
+ // Send a message that triggers peeloff
+ int client_sock = SctpClient(port, DO_PEELOFF);
+ sleep(1);
+ close(client_sock);
+ exit(HasFailure());
+ }
+
+ pid_t child2 = fork();
+ if (child2 == 0) {
+ // Child process 2: wait for server setup
+ sleep(2);
+ // Send a message that triggers server exit
+ int client_sock = SctpClient(port, DO_TERM);
+ close(client_sock);
+ exit(HasFailure());
+ }
+
+ // Can only listen on the fully-capable socket.
+ EXPECT_NOTCAPABLE(listen(cap_sock_rw, 3));
+ EXPECT_OK(listen(cap_sock_all, 3));
+
+ // Can only do socket operations on the fully-capable socket.
+ len = sizeof(addr);
+ EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&addr, &len));
+
+ struct sctp_event_subscribe events;
+ memset(&events, 0, sizeof(events));
+ events.sctp_association_event = 1;
+ events.sctp_data_io_event = 1;
+ EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, IPPROTO_SCTP, SCTP_EVENTS, &events, sizeof(events)));
+ len = sizeof(events);
+ EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, IPPROTO_SCTP, SCTP_EVENTS, &events, &len));
+ memset(&events, 0, sizeof(events));
+ events.sctp_association_event = 1;
+ events.sctp_data_io_event = 1;
+ EXPECT_OK(setsockopt(cap_sock_all, IPPROTO_SCTP, SCTP_EVENTS, &events, sizeof(events)));
+ len = sizeof(events);
+ EXPECT_OK(getsockopt(cap_sock_all, IPPROTO_SCTP, SCTP_EVENTS, &events, &len));
+
+ len = sizeof(addr);
+ memset(&addr, 0, sizeof(addr));
+ EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&addr, &len));
+ EXPECT_EQ(AF_INET, addr.sin_family);
+ EXPECT_EQ(htons(port), addr.sin_port);
+
+ struct sockaddr_in client_addr;
+ socklen_t addr_len = sizeof(client_addr);
+ unsigned char buffer[1024];
+ struct sctp_sndrcvinfo sri;
+ memset(&sri, 0, sizeof(sri));
+ int flags = 0;
+ EXPECT_NOTCAPABLE(sctp_recvmsg(cap_sock_wo, buffer, sizeof(buffer),
+ (struct sockaddr*)&client_addr, &addr_len,
+ &sri, &flags));
+ while (true) {
+ retry:
+ memset(&sri, 0, sizeof(sri));
+ int len = sctp_recvmsg(cap_sock_rw, buffer, sizeof(buffer),
+ (struct sockaddr*)&client_addr, &addr_len,
+ &sri, &flags);
+ if (len < 0 && errno == EAGAIN) goto retry;
+ EXPECT_OK(len);
+ if (len > 0) {
+ if (verbose) fprintf(stderr, "[%d]sctp_recvmsg(%02x..)", getpid_(), (unsigned)buffer[0]);
+ if (buffer[0] == DO_PEELOFF) {
+ if (verbose) fprintf(stderr, "..peeling off association %08lx\n", (long)sri.sinfo_assoc_id);
+ // Peel off the association. Needs CAP_PEELOFF.
+ int rc1 = sctp_peeloff(cap_sock_all_nopeel, sri.sinfo_assoc_id);
+ EXPECT_NOTCAPABLE(rc1);
+ int rc2 = sctp_peeloff(cap_sock_all, sri.sinfo_assoc_id);
+ EXPECT_OK(rc2);
+ int peeled = std::max(rc1, rc2);
+ if (peeled > 0) {
+#ifdef CAP_FROM_PEELOFF
+ // Peeled off FD should have same rights as original socket.
+ cap_rights_t rights;
+ EXPECT_OK(cap_rights_get(peeled, &rights));
+ EXPECT_RIGHTS_EQ(&r_all, &rights);
+#endif
+ close(peeled);
+ }
+ } else if (buffer[0] == DO_TERM) {
+ if (verbose) fprintf(stderr, "..terminating server\n");
+ break;
+ }
+ } else if (len < 0) {
+ break;
+ }
+ }
+
+ // Wait for the children.
+ int status;
+ int rc;
+ EXPECT_EQ(child1, waitpid(child1, &status, 0));
+ rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+ EXPECT_EQ(child2, waitpid(child2, &status, 0));
+ rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ close(cap_sock_wo);
+ close(cap_sock_rw);
+ close(cap_sock_all);
+ close(cap_sock_all_nopeel);
+}
+#endif
diff --git a/select.cc b/select.cc
new file mode 100644
index 000000000000..3fa02c639f03
--- /dev/null
+++ b/select.cc
@@ -0,0 +1,142 @@
+#include <sys/select.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <poll.h>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+namespace {
+
+int AddFDToSet(fd_set* fset, int fd, int maxfd) {
+ FD_SET(fd, fset);
+ if (fd > maxfd) maxfd = fd;
+ return maxfd;
+}
+
+int InitFDSet(fd_set* fset, int *fds, int fdcount) {
+ FD_ZERO(fset);
+ int maxfd = -1;
+ for (int ii = 0; ii < fdcount; ii++) {
+ maxfd = AddFDToSet(fset, fds[ii], maxfd);
+ }
+ return maxfd;
+}
+
+} // namespace
+
+FORK_TEST_ON(Select, LotsOFileDescriptors, TmpFile("cap_select")) {
+ int fd = open(TmpFile("cap_select"), O_RDWR | O_CREAT, 0644);
+ EXPECT_OK(fd);
+ if (fd < 0) return;
+
+ // Create many POLL_EVENT capabilities.
+ const int kCapCount = 64;
+ int cap_fd[kCapCount];
+ cap_rights_t r_poll;
+ cap_rights_init(&r_poll, CAP_EVENT);
+ for (int ii = 0; ii < kCapCount; ii++) {
+ cap_fd[ii] = dup(fd);
+ EXPECT_OK(cap_fd[ii]);
+ EXPECT_OK(cap_rights_limit(cap_fd[ii], &r_poll));
+ }
+ cap_rights_t r_rw;
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE, CAP_SEEK);
+ int cap_rw = dup(fd);
+ EXPECT_OK(cap_rw);
+ EXPECT_OK(cap_rights_limit(cap_rw, &r_rw));
+
+ EXPECT_OK(cap_enter()); // Enter capability mode
+
+ struct timeval tv;
+ tv.tv_sec = 0;
+ tv.tv_usec = 100;
+ // Add normal file descriptor and all CAP_EVENT capabilities
+ fd_set rset;
+ fd_set wset;
+ int maxfd = InitFDSet(&rset, cap_fd, kCapCount);
+ maxfd = AddFDToSet(&rset, fd, maxfd);
+ InitFDSet(&wset, cap_fd, kCapCount);
+ AddFDToSet(&rset, fd, 0);
+ int ret = select(maxfd+1, &rset, &wset, NULL, &tv);
+ EXPECT_OK(ret);
+
+ // Now also include the capability with no CAP_EVENT.
+ InitFDSet(&rset, cap_fd, kCapCount);
+ AddFDToSet(&rset, fd, maxfd);
+ maxfd = AddFDToSet(&rset, cap_rw, maxfd);
+ InitFDSet(&wset, cap_fd, kCapCount);
+ AddFDToSet(&wset, fd, maxfd);
+ AddFDToSet(&wset, cap_rw, maxfd);
+ ret = select(maxfd+1, &rset, &wset, NULL, &tv);
+ EXPECT_NOTCAPABLE(ret);
+
+#ifdef HAVE_PSELECT
+ // And again with pselect
+ struct timespec ts;
+ ts.tv_sec = 0;
+ ts.tv_nsec = 100000;
+ maxfd = InitFDSet(&rset, cap_fd, kCapCount);
+ maxfd = AddFDToSet(&rset, fd, maxfd);
+ InitFDSet(&wset, cap_fd, kCapCount);
+ AddFDToSet(&rset, fd, 0);
+ ret = pselect(maxfd+1, &rset, &wset, NULL, &ts, NULL);
+ EXPECT_OK(ret);
+
+ InitFDSet(&rset, cap_fd, kCapCount);
+ AddFDToSet(&rset, fd, maxfd);
+ maxfd = AddFDToSet(&rset, cap_rw, maxfd);
+ InitFDSet(&wset, cap_fd, kCapCount);
+ AddFDToSet(&wset, fd, maxfd);
+ AddFDToSet(&wset, cap_rw, maxfd);
+ ret = pselect(maxfd+1, &rset, &wset, NULL, &ts, NULL);
+ EXPECT_NOTCAPABLE(ret);
+#endif
+}
+
+FORK_TEST_ON(Poll, LotsOFileDescriptors, TmpFile("cap_poll")) {
+ int fd = open(TmpFile("cap_poll"), O_RDWR | O_CREAT, 0644);
+ EXPECT_OK(fd);
+ if (fd < 0) return;
+
+ // Create many POLL_EVENT capabilities.
+ const int kCapCount = 64;
+ struct pollfd cap_fd[kCapCount + 2];
+ cap_rights_t r_poll;
+ cap_rights_init(&r_poll, CAP_EVENT);
+ for (int ii = 0; ii < kCapCount; ii++) {
+ cap_fd[ii].fd = dup(fd);
+ EXPECT_OK(cap_fd[ii].fd);
+ EXPECT_OK(cap_rights_limit(cap_fd[ii].fd, &r_poll));
+ cap_fd[ii].events = POLLIN|POLLOUT;
+ }
+ cap_fd[kCapCount].fd = fd;
+ cap_fd[kCapCount].events = POLLIN|POLLOUT;
+ cap_rights_t r_rw;
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE, CAP_SEEK);
+ int cap_rw = dup(fd);
+ EXPECT_OK(cap_rw);
+ EXPECT_OK(cap_rights_limit(cap_rw, &r_rw));
+ cap_fd[kCapCount + 1].fd = cap_rw;
+ cap_fd[kCapCount + 1].events = POLLIN|POLLOUT;
+
+ EXPECT_OK(cap_enter()); // Enter capability mode
+
+ EXPECT_OK(poll(cap_fd, kCapCount + 1, 10));
+ // Now also include the capability with no CAP_EVENT.
+ EXPECT_OK(poll(cap_fd, kCapCount + 2, 10));
+ EXPECT_NE(0, (cap_fd[kCapCount + 1].revents & POLLNVAL));
+
+#ifdef HAVE_PPOLL
+ // And again with ppoll
+ struct timespec ts;
+ ts.tv_sec = 0;
+ ts.tv_nsec = 100000;
+ EXPECT_OK(ppoll(cap_fd, kCapCount + 1, &ts, NULL));
+ // Now also include the capability with no CAP_EVENT.
+ EXPECT_OK(ppoll(cap_fd, kCapCount + 2, &ts, NULL));
+ EXPECT_NE(0, (cap_fd[kCapCount + 1].revents & POLLNVAL));
+#endif
+}
diff --git a/showrights b/showrights
new file mode 100755
index 000000000000..5af5441d297d
--- /dev/null
+++ b/showrights
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+import sys
+import re
+
+_values = { # 2-tuple => name
+ (0x0000000000000000, 0x0000000000000100) : 'TTYHOOK',
+ (0x0000000000000040, 0x0000000000000000) : 'CREATE',
+ (0x0000000200000000, 0x0000000000000000) : 'GETSOCKNAME',
+ (0x0000000000000000, 0x0000000000100000) : 'KQUEUE_CHANGE',
+ (0x0000000000000000, 0x0000000000004000) : 'EXTATTR_LIST',
+ (0x0000000000000080, 0x0000000000000000) : 'FEXECVE',
+ (0x0000001000000000, 0x0000000000000000) : 'PEELOFF',
+ (0x0000000000000000, 0x0000000000800000) : 'NOTIFY',
+ (0x0000000000000000, 0x0000000000001000) : 'EXTATTR_DELETE',
+ (0x0000000040000000, 0x0000000000000000) : 'BIND',
+ (0x0000000000000000, 0x0000000000002000) : 'EXTATTR_GET',
+ (0x0000000000008000, 0x0000000000000000) : 'FCNTL',
+ (0x0000000000000000, 0x0000000000400000) : 'EPOLL_CTL',
+ (0x0000000000000004, 0x0000000000000000) : 'SEEK_TELL',
+ (0x000000000000000c, 0x0000000000000000) : 'SEEK',
+ (0x0000004000000000, 0x0000000000000000) : 'SHUTDOWN',
+ (0x0000000000000000, 0x0000000000000080) : 'IOCTL',
+ (0x0000000000000000, 0x0000000000000020) : 'EVENT',
+ (0x0000000400000000, 0x0000000000000000) : 'GETSOCKOPT',
+ (0x0000000080000000, 0x0000000000000000) : 'CONNECT',
+ (0x0000000000000000, 0x0000000000200000) : 'FSIGNAL',
+ (0x0000000000000000, 0x0000000000008000) : 'EXTATTR_SET',
+ (0x0000000000100000, 0x0000000000000000) : 'FSTATFS',
+ (0x0000000000040000, 0x0000000000000000) : 'FSCK',
+ (0x0000000000000000, 0x0000000000000800) : 'PDKILL_FREEBSD',
+ (0x0000000000000000, 0x0000000000000004) : 'SEM_GETVALUE',
+ (0x0000000000000000, 0x0000000000080000) : 'ACL_SET',
+ (0x0000000000200000, 0x0000000000000000) : 'FUTIMES',
+ (0x0000000000000200, 0x0000000000000000) : 'FTRUNCATE',
+ (0x0000000000000000, 0x0000000000000001) : 'MAC_GET',
+ (0x0000000000020000, 0x0000000000000000) : 'FPATHCONF',
+ (0x0000002000000000, 0x0000000000000000) : 'SETSOCKOPT',
+ (0x0000000000002000, 0x0000000000000000) : 'FCHMOD',
+ (0x0000000000000000, 0x0000000002000000) : 'PERFMON',
+ (0x0000000000004000, 0x0000000000000000) : 'FCHOWN',
+ (0x0000000000000400, 0x0000000000000000) : 'LOOKUP',
+ (0x0000000000400400, 0x0000000000000000) : 'LINKAT_TARGET',
+ (0x0000000000800400, 0x0000000000000000) : 'MKDIRAT',
+ (0x0000000001000400, 0x0000000000000000) : 'MKFIFOAT',
+ (0x0000000002000400, 0x0000000000000000) : 'MKNODAT',
+ (0x0000000004000400, 0x0000000000000000) : 'RENAMEAT_SOURCE',
+ (0x0000000008000400, 0x0000000000000000) : 'SYMLINKAT',
+ (0x0000000010000400, 0x0000000000000000) : 'UNLINKAT',
+ (0x0000008000000400, 0x0000000000000000) : 'BINDAT',
+ (0x0000010000000400, 0x0000000000000000) : 'CONNECTAT',
+ (0x0000020000000400, 0x0000000000000000) : 'LINKAT_SOURCE',
+ (0x0000040000000400, 0x0000000000000000) : 'RENAMEAT_TARGET',
+ (0x0000000000000010, 0x0000000000000000) : 'MMAP',
+ (0x000000000000003c, 0x0000000000000000) : 'MMAP_X',
+ (0x0000000000000000, 0x0000000001000000) : 'SETNS',
+ (0x0000000000080000, 0x0000000000000000) : 'FSTAT',
+ (0x0000000000000001, 0x0000000000000000) : 'READ',
+ (0x0000000000000000, 0x0000000000000008) : 'SEM_POST',
+ (0x0000000000000000, 0x0000000000020000) : 'ACL_DELETE',
+ (0x0000000000001000, 0x0000000000000000) : 'FCHFLAGS',
+ (0x0000000800000000, 0x0000000000000000) : 'LISTEN',
+ (0x0000000100000000, 0x0000000000000000) : 'GETPEERNAME',
+ (0x0000000000000100, 0x0000000000000000) : 'FSYNC',
+ (0x0000000000000000, 0x0000000004000000) : 'BPF',
+ (0x0000000020000000, 0x0000000000000000) : 'ACCEPT',
+ (0x0000000000000800, 0x0000000000000000) : 'FCHDIR',
+ (0x0000000000000002, 0x0000000000000000) : 'WRITE',
+ (0x0000000000000000, 0x0000000000000010) : 'SEM_WAIT',
+ (0x0000000000000000, 0x0000000000000040) : 'KQUEUE_EVENT',
+ (0x0000000000000000, 0x0000000000000400) : 'PDWAIT',
+ (0x0000000000000000, 0x0000000000040000) : 'ACL_GET',
+ (0x0000000000010000, 0x0000000000000000) : 'FLOCK',
+ (0x0000000000000000, 0x0000000000010000) : 'ACL_CHECK',
+ (0x0000000000000000, 0x0000000000000002) : 'MAC_SET',
+ (0x0000000000000000, 0x0000000000000200) : 'PDGETPID_FREEBSD',
+}
+
+
+def _map_fdinfo(line):
+ RIGHTS_RE = re.compile(r'(?P<prefix>.*)rights:(?P<ws>\s+)0x(?P<v0>[0-9a-fA-F]+)\s+0x(?P<v1>[0-9a-fA-F]+)$')
+ m = RIGHTS_RE.match(line)
+ if m:
+ val0 = long(m.group('v0'), 16)
+ val0 = (val0 & ~(0x0200000000000000L))
+ val1 = long(m.group('v1'), 16)
+ val1 = (val1 & ~(0x0400000000000000L))
+ rights = []
+ for (right, name) in _values.items():
+ if ((right[0] == 0 or (val0 & right[0])) and
+ (right[1] == 0 or (val1 & right[1]))):
+ rights.append(name)
+ return "%srights:%s%s" % (m.group('prefix'), m.group('ws'), '|'.join(rights))
+ else:
+ return line.rstrip()
+
+if __name__ == "__main__":
+ infile = open(sys.argv[1], 'r') if len(sys.argv) > 1 else sys.stdin
+ for line in infile.readlines():
+ print _map_fdinfo(line)
diff --git a/smoketest.c b/smoketest.c
new file mode 100644
index 000000000000..86d15fe10510
--- /dev/null
+++ b/smoketest.c
@@ -0,0 +1,135 @@
+/* Small standalone test program to check the existence of Capsicum syscalls */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "capsicum.h"
+
+#ifdef __linux__
+// glibc on Linux caches getpid() return value.
+int getpid_(void) { return syscall(__NR_getpid); }
+#else
+#define getpid_ getpid
+#endif
+
+static int seen_sigchld = 0;
+static void handle_signal(int x) {
+ fprintf(stderr, "[%d] received SIGCHLD\n", getpid_());
+ seen_sigchld = 1;
+}
+
+int main(int argc, char *argv[]) {
+ signal(SIGCHLD, handle_signal);
+ int lifetime = 4; /* seconds */
+ if (1 < argc) {
+ lifetime = atoi(argv[1]);
+ }
+
+ /* cap_rights_limit() available? */
+ cap_rights_t r_rws;
+ cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
+ int cap_fd = dup(STDOUT_FILENO);
+ int rc = cap_rights_limit(cap_fd, &r_rws);
+ fprintf(stderr, "[%d] cap_fd=%d\n", getpid_(), cap_fd);
+ if (rc < 0) fprintf(stderr, "*** cap_rights_limit() failed: errno=%d %s\n", errno, strerror(errno));
+
+ /* cap_rights_get() available? */
+ cap_rights_t rights;
+ cap_rights_init(&rights, 0);
+ rc = cap_rights_get(cap_fd, &rights);
+ char buffer[256];
+ cap_rights_describe(&rights, buffer);
+ fprintf(stderr, "[%d] cap_rights_get(cap_fd=%d) rc=%d rights=%s\n", getpid_(), cap_fd, rc, buffer);
+ if (rc < 0) fprintf(stderr, "*** cap_rights_get() failed: errno=%d %s\n", errno, strerror(errno));
+
+ /* fstat() policed? */
+ struct stat buf;
+ rc = fstat(cap_fd, &buf);
+ fprintf(stderr, "[%d] fstat(cap_fd=%d) rc=%d errno=%d\n", getpid_(), cap_fd, rc, errno);
+ if (rc != -1) fprintf(stderr, "*** fstat() unexpectedly succeeded\n");
+
+ /* pdfork() available? */
+ int pd = -1;
+ rc = pdfork(&pd, 0);
+ if (rc < 0) fprintf(stderr, "*** pdfork() failed: errno=%d %s\n", errno, strerror(errno));
+
+ if (rc == 0) { /* child */
+ int count = 0;
+ while (count < 20) {
+ fprintf(stderr, " [%d] child alive, parent is ppid=%d\n", getpid_(), getppid());
+ sleep(1);
+ }
+ fprintf(stderr, " [%d] child exit(0)\n", getpid_());
+ exit(0);
+ }
+ fprintf(stderr, "[%d] pdfork() rc=%d pd=%d\n", getpid_(), rc, pd);
+
+ /* pdgetpid() available? */
+ pid_t actual_pid = rc;
+ pid_t got_pid = -1;
+ rc = pdgetpid(pd, &got_pid);
+ if (rc < 0) fprintf(stderr, "*** pdgetpid(pd=%d) failed: errno=%d %s\n", pd, errno, strerror(errno));
+ fprintf(stderr, "[%d] pdgetpid(pd=%d)=%d, pdfork returned %d\n", getpid_(), pd, got_pid, actual_pid);
+
+ sleep(lifetime);
+
+ /* pdkill() available? */
+ rc = pdkill(pd, SIGKILL);
+ fprintf(stderr, "[%d] pdkill(pd=%d, SIGKILL) -> rc=%d\n", getpid_(), pd, rc);
+ if (rc < 0) fprintf(stderr, "*** pdkill() failed: errno=%d %s\n", errno, strerror(errno));
+ usleep(50000); /* Allow time for death and signals */
+
+ /* Death of a pdforked child should be invisible */
+ if (seen_sigchld) fprintf(stderr, "*** SIGCHLD emitted\n");
+ int status;
+ rc = wait4(-1, &status, WNOHANG, NULL);
+ if (rc > 0) fprintf(stderr, "*** wait4(-1, ...) unexpectedly found child %d\n", rc);
+
+ fprintf(stderr, "[%d] forking off a child process to check cap_enter()\n", getpid_());
+ pid_t child = fork();
+ if (child == 0) { /* child */
+ /* cap_getmode() / cap_enter() available? */
+ unsigned int cap_mode = -1;
+ rc = cap_getmode(&cap_mode);
+ fprintf(stderr, " [%d] cap_getmode() -> rc=%d, cap_mode=%d\n", getpid_(), rc, cap_mode);
+ if (rc < 0) fprintf(stderr, "*** cap_getmode() failed: errno=%d %s\n", errno, strerror(errno));
+
+ rc = cap_enter();
+ fprintf(stderr, " [%d] cap_enter() -> rc=%d\n", getpid_(), rc);
+ if (rc < 0) fprintf(stderr, "*** cap_enter() failed: errno=%d %s\n", errno, strerror(errno));
+
+ rc = cap_getmode(&cap_mode);
+ fprintf(stderr, " [%d] cap_getmode() -> rc=%d, cap_mode=%d\n", getpid_(), rc, cap_mode);
+ if (rc < 0) fprintf(stderr, "*** cap_getmode() failed: errno=%d %s\n", errno, strerror(errno));
+
+ /* open disallowed? */
+ rc = open("/etc/passwd", O_RDONLY);
+ fprintf(stderr, " [%d] open('/etc/passwd') -> rc=%d, errno=%d\n", getpid_(), rc, errno);
+ if (rc != -1) fprintf(stderr, "*** open() unexpectedly succeeded\n");
+#ifdef ECAPMODE
+ if (errno != ECAPMODE) fprintf(stderr, "*** open() failed with errno %d not ECAPMODE\n", errno);
+#endif
+ exit(0);
+ }
+ rc = wait4(child, &status, 0, NULL);
+ fprintf(stderr, "[%d] child %d exited with status %x\n", getpid_(), child, status);
+
+ /* fexecve() available? */
+ char* argv_pass[] = {(char*)"/bin/ls", "-l", "smoketest", NULL};
+ char* null_envp[] = {NULL};
+ int ls_bin = open("/bin/ls", O_RDONLY);
+ fprintf(stderr, "[%d] about to fexecve('/bin/ls', '-l', 'smoketest')\n", getpid_());
+ rc = fexecve(ls_bin, argv_pass, null_envp);
+ /* should never reach here */
+ fprintf(stderr, "*** fexecve(fd=%d) failed: rc=%d errno=%d %s\n", ls_bin, rc, errno, strerror(errno));
+
+ return 0;
+}
diff --git a/socket.cc b/socket.cc
new file mode 100644
index 000000000000..a80cd3ae5e7e
--- /dev/null
+++ b/socket.cc
@@ -0,0 +1,340 @@
+// Tests for socket functionality.
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "capsicum.h"
+#include "syscalls.h"
+#include "capsicum-test.h"
+
+TEST(Socket, UnixDomain) {
+ const char* socketName = TmpFile("capsicum-test.socket");
+ unlink(socketName);
+ cap_rights_t r_rw;
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+ cap_rights_t r_all;
+ cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+
+ pid_t child = fork();
+ if (child == 0) {
+ // Child process: wait for server setup
+ sleep(1);
+
+ // Create sockets
+ int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ EXPECT_OK(sock);
+ if (sock < 0) return;
+
+ int cap_sock_rw = dup(sock);
+ EXPECT_OK(cap_sock_rw);
+ EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+ int cap_sock_all = dup(sock);
+ EXPECT_OK(cap_sock_all);
+ EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+ EXPECT_OK(close(sock));
+
+ // Connect socket
+ struct sockaddr_un un;
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ strcpy(un.sun_path, socketName);
+ socklen_t len = sizeof(un);
+ EXPECT_NOTCAPABLE(connect_(cap_sock_rw, (struct sockaddr *)&un, len));
+ EXPECT_OK(connect_(cap_sock_all, (struct sockaddr *)&un, len));
+
+ exit(HasFailure());
+ }
+
+ int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ EXPECT_OK(sock);
+ if (sock < 0) return;
+
+ int cap_sock_rw = dup(sock);
+ EXPECT_OK(cap_sock_rw);
+ EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+ int cap_sock_all = dup(sock);
+ EXPECT_OK(cap_sock_all);
+ EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+ EXPECT_OK(close(sock));
+
+ struct sockaddr_un un;
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ strcpy(un.sun_path, socketName);
+ socklen_t len = (sizeof(un) - sizeof(un.sun_path) + strlen(un.sun_path));
+
+ // Can only bind the fully-capable socket.
+ EXPECT_NOTCAPABLE(bind_(cap_sock_rw, (struct sockaddr *)&un, len));
+ EXPECT_OK(bind_(cap_sock_all, (struct sockaddr *)&un, len));
+
+ // Can only listen on the fully-capable socket.
+ EXPECT_NOTCAPABLE(listen(cap_sock_rw, 3));
+ EXPECT_OK(listen(cap_sock_all, 3));
+
+ // Can only do socket operations on the fully-capable socket.
+ len = sizeof(un);
+ EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&un, &len));
+ int value = 0;
+ EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, SOL_SOCKET, SO_DEBUG, &value, sizeof(value)));
+ len = sizeof(value);
+ EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, SOL_SOCKET, SO_DEBUG, &value, &len));
+
+ len = sizeof(un);
+ memset(&un, 0, sizeof(un));
+ EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&un, &len));
+ EXPECT_EQ(AF_UNIX, un.sun_family);
+ EXPECT_EQ(std::string(socketName), std::string(un.sun_path));
+ value = 0;
+ EXPECT_OK(setsockopt(cap_sock_all, SOL_SOCKET, SO_DEBUG, &value, sizeof(value)));
+ len = sizeof(value);
+ EXPECT_OK(getsockopt(cap_sock_all, SOL_SOCKET, SO_DEBUG, &value, &len));
+
+ // Accept the incoming connection
+ len = sizeof(un);
+ memset(&un, 0, sizeof(un));
+ EXPECT_NOTCAPABLE(accept(cap_sock_rw, (struct sockaddr *)&un, &len));
+ int conn_fd = accept(cap_sock_all, (struct sockaddr *)&un, &len);
+ EXPECT_OK(conn_fd);
+
+#ifdef CAP_FROM_ACCEPT
+ // New connection should also be a capability.
+ cap_rights_t rights;
+ cap_rights_init(&rights, 0);
+ EXPECT_OK(cap_rights_get(conn_fd, &rights));
+ EXPECT_RIGHTS_IN(&rights, &r_all);
+#endif
+
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ close(conn_fd);
+ close(cap_sock_rw);
+ close(cap_sock_all);
+ unlink(socketName);
+}
+
+TEST(Socket, TCP) {
+ int sock = socket(AF_INET, SOCK_STREAM, 0);
+ EXPECT_OK(sock);
+ if (sock < 0) return;
+
+ cap_rights_t r_rw;
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+ cap_rights_t r_all;
+ cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+
+ int cap_sock_rw = dup(sock);
+ EXPECT_OK(cap_sock_rw);
+ EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+ int cap_sock_all = dup(sock);
+ EXPECT_OK(cap_sock_all);
+ EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+ close(sock);
+
+ struct sockaddr_in addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(0);
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ socklen_t len = sizeof(addr);
+
+ // Can only bind the fully-capable socket.
+ EXPECT_NOTCAPABLE(bind_(cap_sock_rw, (struct sockaddr *)&addr, len));
+ EXPECT_OK(bind_(cap_sock_all, (struct sockaddr *)&addr, len));
+
+ getsockname(cap_sock_all, (struct sockaddr *)&addr, &len);
+ int port = ntohs(addr.sin_port);
+
+ // Now we know the port involved, fork off a child.
+ pid_t child = fork();
+ if (child == 0) {
+ // Child process: wait for server setup
+ sleep(1);
+
+ // Create sockets
+ int sock = socket(AF_INET, SOCK_STREAM, 0);
+ EXPECT_OK(sock);
+ if (sock < 0) return;
+ int cap_sock_rw = dup(sock);
+ EXPECT_OK(cap_sock_rw);
+ EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+ int cap_sock_all = dup(sock);
+ EXPECT_OK(cap_sock_all);
+ EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+ close(sock);
+
+ // Connect socket
+ struct sockaddr_in addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(port); // Pick unused port
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ socklen_t len = sizeof(addr);
+ EXPECT_NOTCAPABLE(connect_(cap_sock_rw, (struct sockaddr *)&addr, len));
+ EXPECT_OK(connect_(cap_sock_all, (struct sockaddr *)&addr, len));
+
+ exit(HasFailure());
+ }
+
+ // Can only listen on the fully-capable socket.
+ EXPECT_NOTCAPABLE(listen(cap_sock_rw, 3));
+ EXPECT_OK(listen(cap_sock_all, 3));
+
+ // Can only do socket operations on the fully-capable socket.
+ len = sizeof(addr);
+ EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&addr, &len));
+ int value = 1;
+ EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value)));
+ len = sizeof(value);
+ EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, &len));
+
+ len = sizeof(addr);
+ memset(&addr, 0, sizeof(addr));
+ EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&addr, &len));
+ EXPECT_EQ(AF_INET, addr.sin_family);
+ EXPECT_EQ(htons(port), addr.sin_port);
+ value = 0;
+ EXPECT_OK(setsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value)));
+ len = sizeof(value);
+ EXPECT_OK(getsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, &len));
+
+ // Accept the incoming connection
+ len = sizeof(addr);
+ memset(&addr, 0, sizeof(addr));
+ EXPECT_NOTCAPABLE(accept(cap_sock_rw, (struct sockaddr *)&addr, &len));
+ int conn_fd = accept(cap_sock_all, (struct sockaddr *)&addr, &len);
+ EXPECT_OK(conn_fd);
+
+#ifdef CAP_FROM_ACCEPT
+ // New connection should also be a capability.
+ cap_rights_t rights;
+ cap_rights_init(&rights, 0);
+ EXPECT_OK(cap_rights_get(conn_fd, &rights));
+ EXPECT_RIGHTS_IN(&rights, &r_all);
+#endif
+
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ close(conn_fd);
+ close(cap_sock_rw);
+ close(cap_sock_all);
+}
+
+TEST(Socket, UDP) {
+ int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ EXPECT_OK(sock);
+ if (sock < 0) return;
+
+ cap_rights_t r_rw;
+ cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
+ cap_rights_t r_all;
+ cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER);
+ cap_rights_t r_connect;
+ cap_rights_init(&r_connect, CAP_READ, CAP_WRITE, CAP_CONNECT);
+
+ int cap_sock_rw = dup(sock);
+ EXPECT_OK(cap_sock_rw);
+ EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+ int cap_sock_all = dup(sock);
+ EXPECT_OK(cap_sock_all);
+ EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all));
+ close(sock);
+
+ struct sockaddr_in addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(0);
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ socklen_t len = sizeof(addr);
+
+ // Can only bind the fully-capable socket.
+ EXPECT_NOTCAPABLE(bind_(cap_sock_rw, (struct sockaddr *)&addr, len));
+ EXPECT_OK(bind_(cap_sock_all, (struct sockaddr *)&addr, len));
+ getsockname(cap_sock_all, (struct sockaddr *)&addr, &len);
+ int port = ntohs(addr.sin_port);
+
+ // Can only do socket operations on the fully-capable socket.
+ len = sizeof(addr);
+ EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&addr, &len));
+ int value = 1;
+ EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value)));
+ len = sizeof(value);
+ EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, &len));
+
+ len = sizeof(addr);
+ memset(&addr, 0, sizeof(addr));
+ EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&addr, &len));
+ EXPECT_EQ(AF_INET, addr.sin_family);
+ EXPECT_EQ(htons(port), addr.sin_port);
+ value = 1;
+ EXPECT_OK(setsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value)));
+ len = sizeof(value);
+ EXPECT_OK(getsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, &len));
+
+ pid_t child = fork();
+ if (child == 0) {
+ int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ EXPECT_OK(sock);
+ int cap_sock_rw = dup(sock);
+ EXPECT_OK(cap_sock_rw);
+ EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw));
+ int cap_sock_connect = dup(sock);
+ EXPECT_OK(cap_sock_connect);
+ EXPECT_OK(cap_rights_limit(cap_sock_connect, &r_connect));
+ close(sock);
+
+ // Can only sendmsg(2) to an address over a socket with CAP_CONNECT.
+ unsigned char buffer[256];
+ struct iovec iov;
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = buffer;
+ iov.iov_len = sizeof(buffer);
+
+ struct msghdr mh;
+ memset(&mh, 0, sizeof(mh));
+ mh.msg_iov = &iov;
+ mh.msg_iovlen = 1;
+
+ struct sockaddr_in addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(port);
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ mh.msg_name = &addr;
+ mh.msg_namelen = sizeof(addr);
+
+ EXPECT_NOTCAPABLE(sendmsg(cap_sock_rw, &mh, 0));
+ EXPECT_OK(sendmsg(cap_sock_connect, &mh, 0));
+
+#ifdef HAVE_SEND_RECV_MMSG
+ struct mmsghdr mv;
+ memset(&mv, 0, sizeof(mv));
+ memcpy(&mv.msg_hdr, &mh, sizeof(struct msghdr));
+ EXPECT_NOTCAPABLE(sendmmsg(cap_sock_rw, &mv, 1, 0));
+ EXPECT_OK(sendmmsg(cap_sock_connect, &mv, 1, 0));
+#endif
+ close(cap_sock_rw);
+ close(cap_sock_connect);
+ exit(HasFailure());
+ }
+ // Wait for the child.
+ int status;
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+ EXPECT_EQ(0, rc);
+
+ close(cap_sock_rw);
+ close(cap_sock_all);
+}
diff --git a/syscalls.h b/syscalls.h
new file mode 100644
index 000000000000..c78576074ef2
--- /dev/null
+++ b/syscalls.h
@@ -0,0 +1,259 @@
+/*
+ * Minimal portability layer for system call differences between
+ * Capsicum OSes.
+ */
+#ifndef __SYSCALLS_H__
+#define __SYSCALLS_H__
+
+/************************************************************
+ * FreeBSD
+ ************************************************************/
+#ifdef __FreeBSD__
+
+/* Map umount2 (Linux) syscall to unmount (FreeBSD) syscall */
+#define umount2(T, F) unmount(T, F)
+
+/* Map sighandler_y (Linux) to sig_t (FreeBSD) */
+#define sighandler_t sig_t
+
+/* profil(2) has a first argument of char* */
+#define profil_arg1_t char
+
+/* FreeBSD has getdents(2) available */
+#include <sys/types.h>
+#include <dirent.h>
+inline int getdents_(unsigned int fd, void *dirp, unsigned int count) {
+ return getdents(fd, (char*)dirp, count);
+}
+#include <sys/mman.h>
+inline int mincore_(void *addr, size_t length, unsigned char *vec) {
+ return mincore(addr, length, (char*)vec);
+}
+#define getpid_ getpid
+
+/* Map Linux-style sendfile to FreeBSD sendfile */
+#include <sys/socket.h>
+#include <sys/uio.h>
+inline ssize_t sendfile_(int out_fd, int in_fd, off_t *offset, size_t count) {
+ return sendfile(in_fd, out_fd, *offset, count, NULL, offset, 0);
+}
+
+/* A sample mount(2) call */
+#include <sys/param.h>
+#include <sys/mount.h>
+inline int bogus_mount_() {
+ return mount("procfs", "/not_mounted", 0, NULL);
+}
+
+/* Mappings for extended attribute functions */
+#include <sys/extattr.h>
+inline ssize_t flistxattr_(int fd, char *list, size_t size) {
+ return extattr_list_fd(fd, EXTATTR_NAMESPACE_USER, list, size);
+}
+inline ssize_t fgetxattr_(int fd, const char *name, void *value, size_t size) {
+ return extattr_get_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size);
+}
+inline int fsetxattr_(int fd, const char *name, const void *value, size_t size, int flags) {
+ return extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size);
+}
+inline int fremovexattr_(int fd, const char *name) {
+ return extattr_delete_fd(fd, EXTATTR_NAMESPACE_USER, name);
+}
+
+/* mq_* functions are wrappers in FreeBSD so go through to underlying syscalls */
+#include <sys/syscall.h>
+extern "C" {
+extern int __sys_kmq_notify(int, const struct sigevent *);
+extern int __sys_kmq_open(const char *, int, mode_t, const struct mq_attr *);
+extern int __sys_kmq_setattr(int, const struct mq_attr *__restrict, struct mq_attr *__restrict);
+extern ssize_t __sys_kmq_timedreceive(int, char *__restrict, size_t,
+ unsigned *__restrict, const struct timespec *__restrict);
+extern int __sys_kmq_timedsend(int, const char *, size_t, unsigned,
+ const struct timespec *);
+extern int __sys_kmq_unlink(const char *);
+}
+#define mq_notify_ __sys_kmq_notify
+#define mq_open_ __sys_kmq_open
+#define mq_setattr_ __sys_kmq_setattr
+#define mq_getattr_(A, B) __sys_kmq_setattr(A, NULL, B)
+#define mq_timedreceive_ __sys_kmq_timedreceive
+#define mq_timedsend_ __sys_kmq_timedsend
+#define mq_unlink_ __sys_kmq_unlink
+#define mq_close_ close
+#include <sys/ptrace.h>
+inline long ptrace_(int request, pid_t pid, void *addr, void *data) {
+ return ptrace(request, pid, (caddr_t)addr, static_cast<int>((long)data));
+}
+#define PTRACE_PEEKDATA_ PT_READ_D
+#define getegid_ getegid
+#define getgid_ getgid
+#define geteuid_ geteuid
+#define getuid_ getuid
+#define getgroups_ getgroups
+#define getrlimit_ getrlimit
+#define bind_ bind
+#define connect_ connect
+
+/* Features available */
+#if __FreeBSD_version >= 1000000
+#define HAVE_CHFLAGSAT
+#define HAVE_BINDAT
+#define HAVE_CONNECTAT
+#endif
+#define HAVE_CHFLAGS
+#define HAVE_GETFSSTAT
+#define HAVE_REVOKE
+#define HAVE_GETLOGIN
+#define HAVE_MKFIFOAT
+#define HAVE_SYSARCH
+#include <machine/sysarch.h>
+#define HAVE_STAT_BIRTHTIME
+#define HAVE_SYSCTL
+#define HAVE_FPATHCONF
+#define HAVE_F_DUP2FD
+#define HAVE_PSELECT
+#define HAVE_SCTP
+
+/* FreeBSD only allows root to call mlock[all]/munlock[all] */
+#define MLOCK_REQUIRES_ROOT 1
+/* FreeBSD effectively only allows root to call sched_setscheduler */
+#define SCHED_SETSCHEDULER_REQUIRES_ROOT 1
+
+#endif /* FreeBSD */
+
+/************************************************************
+ * Linux
+ ************************************************************/
+#ifdef __linux__
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <sys/sendfile.h>
+#include <sys/statfs.h>
+#include <sys/xattr.h>
+#include <sys/mount.h>
+#include <linux/net.h>
+
+/* profil(2) has a first argument of unsigned short* */
+#define profil_arg1_t unsigned short
+
+static inline int getdents_(unsigned int fd, void *dirp, unsigned int count) {
+ return syscall(__NR_getdents, fd, dirp, count);
+}
+/* A sample mount(2) call */
+static inline int bogus_mount_() {
+ return mount("/dev/bogus", "/bogus", "debugfs", MS_RDONLY, "");
+}
+
+/* libc's getpid() wrapper caches the pid value, and doesn't invalidate
+ * the cached value on pdfork(), so directly syscall. */
+static inline pid_t getpid_() {
+ return syscall(__NR_getpid);
+}
+static inline int execveat(int fd, const char *path,
+ char *const argv[], char *const envp[], int flags) {
+ return syscall(__NR_execveat, fd, path, argv, envp, flags);
+}
+
+/*
+ * Linux glibc includes an fexecve() function, implemented via the /proc
+ * filesystem. Bypass this and go directly to the execveat(2) syscall.
+ */
+static inline int fexecve_(int fd, char *const argv[], char *const envp[]) {
+ return execveat(fd, "", argv, envp, AT_EMPTY_PATH);
+}
+/*
+ * Linux glibc attempts to be clever and intercepts various uid/gid functions.
+ * Bypass by calling the syscalls directly.
+ */
+static inline gid_t getegid_(void) { return syscall(__NR_getegid); }
+static inline gid_t getgid_(void) { return syscall(__NR_getgid); }
+static inline uid_t geteuid_(void) { return syscall(__NR_geteuid); }
+static inline uid_t getuid_(void) { return syscall(__NR_getuid); }
+static inline int getgroups_(int size, gid_t list[]) { return syscall(__NR_getgroups, size, list); }
+static inline int getrlimit_(int resource, struct rlimit *rlim) {
+ return syscall(__NR_getrlimit, resource, rlim);
+}
+
+/*
+ * Linux glibc for i386 consumes the errno returned from the raw socketcall(2) operation,
+ * so use the raw syscall for those operations that are disallowed in capability mode.
+ */
+#ifdef __NR_bind
+#define bind_ bind
+#else
+static inline int bind_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
+ unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen};
+ return syscall(__NR_socketcall, SYS_BIND, args);
+}
+#endif
+#ifdef __NR_connect
+#define connect_ connect
+#else
+static inline int connect_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
+ unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen};
+ return syscall(__NR_socketcall, SYS_CONNECT, args);
+}
+#endif
+
+#define mincore_ mincore
+#define sendfile_ sendfile
+#define flistxattr_ flistxattr
+#define fgetxattr_ fgetxattr
+#define fsetxattr_ fsetxattr
+#define fremovexattr_ fremovexattr
+#define mq_notify_ mq_notify
+#define mq_open_ mq_open
+#define mq_setattr_ mq_setattr
+#define mq_getattr_ mq_getattr
+#define mq_timedreceive_ mq_timedreceive
+#define mq_timedsend_ mq_timedsend
+#define mq_unlink_ mq_unlink
+#define mq_close_ mq_close
+#define ptrace_ ptrace
+#define PTRACE_PEEKDATA_ PTRACE_PEEKDATA
+
+/* Features available */
+#define HAVE_DUP3
+#define HAVE_PIPE2
+#include <sys/fsuid.h> /* for setfsgid()/setfsuid() */
+#define HAVE_SETFSUID
+#define HAVE_SETFSGID
+#define HAVE_READAHEAD
+#define HAVE_SEND_RECV_MMSG
+#define HAVE_SYNCFS
+#define HAVE_SYNC_FILE_RANGE
+#include <sys/uio.h> /* for vmsplice */
+#define HAVE_TEE
+#define HAVE_SPLICE
+#define HAVE_VMSPLICE
+#define HAVE_PSELECT
+#define HAVE_PPOLL
+#define HAVE_EXECVEAT
+#define HAVE_SYSCALL
+#define HAVE_MKNOD_REG
+#define HAVE_MKNOD_SOCKET
+/*
+ * O_BENEATH is arch-specific, via <asm/fcntl.h>; however we cannot include both that file
+ * and the normal <fcntl.h> as they have some clashing definitions. Bypass by directly
+ * defining O_BENEATH, using the current proposed x86 value. (This will therefore not
+ * work for non-x86, and may need changing in future if a different value gets merged.)
+ */
+#ifndef O_BENEATH
+#define O_BENEATH 040000000 /* no / or .. in openat path */
+#endif
+
+
+/* Linux allows anyone to call mlock[all]/munlock[all] */
+#define MLOCK_REQUIRES_ROOT 0
+/* Linux allows anyone to call sched_setscheduler */
+#define SCHED_SETSCHEDULER_REQUIRES_ROOT 1
+
+#endif /* Linux */
+
+#endif /*__SYSCALLS_H__*/
diff --git a/sysctl.cc b/sysctl.cc
new file mode 100644
index 000000000000..7cbd3e8cbbf8
--- /dev/null
+++ b/sysctl.cc
@@ -0,0 +1,15 @@
+#include "capsicum.h"
+#include "capsicum-test.h"
+
+#ifdef HAVE_SYSCTL
+#include <sys/sysctl.h>
+
+// Certain sysctls are permitted in capability mode, but most are not. Test
+// for the ones that should be, and try one or two that shouldn't.
+TEST(Sysctl, Capability) {
+ int oid[2] = {CTL_KERN, KERN_OSRELDATE};
+ int ii;
+ size_t len = sizeof(ii);
+ EXPECT_OK(sysctl(oid, 2, &ii, &len, NULL, 0));
+}
+#endif
diff --git a/waittest.c b/waittest.c
new file mode 100644
index 000000000000..b9bce92527fd
--- /dev/null
+++ b/waittest.c
@@ -0,0 +1,42 @@
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#ifdef __FreeBSD__
+#include <sys/procdesc.h>
+#endif
+
+#ifdef __linux__
+#include <sys/syscall.h>
+int pdfork(int *fd, int flags) {
+ return syscall(__NR_pdfork, fd, flags);
+}
+#endif
+
+int main() {
+ int procfd;
+ int rc = pdfork(&procfd, 0);
+ if (rc < 0) {
+ fprintf(stderr, "pdfork() failed rc=%d errno=%d %s\n", rc, errno, strerror(errno));
+ exit(1);
+ }
+ if (rc == 0) { // Child process
+ sleep(1);
+ exit(123);
+ }
+ fprintf(stderr, "pdfork()ed child pid=%ld procfd=%d\n", (long)rc, procfd);
+ sleep(2); // Allow child to complete
+ pid_t child = waitpid(-1, &rc, WNOHANG);
+ if (child == 0) {
+ fprintf(stderr, "waitpid(): no completed child found\n");
+ } else if (child < 0) {
+ fprintf(stderr, "waitpid(): failed errno=%d %s\n", errno, strerror(errno));
+ } else {
+ fprintf(stderr, "waitpid(): found completed child %ld\n", (long)child);
+ }
+ return 0;
+}