#include <stdlib.h> #include <stdio.h> #include <string.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/xattr.h> #include <fcntl.h> #include <dirent.h> #include <assert.h> #include <errno.h> #include <linux/capability.h> #include <sys/prctl.h> #include <limits.h> #include <stdint.h> #include <syscall.h> #include <byteswap.h> // Make sure assertions are not compiled out, we use them to codify // invariants about this program and we want it to fail fast and // loudly if they are violated. #undef NDEBUG extern char **environ; // The WRAPPER_DIR macro is supplied at compile time so that it cannot // be changed at runtime static char *wrapper_dir = WRAPPER_DIR; // Wrapper debug variable name static char *wrapper_debug = "WRAPPER_DEBUG"; #define CAP_SETPCAP 8 #if __BYTE_ORDER == __BIG_ENDIAN #define LE32_TO_H(x) bswap_32(x) #else #define LE32_TO_H(x) (x) #endif int get_last_cap(unsigned *last_cap) { FILE* file = fopen("/proc/sys/kernel/cap_last_cap", "r"); if (file == NULL) { int saved_errno = errno; fprintf(stderr, "failed to open /proc/sys/kernel/cap_last_cap: %s\n", strerror(errno)); return -saved_errno; } int res = fscanf(file, "%u", last_cap); if (res == EOF) { int saved_errno = errno; fprintf(stderr, "could not read number from /proc/sys/kernel/cap_last_cap: %s\n", strerror(errno)); return -saved_errno; } fclose(file); return 0; } // Given the path to this program, fetch its configured capability set // (as set by `setcap ... /path/to/file`) and raise those capabilities // into the Ambient set. static int make_caps_ambient(const char *self_path) { struct vfs_ns_cap_data data = {}; int r = getxattr(self_path, "security.capability", &data, sizeof(data)); if (r < 0) { if (errno == ENODATA) { // no capabilities set return 0; } fprintf(stderr, "cannot get capabilities for %s: %s", self_path, strerror(errno)); return 1; } size_t size; uint32_t version = LE32_TO_H(data.magic_etc) & VFS_CAP_REVISION_MASK; switch (version) { case VFS_CAP_REVISION_1: size = VFS_CAP_U32_1; break; case VFS_CAP_REVISION_2: case VFS_CAP_REVISION_3: size = VFS_CAP_U32_3; break; default: fprintf(stderr, "BUG! Unsupported capability version 0x%x on %s. Report to NixOS bugtracker\n", version, self_path); return 1; } const struct __user_cap_header_struct header = { .version = _LINUX_CAPABILITY_VERSION_3, .pid = getpid(), }; struct __user_cap_data_struct user_data[2] = {}; for (size_t i = 0; i < size; i++) { // merge inheritable & permitted into one user_data[i].permitted = user_data[i].inheritable = LE32_TO_H(data.data[i].inheritable) | LE32_TO_H(data.data[i].permitted); } if (syscall(SYS_capset, &header, &user_data) < 0) { fprintf(stderr, "failed to inherit capabilities: %s", strerror(errno)); return 1; } unsigned last_cap; r = get_last_cap(&last_cap); if (r < 0) { return 1; } uint64_t set = user_data[0].permitted | (uint64_t)user_data[1].permitted << 32; for (unsigned cap = 0; cap < last_cap; cap++) { if (!(set & (1ULL << cap))) { continue; } // Check for the cap_setpcap capability, we set this on the // wrapper so it can elevate the capabilities to the Ambient // set but we do not want to propagate it down into the // wrapped program. // // TODO: what happens if that's the behavior you want // though???? I'm preferring a strict vs. loose policy here. if (cap == CAP_SETPCAP) { if(getenv(wrapper_debug)) { fprintf(stderr, "cap_setpcap in set, skipping it\n"); } continue; } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, (unsigned long) cap, 0, 0)) { fprintf(stderr, "cannot raise the capability %d into the ambient set: %s\n", cap, strerror(errno)); return 1; } if (getenv(wrapper_debug)) { fprintf(stderr, "raised %d into the ambient capability set\n", cap); } } return 0; } int readlink_malloc(const char *p, char **ret) { size_t l = FILENAME_MAX+1; int r; for (;;) { char *c = calloc(l, sizeof(char)); if (!c) { return -ENOMEM; } ssize_t n = readlink(p, c, l-1); if (n < 0) { r = -errno; free(c); return r; } if ((size_t) n < l-1) { c[n] = 0; *ret = c; return 0; } free(c); l *= 2; } } int main(int argc, char **argv) { char *self_path = NULL; int self_path_size = readlink_malloc("/proc/self/exe", &self_path); if (self_path_size < 0) { fprintf(stderr, "cannot readlink /proc/self/exe: %s", strerror(-self_path_size)); } // Make sure that we are being executed from the right location, // i.e., `safe_wrapper_dir'. This is to prevent someone from creating // hard link `X' from some other location, along with a false // `X.real' file, to allow arbitrary programs from being executed // with elevated capabilities. int len = strlen(wrapper_dir); if (len > 0 && '/' == wrapper_dir[len - 1]) --len; assert(!strncmp(self_path, wrapper_dir, len)); assert('/' == wrapper_dir[0]); assert('/' == self_path[len]); // Make *really* *really* sure that we were executed as // `self_path', and not, say, as some other setuid program. That // is, our effective uid/gid should match the uid/gid of // `self_path'. struct stat st; assert(lstat(self_path, &st) != -1); assert(!(st.st_mode & S_ISUID) || (st.st_uid == geteuid())); assert(!(st.st_mode & S_ISGID) || (st.st_gid == getegid())); // And, of course, we shouldn't be writable. assert(!(st.st_mode & (S_IWGRP | S_IWOTH))); // Read the path of the real (wrapped) program from <self>.real. char real_fn[PATH_MAX + 10]; int real_fn_size = snprintf(real_fn, sizeof(real_fn), "%s.real", self_path); assert(real_fn_size < sizeof(real_fn)); int fd_self = open(real_fn, O_RDONLY); assert(fd_self != -1); char source_prog[PATH_MAX]; len = read(fd_self, source_prog, PATH_MAX); assert(len != -1); assert(len < sizeof(source_prog)); assert(len > 0); source_prog[len] = 0; close(fd_self); // Read the capabilities set on the wrapper and raise them in to // the ambient set so the program we're wrapping receives the // capabilities too! if (make_caps_ambient(self_path) != 0) { free(self_path); return 1; } free(self_path); execve(source_prog, argv, environ); fprintf(stderr, "%s: cannot run `%s': %s\n", argv[0], source_prog, strerror(errno)); return 1; }