(root)/
man-db-2.12.0/
lib/
sandbox.c
       1  /*
       2   * sandbox.c: Process sandboxing
       3   *
       4   * Copyright (C) 2017 Colin Watson.
       5   *
       6   * This file is part of man-db.
       7   *
       8   * man-db is free software; you can redistribute it and/or modify it
       9   * under the terms of the GNU General Public License as published by
      10   * the Free Software Foundation; either version 2 of the License, or
      11   * (at your option) any later version.
      12   *
      13   * man-db is distributed in the hope that it will be useful, but
      14   * WITHOUT ANY WARRANTY; without even the implied warranty of
      15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16   * GNU General Public License for more details.
      17   *
      18   * You should have received a copy of the GNU General Public License
      19   * along with man-db; if not, write to the Free Software Foundation,
      20   * Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
      21   *
      22   * Some of the syscall lists in this file come from systemd, whose
      23   * copyright/licensing statement is as follows.  Per LGPLv2.1 s. 3, I have
      24   * altered the original references to LGPLv2.1 to refer to GPLv2 instead.
      25   *
      26   * Copyright 2014 Lennart Poettering
      27   *
      28   * systemd is free software; you can redistribute it and/or modify it
      29   * under the terms of the GNU General Public License as published by
      30   * the Free Software Foundation; either version 2 of the License, or
      31   * (at your option) any later version.
      32   *
      33   * systemd is distributed in the hope that it will be useful, but
      34   * WITHOUT ANY WARRANTY; without even the implied warranty of
      35   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
      36   * General Public License for more details.
      37   *
      38   * You should have received a copy of the GNU General Public License
      39   * along with systemd; If not, see <https://www.gnu.org/licenses/>.
      40   */
      41  
      42  #ifdef HAVE_CONFIG_H
      43  #  include "config.h"
      44  #endif /* HAVE_CONFIG_H */
      45  
      46  #include <stdbool.h>
      47  #include <errno.h>
      48  #include <stdlib.h>
      49  #include <string.h>
      50  #include <sys/types.h>
      51  #include <sys/stat.h>
      52  #include <fcntl.h>
      53  #include <unistd.h>
      54  
      55  #ifdef HAVE_LIBSECCOMP
      56  #  include <sys/ioctl.h>
      57  #  include <sys/ipc.h>
      58  #  include <sys/mman.h>
      59  #  include <sys/prctl.h>
      60  #  include <sys/shm.h>
      61  #  include <sys/socket.h>
      62  #  include <termios.h>
      63  #  include <seccomp.h>
      64  #endif /* HAVE_LIBSECCOMP */
      65  
      66  #include "attribute.h"
      67  #include "xalloc.h"
      68  #include "xstrndup.h"
      69  
      70  #include "manconfig.h"
      71  
      72  #include "debug.h"
      73  #include "fatal.h"
      74  #include "sandbox.h"
      75  
      76  struct man_sandbox {
      77  #ifdef HAVE_LIBSECCOMP
      78  	scmp_filter_ctx ctx;
      79  	scmp_filter_ctx permissive_ctx;
      80  #else /* !HAVE_LIBSECCOMP */
      81  	char dummy;
      82  #endif /* HAVE_LIBSECCOMP */
      83  };
      84  
      85  #ifdef HAVE_LIBSECCOMP
      86  static bool seccomp_filter_unavailable = false;
      87  
      88  static void gripe_seccomp_filter_unavailable (void)
      89  {
      90  	debug ("seccomp filtering requires a kernel configured with "
      91  	       "CONFIG_SECCOMP_FILTER\n");
      92  }
      93  
      94  static bool search_ld_preload (const char *needle)
      95  {
      96  	const char *ld_preload_env;
      97  	static char *ld_preload_file = NULL;
      98  
      99  	ld_preload_env = getenv ("LD_PRELOAD");
     100  	if (ld_preload_env && strstr (ld_preload_env, needle) != NULL)
     101  		return true;
     102  
     103  	if (!ld_preload_file) {
     104  		int fd;
     105  		struct stat st;
     106  		char *mapped = NULL;
     107  
     108  		fd = open ("/etc/ld.so.preload", O_RDONLY);
     109  		if (fd >= 0 && fstat (fd, &st) >= 0 && st.st_size)
     110  			mapped = mmap (NULL, st.st_size, PROT_READ,
     111  				       MAP_PRIVATE | MAP_FILE, fd, 0);
     112  		if (mapped) {
     113  			ld_preload_file = xstrndup (mapped, st.st_size);
     114  			munmap (mapped, st.st_size);
     115  		} else
     116  			ld_preload_file = xstrdup ("");
     117  		if (fd >= 0)
     118  			close (fd);
     119  	}
     120  	/* This isn't very accurate: /etc/ld.so.preload may contain
     121  	 * comments.  On the other hand, glibc says "it should only be used
     122  	 * for emergencies and testing".  File a bug if this is a problem
     123  	 * for you.
     124  	 */
     125  	if (strstr (ld_preload_file, needle) != NULL)
     126  		return true;
     127  
     128  	return false;
     129  }
     130  
     131  /* Can we load a seccomp filter into this process?
     132   *
     133   * This guard allows us to call sandbox_load in code paths that may
     134   * conditionally do so again.
     135   */
     136  static bool can_load_seccomp (void)
     137  {
     138  	const char *man_disable_seccomp;
     139  	int seccomp_status;
     140  
     141  	if (seccomp_filter_unavailable) {
     142  		gripe_seccomp_filter_unavailable ();
     143  		return false;
     144  	}
     145  
     146  	man_disable_seccomp = getenv ("MAN_DISABLE_SECCOMP");
     147  	if (man_disable_seccomp && *man_disable_seccomp) {
     148  		debug ("seccomp filter disabled by user request\n");
     149  		return false;
     150  	}
     151  
     152  	/* Valgrind causes the child process to make some system calls we
     153  	 * don't want to allow in general, so disable seccomp when running
     154  	 * on Valgrind.
     155  	 *
     156  	 * The correct approach seems to be to either require valgrind.h at
     157  	 * build-time or copy valgrind.h into this project and then use the
     158  	 * RUNNING_ON_VALGRIND macro, but I'd really rather not add a
     159  	 * build-dependency for this or take a copy of a >6000-line header
     160  	 * file.  Since the goal of this is only to disable the seccomp
     161  	 * filter under Valgrind, this will do for now.
     162  	 */
     163  	if (search_ld_preload ("/vgpreload")) {
     164  		debug ("seccomp filter disabled while running under "
     165  		       "Valgrind\n");
     166  		return false;
     167  	}
     168  
     169  	seccomp_status = prctl (PR_GET_SECCOMP);
     170  
     171  	if (seccomp_status == 0)
     172  		return true;
     173  
     174  	if (seccomp_status == -1) {
     175  		if (errno == EINVAL)
     176  			debug ("running kernel does not support seccomp\n");
     177  		else
     178  			debug ("unknown error getting seccomp status: %s\n",
     179  			       strerror (errno));
     180  	} else if (seccomp_status == 2)
     181  		debug ("seccomp already enabled\n");
     182  	else
     183  		debug ("unknown return value from PR_GET_SECCOMP: %d\n",
     184  		       seccomp_status);
     185  	return false;
     186  }
     187  #endif /* HAVE_LIBSECCOMP */
     188  
     189  #ifdef HAVE_LIBSECCOMP
     190  
     191  #define SC_ALLOW(name) \
     192  	do { \
     193  		int nr = seccomp_syscall_resolve_name (name); \
     194  		if (nr == __NR_SCMP_ERROR) \
     195  			break; \
     196  		if (seccomp_rule_add (ctx, SCMP_ACT_ALLOW, nr, 0) < 0) \
     197  			fatal (errno, "can't add seccomp rule"); \
     198  	} while (0)
     199  
     200  #define SC_ALLOW_PERMISSIVE(name) \
     201  	do { \
     202  		if (permissive) \
     203  			SC_ALLOW (name); \
     204  	} while (0)
     205  
     206  #define SC_ALLOW_ARG_1(name, cmp1) \
     207  	do { \
     208  		int nr = seccomp_syscall_resolve_name (name); \
     209  		if (nr == __NR_SCMP_ERROR) \
     210  			break; \
     211  		if (seccomp_rule_add (ctx, SCMP_ACT_ALLOW, nr, 1, cmp1) < 0) \
     212  			fatal (errno, "can't add seccomp rule"); \
     213  	} while (0)
     214  
     215  #define SC_ALLOW_ARG_2(name, cmp1, cmp2) \
     216  	do { \
     217  		int nr = seccomp_syscall_resolve_name (name); \
     218  		if (nr == __NR_SCMP_ERROR) \
     219  			break; \
     220  		if (seccomp_rule_add (ctx, SCMP_ACT_ALLOW, nr, \
     221  				      2, cmp1, cmp2) < 0) \
     222  			fatal (errno, "can't add seccomp rule"); \
     223  	} while (0)
     224  
     225  /* Create a seccomp filter.
     226   *
     227   * If permissive is true, then the returned filter will allow limited file
     228   * creation (although not making executable files).  This obviously
     229   * constitutes less effective confinement, but it's necessary for some
     230   * subprocesses (such as groff) that need the ability to write to temporary
     231   * files.  Confining these further requires additional tools that can do
     232   * path-based filtering or similar, such as AppArmor.
     233   */
     234  static scmp_filter_ctx make_seccomp_filter (bool permissive)
     235  {
     236  	scmp_filter_ctx ctx;
     237  	mode_t mode_mask = S_ISUID | S_ISGID | S_IXUSR | S_IXGRP | S_IXOTH;
     238  	int create_mask = O_CREAT
     239  #ifdef O_TMPFILE
     240  		| O_TMPFILE
     241  #endif /* O_TMPFILE */
     242  		;
     243  
     244  	if (!can_load_seccomp ())
     245  		return NULL;
     246  
     247  	debug ("initialising seccomp filter (permissive: %d)\n",
     248  	       (int) permissive);
     249  	ctx = seccomp_init (SCMP_ACT_ERRNO (ENOSYS));
     250  	if (!ctx)
     251  		fatal (errno, "can't initialise seccomp filter");
     252  
     253  	/* Allow sibling architectures for x86, since people sometimes mix
     254  	 * and match architectures there for performance reasons.
     255  	 */
     256  	switch (seccomp_arch_native ()) {
     257  		case SCMP_ARCH_X86:
     258  			seccomp_arch_add (ctx, SCMP_ARCH_X86_64);
     259  			seccomp_arch_add (ctx, SCMP_ARCH_X32);
     260  			break;
     261  		case SCMP_ARCH_X86_64:
     262  			seccomp_arch_add (ctx, SCMP_ARCH_X86);
     263  			seccomp_arch_add (ctx, SCMP_ARCH_X32);
     264  			break;
     265  		case SCMP_ARCH_X32:
     266  			seccomp_arch_add (ctx, SCMP_ARCH_X86);
     267  			seccomp_arch_add (ctx, SCMP_ARCH_X86_64);
     268  			break;
     269  	}
     270  
     271  	/* This sandbox is intended to allow operations that might
     272  	 * reasonably be needed in simple data-transforming pipes: it should
     273  	 * allow the process to do most reasonable things to itself, to read
     274  	 * and write data from and to already-open file descriptors, to open
     275  	 * files in read-only mode, and to fork new processes with the same
     276  	 * restrictions.  (If permissive is true, then it should also allow
     277  	 * limited file creation; see the header comment above.)
     278  	 *
     279  	 * Since I currently know of no library with suitable syscall lists,
     280  	 * the syscall lists here are taken from
     281  	 * systemd:src/shared/seccomp-util.c, last updated from commit
     282  	 * ab9617a76624c43a26de7e94424088ae171ebfef (2023-08-07).
     283  	 */
     284  
     285  	/* systemd: SystemCallFilter=@default */
     286  	SC_ALLOW ("arch_prctl");
     287  	SC_ALLOW ("brk");
     288  	SC_ALLOW ("cacheflush");
     289  	SC_ALLOW ("clock_getres");
     290  	SC_ALLOW ("clock_getres_time64");
     291  	SC_ALLOW ("clock_gettime");
     292  	SC_ALLOW ("clock_gettime64");
     293  	SC_ALLOW ("clock_nanosleep");
     294  	SC_ALLOW ("clock_nanosleep_time64");
     295  	SC_ALLOW ("execve");
     296  	SC_ALLOW ("exit");
     297  	SC_ALLOW ("exit_group");
     298  	SC_ALLOW ("futex");
     299  	SC_ALLOW ("futex_time64");
     300  	SC_ALLOW ("futex_waitv");
     301  	SC_ALLOW ("get_robust_list");
     302  	SC_ALLOW ("get_thread_area");
     303  	SC_ALLOW ("getegid");
     304  	SC_ALLOW ("getegid32");
     305  	SC_ALLOW ("geteuid");
     306  	SC_ALLOW ("geteuid32");
     307  	SC_ALLOW ("getgid");
     308  	SC_ALLOW ("getgid32");
     309  	SC_ALLOW ("getgroups");
     310  	SC_ALLOW ("getgroups32");
     311  	SC_ALLOW ("getpgid");
     312  	SC_ALLOW ("getpgrp");
     313  	SC_ALLOW ("getpid");
     314  	SC_ALLOW ("getppid");
     315  	SC_ALLOW ("getrandom");
     316  	SC_ALLOW ("getresgid");
     317  	SC_ALLOW ("getresgid32");
     318  	SC_ALLOW ("getresuid");
     319  	SC_ALLOW ("getresuid32");
     320  	SC_ALLOW ("getrlimit");
     321  	SC_ALLOW ("getsid");
     322  	SC_ALLOW ("gettid");
     323  	SC_ALLOW ("gettimeofday");
     324  	SC_ALLOW ("getuid");
     325  	SC_ALLOW ("getuid32");
     326  	SC_ALLOW ("membarrier");
     327  	SC_ALLOW ("mmap");
     328  	SC_ALLOW ("mmap2");
     329  	SC_ALLOW ("mprotect");
     330  	SC_ALLOW ("munmap");
     331  	SC_ALLOW ("nanosleep");
     332  	SC_ALLOW ("pause");
     333  	SC_ALLOW ("prlimit64");
     334  	SC_ALLOW ("restart_syscall");
     335  	SC_ALLOW ("riscv_flush_icache");
     336  	SC_ALLOW ("riscv_hwprobe");
     337  	SC_ALLOW ("rseq");
     338  	SC_ALLOW ("rt_sigreturn");
     339  	SC_ALLOW ("sched_getaffinity");
     340  	SC_ALLOW ("sched_yield");
     341  	SC_ALLOW ("set_robust_list");
     342  	SC_ALLOW ("set_thread_area");
     343  	SC_ALLOW ("set_tid_address");
     344  	SC_ALLOW ("set_tls");
     345  	SC_ALLOW ("sigreturn");
     346  	SC_ALLOW ("time");
     347  	SC_ALLOW ("ugetrlimit");
     348  
     349  	/* systemd: SystemCallFilter=@basic-io */
     350  	SC_ALLOW ("_llseek");
     351  	SC_ALLOW ("close");
     352  	SC_ALLOW ("close_range");
     353  	SC_ALLOW ("dup");
     354  	SC_ALLOW ("dup2");
     355  	SC_ALLOW ("dup3");
     356  	SC_ALLOW ("lseek");
     357  	SC_ALLOW ("pread64");
     358  	SC_ALLOW ("preadv");
     359  	SC_ALLOW ("preadv2");
     360  	SC_ALLOW ("pwrite64");
     361  	SC_ALLOW ("pwritev");
     362  	SC_ALLOW ("pwritev2");
     363  	SC_ALLOW ("read");
     364  	SC_ALLOW ("readv");
     365  	SC_ALLOW ("write");
     366  	SC_ALLOW ("writev");
     367  
     368  	/* systemd: SystemCallFilter=@file-system (subset) */
     369  	SC_ALLOW ("access");
     370  	SC_ALLOW ("chdir");
     371  	if (permissive) {
     372  		SC_ALLOW_ARG_1 ("chmod",
     373  				SCMP_A1 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
     374  		SC_ALLOW_ARG_1 ("creat",
     375  				SCMP_A1 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
     376  	}
     377  	SC_ALLOW ("faccessat");
     378  	SC_ALLOW ("faccessat2");
     379  	SC_ALLOW ("fallocate");
     380  	SC_ALLOW ("fchdir");
     381  	if (permissive) {
     382  		SC_ALLOW_ARG_1 ("fchmod",
     383  				SCMP_A1 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
     384  		SC_ALLOW_ARG_1 ("fchmodat",
     385  				SCMP_A2 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
     386  	}
     387  	SC_ALLOW ("fcntl");
     388  	SC_ALLOW ("fcntl64");
     389  	SC_ALLOW ("fstat");
     390  	SC_ALLOW ("fstat64");
     391  	SC_ALLOW ("fstatat64");
     392  	SC_ALLOW ("fstatfs");
     393  	SC_ALLOW ("fstatfs64");
     394  	SC_ALLOW ("ftruncate");
     395  	SC_ALLOW ("ftruncate64");
     396  	SC_ALLOW_PERMISSIVE ("futimesat");
     397  	SC_ALLOW ("getcwd");
     398  	SC_ALLOW ("getdents");
     399  	SC_ALLOW ("getdents64");
     400  	SC_ALLOW_PERMISSIVE ("link");
     401  	SC_ALLOW_PERMISSIVE ("linkat");
     402  	SC_ALLOW ("lstat");
     403  	SC_ALLOW ("lstat64");
     404  	SC_ALLOW_PERMISSIVE ("mkdir");
     405  	SC_ALLOW_PERMISSIVE ("mkdirat");
     406  	SC_ALLOW ("newfstatat");
     407  	SC_ALLOW ("oldfstat");
     408  	SC_ALLOW ("oldlstat");
     409  	SC_ALLOW ("oldstat");
     410  	if (permissive) {
     411  		SC_ALLOW_ARG_2 ("open",
     412  				SCMP_A1 (SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
     413  				SCMP_A2 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
     414  		SC_ALLOW_ARG_2 ("openat",
     415  				SCMP_A2 (SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
     416  				SCMP_A3 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
     417  #ifdef O_TMPFILE
     418  		SC_ALLOW_ARG_2 ("open",
     419  				SCMP_A1 (SCMP_CMP_MASKED_EQ,
     420  					 O_TMPFILE, O_TMPFILE),
     421  				SCMP_A2 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
     422  		SC_ALLOW_ARG_2 ("openat",
     423  				SCMP_A2 (SCMP_CMP_MASKED_EQ,
     424  					 O_TMPFILE, O_TMPFILE),
     425  				SCMP_A3 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
     426  #endif /* O_TMPFILE */
     427  		SC_ALLOW_ARG_1 ("open",
     428  				SCMP_A1 (SCMP_CMP_MASKED_EQ, create_mask, 0));
     429  		SC_ALLOW_ARG_1 ("openat",
     430  				SCMP_A2 (SCMP_CMP_MASKED_EQ, create_mask, 0));
     431  	} else {
     432  		SC_ALLOW_ARG_1 ("open",
     433  				SCMP_A1 (SCMP_CMP_MASKED_EQ, O_ACCMODE,
     434  					 O_RDONLY));
     435  		SC_ALLOW_ARG_1 ("openat",
     436  				SCMP_A2 (SCMP_CMP_MASKED_EQ, O_ACCMODE,
     437  					 O_RDONLY));
     438  	}
     439  	SC_ALLOW ("readlink");
     440  	SC_ALLOW ("readlinkat");
     441  	SC_ALLOW_PERMISSIVE ("rename");
     442  	SC_ALLOW_PERMISSIVE ("renameat");
     443  	SC_ALLOW_PERMISSIVE ("renameat2");
     444  	SC_ALLOW_PERMISSIVE ("rmdir");
     445  	SC_ALLOW ("stat");
     446  	SC_ALLOW ("stat64");
     447  	SC_ALLOW ("statfs");
     448  	SC_ALLOW ("statfs64");
     449  	SC_ALLOW ("statx");
     450  	SC_ALLOW_PERMISSIVE ("symlink");
     451  	SC_ALLOW_PERMISSIVE ("symlinkat");
     452  	SC_ALLOW_PERMISSIVE ("truncate");
     453  	SC_ALLOW_PERMISSIVE ("truncateat");
     454  	SC_ALLOW_PERMISSIVE ("unlink");
     455  	SC_ALLOW_PERMISSIVE ("unlinkat");
     456  	SC_ALLOW_PERMISSIVE ("utime");
     457  	SC_ALLOW_PERMISSIVE ("utimensat");
     458  	SC_ALLOW_PERMISSIVE ("utimensat_time64");
     459  	SC_ALLOW_PERMISSIVE ("utimes");
     460  
     461  	/* systemd: SystemCallFilter=@io-event */
     462  	SC_ALLOW ("_newselect");
     463  	SC_ALLOW ("epoll_create");
     464  	SC_ALLOW ("epoll_create1");
     465  	SC_ALLOW ("epoll_ctl");
     466  	SC_ALLOW ("epoll_ctl_old");
     467  	SC_ALLOW ("epoll_pwait");
     468  	SC_ALLOW ("epoll_pwait2");
     469  	SC_ALLOW ("epoll_wait");
     470  	SC_ALLOW ("epoll_wait_old");
     471  	SC_ALLOW ("eventfd");
     472  	SC_ALLOW ("eventfd2");
     473  	SC_ALLOW ("poll");
     474  	SC_ALLOW ("ppoll");
     475  	SC_ALLOW ("ppoll_time64");
     476  	SC_ALLOW ("pselect6");
     477  	SC_ALLOW ("pselect6_time64");
     478  	SC_ALLOW ("select");
     479  
     480  	/* systemd: SystemCallFilter=@ipc (subset) */
     481  	SC_ALLOW ("pipe");
     482  	SC_ALLOW ("pipe2");
     483  
     484  	/* systemd: SystemCallFilter=@process (subset) */
     485  	SC_ALLOW ("capget");
     486  	SC_ALLOW ("clone");
     487  	SC_ALLOW ("clone3");
     488  	SC_ALLOW ("execveat");
     489  	SC_ALLOW ("fork");
     490  	SC_ALLOW ("getrusage");
     491  	SC_ALLOW ("pidfd_open");
     492  	SC_ALLOW ("pidfd_send_signal");
     493  	SC_ALLOW ("prctl");
     494  	SC_ALLOW ("vfork");
     495  	SC_ALLOW ("wait4");
     496  	SC_ALLOW ("waitid");
     497  	SC_ALLOW ("waitpid");
     498  
     499  	/* systemd: SystemCallFilter=@signal */
     500  	SC_ALLOW ("rt_sigaction");
     501  	SC_ALLOW ("rt_sigpending");
     502  	SC_ALLOW ("rt_sigprocmask");
     503  	SC_ALLOW ("rt_sigsuspend");
     504  	SC_ALLOW ("rt_sigtimedwait");
     505  	SC_ALLOW ("rt_sigtimedwait_time64");
     506  	SC_ALLOW ("sigaction");
     507  	SC_ALLOW ("sigaltstack");
     508  	SC_ALLOW ("signal");
     509  	SC_ALLOW ("signalfd");
     510  	SC_ALLOW ("signalfd4");
     511  	SC_ALLOW ("sigpending");
     512  	SC_ALLOW ("sigprocmask");
     513  	SC_ALLOW ("sigsuspend");
     514  
     515  	/* systemd: SystemCallFilter=@sync */
     516  	SC_ALLOW ("fdatasync");
     517  	SC_ALLOW ("fsync");
     518  	SC_ALLOW ("msync");
     519  	SC_ALLOW ("sync");
     520  	SC_ALLOW ("sync_file_range");
     521  	SC_ALLOW ("sync_file_range2");
     522  	SC_ALLOW ("syncfs");
     523  
     524  	/* systemd: SystemCallFilter=@system-service (subset) */
     525  	SC_ALLOW ("arm_fadvise64_64");
     526  	SC_ALLOW ("fadvise64");
     527  	SC_ALLOW ("fadvise64_64");
     528  	if (permissive)
     529  		SC_ALLOW ("ioctl");
     530  	else {
     531  		SC_ALLOW_ARG_1 ("ioctl", SCMP_A1 (SCMP_CMP_EQ, TCGETS));
     532  		SC_ALLOW_ARG_1 ("ioctl", SCMP_A1 (SCMP_CMP_EQ, TIOCGWINSZ));
     533  	}
     534  	SC_ALLOW ("madvise");
     535  	SC_ALLOW ("mremap");
     536  	SC_ALLOW ("sysinfo");
     537  	SC_ALLOW ("uname");
     538  
     539  	/* Extra syscalls not in any of systemd's sets. */
     540  	SC_ALLOW ("arm_fadvise64_64");
     541  	SC_ALLOW ("arm_sync_file_range");
     542  
     543  	/* Allow killing processes and threads.  This is unfortunate but
     544  	 * unavoidable: groff uses kill to explicitly pass on SIGPIPE to its
     545  	 * child processes, and we can't do any more sophisticated filtering
     546  	 * in seccomp.
     547  	 */
     548  	SC_ALLOW ("kill");
     549  	SC_ALLOW ("tgkill");
     550  
     551  	/* Allow some relatively harmless System V shared memory operations.
     552  	 * These seem to be popular among the sort of program that wants to
     553  	 * install itself in /etc/ld.so.preload or similar (e.g. antivirus
     554  	 * programs and VPNs).
     555  	 */
     556  	SC_ALLOW_ARG_1 ("shmat", SCMP_A2 (SCMP_CMP_EQ, SHM_RDONLY));
     557  	SC_ALLOW_ARG_1 ("shmctl", SCMP_A1 (SCMP_CMP_EQ, IPC_STAT));
     558  	SC_ALLOW ("shmdt");
     559  	SC_ALLOW ("shmget");
     560  
     561  	/* Some antivirus programs use an LD_PRELOAD wrapper that wants to
     562  	 * talk to a private daemon using a Unix-domain socket.  We really
     563  	 * don't want to allow these syscalls in general, but if such a
     564  	 * thing is in use we probably have no choice.
     565  	 *
     566  	 * Firebuild is a build accelerator that connects to its supervisor
     567  	 * using a Unix-domain socket.
     568  	 *
     569  	 * snoopy is an execve monitoring tool that may log messages to
     570  	 * /dev/log.
     571  	 */
     572  	if (search_ld_preload ("libesets_pac.so") ||
     573  	    search_ld_preload ("libfirebuild.so") ||
     574  	    search_ld_preload ("libscep_pac.so") ||
     575  	    search_ld_preload ("libsnoopy.so")) {
     576  		SC_ALLOW ("connect");
     577  		SC_ALLOW ("recvmsg");
     578  		SC_ALLOW ("sendmsg");
     579  		SC_ALLOW ("sendto");
     580  		SC_ALLOW ("setsockopt");
     581  		SC_ALLOW_ARG_1 ("socket", SCMP_A0 (SCMP_CMP_EQ, AF_UNIX));
     582  	}
     583  	/* ESET sends messages to a System V message queue. */
     584  	if (search_ld_preload ("libesets_pac.so") ||
     585  	    search_ld_preload ("libscep_pac.so")) {
     586  		SC_ALLOW_ARG_1 ("msgget", SCMP_A1 (SCMP_CMP_EQ, 0));
     587  		SC_ALLOW ("msgsnd");
     588  	}
     589  
     590  	return ctx;
     591  }
     592  
     593  #undef SC_ALLOW_ARG_2
     594  #undef SC_ALLOW_ARG_1
     595  #undef SC_ALLOW
     596  
     597  #endif /* HAVE_LIBSECCOMP */
     598  
     599  /* Create a sandbox for processing untrusted data.
     600   *
     601   * This only sets up data structures; the caller must call sandbox_load to
     602   * actually enter the sandbox.
     603   */
     604  man_sandbox *sandbox_init (void)
     605  {
     606  	man_sandbox *sandbox = XZALLOC (man_sandbox);
     607  
     608  #ifdef HAVE_LIBSECCOMP
     609  	sandbox->ctx = make_seccomp_filter (false);
     610  	sandbox->permissive_ctx = make_seccomp_filter (true);
     611  #else /* !HAVE_LIBSECCOMP */
     612  	sandbox->dummy = 0;
     613  #endif /* HAVE_LIBSECCOMP */
     614  
     615  	return sandbox;
     616  }
     617  
     618  #ifdef HAVE_LIBSECCOMP
     619  static void _sandbox_load (man_sandbox *sandbox, bool permissive) {
     620  	if (can_load_seccomp ()) {
     621  		scmp_filter_ctx ctx;
     622  
     623  		if (permissive)
     624  			ctx = sandbox->permissive_ctx;
     625  		else
     626  			ctx = sandbox->ctx;
     627  		if (!ctx)
     628  			return;
     629  		debug ("loading seccomp filter (permissive: %d)\n",
     630  		       (int) permissive);
     631  		if (seccomp_load (ctx) < 0) {
     632  			if (errno == EINVAL || errno == EFAULT) {
     633  				/* The kernel doesn't give us particularly
     634  				 * fine-grained errors.  EINVAL could in
     635  				 * theory be an invalid BPF program, but
     636  				 * it's much more likely that the running
     637  				 * kernel doesn't support seccomp filtering.
     638  				 * EFAULT normally means a programming
     639  				 * error, but it could also be returned here
     640  				 * by some versions of qemu-user
     641  				 * (https://bugs.launchpad.net/bugs/1726394).
     642  				 */
     643  				gripe_seccomp_filter_unavailable ();
     644  				/* Don't try this again. */
     645  				seccomp_filter_unavailable = true;
     646  			} else
     647  				fatal (errno, "can't load seccomp filter");
     648  		}
     649  	}
     650  }
     651  #else /* !HAVE_LIBSECCOMP */
     652  static void _sandbox_load (man_sandbox *sandbox MAYBE_UNUSED,
     653  			   bool permissive MAYBE_UNUSED)
     654  {
     655  }
     656  #endif /* HAVE_LIBSECCOMP */
     657  
     658  /* Enter a sandbox for processing untrusted data. */
     659  void sandbox_load (void *data)
     660  {
     661  	man_sandbox *sandbox = data;
     662  
     663  	_sandbox_load (sandbox, false);
     664  }
     665  
     666  /* Enter a sandbox for processing untrusted data, allowing limited file
     667   * creation.
     668   */
     669  void sandbox_load_permissive (void *data)
     670  {
     671  	man_sandbox *sandbox = data;
     672  
     673  	_sandbox_load (sandbox, true);
     674  }
     675  
     676  /* Free a sandbox for processing untrusted data. */
     677  void sandbox_free (void *data) {
     678  	man_sandbox *sandbox = data;
     679  
     680  #ifdef HAVE_LIBSECCOMP
     681  	if (sandbox->ctx)
     682  		seccomp_release (sandbox->ctx);
     683  	if (sandbox->permissive_ctx)
     684  		seccomp_release (sandbox->permissive_ctx);
     685  #endif /* HAVE_LIBSECCOMP */
     686  
     687  	free (sandbox);
     688  }