1 /*
2 * sandbox.c: Process sandboxing
3 *
4 * Copyright (C) 2017 Colin Watson.
5 *
6 * This file is part of man-db.
7 *
8 * man-db is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * man-db is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with man-db; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 *
22 * Some of the syscall lists in this file come from systemd, whose
23 * copyright/licensing statement is as follows. Per LGPLv2.1 s. 3, I have
24 * altered the original references to LGPLv2.1 to refer to GPLv2 instead.
25 *
26 * Copyright 2014 Lennart Poettering
27 *
28 * systemd is free software; you can redistribute it and/or modify it
29 * under the terms of the GNU General Public License as published by
30 * the Free Software Foundation; either version 2 of the License, or
31 * (at your option) any later version.
32 *
33 * systemd is distributed in the hope that it will be useful, but
34 * WITHOUT ANY WARRANTY; without even the implied warranty of
35 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36 * General Public License for more details.
37 *
38 * You should have received a copy of the GNU General Public License
39 * along with systemd; If not, see <https://www.gnu.org/licenses/>.
40 */
41
42 #ifdef HAVE_CONFIG_H
43 # include "config.h"
44 #endif /* HAVE_CONFIG_H */
45
46 #include <stdbool.h>
47 #include <errno.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <sys/types.h>
51 #include <sys/stat.h>
52 #include <fcntl.h>
53 #include <unistd.h>
54
55 #ifdef HAVE_LIBSECCOMP
56 # include <sys/ioctl.h>
57 # include <sys/ipc.h>
58 # include <sys/mman.h>
59 # include <sys/prctl.h>
60 # include <sys/shm.h>
61 # include <sys/socket.h>
62 # include <termios.h>
63 # include <seccomp.h>
64 #endif /* HAVE_LIBSECCOMP */
65
66 #include "attribute.h"
67 #include "xalloc.h"
68 #include "xstrndup.h"
69
70 #include "manconfig.h"
71
72 #include "debug.h"
73 #include "fatal.h"
74 #include "sandbox.h"
75
76 struct man_sandbox {
77 #ifdef HAVE_LIBSECCOMP
78 scmp_filter_ctx ctx;
79 scmp_filter_ctx permissive_ctx;
80 #else /* !HAVE_LIBSECCOMP */
81 char dummy;
82 #endif /* HAVE_LIBSECCOMP */
83 };
84
85 #ifdef HAVE_LIBSECCOMP
86 static bool seccomp_filter_unavailable = false;
87
88 static void gripe_seccomp_filter_unavailable (void)
89 {
90 debug ("seccomp filtering requires a kernel configured with "
91 "CONFIG_SECCOMP_FILTER\n");
92 }
93
94 static bool search_ld_preload (const char *needle)
95 {
96 const char *ld_preload_env;
97 static char *ld_preload_file = NULL;
98
99 ld_preload_env = getenv ("LD_PRELOAD");
100 if (ld_preload_env && strstr (ld_preload_env, needle) != NULL)
101 return true;
102
103 if (!ld_preload_file) {
104 int fd;
105 struct stat st;
106 char *mapped = NULL;
107
108 fd = open ("/etc/ld.so.preload", O_RDONLY);
109 if (fd >= 0 && fstat (fd, &st) >= 0 && st.st_size)
110 mapped = mmap (NULL, st.st_size, PROT_READ,
111 MAP_PRIVATE | MAP_FILE, fd, 0);
112 if (mapped) {
113 ld_preload_file = xstrndup (mapped, st.st_size);
114 munmap (mapped, st.st_size);
115 } else
116 ld_preload_file = xstrdup ("");
117 if (fd >= 0)
118 close (fd);
119 }
120 /* This isn't very accurate: /etc/ld.so.preload may contain
121 * comments. On the other hand, glibc says "it should only be used
122 * for emergencies and testing". File a bug if this is a problem
123 * for you.
124 */
125 if (strstr (ld_preload_file, needle) != NULL)
126 return true;
127
128 return false;
129 }
130
131 /* Can we load a seccomp filter into this process?
132 *
133 * This guard allows us to call sandbox_load in code paths that may
134 * conditionally do so again.
135 */
136 static bool can_load_seccomp (void)
137 {
138 const char *man_disable_seccomp;
139 int seccomp_status;
140
141 if (seccomp_filter_unavailable) {
142 gripe_seccomp_filter_unavailable ();
143 return false;
144 }
145
146 man_disable_seccomp = getenv ("MAN_DISABLE_SECCOMP");
147 if (man_disable_seccomp && *man_disable_seccomp) {
148 debug ("seccomp filter disabled by user request\n");
149 return false;
150 }
151
152 /* Valgrind causes the child process to make some system calls we
153 * don't want to allow in general, so disable seccomp when running
154 * on Valgrind.
155 *
156 * The correct approach seems to be to either require valgrind.h at
157 * build-time or copy valgrind.h into this project and then use the
158 * RUNNING_ON_VALGRIND macro, but I'd really rather not add a
159 * build-dependency for this or take a copy of a >6000-line header
160 * file. Since the goal of this is only to disable the seccomp
161 * filter under Valgrind, this will do for now.
162 */
163 if (search_ld_preload ("/vgpreload")) {
164 debug ("seccomp filter disabled while running under "
165 "Valgrind\n");
166 return false;
167 }
168
169 seccomp_status = prctl (PR_GET_SECCOMP);
170
171 if (seccomp_status == 0)
172 return true;
173
174 if (seccomp_status == -1) {
175 if (errno == EINVAL)
176 debug ("running kernel does not support seccomp\n");
177 else
178 debug ("unknown error getting seccomp status: %s\n",
179 strerror (errno));
180 } else if (seccomp_status == 2)
181 debug ("seccomp already enabled\n");
182 else
183 debug ("unknown return value from PR_GET_SECCOMP: %d\n",
184 seccomp_status);
185 return false;
186 }
187 #endif /* HAVE_LIBSECCOMP */
188
189 #ifdef HAVE_LIBSECCOMP
190
191 #define SC_ALLOW(name) \
192 do { \
193 int nr = seccomp_syscall_resolve_name (name); \
194 if (nr == __NR_SCMP_ERROR) \
195 break; \
196 if (seccomp_rule_add (ctx, SCMP_ACT_ALLOW, nr, 0) < 0) \
197 fatal (errno, "can't add seccomp rule"); \
198 } while (0)
199
200 #define SC_ALLOW_PERMISSIVE(name) \
201 do { \
202 if (permissive) \
203 SC_ALLOW (name); \
204 } while (0)
205
206 #define SC_ALLOW_ARG_1(name, cmp1) \
207 do { \
208 int nr = seccomp_syscall_resolve_name (name); \
209 if (nr == __NR_SCMP_ERROR) \
210 break; \
211 if (seccomp_rule_add (ctx, SCMP_ACT_ALLOW, nr, 1, cmp1) < 0) \
212 fatal (errno, "can't add seccomp rule"); \
213 } while (0)
214
215 #define SC_ALLOW_ARG_2(name, cmp1, cmp2) \
216 do { \
217 int nr = seccomp_syscall_resolve_name (name); \
218 if (nr == __NR_SCMP_ERROR) \
219 break; \
220 if (seccomp_rule_add (ctx, SCMP_ACT_ALLOW, nr, \
221 2, cmp1, cmp2) < 0) \
222 fatal (errno, "can't add seccomp rule"); \
223 } while (0)
224
225 /* Create a seccomp filter.
226 *
227 * If permissive is true, then the returned filter will allow limited file
228 * creation (although not making executable files). This obviously
229 * constitutes less effective confinement, but it's necessary for some
230 * subprocesses (such as groff) that need the ability to write to temporary
231 * files. Confining these further requires additional tools that can do
232 * path-based filtering or similar, such as AppArmor.
233 */
234 static scmp_filter_ctx make_seccomp_filter (bool permissive)
235 {
236 scmp_filter_ctx ctx;
237 mode_t mode_mask = S_ISUID | S_ISGID | S_IXUSR | S_IXGRP | S_IXOTH;
238 int create_mask = O_CREAT
239 #ifdef O_TMPFILE
240 | O_TMPFILE
241 #endif /* O_TMPFILE */
242 ;
243
244 if (!can_load_seccomp ())
245 return NULL;
246
247 debug ("initialising seccomp filter (permissive: %d)\n",
248 (int) permissive);
249 ctx = seccomp_init (SCMP_ACT_ERRNO (ENOSYS));
250 if (!ctx)
251 fatal (errno, "can't initialise seccomp filter");
252
253 /* Allow sibling architectures for x86, since people sometimes mix
254 * and match architectures there for performance reasons.
255 */
256 switch (seccomp_arch_native ()) {
257 case SCMP_ARCH_X86:
258 seccomp_arch_add (ctx, SCMP_ARCH_X86_64);
259 seccomp_arch_add (ctx, SCMP_ARCH_X32);
260 break;
261 case SCMP_ARCH_X86_64:
262 seccomp_arch_add (ctx, SCMP_ARCH_X86);
263 seccomp_arch_add (ctx, SCMP_ARCH_X32);
264 break;
265 case SCMP_ARCH_X32:
266 seccomp_arch_add (ctx, SCMP_ARCH_X86);
267 seccomp_arch_add (ctx, SCMP_ARCH_X86_64);
268 break;
269 }
270
271 /* This sandbox is intended to allow operations that might
272 * reasonably be needed in simple data-transforming pipes: it should
273 * allow the process to do most reasonable things to itself, to read
274 * and write data from and to already-open file descriptors, to open
275 * files in read-only mode, and to fork new processes with the same
276 * restrictions. (If permissive is true, then it should also allow
277 * limited file creation; see the header comment above.)
278 *
279 * Since I currently know of no library with suitable syscall lists,
280 * the syscall lists here are taken from
281 * systemd:src/shared/seccomp-util.c, last updated from commit
282 * ab9617a76624c43a26de7e94424088ae171ebfef (2023-08-07).
283 */
284
285 /* systemd: SystemCallFilter=@default */
286 SC_ALLOW ("arch_prctl");
287 SC_ALLOW ("brk");
288 SC_ALLOW ("cacheflush");
289 SC_ALLOW ("clock_getres");
290 SC_ALLOW ("clock_getres_time64");
291 SC_ALLOW ("clock_gettime");
292 SC_ALLOW ("clock_gettime64");
293 SC_ALLOW ("clock_nanosleep");
294 SC_ALLOW ("clock_nanosleep_time64");
295 SC_ALLOW ("execve");
296 SC_ALLOW ("exit");
297 SC_ALLOW ("exit_group");
298 SC_ALLOW ("futex");
299 SC_ALLOW ("futex_time64");
300 SC_ALLOW ("futex_waitv");
301 SC_ALLOW ("get_robust_list");
302 SC_ALLOW ("get_thread_area");
303 SC_ALLOW ("getegid");
304 SC_ALLOW ("getegid32");
305 SC_ALLOW ("geteuid");
306 SC_ALLOW ("geteuid32");
307 SC_ALLOW ("getgid");
308 SC_ALLOW ("getgid32");
309 SC_ALLOW ("getgroups");
310 SC_ALLOW ("getgroups32");
311 SC_ALLOW ("getpgid");
312 SC_ALLOW ("getpgrp");
313 SC_ALLOW ("getpid");
314 SC_ALLOW ("getppid");
315 SC_ALLOW ("getrandom");
316 SC_ALLOW ("getresgid");
317 SC_ALLOW ("getresgid32");
318 SC_ALLOW ("getresuid");
319 SC_ALLOW ("getresuid32");
320 SC_ALLOW ("getrlimit");
321 SC_ALLOW ("getsid");
322 SC_ALLOW ("gettid");
323 SC_ALLOW ("gettimeofday");
324 SC_ALLOW ("getuid");
325 SC_ALLOW ("getuid32");
326 SC_ALLOW ("membarrier");
327 SC_ALLOW ("mmap");
328 SC_ALLOW ("mmap2");
329 SC_ALLOW ("mprotect");
330 SC_ALLOW ("munmap");
331 SC_ALLOW ("nanosleep");
332 SC_ALLOW ("pause");
333 SC_ALLOW ("prlimit64");
334 SC_ALLOW ("restart_syscall");
335 SC_ALLOW ("riscv_flush_icache");
336 SC_ALLOW ("riscv_hwprobe");
337 SC_ALLOW ("rseq");
338 SC_ALLOW ("rt_sigreturn");
339 SC_ALLOW ("sched_getaffinity");
340 SC_ALLOW ("sched_yield");
341 SC_ALLOW ("set_robust_list");
342 SC_ALLOW ("set_thread_area");
343 SC_ALLOW ("set_tid_address");
344 SC_ALLOW ("set_tls");
345 SC_ALLOW ("sigreturn");
346 SC_ALLOW ("time");
347 SC_ALLOW ("ugetrlimit");
348
349 /* systemd: SystemCallFilter=@basic-io */
350 SC_ALLOW ("_llseek");
351 SC_ALLOW ("close");
352 SC_ALLOW ("close_range");
353 SC_ALLOW ("dup");
354 SC_ALLOW ("dup2");
355 SC_ALLOW ("dup3");
356 SC_ALLOW ("lseek");
357 SC_ALLOW ("pread64");
358 SC_ALLOW ("preadv");
359 SC_ALLOW ("preadv2");
360 SC_ALLOW ("pwrite64");
361 SC_ALLOW ("pwritev");
362 SC_ALLOW ("pwritev2");
363 SC_ALLOW ("read");
364 SC_ALLOW ("readv");
365 SC_ALLOW ("write");
366 SC_ALLOW ("writev");
367
368 /* systemd: SystemCallFilter=@file-system (subset) */
369 SC_ALLOW ("access");
370 SC_ALLOW ("chdir");
371 if (permissive) {
372 SC_ALLOW_ARG_1 ("chmod",
373 SCMP_A1 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
374 SC_ALLOW_ARG_1 ("creat",
375 SCMP_A1 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
376 }
377 SC_ALLOW ("faccessat");
378 SC_ALLOW ("faccessat2");
379 SC_ALLOW ("fallocate");
380 SC_ALLOW ("fchdir");
381 if (permissive) {
382 SC_ALLOW_ARG_1 ("fchmod",
383 SCMP_A1 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
384 SC_ALLOW_ARG_1 ("fchmodat",
385 SCMP_A2 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
386 }
387 SC_ALLOW ("fcntl");
388 SC_ALLOW ("fcntl64");
389 SC_ALLOW ("fstat");
390 SC_ALLOW ("fstat64");
391 SC_ALLOW ("fstatat64");
392 SC_ALLOW ("fstatfs");
393 SC_ALLOW ("fstatfs64");
394 SC_ALLOW ("ftruncate");
395 SC_ALLOW ("ftruncate64");
396 SC_ALLOW_PERMISSIVE ("futimesat");
397 SC_ALLOW ("getcwd");
398 SC_ALLOW ("getdents");
399 SC_ALLOW ("getdents64");
400 SC_ALLOW_PERMISSIVE ("link");
401 SC_ALLOW_PERMISSIVE ("linkat");
402 SC_ALLOW ("lstat");
403 SC_ALLOW ("lstat64");
404 SC_ALLOW_PERMISSIVE ("mkdir");
405 SC_ALLOW_PERMISSIVE ("mkdirat");
406 SC_ALLOW ("newfstatat");
407 SC_ALLOW ("oldfstat");
408 SC_ALLOW ("oldlstat");
409 SC_ALLOW ("oldstat");
410 if (permissive) {
411 SC_ALLOW_ARG_2 ("open",
412 SCMP_A1 (SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
413 SCMP_A2 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
414 SC_ALLOW_ARG_2 ("openat",
415 SCMP_A2 (SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
416 SCMP_A3 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
417 #ifdef O_TMPFILE
418 SC_ALLOW_ARG_2 ("open",
419 SCMP_A1 (SCMP_CMP_MASKED_EQ,
420 O_TMPFILE, O_TMPFILE),
421 SCMP_A2 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
422 SC_ALLOW_ARG_2 ("openat",
423 SCMP_A2 (SCMP_CMP_MASKED_EQ,
424 O_TMPFILE, O_TMPFILE),
425 SCMP_A3 (SCMP_CMP_MASKED_EQ, mode_mask, 0));
426 #endif /* O_TMPFILE */
427 SC_ALLOW_ARG_1 ("open",
428 SCMP_A1 (SCMP_CMP_MASKED_EQ, create_mask, 0));
429 SC_ALLOW_ARG_1 ("openat",
430 SCMP_A2 (SCMP_CMP_MASKED_EQ, create_mask, 0));
431 } else {
432 SC_ALLOW_ARG_1 ("open",
433 SCMP_A1 (SCMP_CMP_MASKED_EQ, O_ACCMODE,
434 O_RDONLY));
435 SC_ALLOW_ARG_1 ("openat",
436 SCMP_A2 (SCMP_CMP_MASKED_EQ, O_ACCMODE,
437 O_RDONLY));
438 }
439 SC_ALLOW ("readlink");
440 SC_ALLOW ("readlinkat");
441 SC_ALLOW_PERMISSIVE ("rename");
442 SC_ALLOW_PERMISSIVE ("renameat");
443 SC_ALLOW_PERMISSIVE ("renameat2");
444 SC_ALLOW_PERMISSIVE ("rmdir");
445 SC_ALLOW ("stat");
446 SC_ALLOW ("stat64");
447 SC_ALLOW ("statfs");
448 SC_ALLOW ("statfs64");
449 SC_ALLOW ("statx");
450 SC_ALLOW_PERMISSIVE ("symlink");
451 SC_ALLOW_PERMISSIVE ("symlinkat");
452 SC_ALLOW_PERMISSIVE ("truncate");
453 SC_ALLOW_PERMISSIVE ("truncateat");
454 SC_ALLOW_PERMISSIVE ("unlink");
455 SC_ALLOW_PERMISSIVE ("unlinkat");
456 SC_ALLOW_PERMISSIVE ("utime");
457 SC_ALLOW_PERMISSIVE ("utimensat");
458 SC_ALLOW_PERMISSIVE ("utimensat_time64");
459 SC_ALLOW_PERMISSIVE ("utimes");
460
461 /* systemd: SystemCallFilter=@io-event */
462 SC_ALLOW ("_newselect");
463 SC_ALLOW ("epoll_create");
464 SC_ALLOW ("epoll_create1");
465 SC_ALLOW ("epoll_ctl");
466 SC_ALLOW ("epoll_ctl_old");
467 SC_ALLOW ("epoll_pwait");
468 SC_ALLOW ("epoll_pwait2");
469 SC_ALLOW ("epoll_wait");
470 SC_ALLOW ("epoll_wait_old");
471 SC_ALLOW ("eventfd");
472 SC_ALLOW ("eventfd2");
473 SC_ALLOW ("poll");
474 SC_ALLOW ("ppoll");
475 SC_ALLOW ("ppoll_time64");
476 SC_ALLOW ("pselect6");
477 SC_ALLOW ("pselect6_time64");
478 SC_ALLOW ("select");
479
480 /* systemd: SystemCallFilter=@ipc (subset) */
481 SC_ALLOW ("pipe");
482 SC_ALLOW ("pipe2");
483
484 /* systemd: SystemCallFilter=@process (subset) */
485 SC_ALLOW ("capget");
486 SC_ALLOW ("clone");
487 SC_ALLOW ("clone3");
488 SC_ALLOW ("execveat");
489 SC_ALLOW ("fork");
490 SC_ALLOW ("getrusage");
491 SC_ALLOW ("pidfd_open");
492 SC_ALLOW ("pidfd_send_signal");
493 SC_ALLOW ("prctl");
494 SC_ALLOW ("vfork");
495 SC_ALLOW ("wait4");
496 SC_ALLOW ("waitid");
497 SC_ALLOW ("waitpid");
498
499 /* systemd: SystemCallFilter=@signal */
500 SC_ALLOW ("rt_sigaction");
501 SC_ALLOW ("rt_sigpending");
502 SC_ALLOW ("rt_sigprocmask");
503 SC_ALLOW ("rt_sigsuspend");
504 SC_ALLOW ("rt_sigtimedwait");
505 SC_ALLOW ("rt_sigtimedwait_time64");
506 SC_ALLOW ("sigaction");
507 SC_ALLOW ("sigaltstack");
508 SC_ALLOW ("signal");
509 SC_ALLOW ("signalfd");
510 SC_ALLOW ("signalfd4");
511 SC_ALLOW ("sigpending");
512 SC_ALLOW ("sigprocmask");
513 SC_ALLOW ("sigsuspend");
514
515 /* systemd: SystemCallFilter=@sync */
516 SC_ALLOW ("fdatasync");
517 SC_ALLOW ("fsync");
518 SC_ALLOW ("msync");
519 SC_ALLOW ("sync");
520 SC_ALLOW ("sync_file_range");
521 SC_ALLOW ("sync_file_range2");
522 SC_ALLOW ("syncfs");
523
524 /* systemd: SystemCallFilter=@system-service (subset) */
525 SC_ALLOW ("arm_fadvise64_64");
526 SC_ALLOW ("fadvise64");
527 SC_ALLOW ("fadvise64_64");
528 if (permissive)
529 SC_ALLOW ("ioctl");
530 else {
531 SC_ALLOW_ARG_1 ("ioctl", SCMP_A1 (SCMP_CMP_EQ, TCGETS));
532 SC_ALLOW_ARG_1 ("ioctl", SCMP_A1 (SCMP_CMP_EQ, TIOCGWINSZ));
533 }
534 SC_ALLOW ("madvise");
535 SC_ALLOW ("mremap");
536 SC_ALLOW ("sysinfo");
537 SC_ALLOW ("uname");
538
539 /* Extra syscalls not in any of systemd's sets. */
540 SC_ALLOW ("arm_fadvise64_64");
541 SC_ALLOW ("arm_sync_file_range");
542
543 /* Allow killing processes and threads. This is unfortunate but
544 * unavoidable: groff uses kill to explicitly pass on SIGPIPE to its
545 * child processes, and we can't do any more sophisticated filtering
546 * in seccomp.
547 */
548 SC_ALLOW ("kill");
549 SC_ALLOW ("tgkill");
550
551 /* Allow some relatively harmless System V shared memory operations.
552 * These seem to be popular among the sort of program that wants to
553 * install itself in /etc/ld.so.preload or similar (e.g. antivirus
554 * programs and VPNs).
555 */
556 SC_ALLOW_ARG_1 ("shmat", SCMP_A2 (SCMP_CMP_EQ, SHM_RDONLY));
557 SC_ALLOW_ARG_1 ("shmctl", SCMP_A1 (SCMP_CMP_EQ, IPC_STAT));
558 SC_ALLOW ("shmdt");
559 SC_ALLOW ("shmget");
560
561 /* Some antivirus programs use an LD_PRELOAD wrapper that wants to
562 * talk to a private daemon using a Unix-domain socket. We really
563 * don't want to allow these syscalls in general, but if such a
564 * thing is in use we probably have no choice.
565 *
566 * Firebuild is a build accelerator that connects to its supervisor
567 * using a Unix-domain socket.
568 *
569 * snoopy is an execve monitoring tool that may log messages to
570 * /dev/log.
571 */
572 if (search_ld_preload ("libesets_pac.so") ||
573 search_ld_preload ("libfirebuild.so") ||
574 search_ld_preload ("libscep_pac.so") ||
575 search_ld_preload ("libsnoopy.so")) {
576 SC_ALLOW ("connect");
577 SC_ALLOW ("recvmsg");
578 SC_ALLOW ("sendmsg");
579 SC_ALLOW ("sendto");
580 SC_ALLOW ("setsockopt");
581 SC_ALLOW_ARG_1 ("socket", SCMP_A0 (SCMP_CMP_EQ, AF_UNIX));
582 }
583 /* ESET sends messages to a System V message queue. */
584 if (search_ld_preload ("libesets_pac.so") ||
585 search_ld_preload ("libscep_pac.so")) {
586 SC_ALLOW_ARG_1 ("msgget", SCMP_A1 (SCMP_CMP_EQ, 0));
587 SC_ALLOW ("msgsnd");
588 }
589
590 return ctx;
591 }
592
593 #undef SC_ALLOW_ARG_2
594 #undef SC_ALLOW_ARG_1
595 #undef SC_ALLOW
596
597 #endif /* HAVE_LIBSECCOMP */
598
599 /* Create a sandbox for processing untrusted data.
600 *
601 * This only sets up data structures; the caller must call sandbox_load to
602 * actually enter the sandbox.
603 */
604 man_sandbox *sandbox_init (void)
605 {
606 man_sandbox *sandbox = XZALLOC (man_sandbox);
607
608 #ifdef HAVE_LIBSECCOMP
609 sandbox->ctx = make_seccomp_filter (false);
610 sandbox->permissive_ctx = make_seccomp_filter (true);
611 #else /* !HAVE_LIBSECCOMP */
612 sandbox->dummy = 0;
613 #endif /* HAVE_LIBSECCOMP */
614
615 return sandbox;
616 }
617
618 #ifdef HAVE_LIBSECCOMP
619 static void _sandbox_load (man_sandbox *sandbox, bool permissive) {
620 if (can_load_seccomp ()) {
621 scmp_filter_ctx ctx;
622
623 if (permissive)
624 ctx = sandbox->permissive_ctx;
625 else
626 ctx = sandbox->ctx;
627 if (!ctx)
628 return;
629 debug ("loading seccomp filter (permissive: %d)\n",
630 (int) permissive);
631 if (seccomp_load (ctx) < 0) {
632 if (errno == EINVAL || errno == EFAULT) {
633 /* The kernel doesn't give us particularly
634 * fine-grained errors. EINVAL could in
635 * theory be an invalid BPF program, but
636 * it's much more likely that the running
637 * kernel doesn't support seccomp filtering.
638 * EFAULT normally means a programming
639 * error, but it could also be returned here
640 * by some versions of qemu-user
641 * (https://bugs.launchpad.net/bugs/1726394).
642 */
643 gripe_seccomp_filter_unavailable ();
644 /* Don't try this again. */
645 seccomp_filter_unavailable = true;
646 } else
647 fatal (errno, "can't load seccomp filter");
648 }
649 }
650 }
651 #else /* !HAVE_LIBSECCOMP */
652 static void _sandbox_load (man_sandbox *sandbox MAYBE_UNUSED,
653 bool permissive MAYBE_UNUSED)
654 {
655 }
656 #endif /* HAVE_LIBSECCOMP */
657
658 /* Enter a sandbox for processing untrusted data. */
659 void sandbox_load (void *data)
660 {
661 man_sandbox *sandbox = data;
662
663 _sandbox_load (sandbox, false);
664 }
665
666 /* Enter a sandbox for processing untrusted data, allowing limited file
667 * creation.
668 */
669 void sandbox_load_permissive (void *data)
670 {
671 man_sandbox *sandbox = data;
672
673 _sandbox_load (sandbox, true);
674 }
675
676 /* Free a sandbox for processing untrusted data. */
677 void sandbox_free (void *data) {
678 man_sandbox *sandbox = data;
679
680 #ifdef HAVE_LIBSECCOMP
681 if (sandbox->ctx)
682 seccomp_release (sandbox->ctx);
683 if (sandbox->permissive_ctx)
684 seccomp_release (sandbox->permissive_ctx);
685 #endif /* HAVE_LIBSECCOMP */
686
687 free (sandbox);
688 }