1 /*
2 * Copyright (c) 2018 Chen Jingpiao <chenjingpiao@gmail.com>
3 * Copyright (c) 2019 Paul Chaignon <paul.chaignon@gmail.com>
4 * Copyright (c) 2018-2023 The strace developers.
5 * All rights reserved.
6 *
7 * SPDX-License-Identifier: LGPL-2.1-or-later
8 */
9
10 #include "defs.h"
11
12 #include "ptrace.h"
13 #include <signal.h>
14 #include <sys/prctl.h>
15 #include <sys/wait.h>
16 #include <linux/filter.h>
17
18 #include "filter_seccomp.h"
19 #include "number_set.h"
20 #include "scno.h"
21
22 bool seccomp_filtering;
23 bool seccomp_before_sysentry;
24
25 #include <linux/seccomp.h>
26
27 #ifndef BPF_MAXINSNS
28 # define BPF_MAXINSNS 4096
29 #endif
30
31 #define JMP_PLACEHOLDER_NEXT ((unsigned char) -1)
32 #define JMP_PLACEHOLDER_TRACE ((unsigned char) -2)
33 #define JMP_PLACEHOLDER_ALLOW ((unsigned char) -3)
34
35 #define SET_BPF(filter, code, jt, jf, k) \
36 (*(filter) = (struct sock_filter) { code, jt, jf, k })
37
38 #define SET_BPF_STMT(filter, code, k) \
39 SET_BPF(filter, code, 0, 0, k)
40
41 #define SET_BPF_JUMP(filter, code, k, jt, jf) \
42 SET_BPF(filter, BPF_JMP | code, jt, jf, k)
43
44 typedef unsigned short (*filter_generator_t)(struct sock_filter *,
45 bool *overflow);
46 static unsigned short linear_filter_generator(struct sock_filter *,
47 bool *overflow);
48 static unsigned short binary_match_filter_generator(struct sock_filter *,
49 bool *overflow);
50 static filter_generator_t filter_generators[] = {
51 linear_filter_generator,
52 binary_match_filter_generator,
53 };
54
55 /*
56 * Keep some margin in seccomp_filter as programs larger than allowed may
57 * be constructed before we discard them.
58 */
59 static struct sock_filter
60 filters[ARRAY_SIZE(filter_generators)][2 * BPF_MAXINSNS];
61 static struct sock_fprog bpf_prog = {
62 .len = USHRT_MAX,
63 .filter = NULL,
64 };
65
66 #ifdef HAVE_FORK
67
68 static void ATTRIBUTE_NORETURN
69 check_seccomp_order_do_child(void)
70 {
71 static const struct sock_filter filter[] = {
72 /* return (nr == __NR_gettid) ? RET_TRACE : RET_ALLOW; */
73 BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
74 offsetof(struct seccomp_data, nr)),
75 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_gettid, 0, 1),
76 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRACE),
77 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW)
78 };
79 static const struct sock_fprog prog = {
80 .len = ARRAY_SIZE(filter),
81 .filter = (struct sock_filter *) filter
82 };
83
84 /* Get everything ready before PTRACE_TRACEME. */
85 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
86 perror_func_msg_and_die("prctl(PR_SET_NO_NEW_PRIVS, 1");
87 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
88 perror_func_msg_and_die("prctl(PR_SET_SECCOMP)");
89 int pid = getpid();
90
91 if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0) {
92 /* Exit with a nonzero exit status. */
93 perror_func_msg_and_die("PTRACE_TRACEME");
94 }
95
96 GCOV_DUMP;
97
98 kill(pid, SIGSTOP);
99 syscall(__NR_gettid);
100 _exit(0);
101 }
102
103 static int
104 check_seccomp_order_tracer(int pid)
105 {
106 for (unsigned int step = 0; ; ++step) {
107 int status;
108
109 for (;;) {
110 long rc = waitpid(pid, &status, 0);
111 if (rc < 0 && errno == EINTR)
112 continue;
113 if (rc == pid)
114 break;
115 /* Cannot happen. */
116 perror_func_msg("#%d: unexpected wait result %ld",
117 step, rc);
118 return pid;
119 }
120
121 if (WIFEXITED(status)) {
122 /* The tracee is no more. */
123 pid = 0;
124
125 int exitstatus = WEXITSTATUS(status);
126 if (step == 5 && exitstatus == 0) {
127 seccomp_filtering = true;
128 } else {
129 error_func_msg("#%d: unexpected exit status %u",
130 step, exitstatus);
131 }
132 break;
133 }
134
135 if (WIFSIGNALED(status)) {
136 /* The tracee is no more. */
137 pid = 0;
138
139 error_func_msg("#%d: unexpected signal %u",
140 step, WTERMSIG(status));
141 break;
142 }
143
144 if (!WIFSTOPPED(status)) {
145 /* Cannot happen. */
146 error_func_msg("#%d: unexpected wait status %#x",
147 step, status);
148 break;
149 }
150
151 unsigned int event = (unsigned int) status >> 16;
152
153 switch (WSTOPSIG(status)) {
154 case SIGSTOP:
155 if (step != 0) {
156 error_func_msg("#%d: unexpected signal stop",
157 step);
158 return pid;
159 }
160 if (ptrace(PTRACE_SETOPTIONS, pid, 0L,
161 PTRACE_O_TRACESYSGOOD|
162 PTRACE_O_TRACESECCOMP) < 0) {
163 perror_func_msg("PTRACE_SETOPTIONS");
164 return pid;
165 }
166 break;
167
168 case SIGTRAP:
169 if (event != PTRACE_EVENT_SECCOMP) {
170 error_func_msg("#%d: unexpected trap %#x",
171 step, event);
172 return pid;
173 }
174
175 switch (step) {
176 case 1: /* Seccomp stop before entering gettid. */
177 seccomp_before_sysentry = true;
178 break;
179 case 2: /* Seccomp stop after entering gettid. */
180 if (!seccomp_before_sysentry)
181 break;
182 ATTRIBUTE_FALLTHROUGH;
183 default:
184 error_func_msg("#%d: unexpected seccomp stop",
185 step);
186 return pid;
187 }
188 break;
189
190 case SIGTRAP | 0x80:
191 switch (step) {
192 case 3: /* Exiting gettid. */
193 case 4: /* Entering exit_group. */
194 break;
195 case 1: /* Entering gettid before seccomp stop. */
196 seccomp_before_sysentry = false;
197 break;
198 case 2: /* Entering gettid after seccomp stop. */
199 if (seccomp_before_sysentry)
200 break;
201 ATTRIBUTE_FALLTHROUGH;
202 default:
203 error_func_msg("#%d: unexpected syscall stop",
204 step);
205 return pid;
206 }
207 break;
208
209 default:
210 error_func_msg("#%d: unexpected stop signal %#x",
211 step, WSTOPSIG(status));
212 return pid;
213 }
214
215 if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
216 /* Cannot happen. */
217 perror_func_msg("#%d: PTRACE_SYSCALL", step);
218 break;
219 }
220 }
221
222 return pid;
223 }
224 #endif /* HAVE_FORK */
225
226 static void
227 check_seccomp_order(void)
228 {
229 seccomp_filtering = false;
230
231 /* NOMMU provides no forks necessary for the test. */
232 #ifdef HAVE_FORK
233 int pid = fork();
234 if (pid < 0) {
235 perror_func_msg("fork");
236 return;
237 }
238
239 if (pid == 0)
240 check_seccomp_order_do_child();
241
242 pid = check_seccomp_order_tracer(pid);
243 if (pid) {
244 kill(pid, SIGKILL);
245 for (;;) {
246 long rc = waitpid(pid, NULL, 0);
247 if (rc < 0 && errno == EINTR)
248 continue;
249 break;
250 }
251 }
252 #endif /* HAVE_FORK */
253 }
254
255 static bool
256 traced_by_seccomp(unsigned int scno, unsigned int p)
257 {
258 unsigned int always_trace_flags =
259 TRACE_INDIRECT_SUBCALL | TRACE_SECCOMP_DEFAULT |
260 (stack_trace_enabled ? MEMORY_MAPPING_CHANGE : 0) |
261 (is_number_in_set(DECODE_PID_COMM, decode_pid_set) ?
262 COMM_CHANGE : 0);
263 return sysent_vec[p][scno].sys_flags & always_trace_flags ||
264 is_number_in_set_array(scno, trace_set, p);
265 }
266
267 static void
268 replace_jmp_placeholders(unsigned char *jmp_offset, unsigned char jmp_next,
269 unsigned char jmp_trace, unsigned char jmp_allow)
270 {
271 switch (*jmp_offset) {
272 case JMP_PLACEHOLDER_NEXT:
273 *jmp_offset = jmp_next;
274 break;
275 case JMP_PLACEHOLDER_TRACE:
276 *jmp_offset = jmp_trace;
277 break;
278 case JMP_PLACEHOLDER_ALLOW:
279 *jmp_offset = jmp_allow;
280 break;
281 default:
282 break;
283 }
284 }
285
286 static unsigned short
287 bpf_syscalls_cmp(struct sock_filter *filter,
288 unsigned int lower, unsigned int upper)
289 {
290 if (lower + 1 == upper) {
291 /* if (nr == lower) return RET_TRACE; */
292 SET_BPF_JUMP(filter, BPF_JEQ | BPF_K, lower,
293 JMP_PLACEHOLDER_TRACE, 0);
294 return 1;
295 } else {
296 /* if (nr >= lower && nr < upper) return RET_TRACE; */
297 SET_BPF_JUMP(filter, BPF_JGE | BPF_K, lower, 0, 1);
298 SET_BPF_JUMP(filter + 1, BPF_JGE | BPF_K, upper, 0,
299 JMP_PLACEHOLDER_TRACE);
300 return 2;
301 }
302 }
303
304 static unsigned short
305 linear_filter_generator(struct sock_filter *filter, bool *overflow)
306 {
307 /*
308 * Generated program looks like:
309 * if (arch == AUDIT_ARCH_A && nr >= flag) {
310 * if (nr == 59)
311 * return SECCOMP_RET_TRACE;
312 * if (nr >= 321 && nr <= 323)
313 * return SECCOMP_RET_TRACE;
314 * ...
315 * return SECCOMP_RET_ALLOW;
316 * }
317 * if (arch == AUDIT_ARCH_A) {
318 * ...
319 * }
320 * if (arch == AUDIT_ARCH_B) {
321 * ...
322 * }
323 * return SECCOMP_RET_TRACE;
324 */
325 unsigned short pos = 0;
326
327 #if SUPPORTED_PERSONALITIES > 1
328 SET_BPF_STMT(&filter[pos++], BPF_LD | BPF_W | BPF_ABS,
329 offsetof(struct seccomp_data, arch));
330 #endif
331
332 /*
333 * Personalities are iterated in reverse-order in the BPF program so
334 * that the x86 case is naturally handled. On x86, the first and third
335 * personalities have the same arch identifier. The third can be
336 * distinguished based on its associated syscall flag, so we check it
337 * first. The only drawback here is that the first personality is more
338 * common, which may make the BPF program slower to match syscalls on
339 * average.
340 */
341 for (int p = SUPPORTED_PERSONALITIES - 1; p >= 0; --p) {
342 unsigned int lower = UINT_MAX;
343 unsigned short start = pos, end;
344
345 #if SUPPORTED_PERSONALITIES > 1
346 /* if (arch != audit_arch_vec[p].arch) goto next; */
347 SET_BPF_JUMP(&filter[pos++], BPF_JEQ | BPF_K,
348 audit_arch_vec[p].arch, 0, JMP_PLACEHOLDER_NEXT);
349 #endif
350 SET_BPF_STMT(&filter[pos++], BPF_LD | BPF_W | BPF_ABS,
351 offsetof(struct seccomp_data, nr));
352
353 #if SUPPORTED_PERSONALITIES > 1
354 if (audit_arch_vec[p].flag) {
355 /* if (nr < audit_arch_vec[p].flag) goto next; */
356 SET_BPF_JUMP(&filter[pos++], BPF_JGE | BPF_K,
357 audit_arch_vec[p].flag, 2, 0);
358 SET_BPF_STMT(&filter[pos++], BPF_LD | BPF_W | BPF_ABS,
359 offsetof(struct seccomp_data, arch));
360 SET_BPF_JUMP(&filter[pos++], BPF_JA,
361 JMP_PLACEHOLDER_NEXT, 0, 0);
362 }
363 #endif
364
365 for (unsigned int i = 0; i < nsyscall_vec[p]; ++i) {
366 if (traced_by_seccomp(i, p)) {
367 if (lower == UINT_MAX)
368 lower = i;
369 continue;
370 }
371 if (lower == UINT_MAX)
372 continue;
373 pos += bpf_syscalls_cmp(filter + pos,
374 lower | audit_arch_vec[p].flag,
375 i | audit_arch_vec[p].flag);
376 lower = UINT_MAX;
377 }
378 if (lower != UINT_MAX)
379 pos += bpf_syscalls_cmp(filter + pos,
380 lower | audit_arch_vec[p].flag,
381 nsyscall_vec[p]
382 | audit_arch_vec[p].flag);
383 end = pos;
384
385 /* if (nr >= max_nr) return RET_TRACE; */
386 SET_BPF_JUMP(&filter[pos++], BPF_JGE | BPF_K,
387 nsyscall_vec[p] | audit_arch_vec[p].flag, 1, 0);
388
389 SET_BPF_STMT(&filter[pos++], BPF_RET | BPF_K,
390 SECCOMP_RET_ALLOW);
391 SET_BPF_STMT(&filter[pos++], BPF_RET | BPF_K,
392 SECCOMP_RET_TRACE);
393
394 /*
395 * Within generated BPF programs, the origin and destination of
396 * jumps are always in the same personality section. The
397 * largest jump is therefore the jump from the first
398 * instruction of the section to the last, to skip the
399 * personality and try to compare .arch to the next
400 * personality.
401 * If we have a personality section with more than 255
402 * instructions, the jump offset will overflow. Such program
403 * is unlikely to happen, so we simply disable seccomp-filter
404 * in such a case.
405 */
406 if (pos - start > UCHAR_MAX) {
407 *overflow = true;
408 return pos;
409 }
410
411 for (unsigned int i = start; i < end; ++i) {
412 if (BPF_CLASS(filter[i].code) != BPF_JMP)
413 continue;
414 unsigned char jmp_next = pos - i - 1;
415 unsigned char jmp_trace = pos - i - 2;
416 unsigned char jmp_allow = pos - i - 3;
417 replace_jmp_placeholders(&filter[i].jt, jmp_next,
418 jmp_trace, jmp_allow);
419 replace_jmp_placeholders(&filter[i].jf, jmp_next,
420 jmp_trace, jmp_allow);
421 if (BPF_OP(filter[i].code) == BPF_JA)
422 filter[i].k = (unsigned int) jmp_next;
423 }
424 }
425
426 #if SUPPORTED_PERSONALITIES > 1
427 /* Jumps conditioned on .arch default to this RET_TRACE. */
428 SET_BPF_STMT(&filter[pos++], BPF_RET | BPF_K, SECCOMP_RET_TRACE);
429 #endif
430
431 return pos;
432 }
433
434 static unsigned short
435 bpf_syscalls_match(struct sock_filter *filter, unsigned int bitarray,
436 unsigned int bitarray_idx)
437 {
438 if (!bitarray) {
439 /* return RET_ALLOW; */
440 SET_BPF_JUMP(filter, BPF_JMP | BPF_JEQ | BPF_K, bitarray_idx,
441 JMP_PLACEHOLDER_ALLOW, 0);
442 return 1;
443 }
444 if (bitarray == UINT_MAX) {
445 /* return RET_TRACE; */
446 SET_BPF_JUMP(filter, BPF_JMP | BPF_JEQ | BPF_K, bitarray_idx,
447 JMP_PLACEHOLDER_TRACE, 0);
448 return 1;
449 }
450 /*
451 * if (A == nr / 32)
452 * return (X & bitarray) ? RET_TRACE : RET_ALLOW;
453 */
454 SET_BPF_JUMP(filter, BPF_JMP | BPF_JEQ | BPF_K, bitarray_idx,
455 0, 2);
456 SET_BPF_STMT(filter + 1, BPF_MISC | BPF_TXA, 0);
457 SET_BPF_JUMP(filter + 2, BPF_JMP | BPF_JSET | BPF_K, bitarray,
458 JMP_PLACEHOLDER_TRACE, JMP_PLACEHOLDER_ALLOW);
459 return 3;
460 }
461
462 static unsigned short
463 binary_match_filter_generator(struct sock_filter *filter, bool *overflow)
464 {
465 unsigned short pos = 0;
466
467 #if SUPPORTED_PERSONALITIES > 1
468 SET_BPF_STMT(&filter[pos++], BPF_LD | BPF_W | BPF_ABS,
469 offsetof(struct seccomp_data, arch));
470 #endif
471
472 /* Personalities are iterated in reverse-order in the BPF program so that
473 * the x86 case is naturally handled. In x86, the first and third
474 * personalities have the same arch identifier. The third can be
475 * distinguished based on its associated bit mask, so we check it first.
476 * The only drawback here is that the first personality is more common,
477 * which may make the BPF program slower to match syscalls on average. */
478 for (int p = SUPPORTED_PERSONALITIES - 1;
479 p >= 0 && pos <= BPF_MAXINSNS;
480 --p) {
481 unsigned short start = pos, end;
482 unsigned int bitarray = 0;
483 unsigned int i;
484
485 #if SUPPORTED_PERSONALITIES > 1
486 SET_BPF_JUMP(&filter[pos++], BPF_JMP | BPF_JEQ | BPF_K,
487 audit_arch_vec[p].arch, 0, JMP_PLACEHOLDER_NEXT);
488 #endif
489 SET_BPF_STMT(&filter[pos++], BPF_LD | BPF_W | BPF_ABS,
490 offsetof(struct seccomp_data, nr));
491
492 #if SUPPORTED_PERSONALITIES > 1
493 if (audit_arch_vec[p].flag) {
494 SET_BPF_JUMP(&filter[pos++], BPF_JMP | BPF_JGE | BPF_K,
495 audit_arch_vec[p].flag, 2, 0);
496 SET_BPF_STMT(&filter[pos++], BPF_LD | BPF_W | BPF_ABS,
497 offsetof(struct seccomp_data, arch));
498 SET_BPF_JUMP(&filter[pos++], BPF_JMP | BPF_JA,
499 JMP_PLACEHOLDER_NEXT, 0, 0);
500
501 /* nr = nr & ~mask */
502 SET_BPF_STMT(&filter[pos++], BPF_ALU | BPF_AND | BPF_K,
503 ~audit_arch_vec[p].flag);
504 }
505 #endif
506
507 /* X = 1 << nr % 32 = 1 << nr & 0x1F; */
508 SET_BPF_STMT(&filter[pos++], BPF_ALU | BPF_AND | BPF_K, 0x1F);
509 SET_BPF_STMT(&filter[pos++], BPF_MISC | BPF_TAX, 0);
510 SET_BPF_STMT(&filter[pos++], BPF_LD | BPF_IMM, 1);
511 SET_BPF_STMT(&filter[pos++], BPF_ALU | BPF_LSH | BPF_X, 0);
512 SET_BPF_STMT(&filter[pos++], BPF_MISC | BPF_TAX, 0);
513
514 /* A = nr / 32 = n >> 5; */
515 SET_BPF_STMT(&filter[pos++], BPF_LD | BPF_W | BPF_ABS,
516 offsetof(struct seccomp_data, nr));
517 if (audit_arch_vec[p].flag) {
518 /* nr = nr & ~mask */
519 SET_BPF_STMT(&filter[pos++], BPF_ALU | BPF_AND | BPF_K,
520 ~audit_arch_vec[p].flag);
521 }
522 SET_BPF_STMT(&filter[pos++], BPF_ALU | BPF_RSH | BPF_K, 5);
523
524 for (i = 0; i < nsyscall_vec[p] && pos <= BPF_MAXINSNS; ++i) {
525 if (traced_by_seccomp(i, p))
526 bitarray |= (1 << i % 32);
527 if (i % 32 == 31) {
528 pos += bpf_syscalls_match(filter + pos,
529 bitarray, i / 32);
530 bitarray = 0;
531 }
532 }
533 if (i % 32 != 0)
534 pos += bpf_syscalls_match(filter + pos, bitarray,
535 i / 32);
536
537 end = pos;
538
539 SET_BPF_STMT(&filter[pos++], BPF_RET | BPF_K,
540 SECCOMP_RET_ALLOW);
541 SET_BPF_STMT(&filter[pos++], BPF_RET | BPF_K,
542 SECCOMP_RET_TRACE);
543
544 if (pos - start > UCHAR_MAX) {
545 *overflow = true;
546 return pos;
547 }
548
549 for (unsigned int i = start; i < end; ++i) {
550 if (BPF_CLASS(filter[i].code) != BPF_JMP)
551 continue;
552 unsigned char jmp_next = pos - i - 1;
553 unsigned char jmp_trace = pos - i - 2;
554 unsigned char jmp_allow = pos - i - 3;
555 replace_jmp_placeholders(&filter[i].jt, jmp_next,
556 jmp_trace, jmp_allow);
557 replace_jmp_placeholders(&filter[i].jf, jmp_next,
558 jmp_trace, jmp_allow);
559 if (BPF_OP(filter[i].code) == BPF_JA)
560 filter[i].k = (unsigned int)jmp_next;
561 }
562 }
563
564 #if SUPPORTED_PERSONALITIES > 1
565 SET_BPF_STMT(&filter[pos++], BPF_RET | BPF_K, SECCOMP_RET_TRACE);
566 #endif
567
568 return pos;
569 }
570
571 static void
572 check_seccomp_filter_properties(void)
573 {
574 int rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0);
575 seccomp_filtering = rc < 0 && errno != EINVAL;
576 if (!seccomp_filtering) {
577 debug_func_perror_msg("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER)");
578 return;
579 }
580
581 for (unsigned int i = 0; i < ARRAY_SIZE(filter_generators); ++i) {
582 bool overflow = false;
583 unsigned short len = filter_generators[i](filters[i],
584 &overflow);
585 if (len < bpf_prog.len && !overflow) {
586 bpf_prog.len = len;
587 bpf_prog.filter = filters[i];
588 }
589 }
590 if (bpf_prog.len == USHRT_MAX) {
591 debug_msg("seccomp filter disabled due to jump offset "
592 "overflow");
593 seccomp_filtering = false;
594 } else if (bpf_prog.len > BPF_MAXINSNS) {
595 debug_msg("seccomp filter disabled due to BPF program "
596 "being oversized (%u > %d)", bpf_prog.len,
597 BPF_MAXINSNS);
598 seccomp_filtering = false;
599 }
600
601 if (seccomp_filtering)
602 check_seccomp_order();
603 }
604
605 static void
606 dump_seccomp_bpf(void)
607 {
608 const struct sock_filter *filter = bpf_prog.filter;
609 for (unsigned int i = 0; i < bpf_prog.len; ++i) {
610 switch (filter[i].code) {
611 case BPF_LD | BPF_W | BPF_ABS:
612 switch (filter[i].k) {
613 case offsetof(struct seccomp_data, arch):
614 error_msg("STMT(BPF_LDWABS, data->arch)");
615 break;
616 case offsetof(struct seccomp_data, nr):
617 error_msg("STMT(BPF_LDWABS, data->nr)");
618 break;
619 default:
620 error_msg("STMT(BPF_LDWABS, 0x%x)",
621 filter[i].k);
622 }
623 break;
624 case BPF_LD + BPF_W + BPF_IMM:
625 error_msg("STMT(BPF_LDWIMM, 0x%x)", filter[i].k);
626 break;
627 case BPF_RET | BPF_K:
628 switch (filter[i].k) {
629 case SECCOMP_RET_TRACE:
630 error_msg("STMT(BPF_RET, SECCOMP_RET_TRACE)");
631 break;
632 case SECCOMP_RET_ALLOW:
633 error_msg("STMT(BPF_RET, SECCOMP_RET_ALLOW)");
634 break;
635 default:
636 error_msg("STMT(BPF_RET, 0x%x)", filter[i].k);
637 }
638 break;
639 case BPF_JMP | BPF_JEQ | BPF_K:
640 error_msg("JUMP(BPF_JEQ, %u, %u, %u)",
641 filter[i].jt, filter[i].jf,
642 filter[i].k);
643 break;
644 case BPF_JMP | BPF_JGE | BPF_K:
645 error_msg("JUMP(BPF_JGE, %u, %u, %u)",
646 filter[i].jt, filter[i].jf,
647 filter[i].k);
648 break;
649 case BPF_JMP + BPF_JSET + BPF_K:
650 error_msg("JUMP(BPF_JSET, %u, %u, 0x%x)",
651 filter[i].jt, filter[i].jf,
652 filter[i].k);
653 break;
654 case BPF_JMP | BPF_JA:
655 error_msg("JUMP(BPF_JA, %u)", filter[i].k);
656 break;
657 case BPF_ALU + BPF_RSH + BPF_K:
658 error_msg("STMT(BPF_RSH, %u)", filter[i].k);
659 break;
660 case BPF_ALU + BPF_LSH + BPF_X:
661 error_msg("STMT(BPF_LSH, X)");
662 break;
663 case BPF_ALU + BPF_AND + BPF_K:
664 error_msg("STMT(BPF_AND, 0x%x)", filter[i].k);
665 break;
666 case BPF_MISC + BPF_TAX:
667 error_msg("STMT(BPF_TAX)");
668 break;
669 case BPF_MISC + BPF_TXA:
670 error_msg("STMT(BPF_TXA)");
671 break;
672 default:
673 error_msg("STMT(0x%x, %u, %u, 0x%x)", filter[i].code,
674 filter[i].jt, filter[i].jf, filter[i].k);
675 }
676 }
677 }
678
679 void
680 init_seccomp_filter(void)
681 {
682 if (debug_flag)
683 dump_seccomp_bpf();
684
685 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf_prog) == 0)
686 return;
687
688 if (errno == EACCES) {
689 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
690 perror_func_msg_and_die("prctl(PR_SET_NO_NEW_PRIVS)");
691 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf_prog) == 0)
692 return;
693 }
694
695 perror_func_msg_and_die("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER)");
696 }
697
698 int
699 seccomp_filter_restart_operator(const struct tcb *tcp)
700 {
701 if (exiting(tcp) && tcp->scno < nsyscall_vec[current_personality]
702 && traced_by_seccomp(tcp->scno, current_personality))
703 return PTRACE_SYSCALL;
704 return PTRACE_CONT;
705 }
706
707 void
708 check_seccomp_filter(void)
709 {
710 /* Let's avoid enabling seccomp if all syscalls are traced. */
711 seccomp_filtering = !is_complete_set_array(trace_set, nsyscall_vec,
712 SUPPORTED_PERSONALITIES);
713 if (!seccomp_filtering) {
714 error_msg("Seccomp filter is requested "
715 "but there are no syscalls to filter. "
716 "See -e trace to filter syscalls.");
717 return;
718 }
719
720 check_seccomp_filter_properties();
721
722 if (!seccomp_filtering)
723 error_msg("seccomp filter is requested but unavailable");
724 }