1 /*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * Copyright (c) 1999-2023 The strace developers.
7 * All rights reserved.
8 *
9 * SPDX-License-Identifier: LGPL-2.1-or-later
10 */
11
12 #include "defs.h"
13 #include <stdarg.h>
14 #include <limits.h>
15 #include <fcntl.h>
16 #include "ptrace.h"
17 #include <signal.h>
18 #include <sys/resource.h>
19 #include <sys/stat.h>
20 #ifdef HAVE_PATHS_H
21 # include <paths.h>
22 #endif
23 #include <getopt.h>
24 #include <pwd.h>
25 #include <grp.h>
26 #include <dirent.h>
27 #include <locale.h>
28 #include <sys/utsname.h>
29 #include <sys/prctl.h>
30
31 #include "kill_save_errno.h"
32 #include "filter_seccomp.h"
33 #include "largefile_wrappers.h"
34 #include "mmap_cache.h"
35 #include "number_set.h"
36 #include "ptrace_syscall_info.h"
37 #include "scno.h"
38 #include "printsiginfo.h"
39 #include "strauss.h"
40 #include "trace_event.h"
41 #include "xstring.h"
42 #include "delay.h"
43 #include "wait.h"
44 #include "secontext.h"
45
46 /* In some libc, these aren't declared. Do it ourself: */
47 extern char **environ;
48 extern int optind;
49 extern char *optarg;
50
51 #ifdef ENABLE_STACKTRACE
52 /* if this is true do the stack trace for every system call */
53 bool stack_trace_enabled;
54 #endif
55
56 #define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
57
58 /* Glue for systems without a MMU that cannot provide fork() */
59 #if !defined(HAVE_FORK)
60 # undef NOMMU_SYSTEM
61 # define NOMMU_SYSTEM 1
62 #endif
63 #if NOMMU_SYSTEM
64 # define fork() vfork()
65 #endif
66
67 const unsigned int syscall_trap_sig = SIGTRAP | 0x80;
68
69 cflag_t cflag = CFLAG_NONE;
70 bool followfork;
71 bool output_separately;
72 unsigned int ptrace_setoptions = PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEEXEC
73 | PTRACE_O_TRACEEXIT;
74 static const struct xlat_data xflag_str[] = {
75 { HEXSTR_NONE, "none" },
76 { HEXSTR_NON_ASCII_CHARS, "non-ascii-chars" },
77 { HEXSTR_NON_ASCII, "non-ascii" },
78 { HEXSTR_ALL, "all" },
79 };
80 unsigned int xflag;
81 bool debug_flag;
82 bool Tflag;
83 int Tflag_scale = 1000;
84 int Tflag_width = 6;
85 bool iflag;
86 bool count_wallclock;
87 bool tracing_fds;
88 long long syscall_limit = -1;
89 static bool nflag;
90 static int tflag_scale = 1000000000;
91 static unsigned tflag_width = 0;
92 static const char *tflag_format = NULL;
93 static bool rflag;
94 static int rflag_scale = 1000;
95 static int rflag_width = 6;
96 static bool print_pid_pfx;
97
98 static unsigned int version_verbosity;
99
100 /* -I n */
101 enum {
102 INTR_NOT_SET = 0,
103 INTR_ANYWHERE = 1, /* don't block/ignore any signals */
104 INTR_WHILE_WAIT = 2, /* block fatal signals while decoding syscall. default */
105 INTR_NEVER = 3, /* block fatal signals. default if '-o FILE PROG' */
106 INTR_BLOCK_TSTP_TOO = 4, /* block fatal signals and SIGTSTP (^Z); default if -D */
107 NUM_INTR_OPTS
108 };
109 static int opt_intr;
110 /* We play with signal mask only if this mode is active: */
111 #define interactive (opt_intr == INTR_WHILE_WAIT)
112
113 enum {
114 DAEMONIZE_NONE = 0,
115 DAEMONIZE_GRANDCHILD = 1,
116 DAEMONIZE_NEW_PGROUP = 2,
117 DAEMONIZE_NEW_SESSION = 3,
118
119 DAEMONIZE_OPTS_GUARD__,
120 MAX_DAEMONIZE_OPTS = DAEMONIZE_OPTS_GUARD__ - 1
121 };
122 static const struct xlat_data daemonize_str[] = {
123 { DAEMONIZE_GRANDCHILD, "grandchild" },
124 { DAEMONIZE_NEW_PGROUP, "pgroup" },
125 { DAEMONIZE_NEW_PGROUP, "pgrp" },
126 { DAEMONIZE_NEW_SESSION, "session" },
127 };
128 /*
129 * daemonized_tracer supports -D option.
130 * With this option, strace forks twice.
131 * Unlike normal case, with -D *grandparent* process exec's,
132 * becoming a traced process. Child exits (this prevents traced process
133 * from having children it doesn't expect to have), and grandchild
134 * attaches to grandparent similarly to strace -p PID.
135 * This allows for more transparent interaction in cases
136 * when process and its parent are communicating via signals,
137 * wait() etc. Without -D, strace process gets lodged in between,
138 * disrupting parent<->child link.
139 */
140 static unsigned int daemonized_tracer;
141
142 static int post_attach_sigstop = TCB_IGNORE_ONE_SIGSTOP;
143 #define use_seize (post_attach_sigstop == 0)
144
145 static bool detach_on_execve;
146
147 static int exit_code;
148 static int strace_child;
149 static int strace_tracer_pid;
150
151 static const char *username;
152 static uid_t run_uid;
153 static gid_t run_gid;
154
155 unsigned int max_strlen = DEFAULT_STRLEN;
156 static int acolumn = DEFAULT_ACOLUMN;
157 static char *acolumn_spaces;
158
159 /* Default output style for xlat entities */
160 enum xlat_style xlat_verbosity = XLAT_STYLE_ABBREV;
161
162 static const char *outfname;
163 /* If -ff, points to stderr. Else, it's our common output log */
164 static FILE *shared_log;
165 static bool open_append;
166
167 struct tcb *printing_tcp;
168 static struct tcb *current_tcp;
169
170 struct tcb_wait_data {
171 enum trace_event te; /**< Event passed to dispatch_event() */
172 int status; /**< status, returned by wait4() */
173 unsigned long msg; /**< Value returned by PTRACE_GETEVENTMSG */
174 siginfo_t si; /**< siginfo, returned by PTRACE_GETSIGINFO */
175 };
176
177 static struct tcb **tcbtab;
178 static unsigned int nprocs;
179 static size_t tcbtabsize;
180
181 static struct tcb_wait_data *tcb_wait_tab;
182 static size_t tcb_wait_tab_size;
183
184
185 #ifndef HAVE_PROGRAM_INVOCATION_NAME
186 char *program_invocation_name;
187 #endif
188
189 char *argv0; /* override argv[0] on execve */
190
191 unsigned os_release; /* generated from uname()'s u.release */
192
193 static void detach(struct tcb *tcp);
194 static void cleanup(int sig);
195 static void interrupt(int sig);
196
197 #ifdef HAVE_SIG_ATOMIC_T
198 static volatile sig_atomic_t interrupted, restart_failed;
199 #else
200 static volatile int interrupted, restart_failed;
201 #endif
202
203 static sigset_t timer_set;
204 static void timer_sighandler(int);
205
206 #ifndef HAVE_STRERROR
207
208 # if !HAVE_DECL_SYS_ERRLIST
209 extern int sys_nerr;
210 extern char *sys_errlist[];
211 # endif
212
213 const char *
214 strerror(int err_no)
215 {
216 static char buf[sizeof("Unknown error %d") + sizeof(int)*3];
217
218 if (err_no < 1 || err_no >= sys_nerr) {
219 xsprintf(buf, "Unknown error %d", err_no);
220 return buf;
221 }
222 return sys_errlist[err_no];
223 }
224
225 #endif /* HAVE_STERRROR */
226
227 static void
228 print_version(unsigned int verbosity)
229 {
230 static const char features[] =
231 #ifdef ENABLE_STACKTRACE
232 " stack-trace=" USE_UNWINDER
233 #endif
234 #ifdef USE_DEMANGLE
235 " stack-demangle"
236 #endif
237 #if SUPPORTED_PERSONALITIES > 1
238 # if defined HAVE_M32_MPERS
239 " m32-mpers"
240 # else
241 " no-m32-mpers"
242 # endif
243 #endif /* SUPPORTED_PERSONALITIES > 1 */
244 #if SUPPORTED_PERSONALITIES > 2
245 # if defined HAVE_MX32_MPERS
246 " mx32-mpers"
247 # else
248 " no-mx32-mpers"
249 # endif
250 #endif /* SUPPORTED_PERSONALITIES > 2 */
251 #ifdef ENABLE_SECONTEXT
252 " secontext"
253 #endif
254 "";
255
256 printf("%s -- version %s\n"
257 "Copyright (c) 1991-%s The strace developers <%s>.\n"
258 "This is free software; see the source for copying conditions. There is NO\n"
259 "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n",
260 PACKAGE_NAME, PACKAGE_VERSION, COPYRIGHT_YEAR, PACKAGE_URL);
261 printf("\nOptional features enabled:%s\n",
262 features[0] ? features : " (none)");
263
264 /* Raise strauss awareness */
265 print_strauss(verbosity);
266 }
267
268 static void
269 usage(void)
270 {
271 #ifdef ENABLE_STACKTRACE
272 # define K_OPT "k"
273 #else
274 # define K_OPT ""
275 #endif
276 #ifdef ENABLE_SECONTEXT
277 # define SECONTEXT_OPT " [--secontext[=FORMAT]]\n"
278 # define SECONTEXT_E_QUAL ", secontext"
279 #else
280 # define SECONTEXT_OPT ""
281 # define SECONTEXT_E_QUAL ""
282 #endif
283
284 printf("\
285 Usage: strace [-ACdffhi" K_OPT "qqrtttTvVwxxyyzZ] [-I N] [-b execve] [-e EXPR]...\n\
286 [-a COLUMN] [-o FILE] [-s STRSIZE] [-X FORMAT] [-O OVERHEAD]\n\
287 [-S SORTBY] [-P PATH]... [-p PID]... [-U COLUMNS] [--seccomp-bpf]\n"\
288 SECONTEXT_OPT "\
289 { -p PID | [-DDD] [-E VAR=VAL]... [-u USERNAME] PROG [ARGS] }\n\
290 or: strace -c[dfwzZ] [-I N] [-b execve] [-e EXPR]... [-O OVERHEAD]\n\
291 [-S SORTBY] [-P PATH]... [-p PID]... [-U COLUMNS] [--seccomp-bpf]\n\
292 { -p PID | [-DDD] [-E VAR=VAL]... [-u USERNAME] PROG [ARGS] }\n\
293 \n\
294 General:\n\
295 -e EXPR a qualifying expression: OPTION=[!]all or OPTION=[!]VAL1[,VAL2]...\n\
296 options: trace, abbrev, verbose, raw, signal, read, write, fault,\n\
297 inject, status, quiet, kvm, decode-fds" SECONTEXT_E_QUAL "\n\
298 \n\
299 Startup:\n\
300 -E VAR=VAL, --env=VAR=VAL\n\
301 put VAR=VAL in the environment for command\n\
302 -E VAR, --env=VAR\n\
303 remove VAR from the environment for command\n\
304 -p PID, --attach=PID\n\
305 trace process with process id PID, may be repeated\n\
306 -u USERNAME, --user=USERNAME\n\
307 run command as USERNAME handling setuid and/or setgid\n\
308 --argv0=NAME set PROG argv[0] to NAME\n\
309 \n\
310 Tracing:\n\
311 -b execve, --detach-on=execve\n\
312 detach on execve syscall\n\
313 -D, --daemonize[=grandchild]\n\
314 run tracer process as a grandchild, not as a parent\n\
315 -DD, --daemonize=pgroup\n\
316 run tracer process in a separate process group\n\
317 -DDD, --daemonize=session\n\
318 run tracer process in a separate session\n\
319 -f, --follow-forks\n\
320 follow forks\n\
321 -ff, --follow-forks --output-separately\n\
322 follow forks with output into separate files\n\
323 -I INTERRUPTIBLE, --interruptible=INTERRUPTIBLE\n\
324 1, anywhere: no signals are blocked\n\
325 2, waiting: fatal signals are blocked while decoding syscall (default)\n\
326 3, never: fatal signals are always blocked (default if '-o FILE PROG')\n\
327 4, never_tstp: fatal signals and SIGTSTP (^Z) are always blocked\n\
328 (useful to make 'strace -o FILE PROG' not stop on ^Z)\n\
329 \n\
330 Filtering:\n\
331 -e trace=[!][?]{{SYSCALL|GROUP|all|/REGEX}[@64|@32|@x32]|none},\n\
332 --trace=[!][?]{{SYSCALL|GROUP|all|/REGEX}[@64|@32|@x32]|none}\n\
333 trace only specified syscalls.\n\
334 groups: %%clock, %%creds, %%desc, %%file, %%fstat, %%fstatfs %%ipc, %%lstat,\n\
335 %%memory, %%net, %%process, %%pure, %%signal, %%stat, %%%%stat,\n\
336 %%statfs, %%%%statfs\n\
337 -e signal=SET, --signal=SET\n\
338 trace only the specified set of signals\n\
339 print only the signals from SET\n\
340 -e status=SET, --status=SET\n\
341 print only system calls with the return statuses in SET\n\
342 statuses: successful, failed, unfinished, unavailable, detached\n\
343 -e trace-fds=SET, --trace-fds=SET\n\
344 trace operations on file descriptors from SET\n\
345 -P PATH, --trace-path=PATH\n\
346 trace accesses to PATH\n\
347 -z, --successful-only\n\
348 print only syscalls that returned without an error code\n\
349 -Z, --failed-only\n\
350 print only syscalls that returned with an error code\n\
351 \n\
352 Output format:\n\
353 -a COLUMN, --columns=COLUMN\n\
354 alignment COLUMN for printing syscall results (default %d)\n\
355 -e abbrev=SET, --abbrev=SET\n\
356 abbreviate output for the syscalls in SET\n\
357 -e verbose=SET, --verbose=SET\n\
358 dereference structures for the syscall in SET\n\
359 -e raw=SET, --raw=SET\n\
360 print undecoded arguments for the syscalls in SET\n\
361 -e read=SET, --read=SET\n\
362 dump the data read from the file descriptors in SET\n\
363 -e write=SET, --write=SET\n\
364 dump the data written to the file descriptors in SET\n\
365 -e quiet=SET, --quiet=SET\n\
366 suppress various informational messages\n\
367 messages: attach, exit, path-resolution, personality, thread-execve\n\
368 -e kvm=vcpu, --kvm=vcpu\n\
369 print exit reason of kvm vcpu\n\
370 -e decode-fds=SET, --decode-fds=SET\n\
371 what kinds of file descriptor information details to decode\n\
372 details: dev (device major/minor for block/char device files)\n\
373 path (file path),\n\
374 pidfd (associated PID for pidfds),\n\
375 socket (protocol-specific information for socket descriptors),\n\
376 signalfd (signal masks for signalfds)\n\
377 "
378 #ifdef ENABLE_SECONTEXT
379 "\
380 -e secontext=FORMAT, --secontext[=FORMAT]\n\
381 print SELinux contexts in square brackets\n\
382 formats: comma-separated list of all, full, mismatch, none\n\
383 all: equivalent to full,mismatch\n\
384 full: print the full context instead of the type only\n\
385 mismatch: print expected context when actual is not matching\n\
386 none: equivalent to not specifying the option at all\n\
387 "
388 #endif
389 "\
390 -i, --instruction-pointer\n\
391 print instruction pointer at time of syscall\n\
392 "
393 #ifdef ENABLE_STACKTRACE
394 "\
395 -k, --stack-traces\n\
396 obtain stack trace between each syscall\n\
397 "
398 #endif
399 "\
400 -n, --syscall-number\n\
401 print syscall number\n\
402 -o FILE, --output=FILE\n\
403 send trace output to FILE instead of stderr\n\
404 -A, --output-append-mode\n\
405 open the file provided in the -o option in append mode\n\
406 --output-separately\n\
407 output into separate files (by appending pid to file names)\n\
408 -q, --quiet=attach,personality\n\
409 suppress messages about attaching, detaching, etc.\n\
410 -qq, --quiet=attach,personality,exit\n\
411 suppress messages about process exit status as well.\n\
412 -qqq, --quiet=all\n\
413 suppress all suppressible messages.\n\
414 -r, --relative-timestamps[=PRECISION]\n\
415 print relative timestamp\n\
416 precision: one of s, ms, us, ns; default is microseconds\n\
417 -s STRSIZE, --string-limit=STRSIZE\n\
418 limit length of print strings to STRSIZE chars (default %d)\n\
419 --absolute-timestamps=[[format:]FORMAT[,[precision:]PRECISION]]\n\
420 set the format of absolute timestamps\n\
421 format: none, time, or unix; default is time\n\
422 precision: one of s, ms, us, ns; default is seconds\n\
423 -t, --absolute-timestamps[=time]\n\
424 print absolute timestamp\n\
425 -tt, --absolute-timestamps=[time,]us\n\
426 print absolute timestamp with usecs\n\
427 -ttt, --absolute-timestamps=unix,us\n\
428 print absolute UNIX time with usecs\n\
429 -T, --syscall-times[=PRECISION]\n\
430 print time spent in each syscall\n\
431 precision: one of s, ms, us, ns; default is microseconds\n\
432 -v, --no-abbrev\n\
433 verbose mode: print entities unabbreviated\n\
434 --strings-in-hex=non-ascii-chars\n\
435 use hex instead of octal in escape sequences\n\
436 -x, --strings-in-hex=non-ascii\n\
437 print non-ASCII strings in hex\n\
438 -xx, --strings-in-hex[=all]\n\
439 print all strings in hex\n\
440 -X FORMAT, --const-print-style=FORMAT\n\
441 set the FORMAT for printing of named constants and flags\n\
442 formats: raw, abbrev, verbose\n\
443 -y, --decode-fds[=path]\n\
444 print paths associated with file descriptor arguments\n\
445 -yy, --decode-fds=all\n\
446 print all available information associated with file\n\
447 descriptors in addition to paths\n\
448 --decode-pids=pidns\n\
449 print PIDs in strace's namespace, too\n\
450 -Y, --decode-pids=comm\n\
451 print command names associated with PIDs\n\
452 \n\
453 Statistics:\n\
454 -c, --summary-only\n\
455 count time, calls, and errors for each syscall and report\n\
456 summary\n\
457 -C, --summary like -c, but also print the regular output\n\
458 -O OVERHEAD[UNIT], --summary-syscall-overhead=OVERHEAD[UNIT]\n\
459 set overhead for tracing syscalls to OVERHEAD UNITs\n\
460 units: one of s, ms, us, ns; default is microseconds\n\
461 -S SORTBY, --summary-sort-by=SORTBY\n\
462 sort syscall counts by: time, min-time, max-time, avg-time,\n\
463 calls, errors, name, nothing (default %s)\n\
464 -U COLUMNS, --summary-columns=COLUMNS\n\
465 show specific columns in the summary report: comma-separated\n\
466 list of time-percent, total-time, min-time, max-time, \n\
467 avg-time, calls, errors, name\n\
468 (default time-percent,total-time,avg-time,calls,errors,name)\n\
469 -w, --summary-wall-clock\n\
470 summarise syscall latency (default is system time)\n\
471 \n\
472 Stop condition:\n\
473 --syscall-limit=LIMIT\n\
474 Detach all tracees after tracing LIMIT syscalls\n\
475 \n\
476 Tampering:\n\
477 -e inject=SET[:error=ERRNO|:retval=VALUE][:signal=SIG][:syscall=SYSCALL]\n\
478 [:delay_enter=DELAY][:delay_exit=DELAY]\n\
479 [:poke_enter=@argN=DATAN,@argM=DATAM...]\n\
480 [:poke_exit=@argN=DATAN,@argM=DATAM...]\n\
481 [:when=WHEN],\n\
482 --inject=SET[:error=ERRNO|:retval=VALUE][:signal=SIG][:syscall=SYSCALL]\n\
483 [:delay_enter=DELAY][:delay_exit=DELAY]\n\
484 [:poke_enter=@argN=DATAN,@argM=DATAM...]\n\
485 [:poke_exit=@argN=DATAN,@argM=DATAM...]\n\
486 [:when=WHEN],\n\
487 perform syscall tampering for the syscalls in SET\n\
488 delay: microseconds or NUMBER{s|ms|us|ns}\n\
489 when: FIRST[..LAST][+[STEP]]\n\
490 -e fault=SET[:error=ERRNO][:when=WHEN], --fault=SET[:error=ERRNO][:when=WHEN]\n\
491 synonym for -e inject with default ERRNO set to ENOSYS.\n\
492 \n\
493 Miscellaneous:\n\
494 -d, --debug enable debug output to stderr\n\
495 -h, --help print help message\n\
496 --seccomp-bpf enable seccomp-bpf filtering\n\
497 --tips[=[[id:]ID][,[format:]FORMAT]]\n\
498 show strace tips, tricks, and tweaks on exit\n\
499 id: non-negative integer or random; default is random\n\
500 format: none, compact, full; default is compact\n\
501 -V, --version print version\n\
502 "
503 /* ancient, no one should use it
504 -F -- attempt to follow vforks (deprecated, use -f)\n\
505 */
506 , DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
507 exit(0);
508
509 #undef K_OPT
510 }
511
512 void ATTRIBUTE_NORETURN
513 die(void)
514 {
515 if (strace_tracer_pid == getpid()) {
516 cleanup(0);
517 exit(1);
518 }
519
520 _exit(1);
521 }
522
523 static void
524 error_opt_arg(int opt, const struct option *lopt, const char *arg)
525 {
526 if (lopt && lopt->name) {
527 error_msg_and_help("invalid --%s argument: '%s'",
528 lopt->name, arg);
529 } else {
530 error_msg_and_help("invalid -%c argument: '%s'", opt, arg);
531 }
532 }
533
534 static int
535 ptrace_attach_or_seize(int pid, const char **ptrace_attach_cmd)
536 {
537 int r;
538 if (!use_seize)
539 return *ptrace_attach_cmd = "PTRACE_ATTACH",
540 ptrace(PTRACE_ATTACH, pid, 0L, 0L);
541 r = ptrace(PTRACE_SEIZE, pid, 0L, (unsigned long) ptrace_setoptions);
542 if (r)
543 return *ptrace_attach_cmd = "PTRACE_SEIZE", r;
544 r = ptrace(PTRACE_INTERRUPT, pid, 0L, 0L);
545 return *ptrace_attach_cmd = "PTRACE_INTERRUPT", r;
546 }
547
548 static const char *
549 ptrace_op_str(unsigned int op)
550 {
551 const char *str = xlookup(ptrace_cmds, op);
552 if (str)
553 return str;
554
555 static char buf[sizeof(op) * 3];
556 xsprintf(buf, "%u", op);
557 return buf;
558 }
559
560 /*
561 * Used when we want to unblock stopped traced process.
562 * Should be only used with PTRACE_CONT, PTRACE_DETACH and PTRACE_SYSCALL.
563 * Returns 0 on success or if error was ESRCH
564 * (presumably process was killed while we talk to it).
565 * Otherwise prints error message and returns -1.
566 */
567 static int
568 ptrace_restart(const unsigned int op, struct tcb *const tcp, unsigned int sig)
569 {
570 int err;
571
572 errno = 0;
573 ptrace(op, tcp->pid, 0L, (unsigned long) sig);
574 err = errno;
575 if (!err || err == ESRCH)
576 return 0;
577
578 /*
579 * Why curcol != 0? Otherwise sometimes we get this:
580 *
581 * 10252 kill(10253, SIGKILL) = 0
582 * <ptrace(SYSCALL,10252):No such process>10253 ...next decode...
583 *
584 * 10252 died after we retrieved syscall exit data,
585 * but before we tried to restart it. Log looks ugly.
586 */
587 if (current_tcp && current_tcp->curcol != 0) {
588 tprint_space();
589 tprintf_string("<Cannot restart pid %d with ptrace(%s): %s>",
590 tcp->pid, ptrace_op_str(op), strerror(err));
591 tprint_newline();
592 line_ended();
593 }
594 errno = err;
595 perror_msg("ptrace(%s,pid:%d,sig:%u)",
596 ptrace_op_str(op), tcp->pid, sig);
597 return -1;
598 }
599
600 static void
601 set_cloexec_flag(int fd)
602 {
603 int flags, newflags;
604
605 flags = fcntl_fd(fd, F_GETFD);
606 if (flags < 0) {
607 /* Can happen only if fd is bad.
608 * Should never happen: if it does, we have a bug
609 * in the caller. Therefore we just abort
610 * instead of propagating the error.
611 */
612 perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
613 }
614
615 newflags = flags | FD_CLOEXEC;
616 if (flags == newflags)
617 return;
618
619 if (fcntl_fd(fd, F_SETFD, newflags)) /* never fails */
620 perror_msg_and_die("fcntl(%d, F_SETFD, %#x)", fd, newflags);
621 }
622
623 /*
624 * When strace is setuid executable, we have to swap uids
625 * before and after filesystem and process management operations.
626 */
627 static void
628 swap_uid(void)
629 {
630 int euid = geteuid(), uid = getuid();
631
632 if (euid != uid && setreuid(euid, uid) < 0) {
633 perror_msg_and_die("setreuid");
634 }
635 }
636
637 static FILE *
638 strace_fopen(const char *path)
639 {
640 FILE *fp;
641
642 swap_uid();
643 fp = fopen_stream(path, open_append ? "a" : "w");
644 if (!fp)
645 perror_msg_and_die("Can't fopen '%s'", path);
646 swap_uid();
647 set_cloexec_flag(fileno(fp));
648 return fp;
649 }
650
651 static int popen_pid;
652
653 #ifndef _PATH_BSHELL
654 # define _PATH_BSHELL "/bin/sh"
655 #endif
656
657 /*
658 * We cannot use standard popen(3) here because we have to distinguish
659 * popen child process from other processes we trace, and standard popen(3)
660 * does not export its child's pid.
661 */
662 static FILE *
663 strace_popen(const char *command)
664 {
665 FILE *fp;
666 int pid;
667 int fds[2];
668
669 swap_uid();
670 if (pipe(fds) < 0)
671 perror_msg_and_die("pipe");
672
673 set_cloexec_flag(fds[1]); /* never fails */
674
675 pid = vfork();
676 if (pid < 0)
677 perror_msg_and_die("vfork");
678
679 if (pid == 0) {
680 /* child */
681 close(fds[1]);
682 if (fds[0] != 0) {
683 if (dup2(fds[0], 0))
684 perror_msg_and_die("dup2");
685 close(fds[0]);
686 }
687 execl(_PATH_BSHELL, "sh", "-c", command, NULL);
688 perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
689 }
690
691 /* parent */
692 popen_pid = pid;
693 close(fds[0]);
694 swap_uid();
695 fp = fdopen(fds[1], "w");
696 if (!fp)
697 perror_msg_and_die("fdopen");
698 return fp;
699 }
700
701 static void
702 outf_perror(const struct tcb * const tcp)
703 {
704 if (tcp->outf == stderr)
705 return;
706
707 /* This is ugly, but we don't store separate file names */
708 if (output_separately)
709 perror_msg("%s.%u", outfname, tcp->pid);
710 else
711 perror_msg("%s", outfname);
712 }
713
714 ATTRIBUTE_FORMAT((printf, 1, 0))
715 static void
716 tvprintf(const char *const fmt, va_list args)
717 {
718 if (current_tcp) {
719 int n = vfprintf(current_tcp->outf, fmt, args);
720 if (n < 0) {
721 /* very unlikely due to vfprintf buffering */
722 outf_perror(current_tcp);
723 } else
724 current_tcp->curcol += n;
725 }
726 }
727
728 void
729 tprintf_string(const char *fmt, ...)
730 {
731 va_list args;
732 va_start(args, fmt);
733 tvprintf(fmt, args);
734 va_end(args);
735 }
736
737 #ifndef HAVE_FPUTS_UNLOCKED
738 # define fputs_unlocked fputs
739 #endif
740
741 void
742 tprints_string(const char *str)
743 {
744 if (current_tcp) {
745 int n = fputs_unlocked(str, current_tcp->outf);
746 if (n >= 0) {
747 current_tcp->curcol += strlen(str);
748 return;
749 }
750 /* very unlikely due to fputs_unlocked buffering */
751 outf_perror(current_tcp);
752 }
753 }
754
755 void
756 tprints_comment(const char *const str)
757 {
758 if (str && *str) {
759 tprint_comment_begin();
760 tprints_string(str);
761 tprint_comment_end();
762 }
763 }
764
765 void
766 tprintf_comment(const char *fmt, ...)
767 {
768 if (!fmt || !*fmt)
769 return;
770
771 va_list args;
772 va_start(args, fmt);
773 tprint_comment_begin();
774 tvprintf(fmt, args);
775 tprint_comment_end();
776 va_end(args);
777 }
778
779 static void
780 flush_tcp_output(const struct tcb *const tcp)
781 {
782 if (fflush(tcp->outf))
783 outf_perror(tcp);
784 }
785
786 void
787 line_ended(void)
788 {
789 if (current_tcp) {
790 current_tcp->curcol = 0;
791 flush_tcp_output(current_tcp);
792 }
793 if (printing_tcp) {
794 printing_tcp->curcol = 0;
795 printing_tcp = NULL;
796 }
797 }
798
799 static void
800 set_current_tcp(const struct tcb *tcp)
801 {
802 current_tcp = (struct tcb *) tcp;
803
804 /* Sync current_personality and stuff */
805 if (current_tcp)
806 set_personality(current_tcp->currpers);
807 }
808
809 static void
810 print_comm_str(const char *str, const size_t len)
811 {
812 if (!len)
813 return;
814 tprint_associated_info_begin();
815 print_quoted_string_ex(str, len,
816 QUOTE_OMIT_LEADING_TRAILING_QUOTES, "<>");
817 tprint_associated_info_end();
818 }
819
820 void
821 printleader(struct tcb *tcp)
822 {
823 /* If -ff, "previous tcb we printed" is always the same as current,
824 * because we have per-tcb output files.
825 */
826 if (output_separately)
827 printing_tcp = tcp;
828
829 if (printing_tcp) {
830 if (printing_tcp->curcol != 0 &&
831 (printing_tcp == tcp ||
832 (!output_separately &&
833 !printing_tcp->staged_output_data))) {
834 /*
835 * case 1: we have a shared log (i.e. not -ff), and last line
836 * wasn't finished (same or different tcb, doesn't matter).
837 * case 2: split log, we are the same tcb, but our last line
838 * didn't finish ("SIGKILL nuked us after syscall entry" etc).
839 */
840 set_current_tcp(printing_tcp);
841 tprint_space();
842 tprints_string("<unfinished ...>");
843 tprint_newline();
844 printing_tcp->curcol = 0;
845 }
846 }
847
848 printing_tcp = tcp;
849 set_current_tcp(tcp);
850 current_tcp->curcol = 0;
851
852 if (print_pid_pfx || (nprocs > 1 && !outfname)) {
853 size_t len = is_number_in_set(DECODE_PID_COMM, decode_pid_set)
854 ? strlen(tcp->comm) : 0;
855
856 if (print_pid_pfx) {
857 if (len)
858 tprintf_string("%u", tcp->pid);
859 else
860 tprintf_string("%-5u", tcp->pid);
861 } else {
862 tprint_attribute_begin();
863 tprintf_string("pid %5u", tcp->pid);
864 }
865
866 print_comm_str(tcp->comm, len);
867
868 if (!print_pid_pfx)
869 tprint_attribute_end();
870 tprint_space();
871 }
872
873 selinux_printpidcon(tcp);
874
875 if (tflag_format) {
876 struct timespec ts;
877 clock_gettime(CLOCK_REALTIME, &ts);
878
879 time_t local = ts.tv_sec;
880 char str[MAX(sizeof("HH:MM:SS"), sizeof(local) * 3)];
881 struct tm *tm = localtime(&local);
882
883 if (tm)
884 strftime(str, sizeof(str), tflag_format, tm);
885 else
886 xsprintf(str, "%lld", (long long) local);
887 if (tflag_width)
888 tprintf_string("%s.%0*ld ", str, tflag_width,
889 (long) ts.tv_nsec / tflag_scale);
890 else
891 tprintf_string("%s ", str);
892 }
893
894 if (rflag) {
895 struct timespec ts;
896 clock_gettime(CLOCK_MONOTONIC, &ts);
897
898 static struct timespec ots;
899 if (ots.tv_sec == 0)
900 ots = ts;
901
902 struct timespec dts;
903 ts_sub(&dts, &ts, &ots);
904 ots = ts;
905
906 tprintf_string("%s%6ld",
907 tflag_format ? "(+" : "", (long) dts.tv_sec);
908 if (rflag_width) {
909 tprintf_string(".%0*ld", rflag_width,
910 (long) dts.tv_nsec / rflag_scale);
911 }
912 tprints_string(tflag_format ? ") " : " ");
913 }
914
915 if (nflag)
916 print_syscall_number(tcp);
917
918 if (iflag)
919 print_instruction_pointer(tcp);
920 }
921
922 void
923 tabto(void)
924 {
925 if (current_tcp->curcol < acolumn)
926 tprints_string(acolumn_spaces + current_tcp->curcol);
927 }
928
929 /* Should be only called directly *after successful attach* to a tracee.
930 * Otherwise, "strace -oFILE -ff -p<nonexistant_pid>"
931 * may create bogus empty FILE.<nonexistant_pid>, and then die.
932 */
933 static void
934 after_successful_attach(struct tcb *tcp, const unsigned int flags)
935 {
936 tcp->flags |= TCB_ATTACHED | TCB_STARTUP | flags;
937 tcp->outf = shared_log; /* if not -ff mode, the same file is for all */
938 if (output_separately) {
939 char name[PATH_MAX];
940 xsprintf(name, "%s.%u", outfname, tcp->pid);
941 tcp->outf = strace_fopen(name);
942 }
943
944 #ifdef ENABLE_STACKTRACE
945 if (stack_trace_enabled)
946 unwind_tcb_init(tcp);
947 #endif
948 }
949
950 static void
951 expand_tcbtab(void)
952 {
953 /* Allocate some (more) TCBs (and expand the table).
954 We don't want to relocate the TCBs because our
955 callers have pointers and it would be a pain.
956 So tcbtab is a table of pointers. Since we never
957 free the TCBs, we allocate a single chunk of many. */
958 size_t old_tcbtabsize;
959 struct tcb *newtcbs;
960
961 old_tcbtabsize = tcbtabsize;
962
963 tcbtab = xgrowarray(tcbtab, &tcbtabsize, sizeof(tcbtab[0]));
964 newtcbs = xcalloc(tcbtabsize - old_tcbtabsize, sizeof(newtcbs[0]));
965
966 for (struct tcb **tcb_ptr = tcbtab + old_tcbtabsize;
967 tcb_ptr < tcbtab + tcbtabsize;
968 ++tcb_ptr, ++newtcbs)
969 *tcb_ptr = newtcbs;
970 }
971
972 static char *
973 strip_trailing_newlines(char *str)
974 {
975 size_t len = strlen(str);
976 for (; len > 0; --len) {
977 if (str[len - 1] != '\n')
978 break;
979 }
980 str[len] = '\0';
981 return str;
982 }
983
984 /* Load the contents of /proc/$pid/comm into `buf'. */
985 static void
986 load_pid_comm(int pid, char *buf, size_t buf_size)
987 {
988 static const char comm_path[] = "/proc/%d/comm";
989 char procfile[sizeof(comm_path) + sizeof(int) * 3];
990
991 buf[0] = '\0';
992 xsprintf(procfile, comm_path, pid);
993 FILE *fp = fopen_stream(procfile, "r");
994 if (fp) {
995 if (fgets(buf, buf_size, fp))
996 strip_trailing_newlines(buf);
997 fclose(fp);
998 }
999 }
1000
1001 void
1002 print_pid_comm(int pid)
1003 {
1004 char buf[PROC_COMM_LEN];
1005 load_pid_comm(pid, buf, sizeof(buf));
1006 print_comm_str(buf, strlen(buf));
1007 }
1008
1009 void
1010 maybe_load_task_comm(struct tcb *tcp)
1011 {
1012 if (!is_number_in_set(DECODE_PID_COMM, decode_pid_set))
1013 return;
1014
1015 load_pid_comm(get_proc_pid(tcp->pid), tcp->comm, sizeof(tcp->comm));
1016 }
1017
1018 static struct tcb *
1019 alloctcb(int pid)
1020 {
1021 if (nprocs == tcbtabsize)
1022 expand_tcbtab();
1023
1024 for (unsigned int i = 0; i < tcbtabsize; ++i) {
1025 struct tcb *tcp = tcbtab[i];
1026 if (!tcp->pid) {
1027 memset(tcp, 0, sizeof(*tcp));
1028 list_init(&tcp->wait_list);
1029 tcp->pid = pid;
1030 maybe_load_task_comm(tcp);
1031 #if SUPPORTED_PERSONALITIES > 1
1032 tcp->currpers = current_personality;
1033 #endif
1034 #ifdef ENABLE_SECONTEXT
1035 tcp->last_dirfd = AT_FDCWD;
1036 #endif
1037 nprocs++;
1038 debug_msg("new tcb for pid %d, active tcbs:%d",
1039 tcp->pid, nprocs);
1040 return tcp;
1041 }
1042 }
1043 error_msg_and_die("bug in alloctcb");
1044 }
1045
1046 void *
1047 get_tcb_priv_data(const struct tcb *tcp)
1048 {
1049 return tcp->_priv_data;
1050 }
1051
1052 int
1053 set_tcb_priv_data(struct tcb *tcp, void *const priv_data,
1054 void (*const free_priv_data)(void *))
1055 {
1056 if (tcp->_priv_data)
1057 return -1;
1058
1059 tcp->_free_priv_data = free_priv_data;
1060 tcp->_priv_data = priv_data;
1061
1062 return 0;
1063 }
1064
1065 void
1066 free_tcb_priv_data(struct tcb *tcp)
1067 {
1068 if (tcp->_priv_data) {
1069 if (tcp->_free_priv_data) {
1070 tcp->_free_priv_data(tcp->_priv_data);
1071 tcp->_free_priv_data = NULL;
1072 }
1073 tcp->_priv_data = NULL;
1074 }
1075 }
1076
1077 static void
1078 droptcb(struct tcb *tcp)
1079 {
1080 if (tcp->pid == 0)
1081 return;
1082
1083 if (cflag && debug_flag) {
1084 struct timespec dt;
1085
1086 ts_sub(&dt, &tcp->stime, &tcp->atime);
1087 debug_func_msg("pid %d: %.9f seconds of system time spent "
1088 "since attach", tcp->pid, ts_float(&dt));
1089 }
1090
1091 for (int p = 0; p < SUPPORTED_PERSONALITIES; ++p)
1092 free(tcp->inject_vec[p]);
1093
1094 free_tcb_priv_data(tcp);
1095
1096 #ifdef ENABLE_STACKTRACE
1097 if (stack_trace_enabled)
1098 unwind_tcb_fin(tcp);
1099 #endif
1100
1101 #ifdef HAVE_LINUX_KVM_H
1102 kvm_vcpu_info_free(tcp);
1103 #endif
1104
1105 if (tcp->mmap_cache)
1106 tcp->mmap_cache->free_fn(tcp, __func__);
1107
1108 nprocs--;
1109 debug_msg("dropped tcb for pid %d, %d remain", tcp->pid, nprocs);
1110
1111 if (tcp->outf) {
1112 bool publish = true;
1113 if (!is_complete_set(status_set, NUMBER_OF_STATUSES)) {
1114 publish = is_number_in_set(STATUS_DETACHED, status_set);
1115 strace_close_memstream(tcp, publish);
1116 }
1117
1118 if (output_separately) {
1119 if (tcp->curcol != 0 && publish)
1120 fprintf(tcp->outf, " <detached ...>\n");
1121 fclose(tcp->outf);
1122 } else {
1123 if (printing_tcp == tcp && tcp->curcol != 0 && publish)
1124 fprintf(tcp->outf, " <detached ...>\n");
1125 flush_tcp_output(tcp);
1126 }
1127 }
1128
1129 if (current_tcp == tcp)
1130 set_current_tcp(NULL);
1131 if (printing_tcp == tcp)
1132 printing_tcp = NULL;
1133
1134 list_remove(&tcp->wait_list);
1135
1136 memset(tcp, 0, sizeof(*tcp));
1137 }
1138
1139 /* Detach traced process.
1140 * Never call DETACH twice on the same process as both unattached and
1141 * attached-unstopped processes give the same ESRCH. For unattached process we
1142 * would SIGSTOP it and wait for its SIGSTOP notification forever.
1143 */
1144 static void
1145 detach(struct tcb *tcp)
1146 {
1147 int error;
1148 int status;
1149
1150 /*
1151 * Linux wrongly insists the child be stopped
1152 * before detaching. Arghh. We go through hoops
1153 * to make a clean break of things.
1154 */
1155
1156 if (!(tcp->flags & TCB_ATTACHED))
1157 goto drop;
1158
1159 /* We attached but possibly didn't see the expected SIGSTOP.
1160 * We must catch exactly one as otherwise the detached process
1161 * would be left stopped (process state T).
1162 */
1163 if (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)
1164 goto wait_loop;
1165
1166 error = ptrace(PTRACE_DETACH, tcp->pid, 0, 0);
1167 if (!error) {
1168 /* On a clear day, you can see forever. */
1169 goto drop;
1170 }
1171 if (errno != ESRCH) {
1172 /* Shouldn't happen. */
1173 perror_func_msg("ptrace(PTRACE_DETACH,%u)", tcp->pid);
1174 goto drop;
1175 }
1176 /* ESRCH: process is either not stopped or doesn't exist. */
1177 if (my_tkill(tcp->pid, 0) < 0) {
1178 if (errno != ESRCH)
1179 /* Shouldn't happen. */
1180 perror_func_msg("tkill(%u,0)", tcp->pid);
1181 /* else: process doesn't exist. */
1182 goto drop;
1183 }
1184 /* Process is not stopped, need to stop it. */
1185 if (use_seize) {
1186 /*
1187 * With SEIZE, tracee can be in group-stop already.
1188 * In this state sending it another SIGSTOP does nothing.
1189 * Need to use INTERRUPT.
1190 * Testcase: trying to ^C a "strace -p <stopped_process>".
1191 */
1192 error = ptrace(PTRACE_INTERRUPT, tcp->pid, 0, 0);
1193 if (!error)
1194 goto wait_loop;
1195 if (errno != ESRCH)
1196 perror_func_msg("ptrace(PTRACE_INTERRUPT,%u)", tcp->pid);
1197 } else {
1198 error = my_tkill(tcp->pid, SIGSTOP);
1199 if (!error)
1200 goto wait_loop;
1201 if (errno != ESRCH)
1202 perror_func_msg("tkill(%u,SIGSTOP)", tcp->pid);
1203 }
1204 /* Either process doesn't exist, or some weird error. */
1205 goto drop;
1206
1207 wait_loop:
1208 /* We end up here in three cases:
1209 * 1. We sent PTRACE_INTERRUPT (use_seize case)
1210 * 2. We sent SIGSTOP (!use_seize)
1211 * 3. Attach SIGSTOP was already pending (TCB_IGNORE_ONE_SIGSTOP set)
1212 */
1213 for (;;) {
1214 unsigned int sig;
1215 if (waitpid(tcp->pid, &status, __WALL) < 0) {
1216 if (errno == EINTR)
1217 continue;
1218 /*
1219 * if (errno == ECHILD) break;
1220 * ^^^ WRONG! We expect this PID to exist,
1221 * and want to emit a message otherwise:
1222 */
1223 perror_func_msg("waitpid(%u)", tcp->pid);
1224 break;
1225 }
1226 if (!WIFSTOPPED(status)) {
1227 /*
1228 * Tracee exited or was killed by signal.
1229 * We shouldn't normally reach this place:
1230 * we don't want to consume exit status.
1231 * Consider "strace -p PID" being ^C-ed:
1232 * we want merely to detach from PID.
1233 *
1234 * However, we _can_ end up here if tracee
1235 * was SIGKILLed.
1236 */
1237 break;
1238 }
1239 sig = WSTOPSIG(status);
1240 debug_msg("detach wait: event:%d sig:%d",
1241 (unsigned) status >> 16, sig);
1242 if (use_seize) {
1243 unsigned event = (unsigned)status >> 16;
1244 if (event == PTRACE_EVENT_STOP /*&& sig == SIGTRAP*/) {
1245 /*
1246 * sig == SIGTRAP: PTRACE_INTERRUPT stop.
1247 * sig == other: process was already stopped
1248 * with this stopping sig (see tests/detach-stopped).
1249 * Looks like re-injecting this sig is not necessary
1250 * in DETACH for the tracee to remain stopped.
1251 */
1252 sig = 0;
1253 }
1254 /*
1255 * PTRACE_INTERRUPT is not guaranteed to produce
1256 * the above event if other ptrace-stop is pending.
1257 * See tests/detach-sleeping testcase:
1258 * strace got SIGINT while tracee is sleeping.
1259 * We sent PTRACE_INTERRUPT.
1260 * We see syscall exit, not PTRACE_INTERRUPT stop.
1261 * We won't get PTRACE_INTERRUPT stop
1262 * if we would CONT now. Need to DETACH.
1263 */
1264 if (sig == syscall_trap_sig)
1265 sig = 0;
1266 /* else: not sure in which case we can be here.
1267 * Signal stop? Inject it while detaching.
1268 */
1269 ptrace_restart(PTRACE_DETACH, tcp, sig);
1270 break;
1271 }
1272 /* Note: this check has to be after use_seize check */
1273 /* (else, in use_seize case SIGSTOP will be mistreated) */
1274 if (sig == SIGSTOP) {
1275 /* Detach, suppressing SIGSTOP */
1276 ptrace_restart(PTRACE_DETACH, tcp, 0);
1277 break;
1278 }
1279 if (sig == syscall_trap_sig)
1280 sig = 0;
1281 /* Can't detach just yet, may need to wait for SIGSTOP */
1282 error = ptrace_restart(PTRACE_CONT, tcp, sig);
1283 if (error < 0) {
1284 /* Should not happen.
1285 * Note: ptrace_restart returns 0 on ESRCH, so it's not it.
1286 * ptrace_restart already emitted error message.
1287 */
1288 break;
1289 }
1290 }
1291
1292 drop:
1293 if (!is_number_in_set(QUIET_ATTACH, quiet_set)
1294 && (tcp->flags & TCB_ATTACHED))
1295 error_msg("Process %u detached", tcp->pid);
1296
1297 droptcb(tcp);
1298 }
1299
1300 static void
1301 process_opt_p_list(char *opt)
1302 {
1303 while (*opt) {
1304 /*
1305 * We accept -p PID,PID; -p "`pidof PROG`"; -p "`pgrep PROG`".
1306 * pidof uses space as delim, pgrep uses newline. :(
1307 */
1308 int pid;
1309 char *delim = opt + strcspn(opt, "\n\t ,");
1310 char c = *delim;
1311
1312 *delim = '\0';
1313 pid = string_to_uint(opt);
1314 if (pid <= 0) {
1315 error_msg_and_die("Invalid process id: '%s'", opt);
1316 }
1317 if (pid == strace_tracer_pid) {
1318 error_msg_and_die("I'm sorry, I can't let you do that, Dave.");
1319 }
1320 *delim = c;
1321 alloctcb(pid);
1322 if (c == '\0')
1323 break;
1324 opt = delim + 1;
1325 }
1326 }
1327
1328 static void
1329 attach_tcb(struct tcb *const tcp)
1330 {
1331 const char *ptrace_attach_cmd;
1332
1333 if (ptrace_attach_or_seize(tcp->pid, &ptrace_attach_cmd) < 0) {
1334 perror_msg("attach: ptrace(%s, %d)",
1335 ptrace_attach_cmd, tcp->pid);
1336 droptcb(tcp);
1337 return;
1338 }
1339
1340 after_successful_attach(tcp, TCB_GRABBED | post_attach_sigstop);
1341 debug_msg("attach to pid %d (main) succeeded", tcp->pid);
1342
1343 static const char task_path[] = "/proc/%d/task";
1344 char procdir[sizeof(task_path) + sizeof(int) * 3];
1345 DIR *dir;
1346 unsigned int ntid = 0, nerr = 0;
1347
1348 if (followfork && tcp->pid != strace_child &&
1349 xsprintf(procdir, task_path, get_proc_pid(tcp->pid)) > 0 &&
1350 (dir = opendir(procdir)) != NULL) {
1351 struct_dirent *de;
1352
1353 while ((de = read_dir(dir)) != NULL) {
1354 if (de->d_fileno == 0)
1355 continue;
1356
1357 int tid = string_to_uint(de->d_name);
1358 if (tid <= 0 || tid == tcp->pid)
1359 continue;
1360
1361 ++ntid;
1362 if (ptrace_attach_or_seize(tid, &ptrace_attach_cmd) < 0)
1363 {
1364 ++nerr;
1365 debug_perror_msg("attach: ptrace(%s, %d)",
1366 ptrace_attach_cmd, tid);
1367 continue;
1368 }
1369
1370 after_successful_attach(alloctcb(tid),
1371 TCB_GRABBED | post_attach_sigstop);
1372 debug_msg("attach to pid %d succeeded", tid);
1373 }
1374
1375 closedir(dir);
1376 }
1377
1378 if (!is_number_in_set(QUIET_ATTACH, quiet_set)) {
1379 if (ntid > nerr)
1380 error_msg("Process %u attached"
1381 " with %u threads",
1382 tcp->pid, ntid - nerr + 1);
1383 else
1384 error_msg("Process %u attached",
1385 tcp->pid);
1386 }
1387 }
1388
1389 static void
1390 startup_attach(void)
1391 {
1392 pid_t parent_pid = strace_tracer_pid;
1393 struct tcb *tcp;
1394
1395 if (daemonized_tracer) {
1396 pid_t pid = fork();
1397 if (pid < 0)
1398 perror_func_msg_and_die("fork");
1399
1400 if (pid) { /* parent */
1401 /*
1402 * Wait for grandchild to attach to straced process
1403 * (grandparent). Grandchild SIGKILLs us after it attached.
1404 * Grandparent's wait() is unblocked by our death,
1405 * it proceeds to exec the straced program.
1406 */
1407 pause();
1408 _exit(0); /* paranoia */
1409 }
1410 /* grandchild */
1411 /* We will be the tracer process. Remember our new pid: */
1412 strace_tracer_pid = getpid();
1413
1414 switch (daemonized_tracer) {
1415 case DAEMONIZE_NEW_PGROUP:
1416 /*
1417 * If -D is passed twice, create a new process group,
1418 * so we won't be killed by kill(0, ...).
1419 */
1420 if (setpgid(0, 0) < 0)
1421 perror_msg_and_die("Cannot create a new"
1422 " process group");
1423 break;
1424 case DAEMONIZE_NEW_SESSION:
1425 /*
1426 * If -D is passed thrice, create a new session,
1427 * so we won't be killed upon session termination.
1428 */
1429 if (setsid() < 0)
1430 perror_msg_and_die("Cannot create a new"
1431 " session");
1432 break;
1433 }
1434 }
1435
1436 for (unsigned int tcbi = 0; tcbi < tcbtabsize; ++tcbi) {
1437 tcp = tcbtab[tcbi];
1438
1439 if (!tcp->pid)
1440 continue;
1441
1442 /* Is this a process we should attach to, but not yet attached? */
1443 if (tcp->flags & TCB_ATTACHED)
1444 continue; /* no, we already attached it */
1445
1446 if (tcp->pid == parent_pid || tcp->pid == strace_tracer_pid) {
1447 errno = EPERM;
1448 perror_msg("attach: pid %d", tcp->pid);
1449 droptcb(tcp);
1450 continue;
1451 }
1452
1453 attach_tcb(tcp);
1454
1455 if (interrupted)
1456 return;
1457 } /* for each tcbtab[] */
1458
1459 if (daemonized_tracer) {
1460 /*
1461 * Make parent go away.
1462 * Also makes grandparent's wait() unblock.
1463 */
1464 kill(parent_pid, SIGKILL);
1465 strace_child = 0;
1466 }
1467 }
1468
1469 static void
1470 maybe_init_seccomp_filter(void)
1471 {
1472 debug_msg("seccomp filter %s",
1473 seccomp_filtering ? "enabled" : "disabled");
1474 if (seccomp_filtering)
1475 init_seccomp_filter();
1476 }
1477
1478 /* Stack-o-phobic exec helper, in the hope to work around
1479 * NOMMU + "daemonized tracer" difficulty.
1480 */
1481 struct exec_params {
1482 int fd_to_close;
1483 uid_t run_euid;
1484 gid_t run_egid;
1485 char **argv;
1486 char **env;
1487 char *pathname;
1488 struct sigaction child_sa;
1489 };
1490 static struct exec_params params_for_tracee;
1491
1492 static void ATTRIBUTE_NOINLINE ATTRIBUTE_NORETURN
1493 exec_or_die(void)
1494 {
1495 struct exec_params *params = ¶ms_for_tracee;
1496
1497 if (params->fd_to_close >= 0)
1498 close(params->fd_to_close);
1499
1500 if (!daemonized_tracer) {
1501 if (params->child_sa.sa_handler != SIG_DFL)
1502 sigaction(SIGCHLD, ¶ms->child_sa, NULL);
1503
1504 if (!use_seize && ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
1505 perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
1506 }
1507
1508 if (username != NULL) {
1509 /*
1510 * It is important to set groups before we
1511 * lose privileges on setuid.
1512 */
1513 if (initgroups(username, run_gid) < 0)
1514 perror_msg_and_die("initgroups");
1515 if (setregid(run_gid, params->run_egid) < 0)
1516 perror_msg_and_die("setregid");
1517
1518 /*
1519 * If there is a seccomp filter to be installed, this should
1520 * be done before CAP_SYS_ADMIN is dropped by setreuid.
1521 */
1522 maybe_init_seccomp_filter();
1523
1524 if (setreuid(run_uid, params->run_euid) < 0)
1525 perror_msg_and_die("setreuid");
1526 }
1527
1528 if (!daemonized_tracer) {
1529 /*
1530 * Induce a ptrace stop. Tracer (our parent)
1531 * will resume us with PTRACE_SYSCALL and display
1532 * the immediately following execve syscall.
1533 * Can't do this on NOMMU systems, we are after
1534 * vfork: parent is blocked, stopping would deadlock.
1535 */
1536 if (!NOMMU_SYSTEM)
1537 kill(getpid(), SIGSTOP);
1538 } else {
1539 alarm(3);
1540 /* we depend on SIGCHLD set to SIG_DFL by init code */
1541 /* if it happens to be SIG_IGN'ed, wait won't block */
1542 while (wait(NULL) < 0 && errno == EINTR)
1543 ;
1544 alarm(0);
1545 if (params->child_sa.sa_handler != SIG_DFL)
1546 sigaction(SIGCHLD, ¶ms->child_sa, NULL);
1547 }
1548
1549 if (!username)
1550 maybe_init_seccomp_filter();
1551 execve(params->pathname, params->argv, params->env);
1552 perror_msg_and_die("exec");
1553 }
1554
1555 /*
1556 * Open a dummy descriptor for use as a placeholder.
1557 * The descriptor is O_RDONLY with FD_CLOEXEC flag set.
1558 * A read attempt from such descriptor ends with EOF,
1559 * a write attempt is rejected with EBADF.
1560 */
1561 static int
1562 open_dummy_desc(void)
1563 {
1564 int fds[2];
1565
1566 if (pipe(fds))
1567 perror_func_msg_and_die("pipe");
1568 close(fds[1]);
1569 set_cloexec_flag(fds[0]);
1570 return fds[0];
1571 }
1572
1573 /* placeholder fds status for stdin and stdout */
1574 static bool fd_is_placeholder[2];
1575
1576 /*
1577 * Ensure that all standard file descriptors are open by opening placeholder
1578 * file descriptors for those standard file descriptors that are not open.
1579 *
1580 * The information which descriptors have been made open is saved
1581 * in fd_is_placeholder for later use.
1582 */
1583 static void
1584 ensure_standard_fds_opened(void)
1585 {
1586 int fd;
1587
1588 while ((fd = open_dummy_desc()) <= 2) {
1589 if (fd == 2)
1590 break;
1591 fd_is_placeholder[fd] = true;
1592 }
1593
1594 if (fd > 2)
1595 close(fd);
1596 }
1597
1598 /*
1599 * Redirect stdin and stdout unless they have been opened earlier
1600 * by ensure_standard_fds_opened as placeholders.
1601 */
1602 static void
1603 redirect_standard_fds(void)
1604 {
1605 /*
1606 * It might be a good idea to redirect stderr as well,
1607 * but we sometimes need to print error messages.
1608 */
1609 for (int i = 0; i <= 1; ++i) {
1610 if (!fd_is_placeholder[i]) {
1611 close(i);
1612 open_dummy_desc();
1613 }
1614 }
1615 }
1616
1617 static void
1618 startup_child(char **argv, char **env)
1619 {
1620 strace_stat_t statbuf;
1621 const char *filename;
1622 size_t filename_len;
1623 char pathname[PATH_MAX];
1624 int pid;
1625 struct tcb *tcp;
1626
1627 filename = argv[0];
1628 filename_len = strlen(filename);
1629
1630 if (filename_len > sizeof(pathname) - 1) {
1631 errno = ENAMETOOLONG;
1632 perror_msg_and_die("exec");
1633 }
1634 if (strchr(filename, '/')) {
1635 strcpy(pathname, filename);
1636 }
1637 #ifdef USE_DEBUGGING_EXEC
1638 /*
1639 * Debuggers customarily check the current directory
1640 * first regardless of the path but doing that gives
1641 * security geeks a panic attack.
1642 */
1643 else if (stat_file(filename, &statbuf) == 0)
1644 strcpy(pathname, filename);
1645 #endif /* USE_DEBUGGING_EXEC */
1646 else {
1647 const char *path;
1648 size_t m, n, len;
1649
1650 for (path = getenv("PATH"); path && *path; path += m) {
1651 const char *colon = strchr(path, ':');
1652 if (colon) {
1653 n = colon - path;
1654 m = n + 1;
1655 } else
1656 m = n = strlen(path);
1657 if (n == 0) {
1658 if (!getcwd(pathname, PATH_MAX))
1659 continue;
1660 len = strlen(pathname);
1661 } else if (n > sizeof(pathname) - 1)
1662 continue;
1663 else {
1664 strncpy(pathname, path, n);
1665 len = n;
1666 }
1667 if (len && pathname[len - 1] != '/')
1668 pathname[len++] = '/';
1669 if (filename_len + len > sizeof(pathname) - 1)
1670 continue;
1671 strcpy(pathname + len, filename);
1672 if (stat_file(pathname, &statbuf) == 0 &&
1673 /* Accept only regular files
1674 with some execute bits set.
1675 XXX not perfect, might still fail */
1676 S_ISREG(statbuf.st_mode) &&
1677 (statbuf.st_mode & 0111))
1678 break;
1679 }
1680 if (!path || !*path)
1681 pathname[0] = '\0';
1682 }
1683 if (stat_file(pathname, &statbuf) < 0) {
1684 perror_msg_and_die("Can't stat '%s'", filename);
1685 }
1686
1687 params_for_tracee.fd_to_close = (shared_log != stderr) ? fileno(shared_log) : -1;
1688 params_for_tracee.run_euid = (statbuf.st_mode & S_ISUID) ? statbuf.st_uid : run_uid;
1689 params_for_tracee.run_egid = (statbuf.st_mode & S_ISGID) ? statbuf.st_gid : run_gid;
1690 params_for_tracee.argv = argv;
1691 if (argv0)
1692 params_for_tracee.argv[0] = argv0;
1693 params_for_tracee.env = env;
1694 /*
1695 * On NOMMU, can be safely freed only after execve in tracee.
1696 * It's hard to know when that happens, so we just leak it.
1697 */
1698 params_for_tracee.pathname = NOMMU_SYSTEM ? xstrdup(pathname) : pathname;
1699
1700 if (daemonized_tracer)
1701 prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY);
1702
1703 pid = fork();
1704 if (pid < 0)
1705 perror_func_msg_and_die("fork");
1706
1707 if ((pid != 0 && daemonized_tracer)
1708 || (pid == 0 && !daemonized_tracer)
1709 ) {
1710 /* We are to become the tracee. Two cases:
1711 * -D: we are parent
1712 * not -D: we are child
1713 */
1714 exec_or_die();
1715 }
1716
1717 /* We are the tracer */
1718
1719 if (!daemonized_tracer) {
1720 strace_child = pid;
1721 if (!use_seize) {
1722 /* child did PTRACE_TRACEME, nothing to do in parent */
1723 } else {
1724 if (!NOMMU_SYSTEM) {
1725 /* Wait until child stopped itself */
1726 int status;
1727 while (waitpid(pid, &status, WSTOPPED) < 0) {
1728 if (errno == EINTR)
1729 continue;
1730 perror_msg_and_die("waitpid");
1731 }
1732 if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) {
1733 kill_save_errno(pid, SIGKILL);
1734 perror_msg_and_die("Unexpected wait status %#x",
1735 status);
1736 }
1737 }
1738 /* Else: NOMMU case, we have no way to sync.
1739 * Just attach to it as soon as possible.
1740 * This means that we may miss a few first syscalls...
1741 */
1742
1743 const char *ptrace_attach_cmd;
1744 if (ptrace_attach_or_seize(pid, &ptrace_attach_cmd)) {
1745 kill_save_errno(pid, SIGKILL);
1746 perror_msg_and_die("attach: ptrace(%s, %d)",
1747 ptrace_attach_cmd, pid);
1748 }
1749 if (!NOMMU_SYSTEM)
1750 kill(pid, SIGCONT);
1751 }
1752 tcp = alloctcb(pid);
1753 after_successful_attach(tcp, TCB_SKIP_DETACH_ON_FIRST_EXEC
1754 | (NOMMU_SYSTEM ? 0
1755 : (TCB_HIDE_LOG
1756 | post_attach_sigstop)));
1757 } else {
1758 /* With -D, we are *child* here, the tracee is our parent. */
1759 strace_child = strace_tracer_pid;
1760 strace_tracer_pid = getpid();
1761 tcp = alloctcb(strace_child);
1762 tcp->flags |= TCB_SKIP_DETACH_ON_FIRST_EXEC | TCB_HIDE_LOG;
1763 /*
1764 * Attaching will be done later, by startup_attach.
1765 * Note: we don't do after_successful_attach() here either!
1766 */
1767
1768 /* NOMMU BUG! -D mode is active, we (child) return,
1769 * and we will scribble over parent's stack!
1770 * When parent later unpauses, it segfaults.
1771 *
1772 * We work around it
1773 * (1) by declaring exec_or_die() NORETURN,
1774 * hopefully compiler will just jump to it
1775 * instead of call (won't push anything to stack),
1776 * (2) by trying very hard in exec_or_die()
1777 * to not use any stack,
1778 * (3) having a really big (PATH_MAX) stack object
1779 * in this function, which creates a "buffer" between
1780 * child's and parent's stack pointers.
1781 * This may save us if (1) and (2) failed
1782 * and compiler decided to use stack in exec_or_die() anyway
1783 * (happens on i386 because of stack parameter passing).
1784 *
1785 * A cleaner solution is to use makecontext + setcontext
1786 * to create a genuine separate stack and execute on it.
1787 */
1788 }
1789
1790 if (seccomp_filtering)
1791 tcp->flags |= TCB_SECCOMP_FILTER;
1792
1793 /*
1794 * A case where straced process is part of a pipe:
1795 * { sleep 1; yes | head -n99999; } | strace -o/dev/null sh -c 'exec <&-; sleep 9'
1796 * If strace won't close its fd#0, closing it in tracee is not enough:
1797 * the pipe is still open, it has a reader. Thus, "head" will not get its
1798 * SIGPIPE at once, on the first write.
1799 *
1800 * Preventing it by redirecting strace's stdin/out.
1801 * (Don't leave fds 0 and 1 closed, this is bad practice: future opens
1802 * will reuse them, unexpectedly making a newly opened object "stdin").
1803 */
1804 redirect_standard_fds();
1805 }
1806
1807 static void
1808 test_ptrace_seize(void)
1809 {
1810 int pid;
1811
1812 /* Need fork for test. NOMMU has no forks */
1813 if (NOMMU_SYSTEM) {
1814 post_attach_sigstop = 0; /* this sets use_seize to 1 */
1815 return;
1816 }
1817
1818 pid = fork();
1819 if (pid < 0)
1820 perror_func_msg_and_die("fork");
1821
1822 if (pid == 0) {
1823 pause();
1824 _exit(0);
1825 }
1826
1827 /* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
1828 * attaching tracee continues to run unless a trap condition occurs.
1829 * PTRACE_SEIZE doesn't affect signal or group stop state.
1830 */
1831 if (ptrace(PTRACE_SEIZE, pid, 0, 0) == 0) {
1832 post_attach_sigstop = 0; /* this sets use_seize to 1 */
1833 } else {
1834 debug_msg("PTRACE_SEIZE doesn't work");
1835 }
1836
1837 kill(pid, SIGKILL);
1838
1839 while (1) {
1840 int status, tracee_pid;
1841
1842 errno = 0;
1843 tracee_pid = waitpid(pid, &status, 0);
1844 if (tracee_pid <= 0) {
1845 if (errno == EINTR)
1846 continue;
1847 perror_func_msg_and_die("unexpected wait result %d",
1848 tracee_pid);
1849 }
1850 if (WIFSIGNALED(status))
1851 return;
1852
1853 error_func_msg_and_die("unexpected wait status %#x", status);
1854 }
1855 }
1856
1857 static unsigned int
1858 get_os_release(void)
1859 {
1860 struct utsname u;
1861 if (uname(&u) < 0)
1862 perror_msg_and_die("uname");
1863 /*
1864 * u.release string consists of at most three parts
1865 * and normally has this form: "3.2.9[-some-garbage]",
1866 * "X.Y-something" means "X.Y.0".
1867 */
1868 const char *p = u.release;
1869 unsigned int rel = 0;
1870 for (unsigned int parts = 0; parts < 3; ++parts) {
1871 unsigned int n = 0;
1872 for (; (*p >= '0') && (*p <= '9'); ++p) {
1873 n *= 10;
1874 n += *p - '0';
1875 }
1876 rel <<= 8;
1877 rel |= n;
1878 if (*p == '.')
1879 ++p;
1880 }
1881 return rel;
1882 }
1883
1884 static void
1885 set_sighandler(int signo, void (*sighandler)(int), struct sigaction *oldact)
1886 {
1887 const struct sigaction sa = { .sa_handler = sighandler };
1888 sigaction(signo, &sa, oldact);
1889 }
1890
1891 static int
1892 parse_interruptible_arg(const char *arg)
1893 {
1894 static const struct xlat_data intr_str[] = {
1895 { INTR_ANYWHERE, "anywhere" },
1896 { INTR_ANYWHERE, "always" },
1897 { INTR_WHILE_WAIT, "waiting" },
1898 { INTR_NEVER, "never" },
1899 { INTR_BLOCK_TSTP_TOO, "never_tstp" },
1900 };
1901
1902 const struct xlat_data *intr_arg = find_xlat_val(intr_str, arg);
1903
1904 return intr_arg ? (int) intr_arg->val
1905 : (int) string_to_uint_upto(arg, NUM_INTR_OPTS - 1);
1906 }
1907
1908 static int
1909 parse_ts_arg(const char *in_arg)
1910 {
1911 static const char format_pfx[] = "format:";
1912 static const char scale_pfx[] = "precision:";
1913
1914 enum {
1915 TOKEN_FORMAT = 1 << 0,
1916 TOKEN_SCALE = 1 << 1,
1917 } token_type;
1918 enum {
1919 FK_UNSET,
1920 FK_NONE,
1921 FK_TIME,
1922 FK_UNIX,
1923 } format_kind = FK_UNSET;
1924 int precision_width;
1925 int precision_scale = 0;
1926 char *arg = xstrdup(in_arg);
1927 char *saveptr = NULL;
1928
1929 for (const char *token = strtok_r(arg, ",", &saveptr);
1930 token; token = strtok_r(NULL, ",", &saveptr)) {
1931 token_type = TOKEN_FORMAT | TOKEN_SCALE;
1932
1933 if (!strncasecmp(token, format_pfx, sizeof(format_pfx) - 1)) {
1934 token += sizeof(format_pfx) - 1;
1935 token_type = TOKEN_FORMAT;
1936 } else if (!strncasecmp(token, scale_pfx,
1937 sizeof(scale_pfx) - 1)) {
1938 token += sizeof(scale_pfx) - 1;
1939 token_type = TOKEN_SCALE;
1940
1941 }
1942
1943 if (token_type & TOKEN_FORMAT) {
1944 if (!strcasecmp(token, "none")) {
1945 format_kind = FK_NONE;
1946 continue;
1947 } else if (!strcasecmp(token, "time")) {
1948 format_kind = FK_TIME;
1949 continue;
1950 } else if (!strcasecmp(token, "unix")) {
1951 format_kind = FK_UNIX;
1952 continue;
1953 }
1954 }
1955
1956 if (token_type & TOKEN_SCALE) {
1957 precision_scale =
1958 str2timescale_optarg(token, &precision_width);
1959
1960 if (precision_scale > 0)
1961 continue;
1962 }
1963
1964 free(arg);
1965 return -1;
1966 }
1967
1968 switch (format_kind) {
1969 case FK_UNSET:
1970 if (!tflag_format)
1971 tflag_format = "%T";
1972 break;
1973 case FK_NONE:
1974 tflag_format = NULL;
1975 break;
1976 case FK_TIME:
1977 tflag_format = "%T";
1978 break;
1979 case FK_UNIX:
1980 tflag_format = "%s";
1981 break;
1982 }
1983
1984 if (precision_scale > 0) {
1985 tflag_scale = precision_scale;
1986 tflag_width = precision_width;
1987 }
1988
1989 free(arg);
1990 return 0;
1991 }
1992
1993 static int
1994 parse_tips_arg(const char *in_arg)
1995 {
1996 static const char id_pfx[] = "id:";
1997 static const char fmt_pfx[] = "format:";
1998
1999 enum {
2000 TOKEN_ID = 1 << 0,
2001 TOKEN_FORMAT = 1 << 1,
2002 } token_type;
2003 int id = tip_id;
2004 enum tips_fmt fmt = show_tips == TIPS_NONE ? TIPS_COMPACT : show_tips;
2005 char *arg = xstrdup(in_arg);
2006 char *saveptr = NULL;
2007
2008 for (const char *token = strtok_r(arg, ",", &saveptr);
2009 token; token = strtok_r(NULL, ",", &saveptr)) {
2010 token_type = TOKEN_ID | TOKEN_FORMAT;
2011
2012 if (!strncasecmp(token, id_pfx, sizeof(id_pfx) - 1)) {
2013 token += sizeof(id_pfx) - 1;
2014 token_type = TOKEN_ID;
2015 } else if (!strncasecmp(token, fmt_pfx,
2016 sizeof(fmt_pfx) - 1)) {
2017 token += sizeof(fmt_pfx) - 1;
2018 token_type = TOKEN_FORMAT;
2019
2020 }
2021
2022 if (token_type & TOKEN_ID) {
2023 int ret;
2024
2025 if (!strcasecmp(token, "random")) {
2026 id = TIP_ID_RANDOM;
2027 continue;
2028 } else if ((ret = string_to_uint(token)) >= 0) {
2029 id = ret;
2030 continue;
2031 }
2032 }
2033
2034 if (token_type & TOKEN_FORMAT) {
2035 if (!strcasecmp(token, "none")) {
2036 fmt = TIPS_NONE;
2037 continue;
2038 } else if (!strcasecmp(token, "compact")) {
2039 fmt = TIPS_COMPACT;
2040 continue;
2041 } else if (!strcasecmp(token, "full")) {
2042 fmt = TIPS_FULL;
2043 continue;
2044 }
2045 }
2046
2047 free(arg);
2048 return -1;
2049 }
2050
2051 tip_id = id;
2052 show_tips = fmt;
2053
2054 free(arg);
2055 return 0;
2056 }
2057
2058 static void
2059 remove_from_env(char **env, size_t *env_count, const char *var)
2060 {
2061 const size_t len = strlen(var);
2062 size_t w = 0;
2063
2064 debug_func_msg("Removing variable \"%s\" from the command environment",
2065 var);
2066
2067 for (size_t r = 0; r < *env_count; ++r) {
2068 if (!strncmp(env[r], var, len) &&
2069 (env[r][len] == '=' || env[r][len] == '\0')) {
2070 debug_func_msg("Skipping entry %zu (\"%s\")",
2071 r, env[r]);
2072 continue;
2073 }
2074 if (w < r) {
2075 debug_func_msg("Copying entry %zu to %zu", r, w);
2076 env[w] = env[r];
2077 }
2078 ++w;
2079 }
2080
2081 if (w < *env_count) {
2082 debug_func_msg("Decreasing env count from %zu to %zu",
2083 *env_count, w);
2084 *env_count = w;
2085 }
2086 }
2087
2088 static void
2089 add_to_env(char **env, size_t *env_count, char *var, const size_t len)
2090 {
2091 size_t r;
2092
2093 for (r = 0; r < *env_count; ++r) {
2094 if (!strncmp(env[r], var, len) &&
2095 (env[r][len] == '=' || env[r][len] == '\0'))
2096 break;
2097 }
2098
2099 if (r < *env_count) {
2100 debug_func_msg("Replacing entry %zu (\"%s\")"
2101 ", key=\"%.*s\", var=\"%s\"",
2102 r, env[r], (int) len, var, var);
2103 } else {
2104 debug_func_msg("Adding entry %zu"
2105 ", key=\"%.*s\", var=\"%s\"",
2106 r, (int) len, var, var);
2107 *env_count += 1;
2108 }
2109
2110 env[r] = var;
2111 }
2112
2113 static void
2114 update_env(char **env, size_t *env_count, char *var)
2115 {
2116 char *val = strchr(var, '=');
2117
2118 if (val)
2119 add_to_env(env, env_count, var, val - var);
2120 else
2121 remove_from_env(env, env_count, var);
2122 }
2123
2124 static char **
2125 make_env(char **orig_env, char *const *env_changes, size_t env_change_count)
2126 {
2127 if (!env_change_count)
2128 return orig_env;
2129
2130 char **new_env;
2131 size_t new_env_count = 0;
2132 size_t new_env_size;
2133
2134 /* Determining the environment variable count. */
2135 if (orig_env) {
2136 for (; orig_env[new_env_count]; ++new_env_count)
2137 ;
2138 }
2139 new_env_size = new_env_count + env_change_count;
2140
2141 if (new_env_size < new_env_count || new_env_size < env_change_count ||
2142 new_env_size + 1 < new_env_size)
2143 error_msg_and_die("Cannot construct new environment: the sum "
2144 "of old environment variable count (%zu) and "
2145 "environment changes count (%zu) is too big",
2146 new_env_count, env_change_count);
2147
2148 new_env_size++;
2149 new_env = xallocarray(new_env_size, sizeof(*new_env));
2150 if (new_env_count)
2151 memcpy(new_env, orig_env, new_env_count * sizeof(*orig_env));
2152
2153 for (size_t i = 0; i < env_change_count; ++i)
2154 update_env(new_env, &new_env_count, env_changes[i]);
2155
2156 new_env[new_env_count] = NULL;
2157
2158 return new_env;
2159 }
2160
2161 static void
2162 increase_version_verbosity(void)
2163 {
2164 if (version_verbosity < (STRAUSS_START_VERBOSITY + strauss_lines))
2165 version_verbosity++;
2166 }
2167
2168 struct pathtrace {
2169 struct path_set_item *paths;
2170 size_t size;
2171 size_t count;
2172 };
2173
2174 static void
2175 add_path_trace(struct pathtrace *pt, const char *path)
2176 {
2177 if (pt->count >= pt->size) {
2178 pt->paths = xgrowarray(pt->paths, &pt->size,
2179 sizeof(pt->paths[0]));
2180 }
2181
2182 pt->paths[pt->count].path = path;
2183 pt->count++;
2184 }
2185
2186 /*
2187 * Initialization part of main() was eating much stack (~0.5k),
2188 * which was unused after init.
2189 * We can reuse it if we move init code into a separate function.
2190 *
2191 * Don't want main() to inline us and defeat the reason
2192 * we have a separate function.
2193 */
2194 static void ATTRIBUTE_NOINLINE
2195 init(int argc, char *argv[])
2196 {
2197 static const char qflag_qual[] = "attach,personality";
2198 static const char qqflag_qual[] = "exit,attach,personality";
2199 static const char qqqflag_qual[] = "all";
2200 static const char yflag_qual[] = "path";
2201 static const char yyflag_qual[] = "all";
2202 static const char tflag_str[] = "format:time";
2203 static const char ttflag_str[] = "precision:us,format:time";
2204 static const char tttflag_str[] = "format:unix,precision:us";
2205 static const char secontext_qual[] = "!full,mismatch";
2206
2207 int c, i;
2208 int optF = 0, zflags = 0;
2209 int lopt_idx;
2210 int daemonized_tracer_long = DAEMONIZE_NONE;
2211 int xflag_long = -1;
2212 int qflag_short = 0;
2213 int followfork_short = 0;
2214 int yflag_short = 0;
2215 bool tflag_long_set = false;
2216 int tflag_short = 0;
2217 bool columns_set = false;
2218 bool sortby_set = false;
2219
2220 /*
2221 * We can initialise global_path_set only after tracing backend
2222 * initialisation, so we store pointers to all the paths from
2223 * command-line arguments during parsing in this array and then,
2224 * after the successful backend initialisation, iterate over it
2225 * in order to add them to global_path_set.
2226 */
2227 struct pathtrace pathtrace = { NULL };
2228
2229 /**
2230 * Storage for environment changes requested for command. They
2231 * are stored in a temporary array and not applied as is during
2232 * command line parsing for two reasons:
2233 * - putenv() changes environment of the tracer as well,
2234 * which is unacceptable.
2235 * - Environment changes have to be applied
2236 * in a tracing-backend-specific way.
2237 */
2238 char **env_changes = NULL;
2239 size_t env_change_size = 0;
2240 size_t env_change_count = 0;
2241
2242 if (!program_invocation_name || !*program_invocation_name) {
2243 static char name[] = "strace";
2244 program_invocation_name =
2245 (argc > 0 && argv[0] && *argv[0]) ? argv[0] : name;
2246 }
2247
2248 strace_tracer_pid = getpid();
2249
2250 os_release = get_os_release();
2251
2252 pidns_init();
2253
2254 shared_log = stderr;
2255 set_sortby(DEFAULT_SORTBY);
2256 set_personality(DEFAULT_PERSONALITY);
2257 qualify_trace("all");
2258 qualify_abbrev("all");
2259 qualify_verbose("all");
2260 #if DEFAULT_QUAL_FLAGS != (QUAL_TRACE | QUAL_ABBREV | QUAL_VERBOSE)
2261 # error Bug in DEFAULT_QUAL_FLAGS
2262 #endif
2263 qualify_status("all");
2264 qualify_quiet("none");
2265 qualify_decode_fd("none");
2266 qualify_signals("all");
2267 #ifdef ENABLE_SECONTEXT
2268 qualify_secontext("none");
2269 #endif
2270
2271 static const char optstring[] =
2272 "+a:Ab:cCdDe:E:fFhiI:kno:O:p:P:qrs:S:tTu:U:vVwxX:yYzZ";
2273
2274 enum {
2275 GETOPT_SECCOMP = 0x100,
2276 GETOPT_DAEMONIZE,
2277 GETOPT_HEX_STR,
2278 GETOPT_FOLLOWFORKS,
2279 GETOPT_OUTPUT_SEPARATELY,
2280 GETOPT_PIDNS_TRANSLATION,
2281 GETOPT_SYSCALL_LIMIT,
2282 GETOPT_TS,
2283 GETOPT_TIPS,
2284 GETOPT_ARGV0,
2285
2286 GETOPT_QUAL_TRACE,
2287 GETOPT_QUAL_TRACE_FD,
2288 GETOPT_QUAL_ABBREV,
2289 GETOPT_QUAL_VERBOSE,
2290 GETOPT_QUAL_RAW,
2291 GETOPT_QUAL_SIGNAL,
2292 GETOPT_QUAL_STATUS,
2293 GETOPT_QUAL_READ,
2294 GETOPT_QUAL_WRITE,
2295 GETOPT_QUAL_FAULT,
2296 GETOPT_QUAL_INJECT,
2297 GETOPT_QUAL_KVM,
2298 GETOPT_QUAL_QUIET,
2299 GETOPT_QUAL_DECODE_FD,
2300 GETOPT_QUAL_DECODE_PID,
2301 GETOPT_QUAL_SECONTEXT,
2302 };
2303 static const struct option longopts[] = {
2304 { "columns", required_argument, 0, 'a' },
2305 { "output-append-mode", no_argument, 0, 'A' },
2306 { "detach-on", required_argument, 0, 'b' },
2307 { "summary-only", no_argument, 0, 'c' },
2308 { "summary", no_argument, 0, 'C' },
2309 { "debug", no_argument, 0, 'd' },
2310 { "daemonize", optional_argument, 0, GETOPT_DAEMONIZE },
2311 { "daemonised", optional_argument, 0, GETOPT_DAEMONIZE },
2312 { "daemonized", optional_argument, 0, GETOPT_DAEMONIZE },
2313 { "env", required_argument, 0, 'E' },
2314 { "follow-forks", no_argument, 0, GETOPT_FOLLOWFORKS },
2315 { "output-separately", no_argument, 0,
2316 GETOPT_OUTPUT_SEPARATELY },
2317 { "help", no_argument, 0, 'h' },
2318 { "instruction-pointer", no_argument, 0, 'i' },
2319 { "interruptible", required_argument, 0, 'I' },
2320 { "stack-traces", no_argument, 0, 'k' },
2321 { "syscall-limit", required_argument, 0, GETOPT_SYSCALL_LIMIT },
2322 { "syscall-number", no_argument, 0, 'n' },
2323 { "output", required_argument, 0, 'o' },
2324 { "summary-syscall-overhead", required_argument, 0, 'O' },
2325 { "attach", required_argument, 0, 'p' },
2326 { "trace-path", required_argument, 0, 'P' },
2327 { "relative-timestamps", optional_argument, 0, 'r' },
2328 { "string-limit", required_argument, 0, 's' },
2329 { "summary-sort-by", required_argument, 0, 'S' },
2330 { "absolute-timestamps", optional_argument, 0, GETOPT_TS },
2331 { "timestamps", optional_argument, 0, GETOPT_TS },
2332 { "syscall-times", optional_argument, 0, 'T' },
2333 { "user", required_argument, 0, 'u' },
2334 { "summary-columns", required_argument, 0, 'U' },
2335 { "no-abbrev", no_argument, 0, 'v' },
2336 { "version", no_argument, 0, 'V' },
2337 { "summary-wall-clock", no_argument, 0, 'w' },
2338 { "strings-in-hex", optional_argument, 0, GETOPT_HEX_STR },
2339 { "const-print-style", required_argument, 0, 'X' },
2340 { "pidns-translation", no_argument , 0, GETOPT_PIDNS_TRANSLATION },
2341 { "successful-only", no_argument, 0, 'z' },
2342 { "failed-only", no_argument, 0, 'Z' },
2343 { "failing-only", no_argument, 0, 'Z' },
2344 { "seccomp-bpf", no_argument, 0, GETOPT_SECCOMP },
2345 { "tips", optional_argument, 0, GETOPT_TIPS },
2346 { "argv0", required_argument, 0, GETOPT_ARGV0 },
2347
2348 { "trace", required_argument, 0, GETOPT_QUAL_TRACE },
2349 { "trace-fds", required_argument, 0, GETOPT_QUAL_TRACE_FD },
2350 { "abbrev", required_argument, 0, GETOPT_QUAL_ABBREV },
2351 { "verbose", required_argument, 0, GETOPT_QUAL_VERBOSE },
2352 { "raw", required_argument, 0, GETOPT_QUAL_RAW },
2353 { "signals", required_argument, 0, GETOPT_QUAL_SIGNAL },
2354 { "status", required_argument, 0, GETOPT_QUAL_STATUS },
2355 { "read", required_argument, 0, GETOPT_QUAL_READ },
2356 { "write", required_argument, 0, GETOPT_QUAL_WRITE },
2357 { "fault", required_argument, 0, GETOPT_QUAL_FAULT },
2358 { "inject", required_argument, 0, GETOPT_QUAL_INJECT },
2359 { "kvm", required_argument, 0, GETOPT_QUAL_KVM },
2360 { "quiet", optional_argument, 0, GETOPT_QUAL_QUIET },
2361 { "silent", optional_argument, 0, GETOPT_QUAL_QUIET },
2362 { "silence", optional_argument, 0, GETOPT_QUAL_QUIET },
2363 { "decode-fds", optional_argument, 0, GETOPT_QUAL_DECODE_FD },
2364 { "decode-pids",required_argument, 0, GETOPT_QUAL_DECODE_PID },
2365 { "secontext", optional_argument, 0, GETOPT_QUAL_SECONTEXT },
2366
2367 { 0, 0, 0, 0 }
2368 };
2369
2370 lopt_idx = -1;
2371 while ((c = getopt_long(argc, argv, optstring, longopts, &lopt_idx)) != EOF) {
2372 const struct option *lopt = lopt_idx >= 0
2373 && (unsigned) lopt_idx < ARRAY_SIZE(longopts)
2374 ? longopts + lopt_idx : NULL;
2375 lopt_idx = -1;
2376
2377 switch (c) {
2378 case 'a':
2379 acolumn = string_to_uint(optarg);
2380 if (acolumn < 0)
2381 error_opt_arg(c, lopt, optarg);
2382 break;
2383 case 'A':
2384 open_append = true;
2385 break;
2386 case 'b':
2387 if (strcmp(optarg, "execve") != 0)
2388 error_msg_and_die("Syscall '%s' for -b isn't supported",
2389 optarg);
2390 detach_on_execve = 1;
2391 break;
2392 case 'c':
2393 if (cflag == CFLAG_BOTH) {
2394 error_msg_and_help("-c/--summary-only and "
2395 "-C/--summary are mutually "
2396 "exclusive");
2397 }
2398 cflag = CFLAG_ONLY_STATS;
2399 break;
2400 case 'C':
2401 if (cflag == CFLAG_ONLY_STATS) {
2402 error_msg_and_help("-c/--summary-only and "
2403 "-C/--summary are mutually "
2404 "exclusive");
2405 }
2406 cflag = CFLAG_BOTH;
2407 break;
2408 case 'd':
2409 debug_flag = 1;
2410 break;
2411 case 'D':
2412 daemonized_tracer++;
2413 break;
2414 case GETOPT_DAEMONIZE:
2415 daemonized_tracer_long =
2416 find_arg_val(optarg, daemonize_str,
2417 DAEMONIZE_GRANDCHILD,
2418 DAEMONIZE_NONE);
2419 if (daemonized_tracer_long <= DAEMONIZE_NONE)
2420 error_opt_arg(c, lopt, optarg);
2421 break;
2422 case 'e':
2423 qualify(optarg);
2424 break;
2425 case 'E':
2426 if (env_change_count >= env_change_size)
2427 env_changes = xgrowarray(env_changes,
2428 &env_change_size,
2429 sizeof(*env_changes));
2430
2431 env_changes[env_change_count++] = optarg;
2432 break;
2433 case 'f':
2434 followfork_short++;
2435 break;
2436 case GETOPT_FOLLOWFORKS:
2437 followfork = true;
2438 break;
2439 case GETOPT_OUTPUT_SEPARATELY:
2440 output_separately = true;
2441 break;
2442 case 'F':
2443 optF = 1;
2444 break;
2445 case 'h':
2446 usage();
2447 break;
2448 case 'i':
2449 iflag = 1;
2450 break;
2451 case 'I':
2452 opt_intr = parse_interruptible_arg(optarg);
2453 if (opt_intr <= 0)
2454 error_opt_arg(c, lopt, optarg);
2455 break;
2456 case 'k':
2457 #ifdef ENABLE_STACKTRACE
2458 stack_trace_enabled = true;
2459 #else
2460 error_msg_and_die("Stack traces (-k/--stack-traces "
2461 "option) are not supported by this "
2462 "build of strace");
2463 #endif
2464 break;
2465 case 'n':
2466 nflag = 1;
2467 break;
2468 case 'o':
2469 outfname = optarg;
2470 break;
2471 case 'O':
2472 if (set_overhead(optarg) < 0)
2473 error_opt_arg(c, lopt, optarg);
2474 break;
2475 case 'p':
2476 process_opt_p_list(optarg);
2477 break;
2478 case 'P':
2479 add_path_trace(&pathtrace, optarg);
2480 break;
2481 case 'q':
2482 qflag_short++;
2483 break;
2484 case 'r':
2485 rflag = 1;
2486 rflag_width = 6;
2487 rflag_scale = str2timescale_optarg(optarg,
2488 &rflag_width);
2489 if (rflag_scale < 0)
2490 error_opt_arg(c, lopt, optarg);
2491 break;
2492 case 's':
2493 i = string_to_uint(optarg);
2494 if (i < 0 || (unsigned int) i > -1U / 4)
2495 error_opt_arg(c, lopt, optarg);
2496 max_strlen = i;
2497 break;
2498 case 'S':
2499 set_sortby(optarg);
2500 sortby_set = true;
2501 break;
2502 case 't':
2503 tflag_short++;
2504 break;
2505 case GETOPT_TS:
2506 tflag_long_set = true;
2507 if (parse_ts_arg(optarg ?: tflag_str))
2508 error_opt_arg(c, lopt, optarg);
2509 break;
2510 case 'T':
2511 Tflag = 1;
2512 Tflag_width = 6;
2513 Tflag_scale = str2timescale_optarg(optarg,
2514 &Tflag_width);
2515 if (Tflag_scale < 0)
2516 error_opt_arg(c, lopt, optarg);
2517 break;
2518 case 'u':
2519 username = optarg;
2520 break;
2521 case 'U':
2522 columns_set = true;
2523 set_count_summary_columns(optarg);
2524 break;
2525 case 'v':
2526 qualify_abbrev("none");
2527 break;
2528 case 'V':
2529 increase_version_verbosity();
2530 break;
2531 case 'w':
2532 count_wallclock = 1;
2533 break;
2534 case 'x':
2535 xflag = MIN(xflag + 1, HEXSTR_ALL);
2536 break;
2537 case GETOPT_HEX_STR:
2538 xflag_long = find_arg_val(optarg, xflag_str,
2539 HEXSTR_ALL, -1);
2540 if (xflag_long < HEXSTR_NONE)
2541 error_opt_arg(c, lopt, optarg);
2542 break;
2543 case 'X':
2544 if (!strcmp(optarg, "raw"))
2545 xlat_verbosity = XLAT_STYLE_RAW;
2546 else if (!strcmp(optarg, "abbrev"))
2547 xlat_verbosity = XLAT_STYLE_ABBREV;
2548 else if (!strcmp(optarg, "verbose"))
2549 xlat_verbosity = XLAT_STYLE_VERBOSE;
2550 else
2551 error_opt_arg(c, lopt, optarg);
2552 break;
2553 case 'y':
2554 yflag_short++;
2555 break;
2556 case 'Y':
2557 qualify_decode_pid("comm");
2558 break;
2559 case GETOPT_PIDNS_TRANSLATION:
2560 qualify_decode_pid("pidns");
2561 break;
2562 case 'z':
2563 clear_number_set_array(status_set, 1);
2564 add_number_to_set(STATUS_SUCCESSFUL, status_set);
2565 zflags++;
2566 break;
2567 case 'Z':
2568 clear_number_set_array(status_set, 1);
2569 add_number_to_set(STATUS_FAILED, status_set);
2570 zflags++;
2571 break;
2572 case GETOPT_SECCOMP:
2573 seccomp_filtering = true;
2574 break;
2575 case GETOPT_SYSCALL_LIMIT:
2576 syscall_limit = string_to_ulonglong(optarg);
2577 if (syscall_limit <= 0)
2578 error_opt_arg(c, lopt, optarg);
2579 break;
2580 case GETOPT_TIPS:
2581 if (parse_tips_arg(optarg ?: ""))
2582 error_opt_arg(c, lopt, optarg);
2583 break;
2584 case GETOPT_ARGV0:
2585 argv0 = optarg;
2586 break;
2587 case GETOPT_QUAL_SECONTEXT:
2588 qualify_secontext(optarg ? optarg : secontext_qual);
2589 break;
2590 case GETOPT_QUAL_TRACE:
2591 qualify_trace(optarg);
2592 break;
2593 case GETOPT_QUAL_TRACE_FD:
2594 qualify_trace_fd(optarg);
2595 break;
2596 case GETOPT_QUAL_ABBREV:
2597 qualify_abbrev(optarg);
2598 break;
2599 case GETOPT_QUAL_VERBOSE:
2600 qualify_verbose(optarg);
2601 break;
2602 case GETOPT_QUAL_RAW:
2603 qualify_raw(optarg);
2604 break;
2605 case GETOPT_QUAL_SIGNAL:
2606 qualify_signals(optarg);
2607 break;
2608 case GETOPT_QUAL_STATUS:
2609 qualify_status(optarg);
2610 break;
2611 case GETOPT_QUAL_READ:
2612 qualify_read(optarg);
2613 break;
2614 case GETOPT_QUAL_WRITE:
2615 qualify_write(optarg);
2616 break;
2617 case GETOPT_QUAL_FAULT:
2618 qualify_fault(optarg);
2619 break;
2620 case GETOPT_QUAL_INJECT:
2621 qualify_inject(optarg);
2622 break;
2623 case GETOPT_QUAL_KVM:
2624 qualify_kvm(optarg);
2625 break;
2626 case GETOPT_QUAL_QUIET:
2627 qualify_quiet(optarg ?: qflag_qual);
2628 break;
2629 case GETOPT_QUAL_DECODE_FD:
2630 qualify_decode_fd(optarg ?: yflag_qual);
2631 break;
2632 case GETOPT_QUAL_DECODE_PID:
2633 qualify_decode_pid(optarg);
2634 break;
2635 default:
2636 error_msg_and_help(NULL);
2637 break;
2638 }
2639 }
2640
2641 if (version_verbosity) {
2642 print_version(version_verbosity);
2643 exit(0);
2644 }
2645
2646 argv += optind;
2647 argc -= optind;
2648
2649 if (argc < 0 || (!nprocs && !argc)) {
2650 if (show_tips != TIPS_NONE) {
2651 print_totd();
2652 exit(exit_code);
2653 }
2654 error_msg_and_help("must have PROG [ARGS] or -p PID");
2655 }
2656
2657 if (!argc && argv0)
2658 error_msg_and_help("PROG [ARGS] must be specified with --argv0");
2659
2660 if (daemonized_tracer_long) {
2661 if (daemonized_tracer) {
2662 error_msg_and_die("-D and --daemonize cannot"
2663 " be provided simultaneously");
2664 } else {
2665 daemonized_tracer = daemonized_tracer_long;
2666 }
2667 }
2668
2669 if (!argc && daemonized_tracer) {
2670 error_msg_and_help("PROG [ARGS] must be specified with "
2671 "-D/--daemonize");
2672 }
2673
2674 if (daemonized_tracer > (unsigned int) MAX_DAEMONIZE_OPTS)
2675 error_msg_and_help("Too many -D's (%u), maximum supported -D "
2676 "count is %d",
2677 daemonized_tracer, MAX_DAEMONIZE_OPTS);
2678
2679 if (tflag_short) {
2680 if (tflag_long_set) {
2681 error_msg_and_die("-t and --absolute-timestamps cannot"
2682 " be provided simultaneously");
2683 }
2684
2685 parse_ts_arg(tflag_short == 1 ? tflag_str :
2686 tflag_short == 2 ? ttflag_str : tttflag_str);
2687 }
2688
2689 if (xflag_long >= 0) {
2690 if (xflag) {
2691 error_msg_and_die("-x and --strings-in-hex cannot"
2692 " be provided simultaneously");
2693 } else {
2694 xflag = xflag_long;
2695 }
2696 }
2697
2698 if (yflag_short) {
2699 if (decode_fd_set_updated) {
2700 error_msg_and_die("-y and --decode-fds cannot"
2701 " be provided simultaneously");
2702 }
2703
2704 qualify_decode_fd(yflag_short == 1 ? yflag_qual : yyflag_qual);
2705 }
2706
2707 if (is_number_in_set(DECODE_PID_COMM, decode_pid_set)) {
2708 /*
2709 * If --decode-pids=comm option comes after -p, comm fields
2710 * of tcbs are not filled though tcbs are initialized.
2711 * We must fill the fields here.
2712 */
2713 for (unsigned int i = 0; i < tcbtabsize; ++i) {
2714 struct tcb *tcp = tcbtab[i];
2715 if (tcp->comm[0] == 0)
2716 maybe_load_task_comm(tcp);
2717 }
2718 }
2719
2720 if (seccomp_filtering && detach_on_execve) {
2721 error_msg("--seccomp-bpf is not enabled because"
2722 " it is not compatible with -b");
2723 seccomp_filtering = false;
2724 }
2725
2726 if (seccomp_filtering && syscall_limit > 0) {
2727 error_msg("--seccomp-bpf is not enabled because"
2728 " it is not compatible with --syscall-limit");
2729 seccomp_filtering = false;
2730 }
2731
2732 if (followfork_short) {
2733 if (followfork) {
2734 error_msg_and_die("-f and --follow-forks cannot"
2735 " be provided simultaneously");
2736 } else if (followfork_short >= 2 && output_separately) {
2737 error_msg_and_die("-ff and --output-separately cannot"
2738 " be provided simultaneously");
2739 } else {
2740 followfork = true;
2741 output_separately = followfork_short >= 2;
2742 }
2743 }
2744
2745 if (seccomp_filtering) {
2746 if (nprocs && (!argc || debug_flag))
2747 error_msg("--seccomp-bpf is not enabled for processes"
2748 " attached with -p");
2749 if (!followfork) {
2750 error_msg("--seccomp-bpf cannot be used without "
2751 "-f/--follow-forks, disabling");
2752 seccomp_filtering = false;
2753 }
2754 }
2755
2756 if (optF) {
2757 if (followfork) {
2758 error_msg("deprecated option -F ignored");
2759 } else {
2760 error_msg("option -F is deprecated, "
2761 "please use -f/--follow-forks instead");
2762 followfork = true;
2763 }
2764 }
2765
2766 if (output_separately && cflag) {
2767 error_msg_and_help("(-c/--summary-only or -C/--summary) and"
2768 " -ff/--output-separately"
2769 " are mutually exclusive");
2770 }
2771
2772 if (count_wallclock && !cflag) {
2773 error_msg_and_help("-w/--summary-wall-clock must be given with"
2774 " (-c/--summary-only or -C/--summary)");
2775 }
2776
2777 if (columns_set && !cflag) {
2778 error_msg_and_help("-U/--summary-columns must be given with"
2779 " (-c/--summary-only or -C/--summary)");
2780 }
2781
2782 if (sortby_set && !cflag) {
2783 error_msg("-S/--summary-sort-by has no effect without"
2784 " (-c/--summary-only or -C/--summary)");
2785 }
2786
2787 if (cflag == CFLAG_ONLY_STATS) {
2788 if (iflag)
2789 error_msg("-i/--instruction-pointer has no effect "
2790 "with -c/--summary-only");
2791 if (stack_trace_enabled)
2792 error_msg("-k/--stack-traces has no effect "
2793 "with -c/--summary-only");
2794 if (nflag)
2795 error_msg("-n/--syscall-number has no effect "
2796 "with -c/--summary-only");
2797 if (rflag)
2798 error_msg("-r/--relative-timestamps has no effect "
2799 "with -c/--summary-only");
2800 if (tflag_format)
2801 error_msg("-t/--absolute-timestamps has no effect "
2802 "with -c/--summary-only");
2803 if (Tflag)
2804 error_msg("-T/--syscall-times has no effect "
2805 "with -c/--summary-only");
2806 if (!number_set_array_is_empty(decode_fd_set, 0))
2807 error_msg("-y/--decode-fds has no effect "
2808 "with -c/--summary-only");
2809 #ifdef ENABLE_SECONTEXT
2810 if (!number_set_array_is_empty(secontext_set, 0))
2811 error_msg("--secontext has no effect with "
2812 "-c/--summary-only");
2813 #endif
2814 }
2815
2816 if (!outfname) {
2817 if (output_separately && !followfork)
2818 error_msg("--output-separately has no effect "
2819 "without -o/--output");
2820 if (open_append)
2821 error_msg("-A/--output-append-mode has no effect "
2822 "without -o/--output");
2823 }
2824
2825 #ifndef HAVE_OPEN_MEMSTREAM
2826 if (!is_complete_set(status_set, NUMBER_OF_STATUSES))
2827 error_msg_and_help("open_memstream is required to use -z, -Z, or -e status");
2828 #endif
2829
2830 if (zflags > 1)
2831 error_msg("Only the last of "
2832 "-z/--successful-only/-Z/--failed-only options will "
2833 "take effect. "
2834 "See status qualifier for more complex filters.");
2835
2836 for (size_t cnt = 0; cnt < pathtrace.count; ++cnt)
2837 pathtrace_select(pathtrace.paths[cnt].path);
2838 free(pathtrace.paths);
2839
2840 acolumn_spaces = xmalloc(acolumn + 1);
2841 memset(acolumn_spaces, ' ', acolumn);
2842 acolumn_spaces[acolumn] = '\0';
2843
2844 set_sighandler(SIGCHLD, SIG_DFL, ¶ms_for_tracee.child_sa);
2845
2846 #ifdef ENABLE_STACKTRACE
2847 if (stack_trace_enabled)
2848 unwind_init();
2849 #endif
2850
2851 /* See if they want to run as another user. */
2852 if (username != NULL) {
2853 struct passwd *pent;
2854
2855 if (getuid() != 0 || geteuid() != 0) {
2856 error_msg_and_die("You must be root to use "
2857 "the -u/--username option");
2858 }
2859 pent = getpwnam(username);
2860 if (pent == NULL) {
2861 error_msg_and_die("Cannot find user '%s'", username);
2862 }
2863 run_uid = pent->pw_uid;
2864 run_gid = pent->pw_gid;
2865 } else {
2866 run_uid = getuid();
2867 run_gid = getgid();
2868 }
2869
2870 if (followfork)
2871 ptrace_setoptions |= PTRACE_O_TRACECLONE |
2872 PTRACE_O_TRACEFORK |
2873 PTRACE_O_TRACEVFORK;
2874
2875 if (seccomp_filtering)
2876 check_seccomp_filter();
2877 if (seccomp_filtering)
2878 ptrace_setoptions |= PTRACE_O_TRACESECCOMP;
2879
2880 debug_msg("ptrace_setoptions = %#x", ptrace_setoptions);
2881 test_ptrace_seize();
2882 test_ptrace_get_syscall_info();
2883
2884 /*
2885 * Is something weird with our stdin and/or stdout -
2886 * for example, may they be not open? In this case,
2887 * ensure that none of the future opens uses them.
2888 *
2889 * This was seen in the wild when /proc/sys/kernel/core_pattern
2890 * was set to "|/bin/strace -o/tmp/LOG PROG":
2891 * kernel runs coredump helper with fd#0 open but fd#1 closed (!),
2892 * therefore LOG gets opened to fd#1, and fd#1 is closed by
2893 * "don't hold up stdin/out open" code soon after.
2894 */
2895 ensure_standard_fds_opened();
2896
2897 /* Check if they want to redirect the output. */
2898 if (outfname) {
2899 /* See if they want to pipe the output. */
2900 if (outfname[0] == '|' || outfname[0] == '!') {
2901 /*
2902 * We can't do the <outfname>.PID funny business
2903 * when using popen, so prohibit it.
2904 */
2905 if (output_separately)
2906 error_msg_and_help("piping the output and "
2907 "-ff/--output-separately "
2908 "are mutually exclusive");
2909 shared_log = strace_popen(outfname + 1);
2910 } else if (!output_separately) {
2911 shared_log = strace_fopen(outfname);
2912 } else if (strlen(outfname) >= PATH_MAX - sizeof(int) * 3) {
2913 errno = ENAMETOOLONG;
2914 perror_msg_and_die("%s", outfname);
2915 }
2916 } else {
2917 /* -ff without -o FILE is the same as single -f */
2918 output_separately = false;
2919 }
2920
2921 if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
2922 setvbuf(shared_log, NULL, _IOLBF, 0);
2923 }
2924
2925 /*
2926 * argv[0] -pPID -oFILE Default interactive setting
2927 * yes * 0 INTR_WHILE_WAIT
2928 * no 1 0 INTR_WHILE_WAIT
2929 * yes * 1 INTR_NEVER
2930 * no 1 1 INTR_WHILE_WAIT
2931 */
2932
2933 if (daemonized_tracer && !opt_intr)
2934 opt_intr = INTR_BLOCK_TSTP_TOO;
2935 if (outfname && argc) {
2936 if (!opt_intr)
2937 opt_intr = INTR_NEVER;
2938 if (!qflag_short && !quiet_set_updated)
2939 qflag_short = 1;
2940 }
2941 if (!opt_intr)
2942 opt_intr = INTR_WHILE_WAIT;
2943
2944 if (qflag_short) {
2945 if (quiet_set_updated) {
2946 error_msg_and_die("-q and -e quiet/--quiet cannot"
2947 " be provided simultaneously");
2948 }
2949
2950 qualify_quiet(qflag_short == 1 ? qflag_qual :
2951 qflag_short == 2 ? qqflag_qual : qqqflag_qual);
2952 }
2953
2954 /*
2955 * startup_child() must be called before the signal handlers get
2956 * installed below as they are inherited into the spawned process.
2957 * Also we do not need to be protected by them as during interruption
2958 * in the startup_child() mode we kill the spawned process anyway.
2959 */
2960 if (argc) {
2961 char **new_environ = make_env(environ, env_changes,
2962 env_change_count);
2963 free(env_changes);
2964
2965 startup_child(argv, new_environ);
2966
2967 /*
2968 * On a NOMMU system, new_environ can be freed only after exec
2969 * in child, so we leak it in that case, similar to pathname
2970 * in startup_child().
2971 */
2972 if (new_environ != environ && !NOMMU_SYSTEM)
2973 free(new_environ);
2974 }
2975
2976 set_sighandler(SIGTTOU, SIG_IGN, NULL);
2977 set_sighandler(SIGTTIN, SIG_IGN, NULL);
2978 if (opt_intr != INTR_ANYWHERE) {
2979 if (opt_intr == INTR_BLOCK_TSTP_TOO)
2980 set_sighandler(SIGTSTP, SIG_IGN, NULL);
2981 /*
2982 * In interactive mode (if no -o OUTFILE, or -p PID is used),
2983 * fatal signals are handled asynchronously and acted
2984 * when waiting for process state changes.
2985 * In non-interactive mode these signals are ignored.
2986 */
2987 set_sighandler(SIGHUP, interactive ? interrupt : SIG_IGN, NULL);
2988 set_sighandler(SIGINT, interactive ? interrupt : SIG_IGN, NULL);
2989 set_sighandler(SIGQUIT, interactive ? interrupt : SIG_IGN, NULL);
2990 set_sighandler(SIGPIPE, interactive ? interrupt : SIG_IGN, NULL);
2991 set_sighandler(SIGTERM, interactive ? interrupt : SIG_IGN, NULL);
2992 }
2993
2994 sigemptyset(&timer_set);
2995 sigaddset(&timer_set, SIGALRM);
2996 sigprocmask(SIG_BLOCK, &timer_set, NULL);
2997 set_sighandler(SIGALRM, timer_sighandler, NULL);
2998
2999 if (nprocs != 0 || daemonized_tracer)
3000 startup_attach();
3001
3002 /* Do we want pids printed in our -o OUTFILE?
3003 * -ff: no (every pid has its own file); or
3004 * -f: yes (there can be more pids in the future); or
3005 * -p PID1,PID2: yes (there are already more than one pid)
3006 */
3007 print_pid_pfx = outfname && !output_separately &&
3008 ((followfork && !output_separately) || nprocs > 1);
3009 }
3010
3011 static struct tcb *
3012 pid2tcb(const int pid)
3013 {
3014 if (pid <= 0)
3015 return NULL;
3016
3017 #define PID2TCB_CACHE_SIZE 1024U
3018 #define PID2TCB_CACHE_MASK (PID2TCB_CACHE_SIZE - 1)
3019
3020 static struct tcb *pid2tcb_cache[PID2TCB_CACHE_SIZE];
3021 struct tcb **const ptcp = &pid2tcb_cache[pid & PID2TCB_CACHE_MASK];
3022 struct tcb *tcp = *ptcp;
3023
3024 if (tcp && tcp->pid == pid)
3025 return tcp;
3026
3027 for (unsigned int i = 0; i < tcbtabsize; ++i) {
3028 tcp = tcbtab[i];
3029 if (tcp->pid == pid)
3030 return *ptcp = tcp;
3031 }
3032
3033 return NULL;
3034 }
3035
3036 static void
3037 cleanup(int fatal_sig)
3038 {
3039 if (!fatal_sig)
3040 fatal_sig = SIGTERM;
3041
3042 for (unsigned int i = 0; i < tcbtabsize; ++i) {
3043 struct tcb *tcp = tcbtab[i];
3044 if (!tcp->pid)
3045 continue;
3046 debug_func_msg("looking at pid %u", tcp->pid);
3047 if (tcp->pid == strace_child) {
3048 kill(tcp->pid, SIGCONT);
3049 kill(tcp->pid, fatal_sig);
3050 }
3051 detach(tcp);
3052 }
3053 }
3054
3055 static void
3056 interrupt(int sig)
3057 {
3058 interrupted = sig;
3059 }
3060
3061 static void
3062 print_debug_info(const int pid, int status)
3063 {
3064 const unsigned int event = (unsigned int) status >> 16;
3065 char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
3066 char evbuf[sizeof(",EVENT_VFORK_DONE (%u)") + sizeof(int)*3 /*paranoia:*/ + 16];
3067
3068 strcpy(buf, "???");
3069 if (WIFSIGNALED(status))
3070 xsprintf(buf, "WIFSIGNALED,%ssig=%s",
3071 WCOREDUMP(status) ? "core," : "",
3072 sprintsigname(WTERMSIG(status)));
3073 if (WIFEXITED(status))
3074 xsprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
3075 if (WIFSTOPPED(status))
3076 xsprintf(buf, "WIFSTOPPED,sig=%s",
3077 sprintsigname(WSTOPSIG(status)));
3078 evbuf[0] = '\0';
3079 if (event != 0) {
3080 static const char *const event_names[] = {
3081 [PTRACE_EVENT_CLONE] = "CLONE",
3082 [PTRACE_EVENT_FORK] = "FORK",
3083 [PTRACE_EVENT_VFORK] = "VFORK",
3084 [PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
3085 [PTRACE_EVENT_EXEC] = "EXEC",
3086 [PTRACE_EVENT_EXIT] = "EXIT",
3087 [PTRACE_EVENT_SECCOMP] = "SECCOMP",
3088 /* [PTRACE_EVENT_STOP (=128)] would make biggish array */
3089 };
3090 const char *e = "??";
3091 if (event < ARRAY_SIZE(event_names))
3092 e = event_names[event];
3093 else if (event == PTRACE_EVENT_STOP)
3094 e = "STOP";
3095 xsprintf(evbuf, ",EVENT_%s (%u)", e, event);
3096 }
3097 error_msg("[wait(0x%06x) = %u] %s%s", status, pid, buf, evbuf);
3098 }
3099
3100 static struct tcb *
3101 maybe_allocate_tcb(const int pid, int status)
3102 {
3103 if (!WIFSTOPPED(status)) {
3104 if (detach_on_execve && pid == strace_child) {
3105 /* example: strace -bexecve sh -c 'exec true' */
3106 strace_child = 0;
3107 return NULL;
3108 }
3109 if (!is_number_in_set(QUIET_EXIT, quiet_set)) {
3110 /*
3111 * This can happen if we inherited an unknown child.
3112 * Example: (sleep 1 & exec strace true)
3113 */
3114 error_msg("Exit of unknown pid %u ignored", pid);
3115 }
3116 return NULL;
3117 }
3118 if (followfork) {
3119 /* We assume it's a fork/vfork/clone child */
3120 struct tcb *tcp = alloctcb(pid);
3121 after_successful_attach(tcp, post_attach_sigstop);
3122 if (!is_number_in_set(QUIET_ATTACH, quiet_set))
3123 error_msg("Process %d attached", pid);
3124 return tcp;
3125 } else {
3126 /*
3127 * This can happen if a clone call misused CLONE_PTRACE itself.
3128 *
3129 * There used to be a dance around possible re-injection of
3130 * WSTOPSIG(status), but it was later removed as the only
3131 * observable stop here is the initial ptrace-stop.
3132 */
3133 ptrace(PTRACE_DETACH, pid, NULL, 0L);
3134 if (!is_number_in_set(QUIET_ATTACH, quiet_set))
3135 error_msg("Detached unknown pid %d", pid);
3136 return NULL;
3137 }
3138 }
3139
3140 /*
3141 * Under Linux, execve changes pid to thread leader's pid, and we see this
3142 * changed pid on EVENT_EXEC and later, execve sysexit. Leader "disappears"
3143 * without exit notification. Let user know that, drop leader's tcb, and fix
3144 * up pid in execve thread's tcb. Effectively, execve thread's tcb replaces
3145 * leader's tcb.
3146 *
3147 * BTW, leader is 'stuck undead' (doesn't report WIFEXITED on exit syscall)
3148 * in multi-threaded programs exactly in order to handle this case.
3149 */
3150 static struct tcb *
3151 maybe_switch_tcbs(struct tcb *tcp, const int pid)
3152 {
3153 /*
3154 * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
3155 * On 2.6 and earlier it can return garbage.
3156 */
3157 if (os_release < KERNEL_VERSION(3, 0, 0))
3158 return NULL;
3159
3160 const long old_pid = tcb_wait_tab[tcp->wait_data_idx].msg;
3161
3162 /* Avoid truncation in pid2tcb() param passing */
3163 if (old_pid <= 0 || old_pid == pid)
3164 return NULL;
3165 if ((unsigned long) old_pid > UINT_MAX)
3166 return NULL;
3167 struct tcb *execve_thread = pid2tcb(old_pid);
3168 /* It should be !NULL, but I feel paranoid */
3169 if (!execve_thread)
3170 return NULL;
3171
3172 if (execve_thread->curcol != 0) {
3173 /*
3174 * One case we are here is -ff, try
3175 * "strace -oLOG -ff test/threaded_execve".
3176 * Another case is demonstrated by
3177 * tests/maybe_switch_current_tcp.c
3178 */
3179 fprintf(execve_thread->outf, " <pid changed to %d ...>\n", pid);
3180 /*execve_thread->curcol = 0; - no need, see code below */
3181 }
3182 /* Swap output FILEs and memstream (needed for -ff) */
3183 FILE *fp = execve_thread->outf;
3184 execve_thread->outf = tcp->outf;
3185 tcp->outf = fp;
3186 if (execve_thread->staged_output_data || tcp->staged_output_data) {
3187 struct staged_output_data *staged_output_data;
3188
3189 staged_output_data = execve_thread->staged_output_data;
3190 execve_thread->staged_output_data = tcp->staged_output_data;
3191 tcp->staged_output_data = staged_output_data;
3192 }
3193
3194 /* And their column positions */
3195 execve_thread->curcol = tcp->curcol;
3196 tcp->curcol = 0;
3197 /* Drop leader, but close execve'd thread outfile (if -ff) */
3198 droptcb(tcp);
3199 /* Switch to the thread, reusing leader's outfile and pid */
3200 tcp = execve_thread;
3201 tcp->pid = pid;
3202 if (cflag != CFLAG_ONLY_STATS) {
3203 if (!is_number_in_set(QUIET_THREAD_EXECVE, quiet_set)) {
3204 printleader(tcp);
3205 tprintf_string("+++ superseded by execve in pid %lu +++",
3206 old_pid);
3207 tprint_newline();
3208 line_ended();
3209 }
3210 /*
3211 * Need to reopen memstream for thread
3212 * as we closed it in droptcb.
3213 */
3214 if (!is_complete_set(status_set, NUMBER_OF_STATUSES))
3215 strace_open_memstream(tcp);
3216 tcp->flags |= TCB_REPRINT;
3217 }
3218
3219 return tcp;
3220 }
3221
3222 static struct tcb *
3223 maybe_switch_current_tcp(void)
3224 {
3225 struct tcb *tcp = maybe_switch_tcbs(current_tcp, current_tcp->pid);
3226
3227 if (tcp)
3228 set_current_tcp(tcp);
3229
3230 return tcp;
3231 }
3232
3233 static void
3234 print_signalled(struct tcb *tcp, const int pid, int status)
3235 {
3236 if (pid == strace_child) {
3237 exit_code = 0x100 | WTERMSIG(status);
3238 strace_child = 0;
3239 }
3240
3241 if (cflag != CFLAG_ONLY_STATS
3242 && is_number_in_set(WTERMSIG(status), signal_set)) {
3243 printleader(tcp);
3244 tprintf_string("+++ killed by %s %s+++",
3245 sprintsigname(WTERMSIG(status)),
3246 WCOREDUMP(status) ? "(core dumped) " : "");
3247 tprint_newline();
3248 line_ended();
3249 }
3250 }
3251
3252 static void
3253 print_exited(struct tcb *tcp, const int pid, int status)
3254 {
3255 if (pid == strace_child) {
3256 exit_code = WEXITSTATUS(status);
3257 strace_child = 0;
3258 }
3259
3260 if (cflag != CFLAG_ONLY_STATS &&
3261 !is_number_in_set(QUIET_EXIT, quiet_set)) {
3262 printleader(tcp);
3263 tprintf_string("+++ exited with %d +++", WEXITSTATUS(status));
3264 tprint_newline();
3265 line_ended();
3266 }
3267 }
3268
3269 static void
3270 print_stopped(struct tcb *tcp, const siginfo_t *si, const unsigned int sig)
3271 {
3272 if (cflag != CFLAG_ONLY_STATS
3273 && !hide_log(tcp)
3274 && is_number_in_set(sig, signal_set)) {
3275 printleader(tcp);
3276 if (si) {
3277 tprintf_string("--- %s ", sprintsigname(sig));
3278 printsiginfo(tcp, si);
3279 tprints_string(" ---");
3280 } else
3281 tprintf_string("--- stopped by %s ---", sprintsigname(sig));
3282 tprint_newline();
3283 line_ended();
3284
3285 #ifdef ENABLE_STACKTRACE
3286 if (stack_trace_enabled)
3287 unwind_tcb_print(tcp);
3288 #endif
3289 }
3290 }
3291
3292 static void
3293 startup_tcb(struct tcb *tcp)
3294 {
3295 debug_msg("pid %d has TCB_STARTUP, initializing it", tcp->pid);
3296
3297 tcp->flags &= ~TCB_STARTUP;
3298
3299 if (!use_seize) {
3300 debug_msg("setting opts 0x%x on pid %d",
3301 ptrace_setoptions, tcp->pid);
3302 if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
3303 if (errno != ESRCH) {
3304 /* Should never happen, really */
3305 perror_msg_and_die("PTRACE_SETOPTIONS");
3306 }
3307 }
3308 }
3309
3310 if ((tcp->flags & TCB_GRABBED) && (get_scno(tcp) == 1))
3311 tcp->s_prev_ent = tcp->s_ent;
3312
3313 if (cflag) {
3314 tcp->atime = tcp->stime;
3315 }
3316 }
3317
3318 static void
3319 print_event_exit(struct tcb *tcp)
3320 {
3321 if (entering(tcp) || filtered(tcp) || hide_log(tcp)
3322 || cflag == CFLAG_ONLY_STATS) {
3323 return;
3324 }
3325
3326 if (!output_separately && printing_tcp && printing_tcp != tcp
3327 && printing_tcp->curcol != 0 && !printing_tcp->staged_output_data) {
3328 set_current_tcp(printing_tcp);
3329 tprint_space();
3330 tprints_string("<unfinished ...>");
3331 tprint_newline();
3332 flush_tcp_output(printing_tcp);
3333 printing_tcp->curcol = 0;
3334 set_current_tcp(tcp);
3335 }
3336
3337 print_syscall_resume(tcp);
3338
3339 if (!(tcp->sys_func_rval & RVAL_DECODED)) {
3340 /*
3341 * The decoder has probably decided to print something
3342 * on exiting syscall which is not going to happen.
3343 */
3344 tprint_space();
3345 tprints_string("<unfinished ...>");
3346 }
3347
3348 tprints_string(") ");
3349 tabto();
3350 tprint_sysret_begin();
3351 tprints_sysret_next("retval");
3352 tprint_sysret_pseudo_rval();
3353 tprint_sysret_end();
3354 tprint_newline();
3355 if (!is_complete_set(status_set, NUMBER_OF_STATUSES)) {
3356 bool publish = is_number_in_set(STATUS_UNFINISHED, status_set);
3357 strace_close_memstream(tcp, publish);
3358 }
3359 line_ended();
3360 }
3361
3362 static size_t
3363 trace_wait_data_size(struct tcb *tcp)
3364 {
3365 return sizeof(struct tcb_wait_data);
3366 }
3367
3368 static struct tcb_wait_data *
3369 init_trace_wait_data(void *p)
3370 {
3371 struct tcb_wait_data *wd = p;
3372
3373 memset(wd, 0, sizeof(*wd));
3374
3375 return wd;
3376 }
3377
3378 static struct tcb_wait_data *
3379 copy_trace_wait_data(const struct tcb_wait_data *wd)
3380 {
3381 return xobjdup(wd);
3382 }
3383
3384 static void
3385 free_trace_wait_data(struct tcb_wait_data *wd)
3386 {
3387 free(wd);
3388 }
3389
3390 static void
3391 tcb_wait_tab_check_size(const size_t size)
3392 {
3393 while (size >= tcb_wait_tab_size) {
3394 tcb_wait_tab = xgrowarray(tcb_wait_tab,
3395 &tcb_wait_tab_size,
3396 sizeof(tcb_wait_tab[0]));
3397 }
3398 }
3399
3400 static const struct tcb_wait_data *
3401 next_event(void)
3402 {
3403 if (interrupted)
3404 return NULL;
3405
3406 if (syscall_limit == 0) {
3407 if (!is_number_in_set(QUIET_ATTACH, quiet_set))
3408 error_msg("System call limit has been reached, detaching tracees");
3409 strace_child = 0;
3410 return NULL;
3411 }
3412
3413 invalidate_umove_cache();
3414
3415 struct tcb *tcp = NULL;
3416 struct list_item *elem;
3417
3418 static EMPTY_LIST(pending_tcps);
3419 /* Handle the queued tcbs before waiting for new events. */
3420 if (!list_is_empty(&pending_tcps))
3421 goto next_event_get_tcp;
3422
3423 static struct tcb *extra_tcp;
3424 static size_t wait_extra_data_idx;
3425 /* Handle the extra tcb event. */
3426 if (extra_tcp) {
3427 tcp = extra_tcp;
3428 extra_tcp = NULL;
3429 tcp->wait_data_idx = wait_extra_data_idx;
3430
3431 debug_msg("dequeued extra event for pid %u", tcp->pid);
3432 goto next_event_exit;
3433 }
3434
3435 /*
3436 * Used to exit simply when nprocs hits zero, but in this testcase:
3437 * int main(void) { _exit(!!fork()); }
3438 * under strace -f, parent sometimes (rarely) manages
3439 * to exit before we see the first stop of the child,
3440 * and we are losing track of it:
3441 * 19923 clone(...) = 19924
3442 * 19923 exit_group(1) = ?
3443 * 19923 +++ exited with 1 +++
3444 * Exiting only when wait() returns ECHILD works better.
3445 */
3446 if (popen_pid != 0) {
3447 /* However, if -o|logger is in use, we can't do that.
3448 * Can work around that by double-forking the logger,
3449 * but that loses the ability to wait for its completion
3450 * on exit. Oh well...
3451 */
3452 if (nprocs == 0)
3453 return NULL;
3454 }
3455
3456 const bool unblock_delay_timer = is_delay_timer_armed();
3457
3458 /*
3459 * The window of opportunity to handle expirations
3460 * of the delay timer opens here.
3461 *
3462 * Unblock the signal handler for the delay timer
3463 * iff the delay timer is already created.
3464 */
3465 if (unblock_delay_timer)
3466 sigprocmask(SIG_UNBLOCK, &timer_set, NULL);
3467
3468 /*
3469 * If the delay timer has expired, then its expiration
3470 * has been handled already by the signal handler.
3471 *
3472 * If the delay timer expires during wait4(),
3473 * then the system call will be interrupted and
3474 * the expiration will be handled by the signal handler.
3475 */
3476 int status;
3477 struct rusage ru;
3478 int pid = wait4(-1, &status, __WALL, (cflag ? &ru : NULL));
3479 int wait_errno = errno;
3480
3481 /*
3482 * The window of opportunity to handle expirations
3483 * of the delay timer closes here.
3484 *
3485 * Block the signal handler for the delay timer
3486 * iff it was unblocked earlier.
3487 */
3488 if (unblock_delay_timer) {
3489 sigprocmask(SIG_BLOCK, &timer_set, NULL);
3490
3491 if (restart_failed)
3492 return NULL;
3493 }
3494
3495 size_t wait_tab_pos = 0;
3496 bool wait_nohang = false;
3497
3498 /*
3499 * Wait for new events until wait4() returns 0 (meaning that there's
3500 * nothing more to wait for for now), or a second event for some tcb
3501 * appears (which may happen if a tracee was SIGKILL'ed, for example).
3502 */
3503 for (;;) {
3504 struct tcb_wait_data *wd;
3505
3506 if (pid < 0) {
3507 if (wait_errno == EINTR)
3508 break;
3509 if (wait_nohang)
3510 break;
3511 if (nprocs == 0 && wait_errno == ECHILD)
3512 return NULL;
3513 /*
3514 * If nprocs > 0, ECHILD is not expected,
3515 * treat it as any other error here:
3516 */
3517 errno = wait_errno;
3518 perror_msg_and_die("wait4(__WALL)");
3519 }
3520
3521 if (!pid)
3522 break;
3523
3524 if (pid == popen_pid) {
3525 if (!WIFSTOPPED(status))
3526 popen_pid = 0;
3527 break;
3528 }
3529
3530 if (debug_flag)
3531 print_debug_info(pid, status);
3532
3533 /* Look up 'pid' in our table. */
3534 tcp = pid2tcb(pid);
3535
3536 if (!tcp) {
3537 tcp = maybe_allocate_tcb(pid, status);
3538 if (!tcp)
3539 goto next_event_wait_next;
3540 }
3541
3542 if (cflag) {
3543 tcp->stime.tv_sec = ru.ru_stime.tv_sec;
3544 tcp->stime.tv_nsec = ru.ru_stime.tv_usec * 1000;
3545 }
3546
3547 tcb_wait_tab_check_size(wait_tab_pos);
3548
3549 /* Initialise a new wait data structure. */
3550 wd = tcb_wait_tab + wait_tab_pos;
3551 init_trace_wait_data(wd);
3552 wd->status = status;
3553
3554 if (WIFSIGNALED(status)) {
3555 wd->te = TE_SIGNALLED;
3556 } else if (WIFEXITED(status)) {
3557 wd->te = TE_EXITED;
3558 } else {
3559 /*
3560 * As WCONTINUED flag has not been specified to wait4,
3561 * it cannot be WIFCONTINUED(status), so the only case
3562 * that remains is WIFSTOPPED(status).
3563 */
3564
3565 const unsigned int sig = WSTOPSIG(status);
3566 const unsigned int event = (unsigned int) status >> 16;
3567
3568 switch (event) {
3569 case 0:
3570 /*
3571 * Is this post-attach SIGSTOP?
3572 * Interestingly, the process may stop
3573 * with STOPSIG equal to some other signal
3574 * than SIGSTOP if we happened to attach
3575 * just before the process takes a signal.
3576 */
3577 if (sig == SIGSTOP &&
3578 (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
3579 debug_func_msg("ignored SIGSTOP on "
3580 "pid %d", tcp->pid);
3581 tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
3582 wd->te = TE_RESTART;
3583 } else if (sig == syscall_trap_sig) {
3584 wd->te = TE_SYSCALL_STOP;
3585 } else {
3586 /*
3587 * True if tracee is stopped by signal
3588 * (as opposed to "tracee received
3589 * signal").
3590 * TODO: shouldn't we check for
3591 * errno == EINVAL too?
3592 * We can get ESRCH instead, you know...
3593 */
3594 bool stopped = ptrace(PTRACE_GETSIGINFO,
3595 pid, 0, &wd->si) < 0;
3596
3597 wd->te = stopped ? TE_GROUP_STOP
3598 : TE_SIGNAL_DELIVERY_STOP;
3599 }
3600 break;
3601 case PTRACE_EVENT_STOP:
3602 /*
3603 * PTRACE_INTERRUPT-stop or group-stop.
3604 * PTRACE_INTERRUPT-stop has sig == SIGTRAP here.
3605 */
3606 switch (sig) {
3607 case SIGSTOP:
3608 case SIGTSTP:
3609 case SIGTTIN:
3610 case SIGTTOU:
3611 wd->te = TE_GROUP_STOP;
3612 break;
3613 default:
3614 wd->te = TE_RESTART;
3615 }
3616 break;
3617 case PTRACE_EVENT_EXEC:
3618 /*
3619 * TODO: shouldn't we check for
3620 * errno == EINVAL here, too?
3621 * We can get ESRCH instead, you know...
3622 */
3623 if (ptrace(PTRACE_GETEVENTMSG, pid, NULL,
3624 &wd->msg) < 0)
3625 wd->msg = 0;
3626
3627 wd->te = TE_STOP_BEFORE_EXECVE;
3628 break;
3629 case PTRACE_EVENT_EXIT:
3630 wd->te = TE_STOP_BEFORE_EXIT;
3631 break;
3632 case PTRACE_EVENT_SECCOMP:
3633 wd->te = TE_SECCOMP;
3634 break;
3635 default:
3636 wd->te = TE_RESTART;
3637 }
3638 }
3639
3640 if (!wd->te)
3641 error_func_msg("Tracing event hasn't been determined "
3642 "for pid %d, status %0#x", pid, status);
3643
3644 if (!list_is_empty(&tcp->wait_list)) {
3645 wait_extra_data_idx = wait_tab_pos;
3646 extra_tcp = tcp;
3647 debug_func_msg("queued extra pid %d", tcp->pid);
3648 } else {
3649 tcp->wait_data_idx = wait_tab_pos;
3650 list_append(&pending_tcps, &tcp->wait_list);
3651 debug_func_msg("queued pid %d", tcp->pid);
3652 }
3653
3654 wait_tab_pos++;
3655
3656 if (extra_tcp)
3657 break;
3658
3659 next_event_wait_next:
3660 pid = wait4(-1, &status, __WALL | WNOHANG, (cflag ? &ru : NULL));
3661 wait_errno = errno;
3662 wait_nohang = true;
3663 }
3664
3665 next_event_get_tcp:
3666 elem = list_remove_head(&pending_tcps);
3667
3668 if (!elem) {
3669 tcb_wait_tab_check_size(0);
3670 memset(tcb_wait_tab, 0, sizeof(*tcb_wait_tab));
3671 tcb_wait_tab->te = TE_NEXT;
3672
3673 return tcb_wait_tab;
3674 } else {
3675 tcp = list_elem(elem, struct tcb, wait_list);
3676 debug_func_msg("dequeued pid %d", tcp->pid);
3677 }
3678
3679 next_event_exit:
3680 /* Is this the very first time we see this tracee stopped? */
3681 if (tcp->flags & TCB_STARTUP)
3682 startup_tcb(tcp);
3683
3684 clear_regs(tcp);
3685
3686 /* Set current output file */
3687 set_current_tcp(tcp);
3688
3689 return tcb_wait_tab + tcp->wait_data_idx;
3690 }
3691
3692 static int
3693 trace_syscall(struct tcb *tcp, unsigned int *sig)
3694 {
3695 if (entering(tcp)) {
3696 int res = syscall_entering_decode(tcp);
3697 switch (res) {
3698 case 0:
3699 return 0;
3700 case 1:
3701 res = syscall_entering_trace(tcp, sig);
3702 }
3703 syscall_entering_finish(tcp, res);
3704 return res;
3705 } else {
3706 struct timespec ts = {};
3707 int res = syscall_exiting_decode(tcp, &ts);
3708 if (res != 0) {
3709 res = syscall_exiting_trace(tcp, &ts, res);
3710 }
3711 syscall_exiting_finish(tcp);
3712 return res;
3713 }
3714 }
3715
3716 /* Returns true iff the main trace loop has to continue. */
3717 static bool
3718 dispatch_event(const struct tcb_wait_data *wd)
3719 {
3720 unsigned int restart_op;
3721 unsigned int restart_sig = 0;
3722 enum trace_event te = wd ? wd->te : TE_BREAK;
3723 /*
3724 * Copy wd->status to a non-const variable to workaround glibc bugs
3725 * around union wait fixed by glibc commit glibc-2.24~391
3726 */
3727 int status = wd ? wd->status : 0;
3728
3729 if (current_tcp && has_seccomp_filter(current_tcp))
3730 restart_op = seccomp_filter_restart_operator(current_tcp);
3731 else
3732 restart_op = PTRACE_SYSCALL;
3733
3734 switch (te) {
3735 case TE_BREAK:
3736 return false;
3737
3738 case TE_NEXT:
3739 return true;
3740
3741 case TE_RESTART:
3742 break;
3743
3744 case TE_SECCOMP:
3745 if (!has_seccomp_filter(current_tcp)) {
3746 /*
3747 * We don't know if forks/clones have a seccomp filter
3748 * when they are created, but we can detect it when we
3749 * have a seccomp-stop.
3750 * In such a case, if !seccomp_before_sysentry, we have
3751 * already processed the syscall entry, so we avoid
3752 * processing it a second time.
3753 */
3754 current_tcp->flags |= TCB_SECCOMP_FILTER;
3755 restart_op = PTRACE_SYSCALL;
3756 break;
3757 }
3758
3759 if (seccomp_before_sysentry) {
3760 restart_op = PTRACE_SYSCALL;
3761 break;
3762 }
3763 ATTRIBUTE_FALLTHROUGH;
3764
3765 case TE_SYSCALL_STOP:
3766 if (trace_syscall(current_tcp, &restart_sig) < 0) {
3767 /*
3768 * ptrace() failed in trace_syscall().
3769 * Likely a result of process disappearing mid-flight.
3770 * Observed case: exit_group() or SIGKILL terminating
3771 * all processes in thread group.
3772 * We assume that ptrace error was caused by process death.
3773 * We used to detach(current_tcp) here, but since we no
3774 * longer implement "detach before death" policy/hack,
3775 * we can let this process to report its death to us
3776 * normally, via WIFEXITED or WIFSIGNALED wait status.
3777 */
3778 return true;
3779 }
3780 if (has_seccomp_filter(current_tcp)) {
3781 /*
3782 * Syscall and seccomp stops can happen in different
3783 * orders depending on kernel. strace tests this in
3784 * check_seccomp_order_tracer().
3785 *
3786 * Linux 3.5--4.7:
3787 * (seccomp-stop before syscall-entry-stop)
3788 * +--> seccomp-stop ->-PTRACE_SYSCALL->-+
3789 * | |
3790 * PTRACE_CONT syscall-entry-stop
3791 * | |
3792 * syscall-exit-stop <---PTRACE_SYSCALL-----<----+
3793 *
3794 * Linux 4.8+:
3795 * (seccomp-stop after syscall-entry-stop)
3796 * syscall-entry-stop
3797 *
3798 * +---->-----PTRACE_CONT---->----+
3799 * | |
3800 * syscall-exit-stop seccomp-stop
3801 * | |
3802 * +----<----PTRACE_SYSCALL---<---+
3803 *
3804 * Note in Linux 4.8+, we restart in PTRACE_CONT
3805 * after syscall-exit to skip the syscall-entry-stop.
3806 * The next seccomp-stop will be treated as a syscall
3807 * entry.
3808 *
3809 * The line below implements this behavior.
3810 * Note that exiting(current_tcp) actually marks
3811 * a syscall-entry-stop because the flag was inverted
3812 * in the above call to trace_syscall.
3813 */
3814 restart_op = exiting(current_tcp) ? PTRACE_SYSCALL : PTRACE_CONT;
3815 }
3816 break;
3817
3818 case TE_SIGNAL_DELIVERY_STOP:
3819 restart_sig = WSTOPSIG(status);
3820 print_stopped(current_tcp, &wd->si, restart_sig);
3821 break;
3822
3823 case TE_SIGNALLED:
3824 print_signalled(current_tcp, current_tcp->pid, status);
3825 droptcb(current_tcp);
3826 return true;
3827
3828 case TE_GROUP_STOP:
3829 restart_sig = WSTOPSIG(status);
3830 print_stopped(current_tcp, NULL, restart_sig);
3831 if (use_seize) {
3832 /*
3833 * This ends ptrace-stop, but does *not* end group-stop.
3834 * This makes stopping signals work properly on straced
3835 * process (that is, process really stops. It used to
3836 * continue to run).
3837 */
3838 restart_op = PTRACE_LISTEN;
3839 restart_sig = 0;
3840 }
3841 break;
3842
3843 case TE_EXITED:
3844 print_exited(current_tcp, current_tcp->pid, status);
3845 droptcb(current_tcp);
3846 return true;
3847
3848 case TE_STOP_BEFORE_EXECVE:
3849 /* The syscall succeeded, clear the flag. */
3850 current_tcp->flags &= ~TCB_CHECK_EXEC_SYSCALL;
3851 /*
3852 * Check that we are inside syscall now (next event after
3853 * PTRACE_EVENT_EXEC should be for syscall exiting). If it is
3854 * not the case, we might have a situation when we attach to a
3855 * process and the first thing we see is a PTRACE_EVENT_EXEC
3856 * and all the following syscall state tracking is screwed up
3857 * otherwise.
3858 */
3859 if (!maybe_switch_current_tcp() && entering(current_tcp)) {
3860 int ret;
3861
3862 error_msg("Stray PTRACE_EVENT_EXEC from pid %d"
3863 ", trying to recover...",
3864 current_tcp->pid);
3865
3866 current_tcp->flags |= TCB_RECOVERING;
3867 ret = trace_syscall(current_tcp, &restart_sig);
3868 current_tcp->flags &= ~TCB_RECOVERING;
3869
3870 if (ret < 0) {
3871 /* The reason is described in TE_SYSCALL_STOP */
3872 return true;
3873 }
3874 }
3875
3876 if (detach_on_execve) {
3877 if (current_tcp->flags & TCB_SKIP_DETACH_ON_FIRST_EXEC) {
3878 current_tcp->flags &= ~TCB_SKIP_DETACH_ON_FIRST_EXEC;
3879 } else {
3880 detach(current_tcp); /* do "-b execve" thingy */
3881 return true;
3882 }
3883 }
3884 break;
3885
3886 case TE_STOP_BEFORE_EXIT:
3887 print_event_exit(current_tcp);
3888 break;
3889 }
3890
3891 /* We handled quick cases, we are permitted to interrupt now. */
3892 if (interrupted)
3893 return false;
3894
3895 /* If the process is being delayed, do not ptrace_restart just yet */
3896 if (syscall_delayed(current_tcp)) {
3897 if (current_tcp->delayed_wait_data)
3898 error_func_msg("pid %d has delayed wait data set"
3899 " already", current_tcp->pid);
3900
3901 current_tcp->delayed_wait_data = copy_trace_wait_data(wd);
3902
3903 return true;
3904 }
3905
3906 if (ptrace_restart(restart_op, current_tcp, restart_sig) < 0) {
3907 /* Note: ptrace_restart emitted error message */
3908 exit_code = 1;
3909 return false;
3910 }
3911 return true;
3912 }
3913
3914 static bool
3915 restart_delayed_tcb(struct tcb *const tcp)
3916 {
3917 struct tcb_wait_data *wd = tcp->delayed_wait_data;
3918
3919 if (!wd) {
3920 error_func_msg("No delayed wait data found for pid %d",
3921 tcp->pid);
3922 wd = init_trace_wait_data(alloca(trace_wait_data_size(tcp)));
3923 }
3924
3925 wd->te = TE_RESTART;
3926
3927 debug_func_msg("pid %d", tcp->pid);
3928
3929 tcp->flags &= ~TCB_DELAYED;
3930
3931 struct tcb *const prev_tcp = current_tcp;
3932 current_tcp = tcp;
3933 bool ret = dispatch_event(wd);
3934 current_tcp = prev_tcp;
3935
3936 free_trace_wait_data(tcp->delayed_wait_data);
3937 tcp->delayed_wait_data = NULL;
3938
3939 return ret;
3940 }
3941
3942 static bool
3943 restart_delayed_tcbs(void)
3944 {
3945 struct tcb *tcp_next = NULL;
3946 struct timespec ts_now;
3947
3948 clock_gettime(CLOCK_MONOTONIC, &ts_now);
3949
3950 for (size_t i = 0; i < tcbtabsize; i++) {
3951 struct tcb *tcp = tcbtab[i];
3952
3953 if (tcp->pid && syscall_delayed(tcp)) {
3954 if (ts_cmp(&ts_now, &tcp->delay_expiration_time) > 0) {
3955 if (!restart_delayed_tcb(tcp))
3956 return false;
3957 } else {
3958 /* Check whether this tcb is the next. */
3959 if (!tcp_next ||
3960 ts_cmp(&tcp_next->delay_expiration_time,
3961 &tcp->delay_expiration_time) > 0) {
3962 tcp_next = tcp;
3963 }
3964 }
3965 }
3966 }
3967
3968 if (tcp_next)
3969 arm_delay_timer(tcp_next);
3970
3971 return true;
3972 }
3973
3974 /*
3975 * As this signal handler does a lot of work that is not suitable
3976 * for signal handlers, extra care must be taken to ensure that
3977 * it is enabled only in those places where it's safe.
3978 */
3979 static void
3980 timer_sighandler(int sig)
3981 {
3982 delay_timer_expired();
3983
3984 if (restart_failed)
3985 return;
3986
3987 int saved_errno = errno;
3988
3989 if (!restart_delayed_tcbs())
3990 restart_failed = 1;
3991
3992 errno = saved_errno;
3993 }
3994
3995 static void ATTRIBUTE_NORETURN
3996 terminate(void)
3997 {
3998 int sig = interrupted;
3999
4000 cleanup(sig);
4001 if (cflag)
4002 call_summary(shared_log);
4003 fflush(NULL);
4004 if (shared_log != stderr)
4005 fclose(shared_log);
4006 if (popen_pid) {
4007 while (waitpid(popen_pid, NULL, 0) < 0 && errno == EINTR)
4008 ;
4009 }
4010 if (sig) {
4011 exit_code = 0x100 | sig;
4012 }
4013 if (exit_code > 0xff) {
4014 /* Avoid potential core file clobbering. */
4015 struct_rlimit rlim = {0, 0};
4016 set_rlimit(RLIMIT_CORE, &rlim);
4017
4018 /* Child was killed by a signal, mimic that. */
4019 exit_code &= 0xff;
4020 signal(exit_code, SIG_DFL);
4021 print_totd();
4022 GCOV_DUMP;
4023 raise(exit_code);
4024
4025 /* Unblock the signal. */
4026 sigset_t mask;
4027 sigemptyset(&mask);
4028 sigaddset(&mask, exit_code);
4029 GCOV_DUMP;
4030 sigprocmask(SIG_UNBLOCK, &mask, NULL);
4031
4032 /* Paranoia - what if this signal is not fatal?
4033 Exit with 128 + signo then. */
4034 exit_code += 128;
4035 }
4036
4037 print_totd();
4038 exit(exit_code);
4039 }
4040
4041 int
4042 main(int argc, char *argv[])
4043 {
4044 setlocale(LC_ALL, "");
4045 init(argc, argv);
4046
4047 exit_code = !nprocs;
4048
4049 while (dispatch_event(next_event()))
4050 ;
4051 terminate();
4052 }