1 /*
2 * Copyright (c) 2020-2021 Ákos Uzonyi <uzonyi.akos@gmail.com>
3 * All rights reserved.
4 *
5 * SPDX-License-Identifier: LGPL-2.1-or-later
6 */
7
8 #include "defs.h"
9
10
11 #include <dirent.h>
12 #include <fcntl.h>
13 #include <stdint.h>
14 #include <string.h>
15 #include <unistd.h>
16
17 #include <asm/unistd.h>
18
19 #include <sys/ioctl.h>
20 #include <sys/param.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24 #include <linux/nsfs.h>
25 #include "largefile_wrappers.h"
26 #include "number_set.h"
27 #include "trie.h"
28 #include "xmalloc.h"
29 #include "xstring.h"
30
31 /**
32 * Key: PID NS ID
33 * Value: a trie:
34 * Key: a process PID in NS
35 * Value: the process's PID as present in /proc
36 */
37 static struct trie *ns_pid_to_proc_pid[PT_COUNT];
38
39 /**
40 * Key: Proc PID
41 * Value: struct proc_data
42 */
43 static struct trie *proc_data_cache;
44
45 static bool ns_get_parent_enotty = false;
46
47 static const char tid_str[] = "NSpid:\t";
48 static const char tgid_str[] = "NStgid:\t";
49 static const char pgid_str[] = "NSpgid:\t";
50 static const char sid_str[] = "NSsid:\t";
51
52 static const struct {
53 const char *str;
54 size_t size;
55 } id_strs[PT_COUNT] = {
56 [PT_TID] = { tid_str, sizeof(tid_str) - 1 },
57 [PT_TGID] = { tgid_str, sizeof(tgid_str) - 1 },
58 [PT_PGID] = { pgid_str, sizeof(pgid_str) - 1 },
59 [PT_SID] = { sid_str, sizeof(sid_str) - 1 },
60 };
61
62
63 /**
64 * Limit on PID NS hierarchy depth, imposed since Linux 3.7. NS traversal
65 * is not possible before Linux 4.9, so we consider this limit pretty universal.
66 */
67 #define MAX_NS_DEPTH 32
68
69 static const size_t ns_id_size = sizeof(unsigned int) * 8;
70 static const uint8_t ptr_sz_lg = (sizeof(void *) == 8 ? 6 : 5);
71
72 static int pid_max;
73 static uint8_t pid_max_size, pid_max_size_lg;
74
75 struct proc_data {
76 int proc_pid;
77 int ns_count;
78 unsigned int ns_hierarchy[MAX_NS_DEPTH];
79 int id_count[PT_COUNT];
80 int id_hierarchy[PT_COUNT][MAX_NS_DEPTH];
81 };
82
83 /**
84 * Helper function for creating a trie.
85 *
86 * For node_key_bits and data_block_key_bits 4 is used (so trie height is 32 / 4
87 * = 8, and node sizes are 8 byte * 2^4 = 128 bytes), which seems to be a good
88 * tradeoff between memory usage and lookup time. It should not be too large,
89 * since there can be large holes between PIDs, and it would be just a waste of
90 * memory having large nodes with lot of NULL pointers in them.
91 */
92 static struct trie *
93 create_trie_4(uint8_t key_size, uint8_t item_size_lg, uint64_t empty_value)
94 {
95 struct trie *t = trie_create(key_size, item_size_lg, 4, 4, empty_value);
96 if (!t)
97 error_msg_and_die("creating trie failed");
98
99 return t;
100 }
101
102 void
103 pidns_init(void)
104 {
105 if (proc_data_cache)
106 return;
107
108 pid_max = INT_MAX;
109 if (read_int_from_file("/proc/sys/kernel/pid_max", &pid_max) < 0)
110 debug_func_perror_msg("reading /proc/sys/kernel/pid_max");
111 pid_max_size = ilog2_32(pid_max - 1) + 1;
112 pid_max_size_lg = ilog2_32(pid_max_size - 1) + 1;
113
114 for (int i = 0; i < PT_COUNT; i++)
115 ns_pid_to_proc_pid[i] = create_trie_4(ns_id_size, ptr_sz_lg, 0);
116
117 proc_data_cache = create_trie_4(pid_max_size, ptr_sz_lg, 0);
118 }
119
120 static void
121 put_proc_pid(unsigned int ns, int ns_pid, enum pid_type type, int proc_pid)
122 {
123 struct trie *b = (struct trie *) (uintptr_t) trie_get(ns_pid_to_proc_pid[type], ns);
124 if (!b) {
125 b = create_trie_4(pid_max_size, pid_max_size_lg, 0);
126 trie_set(ns_pid_to_proc_pid[type], ns, (uint64_t) (uintptr_t) b);
127 }
128 trie_set(b, ns_pid, proc_pid);
129 }
130
131 static int
132 get_cached_proc_pid(unsigned int ns, int ns_pid, enum pid_type type)
133 {
134 struct trie *b = (struct trie *) (uintptr_t)
135 trie_get(ns_pid_to_proc_pid[type], ns);
136 if (!b)
137 return 0;
138
139 return trie_get(b, ns_pid);
140 }
141
142 /**
143 * Returns a list of PID NS IDs for the specified PID.
144 *
145 * @param proc_pid PID (as present in /proc) to get information for.
146 * @param ns_buf Pointer to buffer that is able to contain at least
147 * ns_buf_size items.
148 * @return Amount of NS in list. 0 indicates error.
149 */
150 static size_t
151 get_ns_hierarchy(int proc_pid, unsigned int *ns_buf, size_t ns_buf_size)
152 {
153 char path[PATH_MAX + 1];
154 xsprintf(path, "/proc/%s/ns/pid", pid_to_str(proc_pid));
155
156 int fd = open_file(path, O_RDONLY);
157 if (fd < 0)
158 return 0;
159
160 size_t n = 0;
161 while (n < ns_buf_size) {
162 strace_stat_t st;
163 if (fstat_fd(fd, &st))
164 break;
165
166 ns_buf[n++] = st.st_ino;
167 if (n >= ns_buf_size)
168 break;
169
170 if (ns_get_parent_enotty)
171 break;
172
173 int parent_fd = ioctl(fd, NS_GET_PARENT);
174 if (parent_fd < 0) {
175 switch (errno) {
176 case EPERM:
177 break;
178
179 case ENOTTY:
180 ns_get_parent_enotty = true;
181 error_msg("NS_* ioctl commands are not "
182 "supported by the kernel");
183 break;
184
185 default:
186 perror_func_msg("ioctl(NS_GET_PARENT)");
187 break;
188 }
189
190 break;
191 }
192
193 close(fd);
194 fd = parent_fd;
195 }
196
197 close(fd);
198
199 return n;
200 }
201
202 /**
203 * Get list of IDs present in NS* proc status record. IDs are placed as they are
204 * stored in /proc (from top to bottom of NS hierarchy).
205 *
206 * @param proc_pid PID (as present in /proc) to get information for.
207 * @param id_buf Pointer to buffer that is able to contain at least
208 * MAX_NS_DEPTH items. Can be NULL.
209 * @param type Type of ID requested.
210 * @return Number of items stored in id_list. 0 indicates error.
211 */
212 static size_t
213 get_id_list(int proc_pid, int *id_buf, enum pid_type type)
214 {
215 return proc_status_get_id_list(proc_pid, id_buf, MAX_NS_DEPTH,
216 id_strs[type].str, id_strs[type].size);
217 }
218
219 /**
220 * Returns whether the /proc filesystem's PID namespace is the same as strace's.
221 */
222 static bool
223 is_proc_ours(void)
224 {
225 static int cached_val = -1;
226
227 if (cached_val < 0)
228 cached_val = get_id_list(0, NULL, PT_TID) <= 1;
229
230 return cached_val;
231 }
232
233 /**
234 * Returns the PID namespace of the tracee
235 */
236 static unsigned int
237 get_ns(struct tcb *tcp)
238 {
239 if (!tcp->pid_ns) {
240 int proc_pid = 0;
241 translate_pid(NULL, tcp->pid, PT_TID, &proc_pid);
242
243 if (proc_pid)
244 get_ns_hierarchy(proc_pid, &tcp->pid_ns, 1);
245 }
246
247 return tcp->pid_ns;
248 }
249
250 /**
251 * Returns the PID namespace of strace
252 */
253 static unsigned int
254 get_our_ns(void)
255 {
256 static unsigned int our_ns = 0;
257 static bool our_ns_initialised = false;
258
259 if (!our_ns_initialised) {
260 get_ns_hierarchy(0, &our_ns, 1);
261 our_ns_initialised = true;
262 }
263
264 return our_ns;
265 }
266
267 /**
268 * Returns the cached proc_data struct associated with proc_pid.
269 * If none found, allocates a new proc_data.
270 */
271 static struct proc_data *
272 get_or_create_proc_data(int proc_pid)
273 {
274 struct proc_data *pd = (struct proc_data *) (uintptr_t)
275 trie_get(proc_data_cache, proc_pid);
276
277 if (!pd) {
278 pd = calloc(1, sizeof(*pd));
279 if (!pd)
280 return NULL;
281
282 pd->proc_pid = proc_pid;
283 trie_set(proc_data_cache, proc_pid, (uint64_t) (uintptr_t) pd);
284 }
285
286 return pd;
287 }
288
289 /**
290 * Updates the proc_data from /proc
291 * If the process does not exists, returns false, and frees the proc_data
292 */
293 static bool
294 update_proc_data(struct proc_data *pd, enum pid_type type)
295 {
296 pd->ns_count = get_ns_hierarchy(pd->proc_pid,
297 pd->ns_hierarchy, MAX_NS_DEPTH);
298 if (!pd->ns_count)
299 goto fail;
300
301 pd->id_count[type] = get_id_list(pd->proc_pid,
302 pd->id_hierarchy[type], type);
303 if (!pd->id_count[type])
304 goto fail;
305
306 return true;
307
308 fail:
309 trie_set(proc_data_cache, pd->proc_pid, (uint64_t) (uintptr_t) NULL);
310 free(pd);
311 return false;
312 }
313
314 /**
315 * Parameters for id translation
316 */
317 struct translate_id_params {
318 /* The namespace to be translated from */
319 unsigned int from_ns;
320 /* The id to be translated */
321 int from_id;
322 /* The type of the id */
323 enum pid_type type;
324
325 /* The result (output) */
326 int result_id;
327 /* The proc data of the process (output) */
328 struct proc_data *pd;
329 };
330
331 /**
332 * Translates an id to our namespace, given the proc_pid of the process,
333 * by reading files in /proc.
334 *
335 * @param tip The parameters
336 * @param proc_pid The proc pid of the process.
337 * If 0, use the cached values in tip->pd.
338 */
339 static void
340 translate_id_proc_pid(struct translate_id_params *tip, int proc_pid)
341 {
342 struct proc_data *pd = proc_pid ?
343 get_or_create_proc_data(proc_pid) :
344 tip->pd;
345
346 tip->result_id = 0;
347 tip->pd = NULL;
348
349 if (!pd)
350 return;
351
352 if (proc_pid && !update_proc_data(pd, tip->type))
353 return;
354
355 if (!pd->ns_count || pd->id_count[tip->type] < pd->ns_count)
356 return;
357
358 int *id_hierarchy = pd->id_hierarchy[tip->type];
359 int id_count = pd->id_count[tip->type];
360
361 for (int i = 0; i < pd->ns_count; i++) {
362 unsigned int ns = pd->ns_hierarchy[i];
363 int ns_id = id_hierarchy[id_count - i - 1];
364 int our_id = id_hierarchy[id_count - pd->ns_count];
365
366 if (ns != tip->from_ns)
367 continue;
368
369 if (ns_id != tip->from_id)
370 return;
371
372 tip->result_id = our_id;
373 tip->pd = pd;
374 return;
375 }
376 }
377
378 /**
379 * Translates an id to our namespace by reading all proc entries in a directory.
380 * The directory is either /proc or /proc/<pid>/task.
381 *
382 *
383 * @param tip The parameters
384 * @param path The path of the directory to be read.
385 * @param read_task_dir Whether recurse to "task" subdirectory.
386 */
387 static void
388 translate_id_dir(struct translate_id_params *tip, const char *path,
389 bool read_task_dir)
390 {
391 DIR *dir = opendir(path);
392 if (!dir) {
393 debug_func_perror_msg("opening dir: %s", path);
394 return;
395 }
396
397 while (!tip->result_id) {
398 errno = 0;
399 struct_dirent *entry = read_dir(dir);
400 if (!entry) {
401 if (errno)
402 perror_func_msg("readdir");
403
404 break;
405 }
406
407 if (entry->d_type != DT_DIR)
408 continue;
409
410 errno = 0;
411 long proc_pid = strtol(entry->d_name, NULL, 10);
412 if (proc_pid < 1 || proc_pid > INT_MAX || errno)
413 continue;
414
415 if (read_task_dir) {
416 char task_dir_path[PATH_MAX + 1];
417 xsprintf(task_dir_path, "/proc/%ld/task", proc_pid);
418 translate_id_dir(tip, task_dir_path, false);
419 }
420
421 if (tip->result_id)
422 break;
423
424 translate_id_proc_pid(tip, proc_pid);
425 }
426
427 closedir(dir);
428 }
429
430 /**
431 * Iterator function of the proc_data_cache for id translation.
432 * If the cache contains the id we are looking for, reads the corresponding
433 * directory in /proc, and if cache is valid, saves the result.
434 */
435 static void
436 proc_data_cache_iterator_fn(void* fn_data, uint64_t key, uint64_t val)
437 {
438 struct translate_id_params *tip = (struct translate_id_params *)fn_data;
439 struct proc_data *pd = (struct proc_data *) (uintptr_t) val;
440
441 if (!pd)
442 return;
443
444 /* Result already found in an earlier iteration */
445 if (tip->result_id)
446 return;
447
448 /* Translate from cache */
449 tip->pd = pd;
450 translate_id_proc_pid(tip, 0);
451 if (!tip->result_id)
452 return;
453
454 /* Now translate from actual data in /proc, to check cache validity */
455 translate_id_proc_pid(tip, pd->proc_pid);
456 }
457
458 int
459 translate_pid(struct tcb *tcp, int from_id, enum pid_type type,
460 int *proc_pid_ptr)
461 {
462 if (from_id <= 0 || type < 0 || type >= PT_COUNT)
463 return 0;
464
465 /* If translation is trivial */
466 if ((!tcp || get_ns(tcp) == get_our_ns()) &&
467 (!proc_pid_ptr || is_proc_ours())) {
468 if (proc_pid_ptr)
469 *proc_pid_ptr = from_id;
470
471 return from_id;
472 }
473
474 struct translate_id_params tip = {
475 .from_ns = tcp ? get_ns(tcp) : get_our_ns(),
476 .from_id = from_id,
477 .type = type,
478 .result_id = 0,
479 .pd = NULL,
480 };
481
482 if (!tip.from_ns)
483 return 0;
484
485 if (ns_get_parent_enotty)
486 return 0;
487
488 /* Look for a cached proc_pid for this (from_ns, from_id) pair */
489 int cached_proc_pid = get_cached_proc_pid(tip.from_ns, tip.from_id,
490 tip.type);
491 if (cached_proc_pid) {
492 translate_id_proc_pid(&tip, cached_proc_pid);
493 if (tip.result_id)
494 goto exit;
495 }
496
497 /* Iterate through the cache, find potential proc_data */
498 trie_iterate_keys(proc_data_cache, 0, pid_max - 1,
499 proc_data_cache_iterator_fn, &tip);
500 /* (proc_data_cache_iterator_fn takes care about updating proc_data) */
501 if (tip.result_id)
502 goto exit;
503
504 /* No cache helped, read all entries in /proc */
505 translate_id_dir(&tip, "/proc", true);
506
507 exit:
508 if (tip.pd) {
509 if (tip.pd->proc_pid)
510 put_proc_pid(tip.from_ns, tip.from_id, tip.type,
511 tip.pd->proc_pid);
512
513 if (proc_pid_ptr)
514 *proc_pid_ptr = tip.pd->proc_pid;
515 }
516
517 return tip.result_id;
518 }
519
520 int
521 get_proc_pid(int pid)
522 {
523 int proc_pid = 0;
524 translate_pid(NULL, pid, PT_TID, &proc_pid);
525 return proc_pid;
526 }
527
528 static void
529 printpid_translation(struct tcb *tcp, int pid, enum pid_type type)
530 {
531 bool print_ns_translation =
532 is_number_in_set(DECODE_PID_NS_TRANSLATION, decode_pid_set);
533 bool print_comm =
534 is_number_in_set(DECODE_PID_COMM, decode_pid_set) &&
535 (type == PT_TID || type == PT_TGID);
536
537 if (print_ns_translation || print_comm) {
538 int strace_pid = translate_pid(tcp, pid, type, NULL);
539 if (strace_pid) {
540 if (print_comm)
541 print_pid_comm(strace_pid);
542 if (print_ns_translation && strace_pid != pid)
543 tprintf_comment("%d in strace's PID NS",
544 strace_pid);
545 }
546 }
547 }
548
549 void
550 printpid(struct tcb *tcp, int pid, enum pid_type type)
551 {
552 PRINT_VAL_D(pid);
553 printpid_translation(tcp, pid, type);
554 }
555
556 void
557 printpid_tgid_pgid(struct tcb *tcp, int pid)
558 {
559 PRINT_VAL_D(pid);
560 if (pid > 0)
561 printpid_translation(tcp, pid, PT_TGID);
562 else if (pid < -1)
563 printpid_translation(tcp, -pid, PT_PGID);
564 }