(root)/
strace-6.5/
src/
pidns.c
       1  /*
       2   * Copyright (c) 2020-2021 Ákos Uzonyi <uzonyi.akos@gmail.com>
       3   * All rights reserved.
       4   *
       5   * SPDX-License-Identifier: LGPL-2.1-or-later
       6   */
       7  
       8  #include "defs.h"
       9  
      10  
      11  #include <dirent.h>
      12  #include <fcntl.h>
      13  #include <stdint.h>
      14  #include <string.h>
      15  #include <unistd.h>
      16  
      17  #include <asm/unistd.h>
      18  
      19  #include <sys/ioctl.h>
      20  #include <sys/param.h>
      21  #include <sys/types.h>
      22  #include <sys/stat.h>
      23  
      24  #include <linux/nsfs.h>
      25  #include "largefile_wrappers.h"
      26  #include "number_set.h"
      27  #include "trie.h"
      28  #include "xmalloc.h"
      29  #include "xstring.h"
      30  
      31  /**
      32   * Key:   PID NS ID
      33   * Value: a trie:
      34   *           Key:   a process PID in NS
      35   *           Value: the process's PID as present in /proc
      36   */
      37  static struct trie *ns_pid_to_proc_pid[PT_COUNT];
      38  
      39  /**
      40   * Key:   Proc PID
      41   * Value: struct proc_data
      42   */
      43  static struct trie *proc_data_cache;
      44  
      45  static bool ns_get_parent_enotty = false;
      46  
      47  static const char tid_str[]  = "NSpid:\t";
      48  static const char tgid_str[] = "NStgid:\t";
      49  static const char pgid_str[] = "NSpgid:\t";
      50  static const char sid_str[]  = "NSsid:\t";
      51  
      52  static const struct {
      53  	const char *str;
      54  	size_t size;
      55  } id_strs[PT_COUNT] = {
      56  	[PT_TID] =  { tid_str,  sizeof(tid_str)  - 1 },
      57  	[PT_TGID] = { tgid_str, sizeof(tgid_str) - 1 },
      58  	[PT_PGID] = { pgid_str, sizeof(pgid_str) - 1 },
      59  	[PT_SID] =  { sid_str,  sizeof(sid_str)  - 1 },
      60  };
      61  
      62  
      63  /**
      64   * Limit on PID NS hierarchy depth, imposed since Linux 3.7. NS traversal
      65   * is not possible before Linux 4.9, so we consider this limit pretty universal.
      66   */
      67  #define MAX_NS_DEPTH 32
      68  
      69  static const size_t ns_id_size = sizeof(unsigned int) * 8;
      70  static const uint8_t ptr_sz_lg = (sizeof(void *) == 8 ? 6 : 5);
      71  
      72  static int pid_max;
      73  static uint8_t pid_max_size, pid_max_size_lg;
      74  
      75  struct proc_data {
      76  	int proc_pid;
      77  	int ns_count;
      78  	unsigned int ns_hierarchy[MAX_NS_DEPTH];
      79  	int id_count[PT_COUNT];
      80  	int id_hierarchy[PT_COUNT][MAX_NS_DEPTH];
      81  };
      82  
      83  /**
      84   * Helper function for creating a trie.
      85   *
      86   * For node_key_bits and data_block_key_bits 4 is used (so trie height is 32 / 4
      87   * = 8, and node sizes are 8 byte * 2^4 = 128 bytes), which seems to be a good
      88   * tradeoff between memory usage and lookup time. It should not be too large,
      89   * since there can be large holes between PIDs, and it would be just a waste of
      90   * memory having large nodes with lot of NULL pointers in them.
      91   */
      92  static struct trie *
      93  create_trie_4(uint8_t key_size, uint8_t item_size_lg, uint64_t empty_value)
      94  {
      95  	struct trie *t = trie_create(key_size, item_size_lg, 4, 4, empty_value);
      96  	if (!t)
      97  		error_msg_and_die("creating trie failed");
      98  
      99  	return t;
     100  }
     101  
     102  void
     103  pidns_init(void)
     104  {
     105  	if (proc_data_cache)
     106  		return;
     107  
     108  	pid_max = INT_MAX;
     109  	if (read_int_from_file("/proc/sys/kernel/pid_max", &pid_max) < 0)
     110  		debug_func_perror_msg("reading /proc/sys/kernel/pid_max");
     111  	pid_max_size = ilog2_32(pid_max - 1) + 1;
     112  	pid_max_size_lg = ilog2_32(pid_max_size - 1) + 1;
     113  
     114  	for (int i = 0; i < PT_COUNT; i++)
     115  		ns_pid_to_proc_pid[i] = create_trie_4(ns_id_size, ptr_sz_lg, 0);
     116  
     117  	proc_data_cache = create_trie_4(pid_max_size, ptr_sz_lg, 0);
     118  }
     119  
     120  static void
     121  put_proc_pid(unsigned int ns, int ns_pid, enum pid_type type, int proc_pid)
     122  {
     123  	struct trie *b = (struct trie *) (uintptr_t) trie_get(ns_pid_to_proc_pid[type], ns);
     124  	if (!b) {
     125  		b = create_trie_4(pid_max_size, pid_max_size_lg, 0);
     126  		trie_set(ns_pid_to_proc_pid[type], ns, (uint64_t) (uintptr_t) b);
     127  	}
     128  	trie_set(b, ns_pid, proc_pid);
     129  }
     130  
     131  static int
     132  get_cached_proc_pid(unsigned int ns, int ns_pid, enum pid_type type)
     133  {
     134  	struct trie *b = (struct trie *) (uintptr_t)
     135  		trie_get(ns_pid_to_proc_pid[type], ns);
     136  	if (!b)
     137  		return 0;
     138  
     139  	return trie_get(b, ns_pid);
     140  }
     141  
     142  /**
     143   * Returns a list of PID NS IDs for the specified PID.
     144   *
     145   * @param proc_pid PID (as present in /proc) to get information for.
     146   * @param ns_buf   Pointer to buffer that is able to contain at least
     147   *                 ns_buf_size items.
     148   * @return         Amount of NS in list. 0 indicates error.
     149   */
     150  static size_t
     151  get_ns_hierarchy(int proc_pid, unsigned int *ns_buf, size_t ns_buf_size)
     152  {
     153  	char path[PATH_MAX + 1];
     154  	xsprintf(path, "/proc/%s/ns/pid", pid_to_str(proc_pid));
     155  
     156  	int fd = open_file(path, O_RDONLY);
     157  	if (fd < 0)
     158  		return 0;
     159  
     160  	size_t n = 0;
     161  	while (n < ns_buf_size) {
     162  		strace_stat_t st;
     163  		if (fstat_fd(fd, &st))
     164  			break;
     165  
     166  		ns_buf[n++] = st.st_ino;
     167  		if (n >= ns_buf_size)
     168  			break;
     169  
     170  		if (ns_get_parent_enotty)
     171  			break;
     172  
     173  		int parent_fd = ioctl(fd, NS_GET_PARENT);
     174  		if (parent_fd < 0) {
     175  			switch (errno) {
     176  			case EPERM:
     177  				break;
     178  
     179  			case ENOTTY:
     180  				ns_get_parent_enotty = true;
     181  				error_msg("NS_* ioctl commands are not "
     182  					  "supported by the kernel");
     183  				break;
     184  
     185  			default:
     186  				perror_func_msg("ioctl(NS_GET_PARENT)");
     187  				break;
     188  			}
     189  
     190  			break;
     191  		}
     192  
     193  		close(fd);
     194  		fd = parent_fd;
     195  	}
     196  
     197  	close(fd);
     198  
     199  	return n;
     200  }
     201  
     202  /**
     203   * Get list of IDs present in NS* proc status record. IDs are placed as they are
     204   * stored in /proc (from top to bottom of NS hierarchy).
     205   *
     206   * @param proc_pid    PID (as present in /proc) to get information for.
     207   * @param id_buf      Pointer to buffer that is able to contain at least
     208   *                    MAX_NS_DEPTH items. Can be NULL.
     209   * @param type        Type of ID requested.
     210   * @return            Number of items stored in id_list. 0 indicates error.
     211   */
     212  static size_t
     213  get_id_list(int proc_pid, int *id_buf, enum pid_type type)
     214  {
     215  	return proc_status_get_id_list(proc_pid, id_buf, MAX_NS_DEPTH,
     216  				       id_strs[type].str, id_strs[type].size);
     217  }
     218  
     219  /**
     220   * Returns whether the /proc filesystem's PID namespace is the same as strace's.
     221   */
     222  static bool
     223  is_proc_ours(void)
     224  {
     225  	static int cached_val = -1;
     226  
     227  	if (cached_val < 0)
     228  		cached_val = get_id_list(0, NULL, PT_TID) <= 1;
     229  
     230  	return cached_val;
     231  }
     232  
     233  /**
     234   * Returns the PID namespace of the tracee
     235   */
     236  static unsigned int
     237  get_ns(struct tcb *tcp)
     238  {
     239  	if (!tcp->pid_ns) {
     240  		int proc_pid = 0;
     241  		translate_pid(NULL, tcp->pid, PT_TID, &proc_pid);
     242  
     243  		if (proc_pid)
     244  			get_ns_hierarchy(proc_pid, &tcp->pid_ns, 1);
     245  	}
     246  
     247  	return tcp->pid_ns;
     248  }
     249  
     250  /**
     251   * Returns the PID namespace of strace
     252   */
     253  static unsigned int
     254  get_our_ns(void)
     255  {
     256  	static unsigned int our_ns = 0;
     257  	static bool our_ns_initialised = false;
     258  
     259  	if (!our_ns_initialised) {
     260  		get_ns_hierarchy(0, &our_ns, 1);
     261  		our_ns_initialised = true;
     262  	}
     263  
     264  	return our_ns;
     265  }
     266  
     267  /**
     268   * Returns the cached proc_data struct associated with proc_pid.
     269   * If none found, allocates a new proc_data.
     270   */
     271  static struct proc_data *
     272  get_or_create_proc_data(int proc_pid)
     273  {
     274  	struct proc_data *pd = (struct proc_data *) (uintptr_t)
     275  		trie_get(proc_data_cache, proc_pid);
     276  
     277  	if (!pd) {
     278  		pd = calloc(1, sizeof(*pd));
     279  		if (!pd)
     280  			return NULL;
     281  
     282  		pd->proc_pid = proc_pid;
     283  		trie_set(proc_data_cache, proc_pid, (uint64_t) (uintptr_t) pd);
     284  	}
     285  
     286  	return pd;
     287  }
     288  
     289  /**
     290   * Updates the proc_data from /proc
     291   * If the process does not exists, returns false, and frees the proc_data
     292   */
     293  static bool
     294  update_proc_data(struct proc_data *pd, enum pid_type type)
     295  {
     296  	pd->ns_count = get_ns_hierarchy(pd->proc_pid,
     297  		pd->ns_hierarchy, MAX_NS_DEPTH);
     298  	if (!pd->ns_count)
     299  		goto fail;
     300  
     301  	pd->id_count[type] = get_id_list(pd->proc_pid,
     302  		pd->id_hierarchy[type], type);
     303  	if (!pd->id_count[type])
     304  		goto fail;
     305  
     306  	return true;
     307  
     308  fail:
     309  	trie_set(proc_data_cache, pd->proc_pid, (uint64_t) (uintptr_t) NULL);
     310  	free(pd);
     311  	return false;
     312  }
     313  
     314  /**
     315   * Parameters for id translation
     316   */
     317  struct translate_id_params {
     318  	/* The namespace to be translated from */
     319  	unsigned int from_ns;
     320  	/* The id to be translated */
     321  	int from_id;
     322  	/* The type of the id */
     323  	enum pid_type type;
     324  
     325  	/* The result (output) */
     326  	int result_id;
     327  	/* The proc data of the process (output) */
     328  	struct proc_data *pd;
     329  };
     330  
     331  /**
     332   * Translates an id to our namespace, given the proc_pid of the process,
     333   * by reading files in /proc.
     334   *
     335   * @param tip      The parameters
     336   * @param proc_pid The proc pid of the process.
     337   *                 If 0, use the cached values in tip->pd.
     338   */
     339  static void
     340  translate_id_proc_pid(struct translate_id_params *tip, int proc_pid)
     341  {
     342  	struct proc_data *pd = proc_pid ?
     343  		get_or_create_proc_data(proc_pid) :
     344  		tip->pd;
     345  
     346  	tip->result_id = 0;
     347  	tip->pd = NULL;
     348  
     349  	if (!pd)
     350  		return;
     351  
     352  	if (proc_pid && !update_proc_data(pd, tip->type))
     353  		return;
     354  
     355  	if (!pd->ns_count || pd->id_count[tip->type] < pd->ns_count)
     356  		return;
     357  
     358  	int *id_hierarchy = pd->id_hierarchy[tip->type];
     359  	int id_count = pd->id_count[tip->type];
     360  
     361  	for (int i = 0; i < pd->ns_count; i++) {
     362  		unsigned int ns = pd->ns_hierarchy[i];
     363  		int ns_id = id_hierarchy[id_count - i - 1];
     364  		int our_id = id_hierarchy[id_count - pd->ns_count];
     365  
     366  		if (ns != tip->from_ns)
     367  			continue;
     368  
     369  		if (ns_id != tip->from_id)
     370  			return;
     371  
     372  		tip->result_id = our_id;
     373  		tip->pd = pd;
     374  		return;
     375  	}
     376  }
     377  
     378  /**
     379   * Translates an id to our namespace by reading all proc entries in a directory.
     380   * The directory is either /proc or /proc/<pid>/task.
     381   *
     382   *
     383   * @param tip            The parameters
     384   * @param path           The path of the directory to be read.
     385   * @param read_task_dir  Whether recurse to "task" subdirectory.
     386   */
     387  static void
     388  translate_id_dir(struct translate_id_params *tip, const char *path,
     389                   bool read_task_dir)
     390  {
     391  	DIR *dir = opendir(path);
     392  	if (!dir) {
     393  		debug_func_perror_msg("opening dir: %s", path);
     394  		return;
     395  	}
     396  
     397  	while (!tip->result_id) {
     398  		errno = 0;
     399  		struct_dirent *entry = read_dir(dir);
     400  		if (!entry) {
     401  			if (errno)
     402  				perror_func_msg("readdir");
     403  
     404  			break;
     405  		}
     406  
     407  		if (entry->d_type != DT_DIR)
     408  			continue;
     409  
     410  		errno = 0;
     411  		long proc_pid = strtol(entry->d_name, NULL, 10);
     412  		if (proc_pid < 1 || proc_pid > INT_MAX || errno)
     413  			continue;
     414  
     415  		if (read_task_dir) {
     416  			char task_dir_path[PATH_MAX + 1];
     417  			xsprintf(task_dir_path, "/proc/%ld/task", proc_pid);
     418  			translate_id_dir(tip, task_dir_path, false);
     419  		}
     420  
     421  		if (tip->result_id)
     422  			break;
     423  
     424  		translate_id_proc_pid(tip, proc_pid);
     425  	}
     426  
     427  	closedir(dir);
     428  }
     429  
     430  /**
     431   * Iterator function of the proc_data_cache for id translation.
     432   * If the cache contains the id we are looking for, reads the corresponding
     433   * directory in /proc, and if cache is valid, saves the result.
     434   */
     435  static void
     436  proc_data_cache_iterator_fn(void* fn_data, uint64_t key, uint64_t val)
     437  {
     438  	struct translate_id_params *tip = (struct translate_id_params *)fn_data;
     439  	struct proc_data *pd = (struct proc_data *) (uintptr_t) val;
     440  
     441  	if (!pd)
     442  		return;
     443  
     444  	/* Result already found in an earlier iteration */
     445  	if (tip->result_id)
     446  		return;
     447  
     448  	/* Translate from cache */
     449  	tip->pd = pd;
     450  	translate_id_proc_pid(tip, 0);
     451  	if (!tip->result_id)
     452  		return;
     453  
     454  	/* Now translate from actual data in /proc, to check cache validity */
     455  	translate_id_proc_pid(tip, pd->proc_pid);
     456  }
     457  
     458  int
     459  translate_pid(struct tcb *tcp, int from_id, enum pid_type type,
     460                int *proc_pid_ptr)
     461  {
     462  	if (from_id <= 0 || type < 0 || type >= PT_COUNT)
     463  		return 0;
     464  
     465  	/* If translation is trivial */
     466  	if ((!tcp || get_ns(tcp) == get_our_ns()) &&
     467  	    (!proc_pid_ptr || is_proc_ours())) {
     468  		if (proc_pid_ptr)
     469  			*proc_pid_ptr = from_id;
     470  
     471  		return from_id;
     472  	}
     473  
     474  	struct translate_id_params tip = {
     475  		.from_ns = tcp ? get_ns(tcp) : get_our_ns(),
     476  		.from_id = from_id,
     477  		.type = type,
     478  		.result_id = 0,
     479  		.pd = NULL,
     480  	};
     481  
     482  	if (!tip.from_ns)
     483  		return 0;
     484  
     485  	if (ns_get_parent_enotty)
     486  		return 0;
     487  
     488  	/* Look for a cached proc_pid for this (from_ns, from_id) pair */
     489  	int cached_proc_pid = get_cached_proc_pid(tip.from_ns, tip.from_id,
     490  		tip.type);
     491  	if (cached_proc_pid) {
     492  		translate_id_proc_pid(&tip, cached_proc_pid);
     493  		if (tip.result_id)
     494  			goto exit;
     495  	}
     496  
     497  	/* Iterate through the cache, find potential proc_data */
     498  	trie_iterate_keys(proc_data_cache, 0, pid_max - 1,
     499  		proc_data_cache_iterator_fn, &tip);
     500  	/* (proc_data_cache_iterator_fn takes care about updating proc_data) */
     501  	if (tip.result_id)
     502  		goto exit;
     503  
     504  	/* No cache helped, read all entries in /proc */
     505  	translate_id_dir(&tip, "/proc", true);
     506  
     507  exit:
     508  	if (tip.pd) {
     509  		if (tip.pd->proc_pid)
     510  			put_proc_pid(tip.from_ns, tip.from_id, tip.type,
     511  				tip.pd->proc_pid);
     512  
     513  		if (proc_pid_ptr)
     514  			*proc_pid_ptr = tip.pd->proc_pid;
     515  	}
     516  
     517  	return tip.result_id;
     518  }
     519  
     520  int
     521  get_proc_pid(int pid)
     522  {
     523  	int proc_pid = 0;
     524  	translate_pid(NULL, pid, PT_TID, &proc_pid);
     525  	return proc_pid;
     526  }
     527  
     528  static void
     529  printpid_translation(struct tcb *tcp, int pid, enum pid_type type)
     530  {
     531  	bool print_ns_translation =
     532  		is_number_in_set(DECODE_PID_NS_TRANSLATION, decode_pid_set);
     533  	bool print_comm =
     534  		is_number_in_set(DECODE_PID_COMM, decode_pid_set) &&
     535  		(type == PT_TID || type == PT_TGID);
     536  
     537  	if (print_ns_translation || print_comm) {
     538  		int strace_pid = translate_pid(tcp, pid, type, NULL);
     539  		if (strace_pid) {
     540  			if (print_comm)
     541  				print_pid_comm(strace_pid);
     542  			if (print_ns_translation && strace_pid != pid)
     543  				tprintf_comment("%d in strace's PID NS",
     544  						strace_pid);
     545  		}
     546  	}
     547  }
     548  
     549  void
     550  printpid(struct tcb *tcp, int pid, enum pid_type type)
     551  {
     552  	PRINT_VAL_D(pid);
     553  	printpid_translation(tcp, pid, type);
     554  }
     555  
     556  void
     557  printpid_tgid_pgid(struct tcb *tcp, int pid)
     558  {
     559  	PRINT_VAL_D(pid);
     560  	if (pid > 0)
     561  		printpid_translation(tcp,  pid, PT_TGID);
     562  	else if (pid < -1)
     563  		printpid_translation(tcp, -pid, PT_PGID);
     564  }