(root)/
util-linux-2.39/
libmount/
src/
hook_idmap.c
       1  /* SPDX-License-Identifier: LGPL-2.1-or-later */
       2  /*
       3   * This file is part of libmount from util-linux project.
       4   *
       5   * Copyright (C) 2022 Karel Zak <kzak@redhat.com>
       6   * Copyright (C) 2022 Christian Brauner (Microsoft) <brauner@kernel.org>
       7   *
       8   * libmount is free software; you can redistribute it and/or modify it
       9   * under the terms of the GNU Lesser General Public License as published by
      10   * the Free Software Foundation; either version 2.1 of the License, or
      11   * (at your option) any later version.
      12   *
      13   *
      14   * This is X-mount.idmap= implementation.
      15   *
      16   * Please, see the comment in libmount/src/hooks.c to understand how hooks work.
      17   */
      18  #include <stdbool.h>
      19  #include <sys/socket.h>
      20  #include <sys/wait.h>
      21  #include <sys/ioctl.h>
      22  #include <sys/mount.h>
      23  #include <inttypes.h>
      24  
      25  #include "strutils.h"
      26  #include "all-io.h"
      27  #include "namespace.h"
      28  #include "mount-api-utils.h"
      29  
      30  #include "mountP.h"
      31  
      32  #ifdef HAVE_LINUX_NSFS_H
      33  # include <linux/nsfs.h>
      34  #endif
      35  
      36  #ifdef HAVE_MOUNTFD_API
      37  
      38  typedef enum idmap_type_t {
      39  	ID_TYPE_UID,	/* uidmap entry */
      40  	ID_TYPE_GID,	/* gidmap entry */
      41  	ID_TYPE_UIDGID,	/* uidmap and gidmap entry */
      42  } idmap_type_t;
      43  
      44  struct id_map {
      45  	idmap_type_t map_type;
      46  	uint32_t nsid;
      47  	uint32_t hostid;
      48  	uint32_t range;
      49  	struct list_head map_head;
      50  };
      51  
      52  struct hook_data {
      53  	int userns_fd;
      54  	struct list_head id_map;
      55  };
      56  
      57  static inline struct hook_data *new_hook_data(void)
      58  {
      59  	struct hook_data *hd = calloc(1, sizeof(*hd));
      60  
      61  	if (!hd)
      62  		return NULL;
      63  
      64  	INIT_LIST_HEAD(&hd->id_map);
      65  	hd->userns_fd = -1;
      66  	return hd;
      67  }
      68  
      69  static inline void free_hook_data(struct hook_data *hd)
      70  {
      71  	struct list_head *p, *pnext;
      72  	struct id_map *idmap;
      73  
      74  	if (!hd)
      75  		return;
      76  
      77  	if (hd->userns_fd >= 0) {
      78  		close(hd->userns_fd);
      79  		hd->userns_fd = -1;
      80  	}
      81  
      82  	list_for_each_safe(p, pnext, &hd->id_map) {
      83  		idmap = list_entry(p, struct id_map, map_head);
      84  		list_del(&idmap->map_head);
      85  		free(idmap);
      86  	}
      87  	INIT_LIST_HEAD(&hd->id_map);
      88  	free(hd);
      89  }
      90  
      91  static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf,
      92  			    size_t buf_size)
      93  {
      94  	int fd = -1, rc = -1, setgroups_fd = -1;
      95  	char path[PATH_MAX];
      96  
      97  	if (geteuid() != 0 && map_type == ID_TYPE_GID) {
      98  		snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
      99  
     100  		setgroups_fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY);
     101  		if (setgroups_fd < 0 && errno != ENOENT)
     102  			goto err;
     103  
     104  		if (setgroups_fd >= 0) {
     105  			rc = write_all(setgroups_fd, "deny\n", strlen("deny\n"));
     106  			if (rc)
     107  				goto err;
     108  		}
     109  	}
     110  
     111  	snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid,
     112  		 map_type == ID_TYPE_UID ? 'u' : 'g');
     113  
     114  	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY);
     115  	if (fd < 0)
     116  		goto err;
     117  
     118  	rc = write_all(fd, buf, buf_size);
     119  
     120  err:
     121  	if (fd >= 0)
     122  		close(fd);
     123  	if (setgroups_fd >= 0)
     124  		close(setgroups_fd);
     125  
     126  	return rc;
     127  }
     128  
     129  static int map_ids(struct list_head *idmap, pid_t pid)
     130  {
     131  	int fill, left;
     132  	char *pos;
     133  	int rc = 0;
     134  	char mapbuf[4096] = {};
     135  	struct list_head *p;
     136  
     137  	for (idmap_type_t type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
     138  		bool had_entry = false;
     139  
     140  		pos = mapbuf;
     141  		list_for_each(p, idmap) {
     142  			struct id_map *map = list_entry(p, struct id_map, map_head);
     143  
     144  			/*
     145  			 * If the map type is ID_TYPE_UIDGID we need to include
     146  			 * it in both gid- and uidmap.
     147  			 */
     148  			if (map->map_type != ID_TYPE_UIDGID && map->map_type != type)
     149  				continue;
     150  
     151  			had_entry = true;
     152  
     153  			left = sizeof(mapbuf) - (pos - mapbuf);
     154  			fill = snprintf(pos, left,
     155  					"%" PRIu32 " %" PRIu32 " %" PRIu32 "\n",
     156  					map->nsid, map->hostid, map->range);
     157  			/*
     158  			 * The kernel only takes <= 4k for writes to
     159  			 * /proc/<pid>/{g,u}id_map
     160  			 */
     161  			if (fill <= 0)
     162  				return errno = EINVAL, -1;
     163  
     164  			pos += fill;
     165  		}
     166  		if (!had_entry)
     167  			continue;
     168  
     169  		rc = write_id_mapping(type, pid, mapbuf, pos - mapbuf);
     170  		if (rc < 0)
     171  			return -1;
     172  
     173  		memset(mapbuf, 0, sizeof(mapbuf));
     174  	}
     175  
     176  	return 0;
     177  }
     178  
     179  static int wait_for_pid(pid_t pid)
     180  {
     181  	int status, rc;
     182  
     183  	do {
     184  		rc = waitpid(pid, &status, 0);
     185  	} while (rc < 0 && errno == EINTR);
     186  
     187  	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
     188  		return -1;
     189  
     190  	return 0;
     191  }
     192  
     193  static int get_userns_fd_from_idmap(struct list_head *idmap)
     194  {
     195  	int fd_userns = -1;
     196  	ssize_t rc = -1;
     197  	char c = '1';
     198  	pid_t pid;
     199  	int sock_fds[2];
     200  	char path[PATH_MAX];
     201  
     202  	rc = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, sock_fds);
     203  	if (rc < 0)
     204  		return -errno;
     205  
     206  	pid = fork();
     207  	if (pid < 0)
     208  		goto err_close_sock;
     209  
     210  	if (pid == 0) {
     211  		close(sock_fds[1]);
     212  
     213  		rc = unshare(CLONE_NEWUSER);
     214  		if (rc < 0)
     215  			_exit(EXIT_FAILURE);
     216  
     217  		/* Let parent know we're ready to have the idmapping written. */
     218  		rc = write_all(sock_fds[0], &c, 1);
     219  		if (rc)
     220  			_exit(EXIT_FAILURE);
     221  
     222  		/* Hang around until the parent has persisted our namespace. */
     223  		rc = read_all(sock_fds[0], &c, 1);
     224  		if (rc != 1)
     225  			_exit(EXIT_FAILURE);
     226  
     227  		close(sock_fds[0]);
     228  
     229  		_exit(EXIT_SUCCESS);
     230  	}
     231  	close(sock_fds[0]);
     232  	sock_fds[0] = -1;
     233  
     234  	/* Wait for child to set up a new namespace. */
     235  	rc = read_all(sock_fds[1], &c, 1);
     236  	if (rc != 1) {
     237  		kill(pid, SIGKILL);
     238  		goto err_wait;
     239  	}
     240  
     241  	rc = map_ids(idmap, pid);
     242  	if (rc < 0) {
     243  		kill(pid, SIGKILL);
     244  		goto err_wait;
     245  	}
     246  
     247  	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
     248  	fd_userns = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY);
     249  
     250  	/* Let child know we've persisted its namespace. */
     251  	(void)write_all(sock_fds[1], &c, 1);
     252  
     253  err_wait:
     254  	rc = wait_for_pid(pid);
     255  
     256  err_close_sock:
     257  	if (sock_fds[0] > 0)
     258  		close(sock_fds[0]);
     259  	close(sock_fds[1]);
     260  
     261  	if (rc < 0 && fd_userns >= 0) {
     262  		close(fd_userns);
     263  		fd_userns = -1;
     264  	}
     265  
     266  	return fd_userns;
     267  }
     268  
     269  static int open_userns(const char *path)
     270  {
     271  
     272  	int userns_fd;
     273  
     274  	userns_fd = open(path, O_RDONLY | O_CLOEXEC | O_NOCTTY);
     275  	if (userns_fd < 0)
     276  		return -1;
     277  
     278  #if defined(NS_GET_OWNER_UID)
     279  	/*
     280  	 * We use NS_GET_OWNER_UID to verify that this is a user namespace.
     281  	 * This is on a best-effort basis. If this isn't a userns then
     282  	 * mount_setattr() will tell us to go away later.
     283  	 */
     284  	if (ioctl(userns_fd, NS_GET_OWNER_UID, &(uid_t){-1}) < 0) {
     285  		close(userns_fd);
     286  		return -1;
     287  	}
     288  #endif
     289  	return userns_fd;
     290  }
     291  
     292  /*
     293   * Create an idmapped mount based on context target, unmounting the
     294   * non-idmapped target mount and attaching the detached idmapped mount target.
     295   */
     296  static int hook_mount_post(
     297  			struct libmnt_context *cxt,
     298  			const struct libmnt_hookset *hs,
     299  			void *data)
     300  {
     301  	struct hook_data *hd = (struct hook_data *) data;
     302  	struct mount_attr attr = {
     303  		.attr_set	= MOUNT_ATTR_IDMAP,
     304  		.userns_fd	= hd->userns_fd
     305  	};
     306  	const int recursive = mnt_optlist_is_recursive(cxt->optlist);
     307  	const char *target = mnt_fs_get_target(cxt->fs);
     308  	int fd_tree = -1;
     309  	int rc, is_private = 1;
     310  
     311  	assert(hd);
     312  	assert(target);
     313  	assert(hd->userns_fd >= 0);
     314  
     315  	DBG(HOOK, ul_debugobj(hs, " attaching namespace to %s", target));
     316  
     317  	/*
     318  	 * Once a mount has been attached to the filesystem it can't be
     319  	 * idmapped anymore. So create a new detached mount.
     320  	 */
     321  #ifdef USE_LIBMOUNT_MOUNTFD_SUPPORT
     322  	{
     323  		struct libmnt_sysapi *api = mnt_context_get_sysapi(cxt);
     324  
     325  		if (api && api->fd_tree >= 0) {
     326  			fd_tree = api->fd_tree;
     327  			is_private = 0;
     328  			DBG(HOOK, ul_debugobj(hs, " reuse tree FD"));
     329  		}
     330  	}
     331  #endif
     332  	if (fd_tree < 0)
     333  		fd_tree = open_tree(-1, target,
     334  			    OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
     335  			    (recursive ? AT_RECURSIVE : 0));
     336  	if (fd_tree < 0) {
     337  		DBG(HOOK, ul_debugobj(hs, " failed to open tree"));
     338  		return -MNT_ERR_IDMAP;
     339  	}
     340  
     341  	/* Attach the idmapping to the mount. */
     342  	rc = mount_setattr(fd_tree, "",
     343  			   AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
     344  			   &attr, sizeof(attr));
     345  	if (rc < 0) {
     346  		DBG(HOOK, ul_debugobj(hs, " failed to set attributes"));
     347  		goto done;
     348  	}
     349  
     350  	/* Attach the idmapped mount. */
     351  	if (is_private) {
     352  		/* Unmount the old, non-idmapped mount we just cloned and idmapped. */
     353  		umount2(target, MNT_DETACH);
     354  
     355  		rc = move_mount(fd_tree, "", -1, target, MOVE_MOUNT_F_EMPTY_PATH);
     356  		if (rc)
     357  			DBG(HOOK, ul_debugobj(hs, " failed to set move mount"));
     358  	}
     359  done:
     360  	if (is_private)
     361  		close(fd_tree);
     362  	if (rc < 0)
     363  		return -MNT_ERR_IDMAP;
     364  
     365  	return 0;
     366  }
     367  
     368  /*
     369   * Process X-mount.idmap= mount option
     370   */
     371  static int hook_prepare_options(
     372  			struct libmnt_context *cxt,
     373  			const struct libmnt_hookset *hs,
     374  			void *data __attribute__((__unused__)))
     375  {
     376  	struct hook_data *hd = NULL;
     377  	struct libmnt_optlist *ol;
     378  	struct libmnt_opt *opt;
     379  	int rc;
     380  	const char *value = NULL;
     381  	char *saveptr = NULL, *tok, *buf = NULL;
     382  
     383  	ol = mnt_context_get_optlist(cxt);
     384  	if (!ol)
     385  		return 0;
     386  
     387  	opt = mnt_optlist_get_named(ol, "X-mount.idmap", cxt->map_userspace);
     388  	if (!opt)
     389  		return 0;
     390  
     391  	value = mnt_opt_get_value(opt);
     392  	if (value)
     393  		value = skip_blank(value);
     394  	if (!value || !*value)
     395  		return errno = EINVAL, -MNT_ERR_MOUNTOPT;
     396  
     397  	hd = new_hook_data();
     398  	if (!hd)
     399  		return -ENOMEM;
     400  
     401  	/* Has the user given us a path to a user namespace? */
     402  	if (*value == '/') {
     403  		hd->userns_fd = open_userns(value);
     404  		if (hd->userns_fd < 0)
     405  			goto err;
     406  		goto done;
     407  	}
     408  
     409  	buf = strdup(value);
     410  	if (!buf)
     411  		goto err;
     412  
     413  	/*
     414  	 * This is an explicit ID-mapping list of the form:
     415  	 * [id-type]:id-mount:id-host:id-range [...]
     416  	 *
     417  	 * We split the list into separate ID-mapping entries. The individual
     418  	 * ID-mapping entries are separated by ' '.
     419  	 *
     420  	 * A long while ago I made the kernel support up to 340 individual
     421  	 * ID-mappings. So users have quite a bit of freedom here.
     422  	 */
     423  	for (tok = strtok_r(buf, " ", &saveptr); tok;
     424  	     tok = strtok_r(NULL, " ", &saveptr)) {
     425  		struct id_map *idmap;
     426  		idmap_type_t map_type;
     427  		uint32_t nsid = UINT_MAX, hostid = UINT_MAX, range = UINT_MAX;
     428  
     429  		if (startswith(tok, "b:")) {
     430  			/* b:id-mount:id-host:id-range */
     431  			map_type = ID_TYPE_UIDGID;
     432  			tok += 2;
     433  		} else if (startswith(tok, "g:")) {
     434  			/* g:id-mount:id-host:id-range */
     435  			map_type = ID_TYPE_GID;
     436  			tok += 2;
     437  		} else if (startswith(tok, "u:")) {
     438  			/* u:id-mount:id-host:id-range */
     439  			map_type = ID_TYPE_UID;
     440  			tok += 2;
     441  		} else {
     442  			/*
     443  			 * id-mount:id-host:id-range
     444  			 *
     445  			 * If the user didn't specify it explicitly then they
     446  			 * want this to be both a gid- and uidmap.
     447  			 */
     448  			map_type = ID_TYPE_UIDGID;
     449  		}
     450  
     451  		/* id-mount:id-host:id-range */
     452  		rc = sscanf(tok, "%" PRIu32 ":%" PRIu32 ":%" PRIu32, &nsid,
     453  			    &hostid, &range);
     454  		if (rc != 3)
     455  			goto err;
     456  
     457  		idmap = calloc(1, sizeof(*idmap));
     458  		if (!idmap)
     459  			goto err;
     460  
     461  		idmap->map_type = map_type;
     462  		idmap->nsid = nsid;
     463  		idmap->hostid = hostid;
     464  		idmap->range = range;
     465  		INIT_LIST_HEAD(&idmap->map_head);
     466  		list_add_tail(&idmap->map_head, &hd->id_map);
     467  	}
     468  
     469  	hd->userns_fd = get_userns_fd_from_idmap(&hd->id_map);
     470  	if (hd->userns_fd < 0)
     471  		goto err;
     472  
     473  done:
     474  	/* define post-mount hook to enter the namespace */
     475  	DBG(HOOK, ul_debugobj(hs, " wanted new user namespace"));
     476  	cxt->force_clone = 1; /* require OPEN_TREE_CLONE */
     477  	rc = mnt_context_append_hook(cxt, hs,
     478  				MNT_STAGE_MOUNT_POST,
     479  				hd, hook_mount_post);
     480  	if (rc < 0)
     481  		goto err;
     482  
     483  	free(buf);
     484  	return 0;
     485  
     486  err:
     487  	DBG(HOOK, ul_debugobj(hs, " failed to setup idmap"));
     488  	free_hook_data(hd);
     489  	free(buf);
     490  	return -MNT_ERR_MOUNTOPT;
     491  }
     492  
     493  
     494  /* de-initiallize this module */
     495  static int hookset_deinit(struct libmnt_context *cxt, const struct libmnt_hookset *hs)
     496  {
     497  	void *data;
     498  
     499  	DBG(HOOK, ul_debugobj(hs, "deinit '%s'", hs->name));
     500  
     501  	/* remove all our hooks and free hook data */
     502  	while (mnt_context_remove_hook(cxt, hs, 0, &data) == 0) {
     503  		if (data)
     504  			free_hook_data((struct hook_data *) data);
     505  		data = NULL;
     506  	}
     507  
     508  	return 0;
     509  }
     510  
     511  const struct libmnt_hookset hookset_idmap =
     512  {
     513  	.name = "__idmap",
     514  
     515  	.firststage = MNT_STAGE_PREP_OPTIONS,
     516  	.firstcall = hook_prepare_options,
     517  
     518  	.deinit = hookset_deinit
     519  };
     520  
     521  #endif /* HAVE_MOUNTFD_API */