1 /* Authors: Gregory P. Smith & Jeffrey Yasskin */
2 #ifndef Py_BUILD_CORE_BUILTIN
3 # define Py_BUILD_CORE_MODULE 1
4 #endif
5
6 #include "Python.h"
7 #include "pycore_fileutils.h"
8 #include "pycore_pystate.h"
9 #if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE)
10 # define _GNU_SOURCE
11 #endif
12 #include <unistd.h>
13 #include <fcntl.h>
14 #ifdef HAVE_SYS_TYPES_H
15 #include <sys/types.h>
16 #endif
17 #if defined(HAVE_SYS_STAT_H)
18 #include <sys/stat.h>
19 #endif
20 #ifdef HAVE_SYS_SYSCALL_H
21 #include <sys/syscall.h>
22 #endif
23 #if defined(HAVE_SYS_RESOURCE_H)
24 #include <sys/resource.h>
25 #endif
26 #ifdef HAVE_DIRENT_H
27 #include <dirent.h>
28 #endif
29 #ifdef HAVE_GRP_H
30 #include <grp.h>
31 #endif /* HAVE_GRP_H */
32
33 #include "posixmodule.h"
34
35 #ifdef _Py_MEMORY_SANITIZER
36 # include <sanitizer/msan_interface.h>
37 #endif
38
39 #if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64)
40 # include <sys/linux-syscalls.h>
41 # define SYS_getdents64 __NR_getdents64
42 #endif
43
44 #if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \
45 defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
46 /* If this is ever expanded to non-Linux platforms, verify what calls are
47 * allowed after vfork(). Ex: setsid() may be disallowed on macOS? */
48 # include <signal.h>
49 # define VFORK_USABLE 1
50 #endif
51
52 #if defined(__sun) && defined(__SVR4)
53 /* readdir64 is used to work around Solaris 9 bug 6395699. */
54 # define readdir readdir64
55 # define dirent dirent64
56 # if !defined(HAVE_DIRFD)
57 /* Some versions of Solaris lack dirfd(). */
58 # define dirfd(dirp) ((dirp)->dd_fd)
59 # define HAVE_DIRFD
60 # endif
61 #endif
62
63 #if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__)
64 # define FD_DIR "/dev/fd"
65 #else
66 # define FD_DIR "/proc/self/fd"
67 #endif
68
69 #ifdef NGROUPS_MAX
70 #define MAX_GROUPS NGROUPS_MAX
71 #else
72 #define MAX_GROUPS 64
73 #endif
74
75 #define POSIX_CALL(call) do { if ((call) == -1) goto error; } while (0)
76
77 static struct PyModuleDef _posixsubprocessmodule;
78
79 /*[clinic input]
80 module _posixsubprocess
81 [clinic start generated code]*/
82 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c62211df27cf7334]*/
83
84 /*[python input]
85 class pid_t_converter(CConverter):
86 type = 'pid_t'
87 format_unit = '" _Py_PARSE_PID "'
88
89 def parse_arg(self, argname, displayname):
90 return """
91 {paramname} = PyLong_AsPid({argname});
92 if ({paramname} == -1 && PyErr_Occurred()) {{{{
93 goto exit;
94 }}}}
95 """.format(argname=argname, paramname=self.parser_name)
96 [python start generated code]*/
97 /*[python end generated code: output=da39a3ee5e6b4b0d input=5af1c116d56cbb5a]*/
98
99 #include "clinic/_posixsubprocess.c.h"
100
101 /* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */
102 static int
103 _pos_int_from_ascii(const char *name)
104 {
105 int num = 0;
106 while (*name >= '0' && *name <= '9') {
107 num = num * 10 + (*name - '0');
108 ++name;
109 }
110 if (*name)
111 return -1; /* Non digit found, not a number. */
112 return num;
113 }
114
115
116 #if defined(__FreeBSD__) || defined(__DragonFly__)
117 /* When /dev/fd isn't mounted it is often a static directory populated
118 * with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD.
119 * NetBSD and OpenBSD have a /proc fs available (though not necessarily
120 * mounted) and do not have fdescfs for /dev/fd. MacOS X has a devfs
121 * that properly supports /dev/fd.
122 */
123 static int
124 _is_fdescfs_mounted_on_dev_fd(void)
125 {
126 struct stat dev_stat;
127 struct stat dev_fd_stat;
128 if (stat("/dev", &dev_stat) != 0)
129 return 0;
130 if (stat(FD_DIR, &dev_fd_stat) != 0)
131 return 0;
132 if (dev_stat.st_dev == dev_fd_stat.st_dev)
133 return 0; /* / == /dev == /dev/fd means it is static. #fail */
134 return 1;
135 }
136 #endif
137
138
139 /* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */
140 static int
141 _sanity_check_python_fd_sequence(PyObject *fd_sequence)
142 {
143 Py_ssize_t seq_idx;
144 long prev_fd = -1;
145 for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) {
146 PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx);
147 long iter_fd;
148 if (!PyLong_Check(py_fd)) {
149 return 1;
150 }
151 iter_fd = PyLong_AsLong(py_fd);
152 if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) {
153 /* Negative, overflow, unsorted, too big for a fd. */
154 return 1;
155 }
156 prev_fd = iter_fd;
157 }
158 return 0;
159 }
160
161
162 /* Is fd found in the sorted Python Sequence? */
163 static int
164 _is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence,
165 Py_ssize_t fd_sequence_len)
166 {
167 /* Binary search. */
168 Py_ssize_t search_min = 0;
169 Py_ssize_t search_max = fd_sequence_len - 1;
170 if (search_max < 0)
171 return 0;
172 do {
173 long middle = (search_min + search_max) / 2;
174 long middle_fd = fd_sequence[middle];
175 if (fd == middle_fd)
176 return 1;
177 if (fd > middle_fd)
178 search_min = middle + 1;
179 else
180 search_max = middle - 1;
181 } while (search_min <= search_max);
182 return 0;
183 }
184
185 /*
186 * Do all the Python C API calls in the parent process to turn the pass_fds
187 * "py_fds_to_keep" tuple into a C array. The caller owns allocation and
188 * freeing of the array.
189 *
190 * On error an unknown number of array elements may have been filled in.
191 * A Python exception has been set when an error is returned.
192 *
193 * Returns: -1 on error, 0 on success.
194 */
195 static int
196 convert_fds_to_keep_to_c(PyObject *py_fds_to_keep, int *c_fds_to_keep)
197 {
198 Py_ssize_t i, len;
199
200 len = PyTuple_GET_SIZE(py_fds_to_keep);
201 for (i = 0; i < len; ++i) {
202 PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
203 long fd = PyLong_AsLong(fdobj);
204 if (fd == -1 && PyErr_Occurred()) {
205 return -1;
206 }
207 if (fd < 0 || fd > INT_MAX) {
208 PyErr_SetString(PyExc_ValueError,
209 "fd out of range in fds_to_keep.");
210 return -1;
211 }
212 c_fds_to_keep[i] = (int)fd;
213 }
214 return 0;
215 }
216
217
218 /* This function must be async-signal-safe as it is called from child_exec()
219 * after fork() or vfork().
220 */
221 static int
222 make_inheritable(int *c_fds_to_keep, Py_ssize_t len, int errpipe_write)
223 {
224 Py_ssize_t i;
225
226 for (i = 0; i < len; ++i) {
227 int fd = c_fds_to_keep[i];
228 if (fd == errpipe_write) {
229 /* errpipe_write is part of fds_to_keep. It must be closed at
230 exec(), but kept open in the child process until exec() is
231 called. */
232 continue;
233 }
234 if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0)
235 return -1;
236 }
237 return 0;
238 }
239
240
241 /* Get the maximum file descriptor that could be opened by this process.
242 * This function is async signal safe for use between fork() and exec().
243 */
244 static long
245 safe_get_max_fd(void)
246 {
247 long local_max_fd;
248 #if defined(__NetBSD__)
249 local_max_fd = fcntl(0, F_MAXFD);
250 if (local_max_fd >= 0)
251 return local_max_fd;
252 #endif
253 #if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__)
254 struct rlimit rl;
255 /* Not on the POSIX async signal safe functions list but likely
256 * safe. TODO - Someone should audit OpenBSD to make sure. */
257 if (getrlimit(RLIMIT_NOFILE, &rl) >= 0)
258 return (long) rl.rlim_max;
259 #endif
260 #ifdef _SC_OPEN_MAX
261 local_max_fd = sysconf(_SC_OPEN_MAX);
262 if (local_max_fd == -1)
263 #endif
264 local_max_fd = 256; /* Matches legacy Lib/subprocess.py behavior. */
265 return local_max_fd;
266 }
267
268
269 /* Close all file descriptors in the given range except for those in
270 * fds_to_keep by invoking closer on each subrange.
271 *
272 * If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't
273 * possible to know for sure what the max fd to go up to is for
274 * processes with the capability of raising their maximum, or in case
275 * a process opened a high fd and then lowered its maximum.
276 */
277 static int
278 _close_range_except(int start_fd,
279 int end_fd,
280 int *fds_to_keep,
281 Py_ssize_t fds_to_keep_len,
282 int (*closer)(int, int))
283 {
284 if (end_fd == -1) {
285 end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);
286 }
287 Py_ssize_t keep_seq_idx;
288 /* As fds_to_keep is sorted we can loop through the list closing
289 * fds in between any in the keep list falling within our range. */
290 for (keep_seq_idx = 0; keep_seq_idx < fds_to_keep_len; ++keep_seq_idx) {
291 int keep_fd = fds_to_keep[keep_seq_idx];
292 if (keep_fd < start_fd)
293 continue;
294 if (closer(start_fd, keep_fd - 1) != 0)
295 return -1;
296 start_fd = keep_fd + 1;
297 }
298 if (start_fd <= end_fd) {
299 if (closer(start_fd, end_fd) != 0)
300 return -1;
301 }
302 return 0;
303 }
304
305 #if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
306 /* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
307 * only to read a directory of short file descriptor number names. The kernel
308 * will return an error if we didn't give it enough space. Highly Unlikely.
309 * This structure is very old and stable: It will not change unless the kernel
310 * chooses to break compatibility with all existing binaries. Highly Unlikely.
311 */
312 struct linux_dirent64 {
313 unsigned long long d_ino;
314 long long d_off;
315 unsigned short d_reclen; /* Length of this linux_dirent */
316 unsigned char d_type;
317 char d_name[256]; /* Filename (null-terminated) */
318 };
319
320 static int
321 _brute_force_closer(int first, int last)
322 {
323 for (int i = first; i <= last; i++) {
324 /* Ignore errors */
325 (void)close(i);
326 }
327 return 0;
328 }
329
330 /* Close all open file descriptors in the range from start_fd and higher
331 * Do not close any in the sorted fds_to_keep list.
332 *
333 * This version is async signal safe as it does not make any unsafe C library
334 * calls, malloc calls or handle any locks. It is _unfortunate_ to be forced
335 * to resort to making a kernel system call directly but this is the ONLY api
336 * available that does no harm. opendir/readdir/closedir perform memory
337 * allocation and locking so while they usually work they are not guaranteed
338 * to (especially if you have replaced your malloc implementation). A version
339 * of this function that uses those can be found in the _maybe_unsafe variant.
340 *
341 * This is Linux specific because that is all I am ready to test it on. It
342 * should be easy to add OS specific dirent or dirent64 structures and modify
343 * it with some cpp #define magic to work on other OSes as well if you want.
344 */
345 static void
346 _close_open_fds_safe(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
347 {
348 int fd_dir_fd;
349
350 fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
351 if (fd_dir_fd == -1) {
352 /* No way to get a list of open fds. */
353 _close_range_except(start_fd, -1,
354 fds_to_keep, fds_to_keep_len,
355 _brute_force_closer);
356 return;
357 } else {
358 char buffer[sizeof(struct linux_dirent64)];
359 int bytes;
360 while ((bytes = syscall(SYS_getdents64, fd_dir_fd,
361 (struct linux_dirent64 *)buffer,
362 sizeof(buffer))) > 0) {
363 struct linux_dirent64 *entry;
364 int offset;
365 #ifdef _Py_MEMORY_SANITIZER
366 __msan_unpoison(buffer, bytes);
367 #endif
368 for (offset = 0; offset < bytes; offset += entry->d_reclen) {
369 int fd;
370 entry = (struct linux_dirent64 *)(buffer + offset);
371 if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
372 continue; /* Not a number. */
373 if (fd != fd_dir_fd && fd >= start_fd &&
374 !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
375 fds_to_keep_len)) {
376 close(fd);
377 }
378 }
379 }
380 close(fd_dir_fd);
381 }
382 }
383
384 #define _close_open_fds_fallback _close_open_fds_safe
385
386 #else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
387
388 static int
389 _unsafe_closer(int first, int last)
390 {
391 _Py_closerange(first, last);
392 return 0;
393 }
394
395 /* Close all open file descriptors from start_fd and higher.
396 * Do not close any in the sorted fds_to_keep tuple.
397 *
398 * This function violates the strict use of async signal safe functions. :(
399 * It calls opendir(), readdir() and closedir(). Of these, the one most
400 * likely to ever cause a problem is opendir() as it performs an internal
401 * malloc(). Practically this should not be a problem. The Java VM makes the
402 * same calls between fork and exec in its own UNIXProcess_md.c implementation.
403 *
404 * readdir_r() is not used because it provides no benefit. It is typically
405 * implemented as readdir() followed by memcpy(). See also:
406 * http://womble.decadent.org.uk/readdir_r-advisory.html
407 */
408 static void
409 _close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep,
410 Py_ssize_t fds_to_keep_len)
411 {
412 DIR *proc_fd_dir;
413 #ifndef HAVE_DIRFD
414 while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep,
415 fds_to_keep_len)) {
416 ++start_fd;
417 }
418 /* Close our lowest fd before we call opendir so that it is likely to
419 * reuse that fd otherwise we might close opendir's file descriptor in
420 * our loop. This trick assumes that fd's are allocated on a lowest
421 * available basis. */
422 close(start_fd);
423 ++start_fd;
424 #endif
425
426 #if defined(__FreeBSD__) || defined(__DragonFly__)
427 if (!_is_fdescfs_mounted_on_dev_fd())
428 proc_fd_dir = NULL;
429 else
430 #endif
431 proc_fd_dir = opendir(FD_DIR);
432 if (!proc_fd_dir) {
433 /* No way to get a list of open fds. */
434 _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
435 _unsafe_closer);
436 } else {
437 struct dirent *dir_entry;
438 #ifdef HAVE_DIRFD
439 int fd_used_by_opendir = dirfd(proc_fd_dir);
440 #else
441 int fd_used_by_opendir = start_fd - 1;
442 #endif
443 errno = 0;
444 while ((dir_entry = readdir(proc_fd_dir))) {
445 int fd;
446 if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
447 continue; /* Not a number. */
448 if (fd != fd_used_by_opendir && fd >= start_fd &&
449 !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
450 fds_to_keep_len)) {
451 close(fd);
452 }
453 errno = 0;
454 }
455 if (errno) {
456 /* readdir error, revert behavior. Highly Unlikely. */
457 _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
458 _unsafe_closer);
459 }
460 closedir(proc_fd_dir);
461 }
462 }
463
464 #define _close_open_fds_fallback _close_open_fds_maybe_unsafe
465
466 #endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
467
468 /* We can use close_range() library function only if it's known to be
469 * async-signal-safe.
470 *
471 * On Linux, glibc explicitly documents it to be a thin wrapper over
472 * the system call, and other C libraries are likely to follow glibc.
473 */
474 #if defined(HAVE_CLOSE_RANGE) && \
475 (defined(__linux__) || defined(__FreeBSD__))
476 #define HAVE_ASYNC_SAFE_CLOSE_RANGE
477
478 static int
479 _close_range_closer(int first, int last)
480 {
481 return close_range(first, last, 0);
482 }
483 #endif
484
485 static void
486 _close_open_fds(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
487 {
488 #ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE
489 if (_close_range_except(
490 start_fd, INT_MAX, fds_to_keep, fds_to_keep_len,
491 _close_range_closer) == 0) {
492 return;
493 }
494 #endif
495 _close_open_fds_fallback(start_fd, fds_to_keep, fds_to_keep_len);
496 }
497
498 #ifdef VFORK_USABLE
499 /* Reset dispositions for all signals to SIG_DFL except for ignored
500 * signals. This way we ensure that no signal handlers can run
501 * after we unblock signals in a child created by vfork().
502 */
503 static void
504 reset_signal_handlers(const sigset_t *child_sigmask)
505 {
506 struct sigaction sa_dfl = {.sa_handler = SIG_DFL};
507 for (int sig = 1; sig < _NSIG; sig++) {
508 /* Dispositions for SIGKILL and SIGSTOP can't be changed. */
509 if (sig == SIGKILL || sig == SIGSTOP) {
510 continue;
511 }
512
513 /* There is no need to reset the disposition of signals that will
514 * remain blocked across execve() since the kernel will do it. */
515 if (sigismember(child_sigmask, sig) == 1) {
516 continue;
517 }
518
519 struct sigaction sa;
520 /* C libraries usually return EINVAL for signals used
521 * internally (e.g. for thread cancellation), so simply
522 * skip errors here. */
523 if (sigaction(sig, NULL, &sa) == -1) {
524 continue;
525 }
526
527 /* void *h works as these fields are both pointer types already. */
528 void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :
529 (void *)sa.sa_handler);
530 if (h == SIG_IGN || h == SIG_DFL) {
531 continue;
532 }
533
534 /* This call can't reasonably fail, but if it does, terminating
535 * the child seems to be too harsh, so ignore errors. */
536 (void) sigaction(sig, &sa_dfl, NULL);
537 }
538 }
539 #endif /* VFORK_USABLE */
540
541
542 /*
543 * This function is code executed in the child process immediately after
544 * (v)fork to set things up and call exec().
545 *
546 * All of the code in this function must only use async-signal-safe functions,
547 * listed at `man 7 signal` or
548 * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
549 *
550 * This restriction is documented at
551 * http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
552 *
553 * If this function is called after vfork(), even more care must be taken.
554 * The lack of preparations that C libraries normally take on fork(),
555 * as well as sharing the address space with the parent, might make even
556 * async-signal-safe functions vfork-unsafe. In particular, on Linux,
557 * set*id() and setgroups() library functions must not be called, since
558 * they have to interact with the library-level thread list and send
559 * library-internal signals to implement per-process credentials semantics
560 * required by POSIX but not supported natively on Linux. Another reason to
561 * avoid this family of functions is that sharing an address space between
562 * processes running with different privileges is inherently insecure.
563 * See https://bugs.python.org/issue35823 for discussion and references.
564 *
565 * In some C libraries, setrlimit() has the same thread list/signalling
566 * behavior since resource limits were per-thread attributes before
567 * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue
568 * (https://www.openwall.com/lists/musl/2020/10/15/6).
569 *
570 * If vfork-unsafe functionality is desired after vfork(), consider using
571 * syscall() to obtain it.
572 */
573 Py_NO_INLINE static void
574 child_exec(char *const exec_array[],
575 char *const argv[],
576 char *const envp[],
577 const char *cwd,
578 int p2cread, int p2cwrite,
579 int c2pread, int c2pwrite,
580 int errread, int errwrite,
581 int errpipe_read, int errpipe_write,
582 int close_fds, int restore_signals,
583 int call_setsid, pid_t pgid_to_set,
584 gid_t gid,
585 Py_ssize_t extra_group_size, const gid_t *extra_groups,
586 uid_t uid, int child_umask,
587 const void *child_sigmask,
588 int *fds_to_keep, Py_ssize_t fds_to_keep_len,
589 PyObject *preexec_fn,
590 PyObject *preexec_fn_args_tuple)
591 {
592 int i, saved_errno, reached_preexec = 0;
593 PyObject *result;
594 const char* err_msg = "";
595 /* Buffer large enough to hold a hex integer. We can't malloc. */
596 char hex_errno[sizeof(saved_errno)*2+1];
597
598 if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0)
599 goto error;
600
601 /* Close parent's pipe ends. */
602 if (p2cwrite != -1)
603 POSIX_CALL(close(p2cwrite));
604 if (c2pread != -1)
605 POSIX_CALL(close(c2pread));
606 if (errread != -1)
607 POSIX_CALL(close(errread));
608 POSIX_CALL(close(errpipe_read));
609
610 /* When duping fds, if there arises a situation where one of the fds is
611 either 0, 1 or 2, it is possible that it is overwritten (#12607). */
612 if (c2pwrite == 0) {
613 POSIX_CALL(c2pwrite = dup(c2pwrite));
614 /* issue32270 */
615 if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) {
616 goto error;
617 }
618 }
619 while (errwrite == 0 || errwrite == 1) {
620 POSIX_CALL(errwrite = dup(errwrite));
621 /* issue32270 */
622 if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) {
623 goto error;
624 }
625 }
626
627 /* Dup fds for child.
628 dup2() removes the CLOEXEC flag but we must do it ourselves if dup2()
629 would be a no-op (issue #10806). */
630 if (p2cread == 0) {
631 if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0)
632 goto error;
633 }
634 else if (p2cread != -1)
635 POSIX_CALL(dup2(p2cread, 0)); /* stdin */
636
637 if (c2pwrite == 1) {
638 if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0)
639 goto error;
640 }
641 else if (c2pwrite != -1)
642 POSIX_CALL(dup2(c2pwrite, 1)); /* stdout */
643
644 if (errwrite == 2) {
645 if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0)
646 goto error;
647 }
648 else if (errwrite != -1)
649 POSIX_CALL(dup2(errwrite, 2)); /* stderr */
650
651 /* We no longer manually close p2cread, c2pwrite, and errwrite here as
652 * _close_open_fds takes care when it is not already non-inheritable. */
653
654 if (cwd)
655 POSIX_CALL(chdir(cwd));
656
657 if (child_umask >= 0)
658 umask(child_umask); /* umask() always succeeds. */
659
660 if (restore_signals)
661 _Py_RestoreSignals();
662
663 #ifdef VFORK_USABLE
664 if (child_sigmask) {
665 reset_signal_handlers(child_sigmask);
666 if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) {
667 goto error;
668 }
669 }
670 #endif
671
672 #ifdef HAVE_SETSID
673 if (call_setsid)
674 POSIX_CALL(setsid());
675 #endif
676
677 #ifdef HAVE_SETPGID
678 static_assert(_Py_IS_TYPE_SIGNED(pid_t), "pid_t is unsigned");
679 if (pgid_to_set >= 0) {
680 POSIX_CALL(setpgid(0, pgid_to_set));
681 }
682 #endif
683
684 #ifdef HAVE_SETGROUPS
685 if (extra_group_size > 0)
686 POSIX_CALL(setgroups(extra_group_size, extra_groups));
687 #endif /* HAVE_SETGROUPS */
688
689 #ifdef HAVE_SETREGID
690 if (gid != (gid_t)-1)
691 POSIX_CALL(setregid(gid, gid));
692 #endif /* HAVE_SETREGID */
693
694 #ifdef HAVE_SETREUID
695 if (uid != (uid_t)-1)
696 POSIX_CALL(setreuid(uid, uid));
697 #endif /* HAVE_SETREUID */
698
699
700 reached_preexec = 1;
701 if (preexec_fn != Py_None && preexec_fn_args_tuple) {
702 /* This is where the user has asked us to deadlock their program. */
703 result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL);
704 if (result == NULL) {
705 /* Stringifying the exception or traceback would involve
706 * memory allocation and thus potential for deadlock.
707 * We've already faced potential deadlock by calling back
708 * into Python in the first place, so it probably doesn't
709 * matter but we avoid it to minimize the possibility. */
710 err_msg = "Exception occurred in preexec_fn.";
711 errno = 0; /* We don't want to report an OSError. */
712 goto error;
713 }
714 /* Py_DECREF(result); - We're about to exec so why bother? */
715 }
716
717 /* close FDs after executing preexec_fn, which might open FDs */
718 if (close_fds) {
719 /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
720 _close_open_fds(3, fds_to_keep, fds_to_keep_len);
721 }
722
723 /* This loop matches the Lib/os.py _execvpe()'s PATH search when */
724 /* given the executable_list generated by Lib/subprocess.py. */
725 saved_errno = 0;
726 for (i = 0; exec_array[i] != NULL; ++i) {
727 const char *executable = exec_array[i];
728 if (envp) {
729 execve(executable, argv, envp);
730 } else {
731 execv(executable, argv);
732 }
733 if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) {
734 saved_errno = errno;
735 }
736 }
737 /* Report the first exec error, not the last. */
738 if (saved_errno)
739 errno = saved_errno;
740
741 error:
742 saved_errno = errno;
743 /* Report the posix error to our parent process. */
744 /* We ignore all write() return values as the total size of our writes is
745 less than PIPEBUF and we cannot do anything about an error anyways.
746 Use _Py_write_noraise() to retry write() if it is interrupted by a
747 signal (fails with EINTR). */
748 if (saved_errno) {
749 char *cur;
750 _Py_write_noraise(errpipe_write, "OSError:", 8);
751 cur = hex_errno + sizeof(hex_errno);
752 while (saved_errno != 0 && cur != hex_errno) {
753 *--cur = Py_hexdigits[saved_errno % 16];
754 saved_errno /= 16;
755 }
756 _Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur);
757 _Py_write_noraise(errpipe_write, ":", 1);
758 if (!reached_preexec) {
759 /* Indicate to the parent that the error happened before exec(). */
760 _Py_write_noraise(errpipe_write, "noexec", 6);
761 }
762 /* We can't call strerror(saved_errno). It is not async signal safe.
763 * The parent process will look the error message up. */
764 } else {
765 _Py_write_noraise(errpipe_write, "SubprocessError:0:", 18);
766 _Py_write_noraise(errpipe_write, err_msg, strlen(err_msg));
767 }
768 }
769
770
771 /* The main purpose of this wrapper function is to isolate vfork() from both
772 * subprocess_fork_exec() and child_exec(). A child process created via
773 * vfork() executes on the same stack as the parent process while the latter is
774 * suspended, so this function should not be inlined to avoid compiler bugs
775 * that might clobber data needed by the parent later. Additionally,
776 * child_exec() should not be inlined to avoid spurious -Wclobber warnings from
777 * GCC (see bpo-35823).
778 */
779 Py_NO_INLINE static pid_t
780 do_fork_exec(char *const exec_array[],
781 char *const argv[],
782 char *const envp[],
783 const char *cwd,
784 int p2cread, int p2cwrite,
785 int c2pread, int c2pwrite,
786 int errread, int errwrite,
787 int errpipe_read, int errpipe_write,
788 int close_fds, int restore_signals,
789 int call_setsid, pid_t pgid_to_set,
790 gid_t gid,
791 Py_ssize_t extra_group_size, const gid_t *extra_groups,
792 uid_t uid, int child_umask,
793 const void *child_sigmask,
794 int *fds_to_keep, Py_ssize_t fds_to_keep_len,
795 PyObject *preexec_fn,
796 PyObject *preexec_fn_args_tuple)
797 {
798
799 pid_t pid;
800
801 #ifdef VFORK_USABLE
802 PyThreadState *vfork_tstate_save;
803 if (child_sigmask) {
804 /* These are checked by our caller; verify them in debug builds. */
805 assert(uid == (uid_t)-1);
806 assert(gid == (gid_t)-1);
807 assert(extra_group_size < 0);
808 assert(preexec_fn == Py_None);
809
810 /* Drop the GIL so that other threads can continue execution while this
811 * thread in the parent remains blocked per vfork-semantics on the
812 * child's exec syscall outcome. Exec does filesystem access which
813 * can take an arbitrarily long time. This addresses GH-104372.
814 *
815 * The vfork'ed child still runs in our address space. Per POSIX it
816 * must be limited to nothing but exec, but the Linux implementation
817 * is a little more usable. See the child_exec() comment - The child
818 * MUST NOT re-acquire the GIL.
819 */
820 vfork_tstate_save = PyEval_SaveThread();
821 pid = vfork();
822 if (pid != 0) {
823 // Not in the child process, reacquire the GIL.
824 PyEval_RestoreThread(vfork_tstate_save);
825 }
826 if (pid == (pid_t)-1) {
827 /* If vfork() fails, fall back to using fork(). When it isn't
828 * allowed in a process by the kernel, vfork can return -1
829 * with errno EINVAL. https://bugs.python.org/issue47151. */
830 pid = fork();
831 }
832 } else
833 #endif
834 {
835 pid = fork();
836 }
837
838 if (pid != 0) {
839 // Parent process.
840 return pid;
841 }
842
843 /* Child process.
844 * See the comment above child_exec() for restrictions imposed on
845 * the code below.
846 */
847
848 if (preexec_fn != Py_None) {
849 /* We'll be calling back into Python later so we need to do this.
850 * This call may not be async-signal-safe but neither is calling
851 * back into Python. The user asked us to use hope as a strategy
852 * to avoid deadlock... */
853 PyOS_AfterFork_Child();
854 }
855
856 child_exec(exec_array, argv, envp, cwd,
857 p2cread, p2cwrite, c2pread, c2pwrite,
858 errread, errwrite, errpipe_read, errpipe_write,
859 close_fds, restore_signals, call_setsid, pgid_to_set,
860 gid, extra_group_size, extra_groups,
861 uid, child_umask, child_sigmask,
862 fds_to_keep, fds_to_keep_len,
863 preexec_fn, preexec_fn_args_tuple);
864 _exit(255);
865 return 0; /* Dead code to avoid a potential compiler warning. */
866 }
867
868 /*[clinic input]
869 _posixsubprocess.fork_exec as subprocess_fork_exec
870 args as process_args: object
871 executable_list: object
872 close_fds: bool
873 pass_fds as py_fds_to_keep: object(subclass_of='&PyTuple_Type')
874 cwd as cwd_obj: object
875 env as env_list: object
876 p2cread: int
877 p2cwrite: int
878 c2pread: int
879 c2pwrite: int
880 errread: int
881 errwrite: int
882 errpipe_read: int
883 errpipe_write: int
884 restore_signals: bool
885 call_setsid: bool
886 pgid_to_set: pid_t
887 gid as gid_object: object
888 extra_groups as extra_groups_packed: object
889 uid as uid_object: object
890 child_umask: int
891 preexec_fn: object
892 allow_vfork: bool
893 /
894
895 Spawn a fresh new child process.
896
897 Fork a child process, close parent file descriptors as appropriate in the
898 child and duplicate the few that are needed before calling exec() in the
899 child process.
900
901 If close_fds is True, close file descriptors 3 and higher, except those listed
902 in the sorted tuple pass_fds.
903
904 The preexec_fn, if supplied, will be called immediately before closing file
905 descriptors and exec.
906
907 WARNING: preexec_fn is NOT SAFE if your application uses threads.
908 It may trigger infrequent, difficult to debug deadlocks.
909
910 If an error occurs in the child process before the exec, it is
911 serialized and written to the errpipe_write fd per subprocess.py.
912
913 Returns: the child process's PID.
914
915 Raises: Only on an error in the parent process.
916 [clinic start generated code]*/
917
918 static PyObject *
919 subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
920 PyObject *executable_list, int close_fds,
921 PyObject *py_fds_to_keep, PyObject *cwd_obj,
922 PyObject *env_list, int p2cread, int p2cwrite,
923 int c2pread, int c2pwrite, int errread,
924 int errwrite, int errpipe_read, int errpipe_write,
925 int restore_signals, int call_setsid,
926 pid_t pgid_to_set, PyObject *gid_object,
927 PyObject *extra_groups_packed,
928 PyObject *uid_object, int child_umask,
929 PyObject *preexec_fn, int allow_vfork)
930 /*[clinic end generated code: output=7ee4f6ee5cf22b5b input=51757287ef266ffa]*/
931 {
932 PyObject *converted_args = NULL, *fast_args = NULL;
933 PyObject *preexec_fn_args_tuple = NULL;
934 gid_t *extra_groups = NULL;
935 PyObject *cwd_obj2 = NULL;
936 const char *cwd = NULL;
937 pid_t pid = -1;
938 int need_to_reenable_gc = 0;
939 char *const *argv = NULL, *const *envp = NULL;
940 Py_ssize_t extra_group_size = 0;
941 int need_after_fork = 0;
942 int saved_errno = 0;
943 int *c_fds_to_keep = NULL;
944 Py_ssize_t fds_to_keep_len = PyTuple_GET_SIZE(py_fds_to_keep);
945
946 PyInterpreterState *interp = PyInterpreterState_Get();
947 if ((preexec_fn != Py_None) && interp->finalizing) {
948 PyErr_SetString(PyExc_RuntimeError,
949 "preexec_fn not supported at interpreter shutdown");
950 return NULL;
951 }
952 if ((preexec_fn != Py_None) && (interp != PyInterpreterState_Main())) {
953 PyErr_SetString(PyExc_RuntimeError,
954 "preexec_fn not supported within subinterpreters");
955 return NULL;
956 }
957
958 if (close_fds && errpipe_write < 3) { /* precondition */
959 PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3");
960 return NULL;
961 }
962 if (_sanity_check_python_fd_sequence(py_fds_to_keep)) {
963 PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep");
964 return NULL;
965 }
966
967 /* We need to call gc.disable() when we'll be calling preexec_fn */
968 if (preexec_fn != Py_None) {
969 need_to_reenable_gc = PyGC_Disable();
970 }
971
972 char *const *exec_array = _PySequence_BytesToCharpArray(executable_list);
973 if (!exec_array)
974 goto cleanup;
975
976 /* Convert args and env into appropriate arguments for exec() */
977 /* These conversions are done in the parent process to avoid allocating
978 or freeing memory in the child process. */
979 if (process_args != Py_None) {
980 Py_ssize_t num_args;
981 /* Equivalent to: */
982 /* tuple(PyUnicode_FSConverter(arg) for arg in process_args) */
983 fast_args = PySequence_Fast(process_args, "argv must be a tuple");
984 if (fast_args == NULL)
985 goto cleanup;
986 num_args = PySequence_Fast_GET_SIZE(fast_args);
987 converted_args = PyTuple_New(num_args);
988 if (converted_args == NULL)
989 goto cleanup;
990 for (Py_ssize_t arg_num = 0; arg_num < num_args; ++arg_num) {
991 PyObject *borrowed_arg, *converted_arg;
992 if (PySequence_Fast_GET_SIZE(fast_args) != num_args) {
993 PyErr_SetString(PyExc_RuntimeError, "args changed during iteration");
994 goto cleanup;
995 }
996 borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num);
997 if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0)
998 goto cleanup;
999 PyTuple_SET_ITEM(converted_args, arg_num, converted_arg);
1000 }
1001
1002 argv = _PySequence_BytesToCharpArray(converted_args);
1003 Py_CLEAR(converted_args);
1004 Py_CLEAR(fast_args);
1005 if (!argv)
1006 goto cleanup;
1007 }
1008
1009 if (env_list != Py_None) {
1010 envp = _PySequence_BytesToCharpArray(env_list);
1011 if (!envp)
1012 goto cleanup;
1013 }
1014
1015 if (cwd_obj != Py_None) {
1016 if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0)
1017 goto cleanup;
1018 cwd = PyBytes_AsString(cwd_obj2);
1019 }
1020
1021 if (extra_groups_packed != Py_None) {
1022 #ifdef HAVE_SETGROUPS
1023 if (!PyList_Check(extra_groups_packed)) {
1024 PyErr_SetString(PyExc_TypeError,
1025 "setgroups argument must be a list");
1026 goto cleanup;
1027 }
1028 extra_group_size = PySequence_Size(extra_groups_packed);
1029
1030 if (extra_group_size < 0)
1031 goto cleanup;
1032
1033 if (extra_group_size > MAX_GROUPS) {
1034 PyErr_SetString(PyExc_ValueError, "too many extra_groups");
1035 goto cleanup;
1036 }
1037
1038 /* Deliberately keep extra_groups == NULL for extra_group_size == 0 */
1039 if (extra_group_size > 0) {
1040 extra_groups = PyMem_RawMalloc(extra_group_size * sizeof(gid_t));
1041 if (extra_groups == NULL) {
1042 PyErr_SetString(PyExc_MemoryError,
1043 "failed to allocate memory for group list");
1044 goto cleanup;
1045 }
1046 }
1047
1048 for (Py_ssize_t i = 0; i < extra_group_size; i++) {
1049 PyObject *elem;
1050 elem = PySequence_GetItem(extra_groups_packed, i);
1051 if (!elem)
1052 goto cleanup;
1053 if (!PyLong_Check(elem)) {
1054 PyErr_SetString(PyExc_TypeError,
1055 "extra_groups must be integers");
1056 Py_DECREF(elem);
1057 goto cleanup;
1058 } else {
1059 gid_t gid;
1060 if (!_Py_Gid_Converter(elem, &gid)) {
1061 Py_DECREF(elem);
1062 PyErr_SetString(PyExc_ValueError, "invalid group id");
1063 goto cleanup;
1064 }
1065 extra_groups[i] = gid;
1066 }
1067 Py_DECREF(elem);
1068 }
1069
1070 #else /* HAVE_SETGROUPS */
1071 PyErr_BadInternalCall();
1072 goto cleanup;
1073 #endif /* HAVE_SETGROUPS */
1074 }
1075
1076 gid_t gid = (gid_t)-1;
1077 if (gid_object != Py_None) {
1078 #ifdef HAVE_SETREGID
1079 if (!_Py_Gid_Converter(gid_object, &gid))
1080 goto cleanup;
1081
1082 #else /* HAVE_SETREGID */
1083 PyErr_BadInternalCall();
1084 goto cleanup;
1085 #endif /* HAVE_SETREUID */
1086 }
1087
1088 uid_t uid = (uid_t)-1;
1089 if (uid_object != Py_None) {
1090 #ifdef HAVE_SETREUID
1091 if (!_Py_Uid_Converter(uid_object, &uid))
1092 goto cleanup;
1093
1094 #else /* HAVE_SETREUID */
1095 PyErr_BadInternalCall();
1096 goto cleanup;
1097 #endif /* HAVE_SETREUID */
1098 }
1099
1100 c_fds_to_keep = PyMem_Malloc(fds_to_keep_len * sizeof(int));
1101 if (c_fds_to_keep == NULL) {
1102 PyErr_SetString(PyExc_MemoryError, "failed to malloc c_fds_to_keep");
1103 goto cleanup;
1104 }
1105 if (convert_fds_to_keep_to_c(py_fds_to_keep, c_fds_to_keep) < 0) {
1106 goto cleanup;
1107 }
1108
1109 /* This must be the last thing done before fork() because we do not
1110 * want to call PyOS_BeforeFork() if there is any chance of another
1111 * error leading to the cleanup: code without calling fork(). */
1112 if (preexec_fn != Py_None) {
1113 preexec_fn_args_tuple = PyTuple_New(0);
1114 if (!preexec_fn_args_tuple)
1115 goto cleanup;
1116 PyOS_BeforeFork();
1117 need_after_fork = 1;
1118 }
1119
1120 /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */
1121 const void *old_sigmask = NULL;
1122 #ifdef VFORK_USABLE
1123 /* Use vfork() only if it's safe. See the comment above child_exec(). */
1124 sigset_t old_sigs;
1125 if (preexec_fn == Py_None && allow_vfork &&
1126 uid == (uid_t)-1 && gid == (gid_t)-1 && extra_group_size < 0) {
1127 /* Block all signals to ensure that no signal handlers are run in the
1128 * child process while it shares memory with us. Note that signals
1129 * used internally by C libraries won't be blocked by
1130 * pthread_sigmask(), but signal handlers installed by C libraries
1131 * normally service only signals originating from *within the process*,
1132 * so it should be sufficient to consider any library function that
1133 * might send such a signal to be vfork-unsafe and do not call it in
1134 * the child.
1135 */
1136 sigset_t all_sigs;
1137 sigfillset(&all_sigs);
1138 if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) {
1139 goto cleanup;
1140 }
1141 old_sigmask = &old_sigs;
1142 }
1143 #endif
1144
1145 pid = do_fork_exec(exec_array, argv, envp, cwd,
1146 p2cread, p2cwrite, c2pread, c2pwrite,
1147 errread, errwrite, errpipe_read, errpipe_write,
1148 close_fds, restore_signals, call_setsid, pgid_to_set,
1149 gid, extra_group_size, extra_groups,
1150 uid, child_umask, old_sigmask,
1151 c_fds_to_keep, fds_to_keep_len,
1152 preexec_fn, preexec_fn_args_tuple);
1153
1154 /* Parent (original) process */
1155 if (pid == (pid_t)-1) {
1156 /* Capture errno for the exception. */
1157 saved_errno = errno;
1158 }
1159
1160 #ifdef VFORK_USABLE
1161 if (old_sigmask) {
1162 /* vfork() semantics guarantees that the parent is blocked
1163 * until the child performs _exit() or execve(), so it is safe
1164 * to unblock signals once we're here.
1165 * Note that in environments where vfork() is implemented as fork(),
1166 * such as QEMU user-mode emulation, the parent won't be blocked,
1167 * but it won't share the address space with the child,
1168 * so it's still safe to unblock the signals.
1169 *
1170 * We don't handle errors here because this call can't fail
1171 * if valid arguments are given, and because there is no good
1172 * way for the caller to deal with a failure to restore
1173 * the thread signal mask. */
1174 (void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);
1175 }
1176 #endif
1177
1178 if (need_after_fork)
1179 PyOS_AfterFork_Parent();
1180
1181 cleanup:
1182 if (c_fds_to_keep != NULL) {
1183 PyMem_Free(c_fds_to_keep);
1184 }
1185
1186 if (saved_errno != 0) {
1187 errno = saved_errno;
1188 /* We can't call this above as PyOS_AfterFork_Parent() calls back
1189 * into Python code which would see the unreturned error. */
1190 PyErr_SetFromErrno(PyExc_OSError);
1191 }
1192
1193 Py_XDECREF(preexec_fn_args_tuple);
1194 PyMem_RawFree(extra_groups);
1195 Py_XDECREF(cwd_obj2);
1196 if (envp)
1197 _Py_FreeCharPArray(envp);
1198 Py_XDECREF(converted_args);
1199 Py_XDECREF(fast_args);
1200 if (argv)
1201 _Py_FreeCharPArray(argv);
1202 if (exec_array)
1203 _Py_FreeCharPArray(exec_array);
1204
1205 if (need_to_reenable_gc) {
1206 PyGC_Enable();
1207 }
1208
1209 return pid == -1 ? NULL : PyLong_FromPid(pid);
1210 }
1211
1212 /* module level code ********************************************************/
1213
1214 PyDoc_STRVAR(module_doc,
1215 "A POSIX helper for the subprocess module.");
1216
1217 static PyMethodDef module_methods[] = {
1218 SUBPROCESS_FORK_EXEC_METHODDEF
1219 {NULL, NULL} /* sentinel */
1220 };
1221
1222 static PyModuleDef_Slot _posixsubprocess_slots[] = {
1223 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1224 {0, NULL}
1225 };
1226
1227 static struct PyModuleDef _posixsubprocessmodule = {
1228 PyModuleDef_HEAD_INIT,
1229 .m_name = "_posixsubprocess",
1230 .m_doc = module_doc,
1231 .m_size = 0,
1232 .m_methods = module_methods,
1233 .m_slots = _posixsubprocess_slots,
1234 };
1235
1236 PyMODINIT_FUNC
1237 PyInit__posixsubprocess(void)
1238 {
1239 return PyModuleDef_Init(&_posixsubprocessmodule);
1240 }