1 /*
2 * Copyright (c) 2019 Dmitry V. Levin <ldv@strace.io>
3 * Copyright (c) 2019-2023 The strace developers.
4 * All rights reserved.
5 *
6 * SPDX-License-Identifier: LGPL-2.1-or-later
7 */
8
9 #include "defs.h"
10
11 #include "kernel_time_types.h"
12 #define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H
13 #include <linux/io_uring.h>
14
15 #include "xlat/uring_enter_flags.h"
16 #include "xlat/uring_files_update_fds.h"
17 #include "xlat/uring_iowq_acct.h"
18 #include "xlat/uring_op_flags.h"
19 #include "xlat/uring_ops.h"
20 #include "xlat/uring_setup_features.h"
21 #include "xlat/uring_setup_flags.h"
22 #include "xlat/uring_sqe_flags.h"
23 #include "xlat/uring_register_opcodes.h"
24 #include "xlat/uring_register_rsrc_flags.h"
25 #include "xlat/uring_restriction_opcodes.h"
26
27 static void
28 print_io_sqring_offsets(const struct io_sqring_offsets *const p)
29 {
30 tprint_struct_begin();
31 PRINT_FIELD_U(*p, head);
32 tprint_struct_next();
33 PRINT_FIELD_U(*p, tail);
34 tprint_struct_next();
35 PRINT_FIELD_U(*p, ring_mask);
36 tprint_struct_next();
37 PRINT_FIELD_U(*p, ring_entries);
38 tprint_struct_next();
39 PRINT_FIELD_U(*p, flags);
40 tprint_struct_next();
41 PRINT_FIELD_U(*p, dropped);
42 tprint_struct_next();
43 PRINT_FIELD_U(*p, array);
44 if (p->resv1) {
45 tprint_struct_next();
46 PRINT_FIELD_X(*p, resv1);
47 }
48 tprint_struct_next();
49 PRINT_FIELD_X(*p, user_addr);
50 tprint_struct_end();
51 }
52
53 static void
54 print_io_cqring_offsets(const struct io_cqring_offsets *const p)
55 {
56 tprint_struct_begin();
57 PRINT_FIELD_U(*p, head);
58 tprint_struct_next();
59 PRINT_FIELD_U(*p, tail);
60 tprint_struct_next();
61 PRINT_FIELD_U(*p, ring_mask);
62 tprint_struct_next();
63 PRINT_FIELD_U(*p, ring_entries);
64 tprint_struct_next();
65 PRINT_FIELD_U(*p, overflow);
66 tprint_struct_next();
67 PRINT_FIELD_U(*p, cqes);
68 tprint_struct_next();
69 PRINT_FIELD_U(*p, flags);
70 if (p->resv1) {
71 tprint_struct_next();
72 PRINT_FIELD_X(*p, resv1);
73 }
74 tprint_struct_next();
75 PRINT_FIELD_X(*p, user_addr);
76 tprint_struct_end();
77 }
78
79 SYS_FUNC(io_uring_setup)
80 {
81 const uint32_t entries = tcp->u_arg[0];
82 const kernel_ulong_t params_addr = tcp->u_arg[1];
83 struct io_uring_params params;
84
85 if (entering(tcp)) {
86 /* entries */
87 PRINT_VAL_U(entries);
88 tprint_arg_next();
89
90 /* params */
91 if (umove_or_printaddr(tcp, params_addr, ¶ms))
92 return RVAL_DECODED | RVAL_FD;
93
94 tprint_struct_begin();
95 PRINT_FIELD_FLAGS(params, flags, uring_setup_flags,
96 "IORING_SETUP_???");
97 tprint_struct_next();
98 PRINT_FIELD_X(params, sq_thread_cpu);
99 tprint_struct_next();
100 PRINT_FIELD_U(params, sq_thread_idle);
101 if (params.flags & IORING_SETUP_ATTACH_WQ) {
102 tprint_struct_next();
103 PRINT_FIELD_FD(params, wq_fd, tcp);
104 }
105 if (!IS_ARRAY_ZERO(params.resv)) {
106 tprint_struct_next();
107 PRINT_FIELD_ARRAY(params, resv, tcp,
108 print_xint_array_member);
109 }
110 return 0;
111 }
112
113 /* exiting */
114 if (tfetch_mem(tcp, params_addr, sizeof(params), ¶ms)) {
115 tprint_struct_next();
116 PRINT_FIELD_U(params, sq_entries);
117 tprint_struct_next();
118 PRINT_FIELD_U(params, cq_entries);
119 tprint_struct_next();
120 PRINT_FIELD_FLAGS(params, features,
121 uring_setup_features,
122 "IORING_FEAT_???");
123 tprint_struct_next();
124 PRINT_FIELD_OBJ_PTR(params, sq_off,
125 print_io_sqring_offsets);
126 tprint_struct_next();
127 PRINT_FIELD_OBJ_PTR(params, cq_off,
128 print_io_cqring_offsets);
129 }
130 tprint_struct_end();
131
132 return RVAL_DECODED | RVAL_FD;
133 }
134
135 SYS_FUNC(io_uring_enter)
136 {
137 const int fd = tcp->u_arg[0];
138 const uint32_t to_submit = tcp->u_arg[1];
139 const uint32_t min_complete = tcp->u_arg[2];
140 const uint32_t flags = tcp->u_arg[3];
141 const kernel_ulong_t sigset_addr = tcp->u_arg[4];
142 const kernel_ulong_t sigset_size = tcp->u_arg[5];
143
144 /* fd */
145 printfd(tcp, fd);
146 tprint_arg_next();
147
148 /* to_submit */
149 PRINT_VAL_U(to_submit);
150 tprint_arg_next();
151
152 /* min_complete */
153 PRINT_VAL_U(min_complete);
154 tprint_arg_next();
155
156 /* flags */
157 printflags(uring_enter_flags, flags, "IORING_ENTER_???");
158 tprint_arg_next();
159
160 /* sigset */
161 print_sigset_addr_len(tcp, sigset_addr, sigset_size);
162 tprint_arg_next();
163
164 /* sigsetsize */
165 PRINT_VAL_U(sigset_size);
166
167 return RVAL_DECODED;
168 }
169
170 static bool
171 print_files_update_array_member(struct tcb *tcp, void *elem_buf,
172 size_t elem_size, void *data)
173 {
174 int fd = *(int *) elem_buf;
175
176 if (fd < -1)
177 printxval_d(uring_files_update_fds, fd, NULL);
178 else
179 printfd(tcp, fd);
180
181 return true;
182 }
183
184 static void
185 print_io_uring_files_update(struct tcb *tcp, const kernel_ulong_t addr,
186 const unsigned int nargs)
187 {
188 struct io_uring_files_update arg;
189 int buf;
190
191 if (umove_or_printaddr(tcp, addr, &arg))
192 return;
193
194 tprint_struct_begin();
195 PRINT_FIELD_U(arg, offset);
196 if (arg.resv) {
197 tprint_struct_next();
198 PRINT_FIELD_X(arg, resv);
199 }
200 tprint_struct_next();
201 tprints_field_name("fds");
202 print_big_u64_addr(arg.fds);
203 print_array(tcp, arg.fds, nargs, &buf, sizeof(buf),
204 tfetch_mem, print_files_update_array_member, NULL);
205 tprint_struct_end();
206 }
207
208 static bool
209 print_io_uring_probe_op(struct tcb *tcp, void *elem_buf, size_t elem_size,
210 void *data)
211 {
212 struct io_uring_probe_op *op = (struct io_uring_probe_op *) elem_buf;
213
214 tprint_struct_begin();
215 PRINT_FIELD_XVAL_U(*op, op, uring_ops, "IORING_OP_???");
216 if (op->resv) {
217 tprint_struct_next();
218 PRINT_FIELD_X(*op, resv);
219 }
220 tprint_struct_next();
221 PRINT_FIELD_FLAGS(*op, flags, uring_op_flags, "IO_URING_OP_???");
222 if (op->resv2) {
223 tprint_struct_next();
224 PRINT_FIELD_X(*op, resv2);
225 }
226 tprint_struct_end();
227
228 return true;
229 }
230
231 static int
232 print_io_uring_probe(struct tcb *tcp, const kernel_ulong_t addr,
233 const unsigned int nargs)
234 {
235 struct io_uring_probe *probe;
236 unsigned long printed = exiting(tcp) ? get_tcb_priv_ulong(tcp) : false;
237
238 if (exiting(tcp) && syserror(tcp)) {
239 if (!printed)
240 printaddr(addr);
241 return RVAL_DECODED;
242 }
243 if (nargs > 256) {
244 printaddr(addr);
245 return RVAL_DECODED;
246 }
247 if (printed)
248 tprint_value_changed();
249
250 /* Maximum size is 8 * 256 + 16, a bit over 4k */
251 size_t probe_sz = sizeof(probe->ops[0]) * nargs + sizeof(*probe);
252 probe = alloca(probe_sz);
253
254 /*
255 * So far, the operation doesn't use any data from the arg provided,
256 * but it checks that it is filled with zeroes.
257 */
258 if (umoven_or_printaddr(tcp, addr, probe_sz, probe))
259 return RVAL_DECODED;
260 if (entering(tcp) && is_filled((const char *) probe, 0, probe_sz))
261 return 0;
262 set_tcb_priv_ulong(tcp, true);
263
264 tprint_struct_begin();
265 PRINT_FIELD_XVAL_U(*probe, last_op, uring_ops, "IORING_OP_???");
266 tprint_struct_next();
267 PRINT_FIELD_U(*probe, ops_len);
268 if (probe->resv) {
269 tprint_struct_next();
270 PRINT_FIELD_X(*probe, resv);
271 }
272 if (!IS_ARRAY_ZERO(probe->resv2)) {
273 tprint_struct_next();
274 PRINT_FIELD_ARRAY(*probe, resv2, tcp,
275 print_xint_array_member);
276 }
277 tprint_struct_next();
278 PRINT_FIELD_OBJ_TCB_VAL(*probe, ops, tcp, print_local_array_ex,
279 entering(tcp) ? nargs : MIN(probe->ops_len, nargs),
280 sizeof(probe->ops[0]), print_io_uring_probe_op, NULL,
281 exiting(tcp) && (nargs < probe->ops_len)
282 ? PAF_ARRAY_TRUNCATED : 0,
283 NULL, NULL);
284 tprint_struct_end();
285
286 return 0;
287 }
288
289 static bool
290 print_io_uring_restriction(struct tcb *tcp, void *elem_buf, size_t elem_size,
291 void *data)
292 {
293 struct io_uring_restriction *r =
294 (struct io_uring_restriction *) elem_buf;
295 CHECK_TYPE_SIZE(*r, 16);
296 CHECK_TYPE_SIZE(r->resv2, 12);
297
298 tprint_struct_begin();
299 PRINT_FIELD_XVAL(*r, opcode, uring_restriction_opcodes,
300 "IORING_RESTRICTION_???");
301 switch (r->opcode) {
302 case IORING_RESTRICTION_REGISTER_OP:
303 tprint_struct_next();
304 PRINT_FIELD_XVAL(*r, register_op, uring_register_opcodes,
305 "IORING_REGISTER_???");
306 break;
307 case IORING_RESTRICTION_SQE_OP:
308 tprint_struct_next();
309 PRINT_FIELD_XVAL(*r, sqe_op, uring_ops, "IORING_OP_???");
310 break;
311 case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
312 case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
313 tprint_struct_next();
314 PRINT_FIELD_FLAGS(*r, sqe_flags, uring_sqe_flags, "IOSQE_???");
315 break;
316 default:
317 tprintf_comment("op: %#x", r->register_op);
318 }
319 if (r->resv) {
320 tprint_struct_next();
321 PRINT_FIELD_X(*r, resv);
322 }
323 if (!IS_ARRAY_ZERO(r->resv2)) {
324 tprint_struct_next();
325 PRINT_FIELD_ARRAY(*r, resv2, tcp, print_xint_array_member);
326 }
327 tprint_struct_end();
328
329 return true;
330 }
331
332 static void
333 print_io_uring_restrictions(struct tcb *tcp, const kernel_ulong_t addr,
334 const unsigned int nargs)
335 {
336 struct io_uring_restriction buf;
337 print_array(tcp, addr, nargs, &buf, sizeof(buf),
338 tfetch_mem, print_io_uring_restriction, NULL);
339 }
340
341 static void
342 print_io_uring_rsrc_data(struct tcb *tcp, const uint64_t data,
343 const unsigned int nr, const unsigned int opcode)
344 {
345 int fd_buf;
346
347 switch (opcode) {
348 case IORING_REGISTER_FILES2:
349 case IORING_REGISTER_BUFFERS2:
350 case IORING_REGISTER_BUFFERS_UPDATE:
351 case IORING_REGISTER_FILES_UPDATE2:
352 tprint_struct_next();
353 tprints_field_name("data");
354 print_big_u64_addr(data);
355 break;
356 }
357
358 switch (opcode) {
359 case IORING_REGISTER_FILES2:
360 print_array(tcp, data, nr, &fd_buf, sizeof(fd_buf),
361 tfetch_mem, print_fd_array_member, NULL);
362 break;
363 case IORING_REGISTER_FILES_UPDATE2:
364 print_array(tcp, data, nr, &fd_buf, sizeof(fd_buf),
365 tfetch_mem, print_files_update_array_member, NULL);
366 break;
367 case IORING_REGISTER_BUFFERS2:
368 case IORING_REGISTER_BUFFERS_UPDATE:
369 tprint_iov(tcp, nr, data, iov_decode_addr);
370 break;
371 }
372 }
373
374 static void
375 print_io_uring_rsrc_tags(struct tcb *tcp, const uint64_t tags,
376 const unsigned int nr)
377 {
378 uint64_t tag_buf;
379
380 tprint_struct_next();
381 tprints_field_name("tags");
382 print_big_u64_addr(tags);
383 print_array(tcp, tags, nr, &tag_buf, sizeof(tag_buf),
384 tfetch_mem, print_xint_array_member, NULL);
385 }
386
387 static void
388 print_io_uring_register_rsrc(struct tcb *tcp, const kernel_ulong_t addr,
389 const unsigned int size, const unsigned int opcode)
390 {
391 struct io_uring_rsrc_register arg;
392 CHECK_TYPE_SIZE(arg, 32);
393 CHECK_TYPE_SIZE(arg.resv2, sizeof(uint64_t));
394
395 if (size < 32) {
396 printaddr(addr);
397 return;
398 }
399
400 if (umove_or_printaddr(tcp, addr, &arg))
401 return;
402
403 tprint_struct_begin();
404 PRINT_FIELD_U(arg, nr);
405
406 tprint_struct_next();
407 PRINT_FIELD_FLAGS(arg, flags, uring_register_rsrc_flags,
408 "IORING_RSRC_REGISTER_???");
409
410 if (arg.resv2) {
411 tprint_struct_next();
412 PRINT_FIELD_X(arg, resv2);
413 }
414
415 print_io_uring_rsrc_data(tcp, arg.data, arg.nr, opcode);
416
417 print_io_uring_rsrc_tags(tcp, arg.tags, arg.nr);
418
419 if (size > sizeof(arg)) {
420 print_nonzero_bytes(tcp, tprint_struct_next, addr, sizeof(arg),
421 MIN(size, get_pagesize()), QUOTE_FORCE_HEX);
422 }
423
424 tprint_struct_end();
425 }
426
427 static void
428 print_io_uring_update_rsrc(struct tcb *tcp, const kernel_ulong_t addr,
429 const unsigned int size, const unsigned int opcode)
430 {
431 struct io_uring_rsrc_update2 arg;
432 CHECK_TYPE_SIZE(arg, 32);
433 CHECK_TYPE_SIZE(arg.resv, sizeof(uint32_t));
434 CHECK_TYPE_SIZE(arg.resv2, sizeof(uint32_t));
435
436 if (size < 32) {
437 printaddr(addr);
438 return;
439 }
440
441 if (umove_or_printaddr(tcp, addr, &arg))
442 return;
443
444 tprint_struct_begin();
445 PRINT_FIELD_U(arg, offset);
446
447 if (arg.resv) {
448 tprint_struct_next();
449 PRINT_FIELD_X(arg, resv);
450 }
451
452 print_io_uring_rsrc_data(tcp, arg.data, arg.nr, opcode);
453
454 print_io_uring_rsrc_tags(tcp, arg.tags, arg.nr);
455
456 tprint_struct_next();
457 PRINT_FIELD_U(arg, nr);
458
459 if (arg.resv2) {
460 tprint_struct_next();
461 PRINT_FIELD_X(arg, resv2);
462 }
463
464 if (size > sizeof(arg)) {
465 print_nonzero_bytes(tcp, tprint_struct_next, addr, sizeof(arg),
466 MIN(size, get_pagesize()), QUOTE_FORCE_HEX);
467 }
468
469 tprint_struct_end();
470 }
471
472 static int
473 print_io_uring_iowq_acct(struct tcb *tcp, const kernel_ulong_t addr,
474 const unsigned int nargs)
475 {
476 uint32_t val;
477 bool ret = print_array_ex(tcp, addr, nargs, &val, sizeof(val),
478 tfetch_mem, print_uint_array_member, NULL,
479 PAF_PRINT_INDICES | XLAT_STYLE_FMT_U,
480 uring_iowq_acct, "IO_WQ_???");
481
482 return ret ? 0 : RVAL_DECODED;
483 }
484
485 static bool
486 print_ringfd_register_array_member(struct tcb *tcp, void *buf,
487 size_t elem_size, void *data)
488 {
489 /* offset - offset to insert at or -1 for the first free place */
490 /* resv - reserved */
491 /* data - FD to register */
492 struct io_uring_rsrc_update *elem = buf;
493
494 tprint_struct_begin();
495 if (elem->offset == -1U)
496 PRINT_FIELD_D(*elem, offset);
497 else
498 PRINT_FIELD_U(*elem, offset);
499
500 if (elem->resv) {
501 tprint_struct_next();
502 PRINT_FIELD_X(*elem, resv);
503 }
504
505 tprint_struct_next();
506 PRINT_FIELD_FD(*elem, data, tcp);
507
508 tprint_struct_end();
509
510 return true;
511 }
512 static void
513 print_io_uring_ringfds_register(struct tcb *tcp, const kernel_ulong_t arg,
514 const unsigned int nargs)
515 {
516 struct io_uring_rsrc_update buf;
517 CHECK_TYPE_SIZE(buf, 16);
518 CHECK_TYPE_SIZE(buf.resv, sizeof(uint32_t));
519
520 print_array(tcp, arg, nargs, &buf, sizeof(buf),
521 tfetch_mem, print_ringfd_register_array_member, NULL);
522 }
523
524
525 static bool
526 print_ringfd_unregister_array_member(struct tcb *tcp, void *buf,
527 size_t elem_size, void *data)
528 {
529 /* offset - offset to unregister FD */
530 /* resv - reserved */
531 /* data - unused */
532 struct io_uring_rsrc_update *elem = buf;
533
534 tprint_struct_begin();
535 PRINT_FIELD_U(*elem, offset);
536
537 if (elem->resv) {
538 tprint_struct_next();
539 PRINT_FIELD_X(*elem, resv);
540 }
541
542 if (elem->data) {
543 tprint_struct_next();
544 PRINT_FIELD_X(*elem, data);
545 }
546
547 tprint_struct_end();
548
549 return true;
550 }
551 static void
552 print_io_uring_ringfds_unregister(struct tcb *tcp, const kernel_ulong_t arg,
553 const unsigned int nargs)
554 {
555 struct io_uring_rsrc_update buf;
556
557 print_array(tcp, arg, nargs, &buf, sizeof(buf),
558 tfetch_mem, print_ringfd_unregister_array_member, NULL);
559 }
560
561 static void
562 print_io_uring_buf_reg(struct tcb *tcp, const kernel_ulong_t addr)
563 {
564 struct io_uring_buf_reg arg;
565 CHECK_TYPE_SIZE(arg, 40);
566 CHECK_TYPE_SIZE(arg.flags, sizeof(uint16_t));
567 CHECK_TYPE_SIZE(arg.resv, sizeof(uint64_t) * 3);
568
569 if (umove_or_printaddr(tcp, addr, &arg))
570 return;
571
572 tprint_struct_begin();
573 PRINT_FIELD_ADDR64(arg, ring_addr);
574
575 tprint_struct_next();
576 PRINT_FIELD_U(arg, ring_entries);
577
578 tprint_struct_next();
579 PRINT_FIELD_U(arg, bgid);
580
581 tprint_struct_next();
582 PRINT_FIELD_X(arg, flags);
583
584 if (!IS_ARRAY_ZERO(arg.resv)) {
585 tprint_struct_next();
586 PRINT_FIELD_ARRAY(arg, resv, tcp, print_xint_array_member);
587 }
588
589 tprint_struct_end();
590 }
591
592 SYS_FUNC(io_uring_register)
593 {
594 const int fd = tcp->u_arg[0];
595 const unsigned int opcode = tcp->u_arg[1];
596 const kernel_ulong_t arg = tcp->u_arg[2];
597 const unsigned int nargs = tcp->u_arg[3];
598 int rc = RVAL_DECODED;
599 int buf;
600
601 if (entering(tcp)) {
602 /* fd */
603 printfd(tcp, fd);
604 tprint_arg_next();
605
606 /* opcode */
607 printxval(uring_register_opcodes, opcode,
608 "IORING_REGISTER_???");
609 tprint_arg_next();
610 }
611
612 /* arg */
613 switch (opcode) {
614 case IORING_REGISTER_BUFFERS:
615 tprint_iov(tcp, nargs, arg, iov_decode_addr);
616 break;
617 case IORING_REGISTER_FILES:
618 case IORING_REGISTER_EVENTFD:
619 case IORING_REGISTER_EVENTFD_ASYNC:
620 print_array(tcp, arg, nargs, &buf, sizeof(buf),
621 tfetch_mem, print_fd_array_member, NULL);
622 break;
623 case IORING_REGISTER_FILES_UPDATE:
624 print_io_uring_files_update(tcp, arg, nargs);
625 break;
626 case IORING_REGISTER_PROBE:
627 rc = print_io_uring_probe(tcp, arg, nargs);
628 break;
629 case IORING_REGISTER_RESTRICTIONS:
630 print_io_uring_restrictions(tcp, arg, nargs);
631 break;
632 case IORING_REGISTER_FILES2:
633 case IORING_REGISTER_BUFFERS2:
634 print_io_uring_register_rsrc(tcp, arg, nargs, opcode);
635 break;
636 case IORING_REGISTER_FILES_UPDATE2:
637 case IORING_REGISTER_BUFFERS_UPDATE:
638 print_io_uring_update_rsrc(tcp, arg, nargs, opcode);
639 break;
640 case IORING_REGISTER_IOWQ_AFF:
641 print_affinitylist(tcp, arg, nargs);
642 break;
643 case IORING_REGISTER_IOWQ_MAX_WORKERS:
644 rc = print_io_uring_iowq_acct(tcp, arg, nargs);
645 if (entering(tcp) && !rc)
646 tprint_value_changed();
647 break;
648 case IORING_REGISTER_RING_FDS:
649 print_io_uring_ringfds_register(tcp, arg, nargs);
650 break;
651 case IORING_UNREGISTER_RING_FDS:
652 print_io_uring_ringfds_unregister(tcp, arg, nargs);
653 break;
654 case IORING_REGISTER_PBUF_RING:
655 case IORING_UNREGISTER_PBUF_RING:
656 print_io_uring_buf_reg(tcp, arg);
657 break;
658 case IORING_UNREGISTER_BUFFERS:
659 case IORING_UNREGISTER_FILES:
660 case IORING_UNREGISTER_EVENTFD:
661 case IORING_REGISTER_PERSONALITY:
662 case IORING_UNREGISTER_PERSONALITY:
663 case IORING_REGISTER_ENABLE_RINGS:
664 case IORING_UNREGISTER_IOWQ_AFF:
665 default:
666 printaddr(arg);
667 break;
668 }
669
670 if (rc || exiting(tcp)) {
671 tprint_arg_next();
672 /* nr_args */
673 PRINT_VAL_U(nargs);
674 }
675
676 return rc;
677 }