1 /*
2 * Copyright (c) 2013 Ben Noordhuis <info@bnoordhuis.nl>
3 * Copyright (c) 2013-2015 Dmitry V. Levin <ldv@strace.io>
4 * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr@gmail.com>
5 * Copyright (c) 2015-2023 The strace developers.
6 * All rights reserved.
7 *
8 * SPDX-License-Identifier: LGPL-2.1-or-later
9 */
10
11 #include "defs.h"
12
13 #include "perf_event_struct.h"
14
15 #include "xlat/hw_breakpoint_len.h"
16 #include "xlat/hw_breakpoint_type.h"
17 #include "xlat/perf_attr_size.h"
18 #include "xlat/perf_branch_sample_type.h"
19 #include "xlat/perf_event_open_flags.h"
20 #include "xlat/perf_event_read_format.h"
21 #include "xlat/perf_event_sample_format.h"
22 #include "xlat/perf_hw_cache_id.h"
23 #include "xlat/perf_hw_cache_op_id.h"
24 #include "xlat/perf_hw_cache_op_result_id.h"
25 #include "xlat/perf_hw_id.h"
26 #include "xlat/perf_sw_ids.h"
27 #include "xlat/perf_type_id.h"
28
29 struct pea_desc {
30 struct perf_event_attr *attr;
31 uint32_t size;
32 };
33
34 static void
35 free_pea_desc(void *pea_desc_ptr)
36 {
37 struct pea_desc *desc = pea_desc_ptr;
38
39 free(desc->attr);
40 free(desc);
41 }
42
43 int
44 fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
45 {
46 struct pea_desc *desc;
47 struct perf_event_attr *attr;
48 uint32_t size;
49
50 if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) {
51 printaddr(addr);
52 return 1;
53 }
54
55 if (size > sizeof(*attr))
56 size = sizeof(*attr);
57
58 if (!size)
59 size = PERF_ATTR_SIZE_VER0;
60
61 /*
62 * Kernel (rightfully) deems invalid attribute structures with size less
63 * than first published format size, and we do the same.
64 */
65 if (size < PERF_ATTR_SIZE_VER0) {
66 printaddr(addr);
67 return 1;
68 }
69
70 if (abbrev(tcp))
71 size = offsetof(struct perf_event_attr, wakeup_events);
72
73 /* Size should be multiple of 8, but kernel doesn't check for it */
74 /* size &= ~7; */
75
76 attr = xzalloc(sizeof(*attr));
77
78 if (umoven_or_printaddr(tcp, addr, size, attr)) {
79 free(attr);
80
81 return 1;
82 }
83
84 desc = xmalloc(sizeof(*desc));
85
86 desc->attr = attr;
87 desc->size = size;
88
89 set_tcb_priv_data(tcp, desc, free_pea_desc);
90
91 return 0;
92 }
93
94 void
95 print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
96 {
97 static const char *precise_ip_desc[] = {
98 "arbitrary skid",
99 "constant skid",
100 "requested to have 0 skid",
101 "must have 0 skid",
102 };
103
104 struct pea_desc *desc;
105 struct perf_event_attr *attr;
106 uint32_t size;
107 uint32_t new_size;
108 int use_new_size = 0;
109
110 /*
111 * Amusingly, the kernel accepts structures with only part of the field
112 * present, so we perform the check like this (instead of checking
113 * offsetofend against size) in order to print fields as kernel sees
114 * them. This also should work great on big endian architectures.
115 */
116 #define STRACE_PERF_CHECK_FIELD(field_) \
117 do { \
118 if (offsetof(struct perf_event_attr, field_) >= size) \
119 goto print_perf_event_attr_out; \
120 } while (0)
121
122 desc = get_tcb_priv_data(tcp);
123
124 attr = desc->attr;
125 size = desc->size;
126
127 /* The only error which expected to change size field currently */
128 if (tcp->u_error == E2BIG) {
129 if (umove(tcp, addr + offsetof(struct perf_event_attr, size),
130 &new_size))
131 use_new_size = -1;
132 else
133 use_new_size = 1;
134 }
135
136 tprint_struct_begin();
137 PRINT_FIELD_XVAL(*attr, type, perf_type_id, "PERF_TYPE_???");
138 tprint_struct_next();
139 PRINT_FIELD_XVAL(*attr, size, perf_attr_size, "PERF_ATTR_SIZE_???");
140
141 if (use_new_size) {
142 tprint_value_changed();
143
144 if (use_new_size > 0)
145 printxval(perf_attr_size, new_size,
146 "PERF_ATTR_SIZE_???");
147 else
148 tprint_unavailable();
149 }
150
151 switch (attr->type) {
152 case PERF_TYPE_HARDWARE:
153 /*
154 * EEEEEEEE000000AA
155 * EEEEEEEE - PMU type ID
156 * AA - perf_hw_id
157 */
158 tprint_struct_next();
159 tprints_field_name("config");
160 tprint_flags_begin();
161 if (attr->config >> 32) {
162 tprint_shift_begin();
163 PRINT_VAL_X(attr->config >> 32);
164 tprint_shift();
165 PRINT_VAL_U(32);
166 tprint_shift_end();
167 tprint_flags_or();
168 }
169 printxval(perf_hw_id, attr->config & PERF_HW_EVENT_MASK,
170 "PERF_COUNT_HW_???");
171 tprint_flags_end();
172 break;
173 case PERF_TYPE_SOFTWARE:
174 tprint_struct_next();
175 PRINT_FIELD_XVAL(*attr, config, perf_sw_ids,
176 "PERF_COUNT_SW_???");
177 break;
178 case PERF_TYPE_TRACEPOINT:
179 /*
180 * "The value to use in config can be obtained from under
181 * debugfs tracing/events/../../id if ftrace is enabled
182 * in the kernel."
183 */
184 tprint_struct_next();
185 PRINT_FIELD_U(*attr, config);
186 break;
187 case PERF_TYPE_HW_CACHE:
188 /*
189 * EEEEEEEE00DDCCBB
190 * EEEEEEEE - PMU type ID
191 * BB - perf_hw_cache_id
192 * CC - perf_hw_cache_op_id
193 * DD - perf_hw_cache_op_result_id
194 */
195 tprint_struct_next();
196 tprints_field_name("config");
197 tprint_flags_begin();
198 if (attr->config >> 32){
199 tprint_shift_begin();
200 PRINT_VAL_X(attr->config >> 32);
201 tprint_shift();
202 PRINT_VAL_U(32);
203 tprint_shift_end();
204 tprint_flags_or();
205 }
206 if ((attr->config & PERF_HW_EVENT_MASK) >> 24) {
207 tprint_shift_begin();
208 PRINT_VAL_X((attr->config & PERF_HW_EVENT_MASK) >> 24);
209 tprint_shift();
210 PRINT_VAL_U(24);
211 tprint_shift_end();
212 tprint_flags_or();
213 }
214 tprint_shift_begin();
215 printxval(perf_hw_cache_op_result_id,
216 (attr->config >> 16) & 0xFF,
217 "PERF_COUNT_HW_CACHE_RESULT_???");
218 tprint_shift();
219 PRINT_VAL_U(16);
220 tprint_shift_end();
221
222 tprint_flags_or();
223 tprint_shift_begin();
224 printxval(perf_hw_cache_op_id, (attr->config >> 8) & 0xFF,
225 "PERF_COUNT_HW_CACHE_OP_???");
226 tprint_shift();
227 PRINT_VAL_U(8);
228 tprint_shift_end();
229
230 tprint_flags_or();
231 printxval(perf_hw_cache_id, attr->config & 0xFF,
232 "PERF_COUNT_HW_CACHE_???");
233 tprint_flags_end();
234 break;
235 case PERF_TYPE_RAW:
236 /*
237 * "If type is PERF_TYPE_RAW, then a custom "raw" config
238 * value is needed. Most CPUs support events that are not
239 * covered by the "generalized" events. These are
240 * implementation defined; see your CPU manual (for example the
241 * Intel Volume 3B documentation or the AMD BIOS and Kernel
242 * Developer Guide). The libpfm4 library can be used to
243 * translate from the name in the architectural manuals
244 * to the raw hex value perf_event_open() expects in this
245 * field."
246 */
247 case PERF_TYPE_BREAKPOINT:
248 /*
249 * "If type is PERF_TYPE_BREAKPOINT, then leave config set
250 * to zero. Its parameters are set in other places."
251 */
252 default:
253 tprint_struct_next();
254 PRINT_FIELD_X(*attr, config);
255 break;
256 }
257
258 if (attr->freq) {
259 tprint_struct_next();
260 PRINT_FIELD_U(*attr, sample_freq);
261 } else {
262 tprint_struct_next();
263 PRINT_FIELD_U(*attr, sample_period);
264 }
265
266 tprint_struct_next();
267 PRINT_FIELD_FLAGS(*attr, sample_type, perf_event_sample_format,
268 "PERF_SAMPLE_???");
269 tprint_struct_next();
270 PRINT_FIELD_FLAGS(*attr, read_format, perf_event_read_format,
271 "PERF_FORMAT_???");
272
273 /*** A shorthand for printing struct perf_event_attr bit flags */
274 #define STRACE_PERF_PRINT_FLAG(flag_) \
275 do { \
276 if (!abbrev(tcp) || attr->flag_) { \
277 tprint_struct_next(); \
278 PRINT_FIELD_U_CAST(*attr, flag_, unsigned int); \
279 } \
280 } while (0)
281
282 STRACE_PERF_PRINT_FLAG(disabled);
283 STRACE_PERF_PRINT_FLAG(inherit);
284 STRACE_PERF_PRINT_FLAG(pinned);
285 STRACE_PERF_PRINT_FLAG(exclusive);
286 STRACE_PERF_PRINT_FLAG(exclude_user);
287 STRACE_PERF_PRINT_FLAG(exclude_kernel);
288 STRACE_PERF_PRINT_FLAG(exclude_hv);
289 STRACE_PERF_PRINT_FLAG(exclude_idle);
290 STRACE_PERF_PRINT_FLAG(mmap);
291 STRACE_PERF_PRINT_FLAG(comm);
292 STRACE_PERF_PRINT_FLAG(freq);
293 STRACE_PERF_PRINT_FLAG(inherit_stat);
294 STRACE_PERF_PRINT_FLAG(enable_on_exec);
295 STRACE_PERF_PRINT_FLAG(task);
296 STRACE_PERF_PRINT_FLAG(watermark);
297 tprint_struct_next();
298 PRINT_FIELD_U_CAST(*attr, precise_ip, unsigned int);
299 tprints_comment(precise_ip_desc[attr->precise_ip]);
300 STRACE_PERF_PRINT_FLAG(mmap_data);
301 STRACE_PERF_PRINT_FLAG(sample_id_all);
302 STRACE_PERF_PRINT_FLAG(exclude_host);
303 STRACE_PERF_PRINT_FLAG(exclude_guest);
304 STRACE_PERF_PRINT_FLAG(exclude_callchain_kernel);
305 STRACE_PERF_PRINT_FLAG(exclude_callchain_user);
306 STRACE_PERF_PRINT_FLAG(mmap2);
307 STRACE_PERF_PRINT_FLAG(comm_exec);
308 STRACE_PERF_PRINT_FLAG(use_clockid);
309 STRACE_PERF_PRINT_FLAG(context_switch);
310 STRACE_PERF_PRINT_FLAG(write_backward);
311 STRACE_PERF_PRINT_FLAG(namespaces);
312 STRACE_PERF_PRINT_FLAG(ksymbol);
313 STRACE_PERF_PRINT_FLAG(bpf_event);
314 STRACE_PERF_PRINT_FLAG(aux_output);
315 STRACE_PERF_PRINT_FLAG(cgroup);
316 STRACE_PERF_PRINT_FLAG(text_poke);
317 STRACE_PERF_PRINT_FLAG(build_id);
318 STRACE_PERF_PRINT_FLAG(inherit_thread);
319 STRACE_PERF_PRINT_FLAG(remove_on_exec);
320 STRACE_PERF_PRINT_FLAG(sigtrap);
321
322 /*
323 * Print it only in case it is non-zero, since it may contain flags we
324 * are not aware about.
325 */
326 if (attr->__reserved_1) {
327 tprint_struct_next();
328 PRINT_FIELD_X_CAST(*attr, __reserved_1, uint64_t);
329 tprints_comment("Bits 63..38");
330 }
331
332 if (abbrev(tcp))
333 goto print_perf_event_attr_out;
334
335 if (attr->watermark) {
336 tprint_struct_next();
337 PRINT_FIELD_U(*attr, wakeup_watermark);
338 } else {
339 tprint_struct_next();
340 PRINT_FIELD_U(*attr, wakeup_events);
341 }
342
343 if (attr->type == PERF_TYPE_BREAKPOINT) {
344 /* Any combination of R/W with X is deemed invalid */
345 tprint_struct_next();
346 PRINT_FIELD_XVAL(*attr, bp_type, hw_breakpoint_type,
347 (attr->bp_type <=
348 (HW_BREAKPOINT_X | HW_BREAKPOINT_RW))
349 ? "HW_BREAKPOINT_INVALID"
350 : "HW_BREAKPOINT_???");
351 }
352
353 if (attr->type == PERF_TYPE_BREAKPOINT) {
354 tprint_struct_next();
355 PRINT_FIELD_X(*attr, bp_addr);
356 } else {
357 tprint_struct_next();
358 PRINT_FIELD_X(*attr, config1);
359 }
360
361 /*
362 * Fields after bp_addr/config1 are optional and may not present; check
363 * against size is needed.
364 */
365
366 STRACE_PERF_CHECK_FIELD(bp_len);
367 if (attr->type == PERF_TYPE_BREAKPOINT) {
368 tprint_struct_next();
369 PRINT_FIELD_U(*attr, bp_len);
370 } else {
371 tprint_struct_next();
372 PRINT_FIELD_X(*attr, config2);
373 }
374
375 STRACE_PERF_CHECK_FIELD(branch_sample_type);
376 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
377 tprint_struct_next();
378 PRINT_FIELD_FLAGS(*attr, branch_sample_type,
379 perf_branch_sample_type,
380 "PERF_SAMPLE_BRANCH_???");
381 }
382
383 STRACE_PERF_CHECK_FIELD(sample_regs_user);
384 /*
385 * "This bit mask defines the set of user CPU registers to dump on
386 * samples. The layout of the register mask is architecture-specific and
387 * described in the kernel header
388 * arch/ARCH/include/uapi/asm/perf_regs.h."
389 */
390 tprint_struct_next();
391 PRINT_FIELD_X(*attr, sample_regs_user);
392
393 STRACE_PERF_CHECK_FIELD(sample_stack_user);
394 /*
395 * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is
396 * specified."
397 */
398 if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
399 tprint_struct_next();
400 PRINT_FIELD_X(*attr, sample_stack_user);
401 }
402
403 if (attr->use_clockid) {
404 STRACE_PERF_CHECK_FIELD(clockid);
405 tprint_struct_next();
406 PRINT_FIELD_XVAL(*attr, clockid, clocknames, "CLOCK_???");
407 }
408
409 STRACE_PERF_CHECK_FIELD(sample_regs_intr);
410 tprint_struct_next();
411 PRINT_FIELD_X(*attr, sample_regs_intr);
412
413 STRACE_PERF_CHECK_FIELD(aux_watermark);
414 tprint_struct_next();
415 PRINT_FIELD_U(*attr, aux_watermark);
416
417 STRACE_PERF_CHECK_FIELD(sample_max_stack);
418 tprint_struct_next();
419 PRINT_FIELD_U(*attr, sample_max_stack);
420
421 STRACE_PERF_CHECK_FIELD(__reserved_2);
422 if (attr->__reserved_2)
423 tprintf_comment("bytes 110..111: %#hx", attr->__reserved_2);
424
425 STRACE_PERF_CHECK_FIELD(aux_sample_size);
426 tprint_struct_next();
427 PRINT_FIELD_U(*attr, aux_sample_size);
428
429 STRACE_PERF_CHECK_FIELD(__reserved_3);
430 if (attr->__reserved_3)
431 tprintf_comment("bytes 116..119: %#x", attr->__reserved_3);
432
433 STRACE_PERF_CHECK_FIELD(sig_data);
434 tprint_struct_next();
435 PRINT_FIELD_X(*attr, sig_data);
436
437 STRACE_PERF_CHECK_FIELD(config3);
438 tprint_struct_next();
439 PRINT_FIELD_X(*attr, config3);
440
441 print_perf_event_attr_out:
442 if ((attr->size && (attr->size > size)) ||
443 (!attr->size && (size < PERF_ATTR_SIZE_VER0))) {
444 tprint_struct_next();
445 tprint_more_data_follows();
446 }
447
448 tprint_struct_end();
449 }
450
451 SYS_FUNC(perf_event_open)
452 {
453 /*
454 * We try to copy out the whole structure on entering in order to check
455 * size value on exiting. We do not check the rest of the fields because
456 * they shouldn't be changed, but copy the whole structure instead
457 * of just size field because they could.
458 */
459 if (entering(tcp)) {
460 /* attr */
461 if (!fetch_perf_event_attr(tcp, tcp->u_arg[0]))
462 return 0;
463 } else {
464 /* attr */
465 print_perf_event_attr(tcp, tcp->u_arg[0]);
466 }
467 tprint_arg_next();
468
469 /* pid */
470 PRINT_VAL_D((int) tcp->u_arg[1]);
471 tprint_arg_next();
472
473 /* cpu */
474 PRINT_VAL_D((int) tcp->u_arg[2]);
475 tprint_arg_next();
476
477 /* group_fd */
478 printfd(tcp, tcp->u_arg[3]);
479 tprint_arg_next();
480
481 /* flags */
482 printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???");
483
484 return RVAL_DECODED | RVAL_FD;
485 }