(root)/
strace-6.5/
src/
perf.c
       1  /*
       2   * Copyright (c) 2013 Ben Noordhuis <info@bnoordhuis.nl>
       3   * Copyright (c) 2013-2015 Dmitry V. Levin <ldv@strace.io>
       4   * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr@gmail.com>
       5   * Copyright (c) 2015-2023 The strace developers.
       6   * All rights reserved.
       7   *
       8   * SPDX-License-Identifier: LGPL-2.1-or-later
       9   */
      10  
      11  #include "defs.h"
      12  
      13  #include "perf_event_struct.h"
      14  
      15  #include "xlat/hw_breakpoint_len.h"
      16  #include "xlat/hw_breakpoint_type.h"
      17  #include "xlat/perf_attr_size.h"
      18  #include "xlat/perf_branch_sample_type.h"
      19  #include "xlat/perf_event_open_flags.h"
      20  #include "xlat/perf_event_read_format.h"
      21  #include "xlat/perf_event_sample_format.h"
      22  #include "xlat/perf_hw_cache_id.h"
      23  #include "xlat/perf_hw_cache_op_id.h"
      24  #include "xlat/perf_hw_cache_op_result_id.h"
      25  #include "xlat/perf_hw_id.h"
      26  #include "xlat/perf_sw_ids.h"
      27  #include "xlat/perf_type_id.h"
      28  
      29  struct pea_desc {
      30  	struct perf_event_attr *attr;
      31  	uint32_t size;
      32  };
      33  
      34  static void
      35  free_pea_desc(void *pea_desc_ptr)
      36  {
      37  	struct pea_desc *desc = pea_desc_ptr;
      38  
      39  	free(desc->attr);
      40  	free(desc);
      41  }
      42  
      43  int
      44  fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
      45  {
      46  	struct pea_desc *desc;
      47  	struct perf_event_attr *attr;
      48  	uint32_t size;
      49  
      50  	if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) {
      51  		printaddr(addr);
      52  		return 1;
      53  	}
      54  
      55  	if (size > sizeof(*attr))
      56  		size = sizeof(*attr);
      57  
      58  	if (!size)
      59  		size = PERF_ATTR_SIZE_VER0;
      60  
      61  	/*
      62  	 * Kernel (rightfully) deems invalid attribute structures with size less
      63  	 * than first published format size, and we do the same.
      64  	 */
      65  	if (size < PERF_ATTR_SIZE_VER0) {
      66  		printaddr(addr);
      67  		return 1;
      68  	}
      69  
      70  	if (abbrev(tcp))
      71  		size = offsetof(struct perf_event_attr, wakeup_events);
      72  
      73  	/* Size should be multiple of 8, but kernel doesn't check for it */
      74  	/* size &= ~7; */
      75  
      76  	attr = xzalloc(sizeof(*attr));
      77  
      78  	if (umoven_or_printaddr(tcp, addr, size, attr)) {
      79  		free(attr);
      80  
      81  		return 1;
      82  	}
      83  
      84  	desc = xmalloc(sizeof(*desc));
      85  
      86  	desc->attr = attr;
      87  	desc->size = size;
      88  
      89  	set_tcb_priv_data(tcp, desc, free_pea_desc);
      90  
      91  	return 0;
      92  }
      93  
      94  void
      95  print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
      96  {
      97  	static const char *precise_ip_desc[] = {
      98  		"arbitrary skid",
      99  		"constant skid",
     100  		"requested to have 0 skid",
     101  		"must have 0 skid",
     102  	};
     103  
     104  	struct pea_desc *desc;
     105  	struct perf_event_attr *attr;
     106  	uint32_t size;
     107  	uint32_t new_size;
     108  	int use_new_size = 0;
     109  
     110  	/*
     111  	 * Amusingly, the kernel accepts structures with only part of the field
     112  	 * present, so we perform the check like this (instead of checking
     113  	 * offsetofend against size) in order to print fields as kernel sees
     114  	 * them.  This also should work great on big endian architectures.
     115  	 */
     116  #define STRACE_PERF_CHECK_FIELD(field_) \
     117  		do { \
     118  			if (offsetof(struct perf_event_attr, field_) >= size) \
     119  				goto print_perf_event_attr_out; \
     120  		} while (0)
     121  
     122  	desc = get_tcb_priv_data(tcp);
     123  
     124  	attr = desc->attr;
     125  	size = desc->size;
     126  
     127  	/* The only error which expected to change size field currently */
     128  	if (tcp->u_error == E2BIG) {
     129  		if (umove(tcp, addr + offsetof(struct perf_event_attr, size),
     130  		    &new_size))
     131  			use_new_size = -1;
     132  		else
     133  			use_new_size = 1;
     134  	}
     135  
     136  	tprint_struct_begin();
     137  	PRINT_FIELD_XVAL(*attr, type, perf_type_id, "PERF_TYPE_???");
     138  	tprint_struct_next();
     139  	PRINT_FIELD_XVAL(*attr, size, perf_attr_size, "PERF_ATTR_SIZE_???");
     140  
     141  	if (use_new_size) {
     142  		tprint_value_changed();
     143  
     144  		if (use_new_size > 0)
     145  			printxval(perf_attr_size, new_size,
     146  				  "PERF_ATTR_SIZE_???");
     147  		else
     148  			tprint_unavailable();
     149  	}
     150  
     151  	switch (attr->type) {
     152  	case PERF_TYPE_HARDWARE:
     153  		/*
     154  		 * EEEEEEEE000000AA
     155  		 * EEEEEEEE - PMU type ID
     156  		 * AA - perf_hw_id
     157  		 */
     158  		tprint_struct_next();
     159  		tprints_field_name("config");
     160  		tprint_flags_begin();
     161  		if (attr->config >> 32) {
     162  			tprint_shift_begin();
     163  			PRINT_VAL_X(attr->config >> 32);
     164  			tprint_shift();
     165  			PRINT_VAL_U(32);
     166  			tprint_shift_end();
     167  			tprint_flags_or();
     168  		}
     169  		printxval(perf_hw_id, attr->config & PERF_HW_EVENT_MASK,
     170  			   "PERF_COUNT_HW_???");
     171  		tprint_flags_end();
     172  		break;
     173  	case PERF_TYPE_SOFTWARE:
     174  		tprint_struct_next();
     175  		PRINT_FIELD_XVAL(*attr, config, perf_sw_ids,
     176  				 "PERF_COUNT_SW_???");
     177  		break;
     178  	case PERF_TYPE_TRACEPOINT:
     179  		/*
     180  		 * "The value to use in config can be obtained from under
     181  		 * debugfs tracing/events/../../id if ftrace is enabled
     182  		 * in the kernel."
     183  		 */
     184  		tprint_struct_next();
     185  		PRINT_FIELD_U(*attr, config);
     186  		break;
     187  	case PERF_TYPE_HW_CACHE:
     188  		/*
     189  		 * EEEEEEEE00DDCCBB
     190  		 * EEEEEEEE - PMU type ID
     191  		 * BB - perf_hw_cache_id
     192  		 * CC - perf_hw_cache_op_id
     193  		 * DD - perf_hw_cache_op_result_id
     194  		 */
     195  		tprint_struct_next();
     196  		tprints_field_name("config");
     197  		tprint_flags_begin();
     198  		if (attr->config >> 32){
     199  			tprint_shift_begin();
     200  			PRINT_VAL_X(attr->config >> 32);
     201  			tprint_shift();
     202  			PRINT_VAL_U(32);
     203  			tprint_shift_end();
     204  			tprint_flags_or();
     205  		}
     206  		if ((attr->config & PERF_HW_EVENT_MASK) >> 24) {
     207  			tprint_shift_begin();
     208  			PRINT_VAL_X((attr->config & PERF_HW_EVENT_MASK) >> 24);
     209  			tprint_shift();
     210  			PRINT_VAL_U(24);
     211  			tprint_shift_end();
     212  			tprint_flags_or();
     213  		}
     214  		tprint_shift_begin();
     215  		printxval(perf_hw_cache_op_result_id,
     216  			  (attr->config >> 16) & 0xFF,
     217  			  "PERF_COUNT_HW_CACHE_RESULT_???");
     218  		tprint_shift();
     219  		PRINT_VAL_U(16);
     220  		tprint_shift_end();
     221  
     222  		tprint_flags_or();
     223  		tprint_shift_begin();
     224  		printxval(perf_hw_cache_op_id, (attr->config >> 8) & 0xFF,
     225  			   "PERF_COUNT_HW_CACHE_OP_???");
     226  		tprint_shift();
     227  		PRINT_VAL_U(8);
     228  		tprint_shift_end();
     229  
     230  		tprint_flags_or();
     231  		printxval(perf_hw_cache_id, attr->config & 0xFF,
     232  			  "PERF_COUNT_HW_CACHE_???");
     233  		tprint_flags_end();
     234  		break;
     235  	case PERF_TYPE_RAW:
     236  		/*
     237  		 * "If type is PERF_TYPE_RAW, then a custom "raw" config
     238  		 * value is needed. Most CPUs support events that are not
     239  		 * covered by the "generalized" events. These are
     240  		 * implementation defined; see your CPU manual (for example the
     241  		 * Intel Volume 3B documentation or the AMD BIOS and Kernel
     242  		 * Developer Guide). The libpfm4 library can be used to
     243  		 * translate from the name in the architectural manuals
     244  		 * to the raw hex value perf_event_open() expects in this
     245  		 * field."
     246  		 */
     247  	case PERF_TYPE_BREAKPOINT:
     248  		/*
     249  		 * "If type is PERF_TYPE_BREAKPOINT, then leave config set
     250  		 * to zero. Its parameters are set in other places."
     251  		 */
     252  	default:
     253  		tprint_struct_next();
     254  		PRINT_FIELD_X(*attr, config);
     255  		break;
     256  	}
     257  
     258  	if (attr->freq) {
     259  		tprint_struct_next();
     260  		PRINT_FIELD_U(*attr, sample_freq);
     261  	} else {
     262  		tprint_struct_next();
     263  		PRINT_FIELD_U(*attr, sample_period);
     264  	}
     265  
     266  	tprint_struct_next();
     267  	PRINT_FIELD_FLAGS(*attr, sample_type, perf_event_sample_format,
     268  			  "PERF_SAMPLE_???");
     269  	tprint_struct_next();
     270  	PRINT_FIELD_FLAGS(*attr, read_format, perf_event_read_format,
     271  			  "PERF_FORMAT_???");
     272  
     273  	/*** A shorthand for printing struct perf_event_attr bit flags */
     274  #define STRACE_PERF_PRINT_FLAG(flag_) \
     275  	do { \
     276  		if (!abbrev(tcp) || attr->flag_) { \
     277  			tprint_struct_next(); \
     278  			PRINT_FIELD_U_CAST(*attr, flag_, unsigned int); \
     279  		}  \
     280  	} while (0)
     281  
     282  	STRACE_PERF_PRINT_FLAG(disabled);
     283  	STRACE_PERF_PRINT_FLAG(inherit);
     284  	STRACE_PERF_PRINT_FLAG(pinned);
     285  	STRACE_PERF_PRINT_FLAG(exclusive);
     286  	STRACE_PERF_PRINT_FLAG(exclude_user);
     287  	STRACE_PERF_PRINT_FLAG(exclude_kernel);
     288  	STRACE_PERF_PRINT_FLAG(exclude_hv);
     289  	STRACE_PERF_PRINT_FLAG(exclude_idle);
     290  	STRACE_PERF_PRINT_FLAG(mmap);
     291  	STRACE_PERF_PRINT_FLAG(comm);
     292  	STRACE_PERF_PRINT_FLAG(freq);
     293  	STRACE_PERF_PRINT_FLAG(inherit_stat);
     294  	STRACE_PERF_PRINT_FLAG(enable_on_exec);
     295  	STRACE_PERF_PRINT_FLAG(task);
     296  	STRACE_PERF_PRINT_FLAG(watermark);
     297  	tprint_struct_next();
     298  	PRINT_FIELD_U_CAST(*attr, precise_ip, unsigned int);
     299  	tprints_comment(precise_ip_desc[attr->precise_ip]);
     300  	STRACE_PERF_PRINT_FLAG(mmap_data);
     301  	STRACE_PERF_PRINT_FLAG(sample_id_all);
     302  	STRACE_PERF_PRINT_FLAG(exclude_host);
     303  	STRACE_PERF_PRINT_FLAG(exclude_guest);
     304  	STRACE_PERF_PRINT_FLAG(exclude_callchain_kernel);
     305  	STRACE_PERF_PRINT_FLAG(exclude_callchain_user);
     306  	STRACE_PERF_PRINT_FLAG(mmap2);
     307  	STRACE_PERF_PRINT_FLAG(comm_exec);
     308  	STRACE_PERF_PRINT_FLAG(use_clockid);
     309  	STRACE_PERF_PRINT_FLAG(context_switch);
     310  	STRACE_PERF_PRINT_FLAG(write_backward);
     311  	STRACE_PERF_PRINT_FLAG(namespaces);
     312  	STRACE_PERF_PRINT_FLAG(ksymbol);
     313  	STRACE_PERF_PRINT_FLAG(bpf_event);
     314  	STRACE_PERF_PRINT_FLAG(aux_output);
     315  	STRACE_PERF_PRINT_FLAG(cgroup);
     316  	STRACE_PERF_PRINT_FLAG(text_poke);
     317  	STRACE_PERF_PRINT_FLAG(build_id);
     318  	STRACE_PERF_PRINT_FLAG(inherit_thread);
     319  	STRACE_PERF_PRINT_FLAG(remove_on_exec);
     320  	STRACE_PERF_PRINT_FLAG(sigtrap);
     321  
     322  	/*
     323  	 * Print it only in case it is non-zero, since it may contain flags we
     324  	 * are not aware about.
     325  	 */
     326  	if (attr->__reserved_1) {
     327  		tprint_struct_next();
     328  		PRINT_FIELD_X_CAST(*attr, __reserved_1, uint64_t);
     329  		tprints_comment("Bits 63..38");
     330  	}
     331  
     332  	if (abbrev(tcp))
     333  		goto print_perf_event_attr_out;
     334  
     335  	if (attr->watermark) {
     336  		tprint_struct_next();
     337  		PRINT_FIELD_U(*attr, wakeup_watermark);
     338  	} else {
     339  		tprint_struct_next();
     340  		PRINT_FIELD_U(*attr, wakeup_events);
     341  	}
     342  
     343  	if (attr->type == PERF_TYPE_BREAKPOINT) {
     344  		/* Any combination of R/W with X is deemed invalid */
     345  		tprint_struct_next();
     346  		PRINT_FIELD_XVAL(*attr, bp_type, hw_breakpoint_type,
     347  				 (attr->bp_type <=
     348  					(HW_BREAKPOINT_X | HW_BREAKPOINT_RW))
     349  						? "HW_BREAKPOINT_INVALID"
     350  						: "HW_BREAKPOINT_???");
     351  	}
     352  
     353  	if (attr->type == PERF_TYPE_BREAKPOINT) {
     354  		tprint_struct_next();
     355  		PRINT_FIELD_X(*attr, bp_addr);
     356  	} else {
     357  		tprint_struct_next();
     358  		PRINT_FIELD_X(*attr, config1);
     359  	}
     360  
     361  	/*
     362  	 * Fields after bp_addr/config1 are optional and may not present; check
     363  	 * against size is needed.
     364  	 */
     365  
     366  	STRACE_PERF_CHECK_FIELD(bp_len);
     367  	if (attr->type == PERF_TYPE_BREAKPOINT) {
     368  		tprint_struct_next();
     369  		PRINT_FIELD_U(*attr, bp_len);
     370  	} else {
     371  		tprint_struct_next();
     372  		PRINT_FIELD_X(*attr, config2);
     373  	}
     374  
     375  	STRACE_PERF_CHECK_FIELD(branch_sample_type);
     376  	if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
     377  		tprint_struct_next();
     378  		PRINT_FIELD_FLAGS(*attr, branch_sample_type,
     379  				  perf_branch_sample_type,
     380  				  "PERF_SAMPLE_BRANCH_???");
     381  	}
     382  
     383  	STRACE_PERF_CHECK_FIELD(sample_regs_user);
     384  	/*
     385  	 * "This bit mask defines the set of user CPU registers to dump on
     386  	 * samples. The layout of the register mask is architecture-specific and
     387  	 * described in the kernel header
     388  	 * arch/ARCH/include/uapi/asm/perf_regs.h."
     389  	 */
     390  	tprint_struct_next();
     391  	PRINT_FIELD_X(*attr, sample_regs_user);
     392  
     393  	STRACE_PERF_CHECK_FIELD(sample_stack_user);
     394  	/*
     395  	 * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is
     396  	 * specified."
     397  	 */
     398  	if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
     399  		tprint_struct_next();
     400  		PRINT_FIELD_X(*attr, sample_stack_user);
     401  	}
     402  
     403  	if (attr->use_clockid) {
     404  		STRACE_PERF_CHECK_FIELD(clockid);
     405  		tprint_struct_next();
     406  		PRINT_FIELD_XVAL(*attr, clockid, clocknames, "CLOCK_???");
     407  	}
     408  
     409  	STRACE_PERF_CHECK_FIELD(sample_regs_intr);
     410  	tprint_struct_next();
     411  	PRINT_FIELD_X(*attr, sample_regs_intr);
     412  
     413  	STRACE_PERF_CHECK_FIELD(aux_watermark);
     414  	tprint_struct_next();
     415  	PRINT_FIELD_U(*attr, aux_watermark);
     416  
     417  	STRACE_PERF_CHECK_FIELD(sample_max_stack);
     418  	tprint_struct_next();
     419  	PRINT_FIELD_U(*attr, sample_max_stack);
     420  
     421  	STRACE_PERF_CHECK_FIELD(__reserved_2);
     422  	if (attr->__reserved_2)
     423  		tprintf_comment("bytes 110..111: %#hx", attr->__reserved_2);
     424  
     425  	STRACE_PERF_CHECK_FIELD(aux_sample_size);
     426  	tprint_struct_next();
     427  	PRINT_FIELD_U(*attr, aux_sample_size);
     428  
     429  	STRACE_PERF_CHECK_FIELD(__reserved_3);
     430  	if (attr->__reserved_3)
     431  		tprintf_comment("bytes 116..119: %#x", attr->__reserved_3);
     432  
     433  	STRACE_PERF_CHECK_FIELD(sig_data);
     434  	tprint_struct_next();
     435  	PRINT_FIELD_X(*attr, sig_data);
     436  
     437  	STRACE_PERF_CHECK_FIELD(config3);
     438  	tprint_struct_next();
     439  	PRINT_FIELD_X(*attr, config3);
     440  
     441  print_perf_event_attr_out:
     442  	if ((attr->size && (attr->size > size)) ||
     443  	    (!attr->size && (size < PERF_ATTR_SIZE_VER0))) {
     444  		tprint_struct_next();
     445  		tprint_more_data_follows();
     446  	}
     447  
     448  	tprint_struct_end();
     449  }
     450  
     451  SYS_FUNC(perf_event_open)
     452  {
     453  	/*
     454  	 * We try to copy out the whole structure on entering in order to check
     455  	 * size value on exiting. We do not check the rest of the fields because
     456  	 * they shouldn't be changed, but copy the whole structure instead
     457  	 * of just size field because they could.
     458  	 */
     459  	if (entering(tcp)) {
     460  		/* attr */
     461  		if (!fetch_perf_event_attr(tcp, tcp->u_arg[0]))
     462  			return 0;
     463  	} else {
     464  		/* attr */
     465  		print_perf_event_attr(tcp, tcp->u_arg[0]);
     466  	}
     467  	tprint_arg_next();
     468  
     469  	/* pid */
     470  	PRINT_VAL_D((int) tcp->u_arg[1]);
     471  	tprint_arg_next();
     472  
     473  	/* cpu */
     474  	PRINT_VAL_D((int) tcp->u_arg[2]);
     475  	tprint_arg_next();
     476  
     477  	/* group_fd */
     478  	printfd(tcp, tcp->u_arg[3]);
     479  	tprint_arg_next();
     480  
     481  	/* flags */
     482  	printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???");
     483  
     484  	return RVAL_DECODED | RVAL_FD;
     485  }