(root)/
glibc-2.38/
sysdeps/
x86_64/
dl-trampoline.h
       1  /* PLT trampolines.  x86-64 version.
       2     Copyright (C) 2009-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #ifndef SECTION
      20  # define SECTION(p)	p
      21  #endif
      22  
      23  	.section SECTION(.text),"ax",@progbits
      24  #ifdef _dl_runtime_resolve
      25  
      26  # undef REGISTER_SAVE_AREA
      27  # undef LOCAL_STORAGE_AREA
      28  # undef BASE
      29  
      30  # if (STATE_SAVE_ALIGNMENT % 16) != 0
      31  #  error STATE_SAVE_ALIGNMENT must be multiple of 16
      32  # endif
      33  
      34  # if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
      35  #  error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
      36  # endif
      37  
      38  # if DL_RUNTIME_RESOLVE_REALIGN_STACK
      39  /* Local stack area before jumping to function address: RBX.  */
      40  #  define LOCAL_STORAGE_AREA	8
      41  #  define BASE			rbx
      42  #  ifdef USE_FXSAVE
      43  /* Use fxsave to save XMM registers.  */
      44  #   define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET)
      45  #   if (REGISTER_SAVE_AREA % 16) != 0
      46  #    error REGISTER_SAVE_AREA must be multiple of 16
      47  #   endif
      48  #  endif
      49  # else
      50  #  ifndef USE_FXSAVE
      51  #   error USE_FXSAVE must be defined
      52  #  endif
      53  /* Use fxsave to save XMM registers.  */
      54  #  define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET + 8)
      55  /* Local stack area before jumping to function address:  All saved
      56     registers.  */
      57  #  define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
      58  #  define BASE			rsp
      59  #  if (REGISTER_SAVE_AREA % 16) != 8
      60  #   error REGISTER_SAVE_AREA must be odd multiple of 8
      61  #  endif
      62  # endif
      63  
      64  	.globl _dl_runtime_resolve
      65  	.hidden _dl_runtime_resolve
      66  	.type _dl_runtime_resolve, @function
      67  	.align 16
      68  	cfi_startproc
      69  _dl_runtime_resolve:
      70  	cfi_adjust_cfa_offset(16) # Incorporate PLT
      71  	_CET_ENDBR
      72  # if DL_RUNTIME_RESOLVE_REALIGN_STACK
      73  #  if LOCAL_STORAGE_AREA != 8
      74  #   error LOCAL_STORAGE_AREA must be 8
      75  #  endif
      76  	pushq %rbx			# push subtracts stack by 8.
      77  	cfi_adjust_cfa_offset(8)
      78  	cfi_rel_offset(%rbx, 0)
      79  	mov %RSP_LP, %RBX_LP
      80  	cfi_def_cfa_register(%rbx)
      81  	and $-STATE_SAVE_ALIGNMENT, %RSP_LP
      82  # endif
      83  # ifdef REGISTER_SAVE_AREA
      84  	sub $REGISTER_SAVE_AREA, %RSP_LP
      85  #  if !DL_RUNTIME_RESOLVE_REALIGN_STACK
      86  	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
      87  #  endif
      88  # else
      89  	# Allocate stack space of the required size to save the state.
      90  #  if IS_IN (rtld)
      91  	sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
      92  #  else
      93  	sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
      94  #  endif
      95  # endif
      96  	# Preserve registers otherwise clobbered.
      97  	movq %rax, REGISTER_SAVE_RAX(%rsp)
      98  	movq %rcx, REGISTER_SAVE_RCX(%rsp)
      99  	movq %rdx, REGISTER_SAVE_RDX(%rsp)
     100  	movq %rsi, REGISTER_SAVE_RSI(%rsp)
     101  	movq %rdi, REGISTER_SAVE_RDI(%rsp)
     102  	movq %r8, REGISTER_SAVE_R8(%rsp)
     103  	movq %r9, REGISTER_SAVE_R9(%rsp)
     104  # ifdef USE_FXSAVE
     105  	fxsave STATE_SAVE_OFFSET(%rsp)
     106  # else
     107  	movl $STATE_SAVE_MASK, %eax
     108  	xorl %edx, %edx
     109  	# Clear the XSAVE Header.
     110  #  ifdef USE_XSAVE
     111  	movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
     112  	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
     113  #  endif
     114  	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
     115  	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
     116  	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
     117  	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
     118  	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
     119  	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
     120  #  ifdef USE_XSAVE
     121  	xsave STATE_SAVE_OFFSET(%rsp)
     122  #  else
     123  	xsavec STATE_SAVE_OFFSET(%rsp)
     124  #  endif
     125  # endif
     126  	# Copy args pushed by PLT in register.
     127  	# %rdi: link_map, %rsi: reloc_index
     128  	mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
     129  	mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
     130  	call _dl_fixup		# Call resolver.
     131  	mov %RAX_LP, %R11_LP	# Save return value
     132  	# Get register content back.
     133  # ifdef USE_FXSAVE
     134  	fxrstor STATE_SAVE_OFFSET(%rsp)
     135  # else
     136  	movl $STATE_SAVE_MASK, %eax
     137  	xorl %edx, %edx
     138  	xrstor STATE_SAVE_OFFSET(%rsp)
     139  # endif
     140  	movq REGISTER_SAVE_R9(%rsp), %r9
     141  	movq REGISTER_SAVE_R8(%rsp), %r8
     142  	movq REGISTER_SAVE_RDI(%rsp), %rdi
     143  	movq REGISTER_SAVE_RSI(%rsp), %rsi
     144  	movq REGISTER_SAVE_RDX(%rsp), %rdx
     145  	movq REGISTER_SAVE_RCX(%rsp), %rcx
     146  	movq REGISTER_SAVE_RAX(%rsp), %rax
     147  # if DL_RUNTIME_RESOLVE_REALIGN_STACK
     148  	mov %RBX_LP, %RSP_LP
     149  	cfi_def_cfa_register(%rsp)
     150  	movq (%rsp), %rbx
     151  	cfi_restore(%rbx)
     152  # endif
     153  	# Adjust stack(PLT did 2 pushes)
     154  	add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
     155  	cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
     156  	jmp *%r11		# Jump to function address.
     157  	cfi_endproc
     158  	.size _dl_runtime_resolve, .-_dl_runtime_resolve
     159  #endif
     160  
     161  
     162  #if !defined PROF && defined _dl_runtime_profile
     163  # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
     164  #  error LR_VECTOR_OFFSET must be multiple of VEC_SIZE
     165  # endif
     166  
     167  	.globl _dl_runtime_profile
     168  	.hidden _dl_runtime_profile
     169  	.type _dl_runtime_profile, @function
     170  	.align 16
     171  _dl_runtime_profile:
     172  	cfi_startproc
     173  	cfi_adjust_cfa_offset(16) # Incorporate PLT
     174  	_CET_ENDBR
     175  	/* The La_x86_64_regs data structure pointed to by the
     176  	   fourth parameter must be VEC_SIZE-byte aligned.  This must
     177  	   be explicitly enforced.  We have the set up a dynamically
     178  	   sized stack frame.  %rbx points to the top half which
     179  	   has a fixed size and preserves the original stack pointer.  */
     180  
     181  	sub $32, %RSP_LP	# Allocate the local storage.
     182  	cfi_adjust_cfa_offset(32)
     183  	movq %rbx, (%rsp)
     184  	cfi_rel_offset(%rbx, 0)
     185  
     186  	/* On the stack:
     187  		56(%rbx)	parameter #1
     188  		48(%rbx)	return address
     189  
     190  		40(%rbx)	reloc index
     191  		32(%rbx)	link_map
     192  
     193  		24(%rbx)	La_x86_64_regs pointer
     194  		16(%rbx)	framesize
     195  		 8(%rbx)	rax
     196  		  (%rbx)	rbx
     197  	*/
     198  
     199  	movq %rax, 8(%rsp)
     200  	mov %RSP_LP, %RBX_LP
     201  	cfi_def_cfa_register(%rbx)
     202  
     203  	/* Actively align the La_x86_64_regs structure.  */
     204  	and $-VEC_SIZE, %RSP_LP
     205  	/* sizeof(La_x86_64_regs).  Need extra space for 8 SSE registers
     206  	   to detect if any xmm0-xmm7 registers are changed by audit
     207  	   module.  */
     208  	sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP
     209  	movq %rsp, 24(%rbx)
     210  
     211  	/* Fill the La_x86_64_regs structure.  */
     212  	movq %rdx, LR_RDX_OFFSET(%rsp)
     213  	movq %r8,  LR_R8_OFFSET(%rsp)
     214  	movq %r9,  LR_R9_OFFSET(%rsp)
     215  	movq %rcx, LR_RCX_OFFSET(%rsp)
     216  	movq %rsi, LR_RSI_OFFSET(%rsp)
     217  	movq %rdi, LR_RDI_OFFSET(%rsp)
     218  	movq %rbp, LR_RBP_OFFSET(%rsp)
     219  
     220  	lea 48(%rbx), %RAX_LP
     221  	movq %rax, LR_RSP_OFFSET(%rsp)
     222  
     223  	/* We always store the XMM registers even if AVX is available.
     224  	   This is to provide backward binary compatibility for existing
     225  	   audit modules.  */
     226  	VMOVA %xmm0, (LR_XMM_OFFSET + XMM_SIZE*0)(%rsp)
     227  	VMOVA %xmm1, (LR_XMM_OFFSET + XMM_SIZE*1)(%rsp)
     228  	VMOVA %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
     229  	VMOVA %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
     230  	VMOVA %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
     231  	VMOVA %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
     232  	VMOVA %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
     233  	VMOVA %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
     234  
     235  # ifdef RESTORE_AVX
     236  	/* This is to support AVX audit modules.  */
     237  	VMOVA %VEC(0), (LR_VECTOR_OFFSET + VECTOR_SIZE*0)(%rsp)
     238  	VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE*1)(%rsp)
     239  	VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
     240  	VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
     241  	VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
     242  	VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
     243  	VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
     244  	VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
     245  
     246  	/* Save xmm0-xmm7 registers to detect if any of them are
     247  	   changed by audit module.  */
     248  	vmovdqa %xmm0, (LR_SIZE + XMM_SIZE*0)(%rsp)
     249  	vmovdqa %xmm1, (LR_SIZE + XMM_SIZE*1)(%rsp)
     250  	vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
     251  	vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
     252  	vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
     253  	vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
     254  	vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
     255  	vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
     256  # endif
     257  
     258  	mov %RSP_LP, %RCX_LP	# La_x86_64_regs pointer to %rcx.
     259  	mov 48(%rbx), %RDX_LP	# Load return address if needed.
     260  	mov 40(%rbx), %RSI_LP	# Copy args pushed by PLT in register.
     261  	mov 32(%rbx), %RDI_LP	# %rdi: link_map, %rsi: reloc_index
     262  	lea 16(%rbx), %R8_LP	# Address of framesize
     263  	call _dl_profile_fixup	# Call resolver.
     264  
     265  	mov %RAX_LP, %R11_LP	# Save return value.
     266  
     267  	movq 8(%rbx), %rax	# Get back register content.
     268  	movq LR_RDX_OFFSET(%rsp), %rdx
     269  	movq  LR_R8_OFFSET(%rsp), %r8
     270  	movq  LR_R9_OFFSET(%rsp), %r9
     271  
     272  	VMOVA (LR_XMM_OFFSET + XMM_SIZE*0)(%rsp), %xmm0
     273  	VMOVA (LR_XMM_OFFSET + XMM_SIZE*1)(%rsp), %xmm1
     274  	VMOVA (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
     275  	VMOVA (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
     276  	VMOVA (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
     277  	VMOVA (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
     278  	VMOVA (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
     279  	VMOVA (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
     280  
     281  # ifdef RESTORE_AVX
     282  	/* Check if any xmm0-xmm7 registers are changed by audit
     283  	   module.  */
     284  	vpcmpeqb (LR_SIZE)(%rsp), %xmm0, %xmm8
     285  	vpmovmskb %xmm8, %esi
     286  	incw %si
     287  	je 2f
     288  	vmovdqa	%xmm0, (LR_VECTOR_OFFSET)(%rsp)
     289  	jmp 1f
     290  2:	VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
     291  	vmovdqa	%xmm0, (LR_XMM_OFFSET)(%rsp)
     292  
     293  1:	vpcmpeqb (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
     294  	vpmovmskb %xmm8, %esi
     295  	incw %si
     296  	je 2f
     297  	vmovdqa	%xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
     298  	jmp 1f
     299  2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
     300  	vmovdqa	%xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
     301  
     302  1:	vpcmpeqb (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
     303  	vpmovmskb %xmm8, %esi
     304  	incw %si
     305  	je 2f
     306  	vmovdqa	%xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
     307  	jmp 1f
     308  2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
     309  	vmovdqa	%xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
     310  
     311  1:	vpcmpeqb (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
     312  	vpmovmskb %xmm8, %esi
     313  	incw %si
     314  	je 2f
     315  	vmovdqa	%xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
     316  	jmp 1f
     317  2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
     318  	vmovdqa	%xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
     319  
     320  1:	vpcmpeqb (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
     321  	vpmovmskb %xmm8, %esi
     322  	incw %si
     323  	je 2f
     324  	vmovdqa	%xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
     325  	jmp 1f
     326  2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
     327  	vmovdqa	%xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
     328  
     329  1:	vpcmpeqb (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
     330  	vpmovmskb %xmm8, %esi
     331  	incw %si
     332  	je 2f
     333  	vmovdqa	%xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
     334  	jmp 1f
     335  2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
     336  	vmovdqa	%xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
     337  
     338  1:	vpcmpeqb (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
     339  	vpmovmskb %xmm8, %esi
     340  	incw %si
     341  	je 2f
     342  	vmovdqa	%xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
     343  	jmp 1f
     344  2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
     345  	vmovdqa	%xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
     346  
     347  1:	vpcmpeqb (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
     348  	vpmovmskb %xmm8, %esi
     349  	incw %si
     350  	je 2f
     351  	vmovdqa	%xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
     352  	jmp 1f
     353  2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
     354  	vmovdqa	%xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
     355  
     356  1:
     357  # endif
     358  
     359  	mov  16(%rbx), %RCX_LP	# Anything in framesize?
     360  	test %RCX_LP, %RCX_LP
     361  	jns 3f
     362  
     363  	/* There's nothing in the frame size, so there
     364  	   will be no call to the _dl_audit_pltexit. */
     365  
     366  	/* Get back registers content.  */
     367  	movq LR_RCX_OFFSET(%rsp), %rcx
     368  	movq LR_RSI_OFFSET(%rsp), %rsi
     369  	movq LR_RDI_OFFSET(%rsp), %rdi
     370  
     371  	mov %RBX_LP, %RSP_LP
     372  	movq (%rsp), %rbx
     373  	cfi_restore(%rbx)
     374  	cfi_def_cfa_register(%rsp)
     375  
     376  	add $48, %RSP_LP	# Adjust the stack to the return value
     377  				# (eats the reloc index and link_map)
     378  	cfi_adjust_cfa_offset(-48)
     379  	jmp *%r11		# Jump to function address.
     380  
     381  3:
     382  	cfi_adjust_cfa_offset(48)
     383  	cfi_rel_offset(%rbx, 0)
     384  	cfi_def_cfa_register(%rbx)
     385  
     386  	/* At this point we need to prepare new stack for the function
     387  	   which has to be called.  We copy the original stack to a
     388  	   temporary buffer of the size specified by the 'framesize'
     389  	   returned from _dl_profile_fixup */
     390  
     391  	lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack
     392  	add $8, %RCX_LP
     393  	and $-16, %RCX_LP
     394  	sub %RCX_LP, %RSP_LP
     395  	mov %RSP_LP, %RDI_LP
     396  	rep movsb
     397  
     398  	movq 24(%rdi), %rcx	# Get back register content.
     399  	movq 32(%rdi), %rsi
     400  	movq 40(%rdi), %rdi
     401  
     402  	call *%r11
     403  
     404  	mov 24(%rbx), %RSP_LP	# Drop the copied stack content
     405  
     406  	/* Now we have to prepare the La_x86_64_retval structure for the
     407  	   _dl_audit_pltexit.  The La_x86_64_regs is being pointed by rsp now,
     408  	   so we just need to allocate the sizeof(La_x86_64_retval) space on
     409  	   the stack, since the alignment has already been taken care of. */
     410  # ifdef RESTORE_AVX
     411  	/* sizeof(La_x86_64_retval).  Need extra space for 2 SSE
     412  	   registers to detect if xmm0/xmm1 registers are changed
     413  	   by audit module.  Since rsp is aligned to VEC_SIZE, we
     414  	   need to make sure that the address of La_x86_64_retval +
     415  	   LRV_VECTOR0_OFFSET is aligned to VEC_SIZE.  */
     416  #  define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
     417  #  define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
     418  #  if LRV_MISALIGNED == 0
     419  	sub $LRV_SPACE, %RSP_LP
     420  #  else
     421  	sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
     422  #  endif
     423  # else
     424  	sub $LRV_SIZE, %RSP_LP	# sizeof(La_x86_64_retval)
     425  # endif
     426  	mov %RSP_LP, %RCX_LP	# La_x86_64_retval argument to %rcx.
     427  
     428  	/* Fill in the La_x86_64_retval structure.  */
     429  	movq %rax, LRV_RAX_OFFSET(%rcx)
     430  	movq %rdx, LRV_RDX_OFFSET(%rcx)
     431  
     432  	VMOVA %xmm0, LRV_XMM0_OFFSET(%rcx)
     433  	VMOVA %xmm1, LRV_XMM1_OFFSET(%rcx)
     434  
     435  # ifdef RESTORE_AVX
     436  	/* This is to support AVX audit modules.  */
     437  	VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
     438  	VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
     439  
     440  	/* Save xmm0/xmm1 registers to detect if they are changed
     441  	   by audit module.  */
     442  	vmovdqa %xmm0, (LRV_SIZE + XMM_SIZE*0)(%rcx)
     443  	vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE*1)(%rcx)
     444  # endif
     445  
     446  	fstpt LRV_ST0_OFFSET(%rcx)
     447  	fstpt LRV_ST1_OFFSET(%rcx)
     448  
     449  	movq 24(%rbx), %rdx	# La_x86_64_regs argument to %rdx.
     450  	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
     451  	movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
     452  	call _dl_audit_pltexit
     453  
     454  	/* Restore return registers.  */
     455  	movq LRV_RAX_OFFSET(%rsp), %rax
     456  	movq LRV_RDX_OFFSET(%rsp), %rdx
     457  
     458  	VMOVA LRV_XMM0_OFFSET(%rsp), %xmm0
     459  	VMOVA LRV_XMM1_OFFSET(%rsp), %xmm1
     460  
     461  # ifdef RESTORE_AVX
     462  	/* Check if xmm0/xmm1 registers are changed by audit module.  */
     463  	vpcmpeqb (LRV_SIZE)(%rsp), %xmm0, %xmm2
     464  	vpmovmskb %xmm2, %esi
     465  	incw %si
     466  	jne 1f
     467  	VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
     468  
     469  1:	vpcmpeqb (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
     470  	vpmovmskb %xmm2, %esi
     471  	incw %si
     472  	jne 1f
     473  	VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
     474  
     475  1:
     476  # endif
     477  
     478  	fldt LRV_ST1_OFFSET(%rsp)
     479  	fldt LRV_ST0_OFFSET(%rsp)
     480  
     481  	mov %RBX_LP, %RSP_LP
     482  	movq (%rsp), %rbx
     483  	cfi_restore(%rbx)
     484  	cfi_def_cfa_register(%rsp)
     485  
     486  	add $48, %RSP_LP	# Adjust the stack to the return value
     487  				# (eats the reloc index and link_map)
     488  	cfi_adjust_cfa_offset(-48)
     489  	retq
     490  
     491  	cfi_endproc
     492  	.size _dl_runtime_profile, .-_dl_runtime_profile
     493  #endif