(root)/
glibc-2.38/
sysdeps/
x86_64/
fpu/
svml_s_wrapper_impl.h
       1  /* Wrapper implementations of vector math functions.
       2     Copyright (C) 2014-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  /* SSE2 ISA version as wrapper to scalar.  */
      20  .macro WRAPPER_IMPL_SSE2 callee
      21  	push	%rbx
      22  	cfi_adjust_cfa_offset (8)
      23  	cfi_rel_offset (%rbx, 0)
      24  	subq	$16, %rsp
      25  	cfi_adjust_cfa_offset (16)
      26  	movaps	%xmm0, (%rsp)
      27  	call	JUMPTARGET(\callee)
      28  	movss	%xmm0, (%rsp)
      29  	movss	4(%rsp), %xmm0
      30  	call	JUMPTARGET(\callee)
      31  	movss	%xmm0, 4(%rsp)
      32  	movss	8(%rsp), %xmm0
      33  	call	JUMPTARGET(\callee)
      34  	movd	%xmm0, %ebx
      35  	movss	12(%rsp), %xmm0
      36  	call	JUMPTARGET(\callee)
      37  	movd	%ebx, %xmm1
      38  	unpcklps %xmm0, %xmm1
      39  	movsd	(%rsp), %xmm0
      40  	unpcklpd %xmm1, %xmm0
      41  	addq	$16, %rsp
      42  	cfi_adjust_cfa_offset (-16)
      43  	popq	%rbx
      44  	cfi_adjust_cfa_offset (-8)
      45  	cfi_restore (%rbx)
      46  	ret
      47  .endm
      48  
      49  /* 2 argument SSE2 ISA version as wrapper to scalar.  */
      50  .macro WRAPPER_IMPL_SSE2_ff callee
      51  	push	%rbx
      52  	cfi_adjust_cfa_offset (8)
      53  	cfi_rel_offset (%rbx, 0)
      54  	subq	$32, %rsp
      55  	cfi_adjust_cfa_offset (40)
      56  	movaps	%xmm0, (%rsp)
      57  	movaps	%xmm1, 16(%rsp)
      58  	call	JUMPTARGET(\callee)
      59  	movss	20(%rsp), %xmm1
      60  	movss	%xmm0, 0(%rsp)
      61  	movss	4(%rsp), %xmm0
      62  	call	JUMPTARGET(\callee)
      63  	movss	24(%rsp), %xmm1
      64  	movss	%xmm0, 4(%rsp)
      65  	movss	8(%rsp), %xmm0
      66  	call	JUMPTARGET(\callee)
      67  	movss	28(%rsp), %xmm1
      68  	movd	%xmm0, %ebx
      69  	movss	12(%rsp), %xmm0
      70  	call	JUMPTARGET(\callee)
      71  	/* merge 4x results into xmm0.  */
      72  	movd	%ebx, %xmm1
      73  	unpcklps %xmm0, %xmm1
      74  	movsd	(%rsp), %xmm0
      75  	unpcklpd %xmm1, %xmm0
      76  	addq	$32, %rsp
      77  	cfi_adjust_cfa_offset (-32)
      78  	popq	%rbx
      79  	cfi_adjust_cfa_offset (-8)
      80  	cfi_restore (%rbx)
      81  	ret
      82  .endm
      83  
      84  /* 3 argument SSE2 ISA version as wrapper to scalar.  */
      85  .macro WRAPPER_IMPL_SSE2_fFF callee
      86  	pushq	%rbp
      87  	cfi_adjust_cfa_offset (8)
      88  	cfi_rel_offset (%rbp, 0)
      89  	pushq	%rbx
      90  	cfi_adjust_cfa_offset (8)
      91  	cfi_rel_offset (%rbx, 0)
      92  	movq	%rdi, %rbp
      93  	movq	%rsi, %rbx
      94  	subq	$24, %rsp
      95  	cfi_adjust_cfa_offset (24)
      96  	movaps	%xmm0, (%rsp)
      97  	call	JUMPTARGET(\callee)
      98  	movss	4(%rsp), %xmm0
      99  	leaq	4(%rbp), %rdi
     100  	leaq	4(%rbx), %rsi
     101  	call	JUMPTARGET(\callee)
     102  	movss	8(%rsp), %xmm0
     103  	leaq	8(%rbp), %rdi
     104  	leaq	8(%rbx), %rsi
     105  	call	JUMPTARGET(\callee)
     106  	movss	12(%rsp), %xmm0
     107  	leaq	12(%rbp), %rdi
     108  	leaq	12(%rbx), %rsi
     109  	call	JUMPTARGET(\callee)
     110  	addq	$24, %rsp
     111  	cfi_adjust_cfa_offset (-24)
     112  	popq	%rbx
     113  	cfi_adjust_cfa_offset (-8)
     114  	cfi_restore (%rbx)
     115  	popq	%rbp
     116  	cfi_adjust_cfa_offset (-8)
     117  	cfi_restore (%rbp)
     118  	ret
     119  .endm
     120  
     121  #include "svml_sd_wrapper_impl.h"