1  #ifndef INCLUDED_ARGS_H
       2  #define INCLUDED_ARGS_H
       3  
       4  #include <immintrin.h>
       5  #include <string.h>
       6  
       7  /* Assertion macro.  */
       8  #define assert(test) if (!(test)) abort()
       9  
      10  #ifdef __GNUC__
      11  #define ATTRIBUTE_UNUSED __attribute__((__unused__))
      12  #else
      13  #define ATTRIBUTE_UNUSED
      14  #endif
      15  
      16  /* This defines the calling sequences for integers and floats.  */
      17  #define I0 rdi
      18  #define I1 rsi
      19  #define I2 rdx
      20  #define I3 rcx
      21  #define I4 r8
      22  #define I5 r9
      23  #define F0 zmm0
      24  #define F1 zmm1
      25  #define F2 zmm2
      26  #define F3 zmm3
      27  #define F4 zmm4
      28  #define F5 zmm5
      29  #define F6 zmm6
      30  #define F7 zmm7
      31  
      32  typedef union {
      33    __bf16 ___bf16[32];
      34    float _float[16];
      35    double _double[8];
      36    long long _longlong[8];
      37    int _int[16];
      38    unsigned long long _ulonglong[8];
      39    __m64 _m64[8];
      40    __m128 _m128[4];
      41    __m256 _m256[2];
      42    __m512 _m512[1];
      43    __m512bf16 _m512bf16[1];
      44  } ZMM_T;
      45  
      46  typedef union {
      47    float _float;
      48    double _double;
      49    long double _ldouble;
      50    unsigned long long _ulonglong[2];
      51  } X87_T;
      52  extern void (*callthis)(void);
      53  extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
      54  extern ZMM_T zmm_regs[32];
      55  extern X87_T x87_regs[8];
      56  extern volatile unsigned long long volatile_var;
      57  extern void snapshot (void);
      58  extern void snapshot_ret (void);
      59  #define WRAP_CALL(N) \
      60    (callthis = (void (*)()) (N), (typeof (&N)) snapshot)
      61  #define WRAP_RET(N) \
      62    (callthis = (void (*)()) (N), (typeof (&N)) snapshot_ret)
      63  
      64  /* Clear all integer registers.  */
      65  #define clear_int_hardware_registers \
      66    asm __volatile__ ("xor %%rax, %%rax\n\t" \
      67  		    "xor %%rbx, %%rbx\n\t" \
      68  		    "xor %%rcx, %%rcx\n\t" \
      69  		    "xor %%rdx, %%rdx\n\t" \
      70  		    "xor %%rsi, %%rsi\n\t" \
      71  		    "xor %%rdi, %%rdi\n\t" \
      72  		    "xor %%r8, %%r8\n\t" \
      73  		    "xor %%r9, %%r9\n\t" \
      74  		    "xor %%r10, %%r10\n\t" \
      75  		    "xor %%r11, %%r11\n\t" \
      76  		    "xor %%r12, %%r12\n\t" \
      77  		    "xor %%r13, %%r13\n\t" \
      78  		    "xor %%r14, %%r14\n\t" \
      79  		    "xor %%r15, %%r15\n\t" \
      80  		    ::: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", \
      81  		    "r9", "r10", "r11", "r12", "r13", "r14", "r15");
      82  
      83  /* This is the list of registers available for passing arguments. Not all of
      84     these are used or even really available.  */
      85  struct IntegerRegisters
      86  {
      87    unsigned long long rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
      88  };
      89  struct FloatRegisters
      90  {
      91    double mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
      92    long double st0, st1, st2, st3, st4, st5, st6, st7;
      93    ZMM_T zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7, zmm8, zmm9,
      94          zmm10, zmm11, zmm12, zmm13, zmm14, zmm15, zmm16, zmm17, zmm18,
      95  	zmm19, zmm20, zmm21, zmm22, zmm23, zmm24, zmm25, zmm26, zmm27,
      96  	zmm28, zmm29, zmm30, zmm31;
      97  };
      98  
      99  /* Implemented in scalarargs.c  */
     100  extern struct IntegerRegisters iregs;
     101  extern struct FloatRegisters fregs;
     102  extern unsigned int num_iregs, num_fregs;
     103  
     104  /* Clear register struct.  */
     105  #define clear_struct_registers \
     106    rax = rbx = rcx = rdx = rdi = rsi = rbp = rsp \
     107      = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = 0; \
     108    memset (&iregs, 0, sizeof (iregs)); \
     109    memset (&fregs, 0, sizeof (fregs)); \
     110    memset (zmm_regs, 0, sizeof (zmm_regs)); \
     111    memset (x87_regs, 0, sizeof (x87_regs));
     112  
     113  /* Clear both hardware and register structs for integers.  */
     114  #define clear_int_registers \
     115    clear_struct_registers \
     116    clear_int_hardware_registers
     117  
     118  #define check_vector_arguments(T,O) do { \
     119    assert (num_fregs <= 0 \
     120  	  || memcmp (((char *) &fregs.zmm0) + (O), \
     121  		     &zmm_regs[0], \
     122  		     sizeof (__ ## T) - (O)) == 0); \
     123    assert (num_fregs <= 1 \
     124  	  || memcmp (((char *) &fregs.zmm1) + (O), \
     125  		     &zmm_regs[1], \
     126  		     sizeof (__ ## T) - (O)) == 0); \
     127    assert (num_fregs <= 2 \
     128  	  || memcmp (((char *) &fregs.zmm2) + (O), \
     129  		     &zmm_regs[2], \
     130  		     sizeof (__ ## T) - (O)) == 0); \
     131    assert (num_fregs <= 3 \
     132  	  || memcmp (((char *) &fregs.zmm3) + (O), \
     133  		     &zmm_regs[3], \
     134  		     sizeof (__ ## T) - (O)) == 0); \
     135    assert (num_fregs <= 4 \
     136  	  || memcmp (((char *) &fregs.zmm4) + (O), \
     137  		     &zmm_regs[4], \
     138  		     sizeof (__ ## T) - (O)) == 0); \
     139    assert (num_fregs <= 5 \
     140  	  || memcmp (((char *) &fregs.zmm5) + (O), \
     141  		     &zmm_regs[5], \
     142  		     sizeof (__ ## T) - (O)) == 0); \
     143    assert (num_fregs <= 6 \
     144  	  || memcmp (((char *) &fregs.zmm6) + (O), \
     145  		     &zmm_regs[6], \
     146  		     sizeof (__ ## T) - (O)) == 0); \
     147    assert (num_fregs <= 7 \
     148  	  || memcmp (((char *) &fregs.zmm7) + (O), \
     149  		     &zmm_regs[7], \
     150  		     sizeof (__ ## T) - (O)) == 0); \
     151    } while (0)
     152  
     153  #define check_m512_arguments check_vector_arguments(m512, 0)
     154  
     155  #endif /* INCLUDED_ARGS_H  */