(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
spill_to_mask-1.c
       1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -march=skylake-avx512 -DDTYPE32" } */
       3  
       4  typedef unsigned long long u64;
       5  typedef unsigned int u32;
       6  typedef unsigned short u16;
       7  typedef unsigned char u8;
       8  
       9  #ifdef DTYPE32
      10  typedef u32 DTYPE;
      11  #define byteswap byteswapu32
      12  #endif
      13  
      14  #ifdef DTYPE16
      15  typedef u16 DTYPE;
      16  #define byteswap byteswapu16
      17  #endif
      18  
      19  #ifdef DTYPE8
      20  typedef u16 DTYPE;
      21  #define byteswap byteswapu8
      22  #endif
      23  
      24  #ifdef DTYPE64
      25  typedef u16 DTYPE;
      26  #define byteswap byteswapu64
      27  #endif
      28  
      29  #define R(x,n) ( (x >> n) | (x << (32 - n)))
      30  
      31  #define S0(x) (R(x, 2) ^ R(x,13) ^ R(x,22))
      32  #define S1(x) (R(x, 6) ^ R(x,11) ^ R(x,25))
      33  
      34  #define TT(a,b,c,d,e,f,g,h,x,K)                 \
      35  {                                                        \
      36      tmp1 = h + S1(e) + (g ^ (e & (f ^ g))) + K + x;                \
      37      tmp2 = S0(a) + ((a & b) | (c & (a | b)));                           \
      38      h  = tmp1 + tmp2;                                    \
      39      d += tmp1;                                           \
      40  }
      41  
      42  static inline u32 byteswapu32(u32 x)
      43  {
      44    x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
      45    x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;  
      46    return x;
      47  }
      48  
      49  static inline u16 byteswapu16(u16 x)
      50  {
      51    x = (x & 0x00FF) << 8 | (x & 0xFF00) >> 8;  
      52    return x;
      53  }
      54  
      55  static inline u8 byteswapu8(u8 x)
      56  {
      57    return x;
      58  }
      59  
      60  static inline u64 byteswapu64(u64 x)
      61  {
      62    x = ((u64)(byteswapu32 (x & 0x00000000FFFFFFFF))) << 32 | byteswapu32((x & 0xFFFFFFFF00000000) >> 32);
      63    return x;
      64  }
      65  
      66  void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16])
      67  {
      68      DTYPE tmp1 = 0, tmp2 = 0, a, b, c, d, e, f, g, h;
      69      DTYPE w0, w1, w2, w3, w4, w5, w6, w7,
      70  	w8, w9, w10, w11, w12, w13, w14, w15;
      71      w0  = byteswap(in[0]);
      72      w1  = byteswap(in[1]);
      73      w2  = byteswap(in[2]);
      74      w3  = byteswap(in[3]);
      75      w4  = byteswap(in[4]);
      76      w5  = byteswap(in[5]);
      77      w6  = byteswap(in[6]);
      78      w7  = byteswap(in[7]);
      79      w8  = byteswap(in[8]);
      80      w9  = byteswap(in[9]);
      81      w10 = byteswap(in[10]);
      82      w11 = byteswap(in[11]);
      83      w12 = byteswap(in[12]);
      84      w13 = byteswap(in[13]);
      85      w14 = byteswap(in[14]);
      86      w15 = byteswap(in[15]);
      87      a = out[0];
      88      b = out[1];
      89      c = out[2];
      90      d = out[3];
      91      e = out[4];
      92      f = out[5];
      93      g = out[6];
      94      h = out[7];
      95  
      96      TT(a, b, c, d, e, f, g, h,  w0, C[0]);
      97      TT(h, a, b, c, d, e, f, g,  w1, C[1]);
      98      TT(g, h, a, b, c, d, e, f,  w2, C[2]);
      99      TT(f, g, h, a, b, c, d, e,  w3, C[3]);
     100      TT(e, f, g, h, a, b, c, d,  w4, C[4]);
     101      TT(d, e, f, g, h, a, b, c,  w5, C[5]);
     102      TT(c, d, e, f, g, h, a, b,  w6, C[6]);
     103      TT(b, c, d, e, f, g, h, a,  w7, C[7]);
     104      TT(a, b, c, d, e, f, g, h,  w8, C[8]);
     105      TT(h, a, b, c, d, e, f, g,  w9, C[9]);
     106      TT(g, h, a, b, c, d, e, f, w10, C[10]);
     107      TT(f, g, h, a, b, c, d, e, w11, C[11]);
     108      TT(e, f, g, h, a, b, c, d, w12, C[12]);
     109      TT(d, e, f, g, h, a, b, c, w13, C[13]);
     110      TT(c, d, e, f, g, h, a, b, w14, C[14]);
     111      TT(b, c, d, e, f, g, h, a, w15, C[15]);
     112  
     113      out[0] += a;
     114      out[1] += b;
     115      out[2] += c;
     116      out[3] += d;
     117      out[4] += e;
     118      out[5] += f;
     119      out[6] += g;
     120      out[7] += h;
     121  }
     122  
     123  /* { dg-final { scan-assembler "kmovd" { xfail *-*-* } } } */
     124  /* { dg-final { scan-assembler-not "knot" } } */
     125  /* { dg-final { scan-assembler-not "kxor" } } */
     126  /* { dg-final { scan-assembler-not "kor" } } */
     127  /* { dg-final { scan-assembler-not "kandn" } } */