(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
pr106010-6c.c
       1  /* { dg-do run } */
       2  /* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256 -fdump-tree-slp-details" } */
       3  /* { dg-require-effective-target avx512fp16 } */
       4  /* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 \}} 2 "slp2" } }  */
       5  /* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 1 "slp2" } } */
       6  
       7  #include <string.h>
       8  
       9  static void do_test (void);
      10  #define DO_TEST do_test
      11  #define AVX512FP16
      12  #include "avx512-check.h"
      13  
      14  void
      15  __attribute__((noipa))
      16  foo_ph (_Complex _Float16* a, _Complex _Float16* __restrict b)
      17  {
      18    a[0] = b[15];
      19    a[1] = b[14];
      20    a[2] = b[13];
      21    a[3] = b[12];
      22    a[4] = b[11];
      23    a[5] = b[10];
      24    a[6] = b[9];
      25    a[7] = b[8];
      26    a[8] = b[7];
      27    a[9] = b[6];
      28    a[10] = b[5];
      29    a[11] = b[4];
      30    a[12] = b[3];
      31    a[13] = b[2];
      32    a[14] = b[1];
      33    a[15] = b[0];
      34  }
      35  
      36  void
      37  do_test (void)
      38  {
      39    _Complex _Float16* ph_src = (_Complex _Float16*) malloc (64);
      40    _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (64);
      41    _Complex _Float16* ph_exp = (_Complex _Float16*) malloc (64);
      42    char* p = (char* ) malloc (64);
      43    char* q = (char* ) malloc (64);
      44  
      45    __builtin_memset (ph_dst, 0, 64);
      46  
      47    for (int i = 0; i != 64; i++)
      48      p[i] = i;
      49  
      50    __builtin_memcpy (ph_src, p, 64);
      51  
      52    for (int i = 0; i != 4; i++)
      53      {
      54        q[i] = i + 60;
      55        q[i + 4] = i + 56;
      56        q[i + 8] = i + 52;
      57        q[i + 12] = i + 48;
      58        q[i + 16] = i + 44;
      59        q[i + 20] = i + 40;
      60        q[i + 24] = i + 36;
      61        q[i + 28] = i + 32;
      62        q[i + 32] = i + 28;
      63        q[i + 36] = i + 24;
      64        q[i + 40] = i + 20;
      65        q[i + 44] = i + 16;
      66        q[i + 48] = i + 12;
      67        q[i + 52] = i + 8;
      68        q[i + 56] = i + 4;
      69        q[i + 60] = i;
      70      }
      71  
      72    __builtin_memcpy (ph_exp, q, 64);
      73  
      74    foo_ph (ph_dst, ph_src);
      75    
      76    if (__builtin_memcmp (ph_dst, ph_exp, 64) != 0)
      77      __builtin_abort ();
      78  
      79    return;
      80  }