(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
pr106010-3c.c
       1  /* { dg-do run } */
       2  /* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256 -fdump-tree-slp-details" } */
       3  /* { dg-require-effective-target avx512fp16 } */
       4  /* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 1 "slp2" } }*/
       5  /* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 2, 3, 0, 1, 8, 9, 6, 7, 14, 15, 12, 13, 4, 5, 10, 11 \}} 1 "slp2" } }  */
       6  
       7  #include <string.h>
       8  
       9  static void do_test (void);
      10  #define DO_TEST do_test
      11  #define AVX512FP16
      12  #include "avx512-check.h"
      13  
      14  void
      15  __attribute__((noipa))
      16  foo_ph (_Complex _Float16* a, _Complex _Float16* __restrict b)
      17  {
      18    a[0] = b[1];
      19    a[1] = b[0];
      20    a[2] = b[4];
      21    a[3] = b[3];
      22    a[4] = b[7];
      23    a[5] = b[6];
      24    a[6] = b[2];
      25    a[7] = b[5];
      26  }
      27  
      28  void
      29  do_test (void)
      30  {
      31    _Complex _Float16* ph_src = (_Complex _Float16*) malloc (32);
      32    _Complex _Float16* ph_dst = (_Complex _Float16*) malloc (32);
      33    _Complex _Float16* ph_exp = (_Complex _Float16*) malloc (32);
      34    char* p = (char* ) malloc (32);
      35    char* q = (char* ) malloc (32);
      36  
      37    __builtin_memset (ph_dst, 0, 32);
      38  
      39    for (int i = 0; i != 32; i++)
      40      p[i] = i;
      41    __builtin_memcpy (ph_src, p, 32);
      42  
      43    for (int i = 0; i != 4; i++)
      44      {
      45        p[i] = i + 4;
      46        p[i + 4] = i;
      47        p[i + 8] = i + 16;
      48        p[i + 12] = i + 12;
      49        p[i + 16] = i + 28;
      50        p[i + 20] = i + 24;
      51        p[i + 24] = i + 8;
      52        p[i + 28] = i + 20;
      53        q[i] = i + 28;
      54        q[i + 4] = i + 24;
      55        q[i + 8] = i + 20;
      56        q[i + 12] = i + 16;
      57        q[i + 16] = i + 12;
      58        q[i + 20] = i + 8;
      59        q[i + 24] = i + 4;
      60        q[i + 28] = i;
      61      }
      62    __builtin_memcpy (ph_exp, p, 32);
      63  
      64    foo_ph (ph_dst, ph_src);
      65    if (__builtin_memcmp (ph_dst, ph_exp, 32) != 0)
      66      __builtin_abort ();
      67  
      68    return;
      69  }