1  /* { dg-do run } */
       2  /* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */
       3  /* { dg-require-effective-target avx2 } */
       4  
       5  #include "avx2-check.h"
       6  #include <string.h>
       7  #include "pr106010-6a.c"
       8  
       9  void
      10  avx2_test (void)
      11  {
      12    _Complex double* pd_src = (_Complex double*) malloc (64);
      13    _Complex double* pd_dst = (_Complex double*) malloc (64);
      14    _Complex double* pd_exp = (_Complex double*) malloc (64);
      15    _Complex float* ps_src = (_Complex float*) malloc (64);
      16    _Complex float* ps_dst = (_Complex float*) malloc (64);
      17    _Complex float* ps_exp = (_Complex float*) malloc (64);
      18    _Complex long long* epi64_src = (_Complex long long*) malloc (64);
      19    _Complex long long* epi64_dst = (_Complex long long*) malloc (64);
      20    _Complex long long* epi64_exp = (_Complex long long*) malloc (64);
      21    _Complex int* epi32_src = (_Complex int*) malloc (64);
      22    _Complex int* epi32_dst = (_Complex int*) malloc (64);
      23    _Complex int* epi32_exp = (_Complex int*) malloc (64);
      24    _Complex short* epi16_src = (_Complex short*) malloc (64);
      25    _Complex short* epi16_dst = (_Complex short*) malloc (64);
      26    _Complex short* epi16_exp = (_Complex short*) malloc (64);
      27    _Complex char* epi8_src = (_Complex char*) malloc (64);
      28    _Complex char* epi8_dst = (_Complex char*) malloc (64);
      29    _Complex char* epi8_exp = (_Complex char*) malloc (64);
      30    char* p = (char* ) malloc (64);
      31    char* q = (char* ) malloc (64);
      32  
      33    __builtin_memset (pd_dst, 0, 64);
      34    __builtin_memset (ps_dst, 0, 64);
      35    __builtin_memset (epi64_dst, 0, 64);
      36    __builtin_memset (epi32_dst, 0, 64);
      37    __builtin_memset (epi16_dst, 0, 64);
      38    __builtin_memset (epi8_dst, 0, 64);
      39  
      40    for (int i = 0; i != 64; i++)
      41      p[i] = i;
      42  
      43    __builtin_memcpy (pd_src, p, 64);
      44    __builtin_memcpy (ps_src, p, 64);
      45    __builtin_memcpy (epi64_src, p, 64);
      46    __builtin_memcpy (epi32_src, p, 64);
      47    __builtin_memcpy (epi16_src, p, 64);
      48    __builtin_memcpy (epi8_src, p, 64);
      49  
      50  
      51    for (int i = 0; i != 16; i++)
      52      {
      53        q[i] = i + 48;
      54        q[i + 16] = i + 32;
      55        q[i + 32] = i + 16;
      56        q[i + 48] = i;
      57      }
      58   
      59    __builtin_memcpy (pd_exp, q, 64);
      60    __builtin_memcpy (epi64_exp, q, 64);
      61  
      62     for (int i = 0; i != 8; i++)
      63      {
      64        q[i] = i + 56;
      65        q[i + 8] = i + 48;
      66        q[i + 16] = i + 40;
      67        q[i + 24] = i + 32;
      68        q[i + 32] = i + 24;
      69        q[i + 40] = i + 16;
      70        q[i + 48] = i + 8;
      71        q[i + 56] = i;
      72      }
      73  
      74    __builtin_memcpy (ps_exp, q, 64);
      75    __builtin_memcpy (epi32_exp, q, 64);
      76  
      77    for (int i = 0; i != 4; i++)
      78      {
      79        q[i] = i + 60;
      80        q[i + 4] = i + 56;
      81        q[i + 8] = i + 52;
      82        q[i + 12] = i + 48;
      83        q[i + 16] = i + 44;
      84        q[i + 20] = i + 40;
      85        q[i + 24] = i + 36;
      86        q[i + 28] = i + 32;
      87        q[i + 32] = i + 28;
      88        q[i + 36] = i + 24;
      89        q[i + 40] = i + 20;
      90        q[i + 44] = i + 16;
      91        q[i + 48] = i + 12;
      92        q[i + 52] = i + 8;
      93        q[i + 56] = i + 4;
      94        q[i + 60] = i;
      95      }
      96  
      97    __builtin_memcpy (epi16_exp, q, 64);
      98  
      99    for (int i = 0; i != 2; i++)
     100      {
     101        q[i] = i + 62;
     102        q[i + 2] = i + 60;
     103        q[i + 4] = i + 58;
     104        q[i + 6] = i + 56;
     105        q[i + 8] = i + 54;
     106        q[i + 10] = i + 52;
     107        q[i + 12] = i + 50;
     108        q[i + 14] = i + 48;
     109        q[i + 16] = i + 46;
     110        q[i + 18] = i + 44;
     111        q[i + 20] = i + 42;
     112        q[i + 22] = i + 40;
     113        q[i + 24] = i + 38;
     114        q[i + 26] = i + 36;
     115        q[i + 28] = i + 34;
     116        q[i + 30] = i + 32;
     117        q[i + 32] = i + 30;
     118        q[i + 34] = i + 28;
     119        q[i + 36] = i + 26;
     120        q[i + 38] = i + 24;
     121        q[i + 40] = i + 22;
     122        q[i + 42] = i + 20;
     123        q[i + 44] = i + 18;
     124        q[i + 46] = i + 16;
     125        q[i + 48] = i + 14;
     126        q[i + 50] = i + 12;
     127        q[i + 52] = i + 10;
     128        q[i + 54] = i + 8;
     129        q[i + 56] = i + 6;
     130        q[i + 58] = i + 4;
     131        q[i + 60] = i + 2;
     132        q[i + 62] = i;
     133      }
     134    __builtin_memcpy (epi8_exp, q, 64);
     135  
     136    foo_pd (pd_dst, pd_src);
     137    foo_ps (ps_dst, ps_src);
     138    foo_epi64 (epi64_dst, epi64_src);
     139    foo_epi32 (epi32_dst, epi32_src);
     140    foo_epi16 (epi16_dst, epi16_src);
     141    foo_epi8 (epi8_dst, epi8_src);
     142  
     143    if (__builtin_memcmp (pd_dst, pd_exp, 64) != 0)
     144      __builtin_abort ();
     145    if (__builtin_memcmp (ps_dst, ps_exp, 64) != 0)
     146      __builtin_abort ();
     147    if (__builtin_memcmp (epi64_dst, epi64_exp, 64) != 0)
     148      __builtin_abort ();
     149    if (__builtin_memcmp (epi32_dst, epi32_exp, 64) != 0)
     150      __builtin_abort ();
     151    if (__builtin_memcmp (epi16_dst, epi16_exp, 64) != 0)
     152      __builtin_abort ();
     153    if (__builtin_memcmp (epi8_dst, epi8_exp, 64) != 0)
     154      __builtin_abort ();
     155  
     156    return;
     157  }