(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.dg/
vect/
slp-perm-4.c
       1  /* { dg-require-effective-target vect_int } */
       2  /* { dg-require-effective-target vect_perm } */
       3  
       4  #include <stdarg.h>
       5  #include "tree-vect.h"
       6  
       7  #define M00 100
       8  #define M10 216
       9  #define M20 23
      10  #define M30 237
      11  #define M40 437
      12  
      13  #define M01 1322
      14  #define M11 13
      15  #define M21 27271
      16  #define M31 2280
      17  #define M41 284
      18  
      19  #define M02 74
      20  #define M12 191
      21  #define M22 500
      22  #define M32 111
      23  #define M42 1114
      24  
      25  #define M03 134
      26  #define M13 117
      27  #define M23 11
      28  #define M33 771
      29  #define M43 71
      30  
      31  #define M04 334
      32  #define M14 147
      33  #define M24 115
      34  #define M34 7716
      35  #define M44 16
      36  
      37  #if VECTOR_BITS > 128
      38  #define N (VECTOR_BITS * 5 / 32)
      39  #else
      40  #define N 20
      41  #endif
      42  
      43  void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput)
      44  {
      45    unsigned int i, a, b, c, d, e;
      46  
      47    for (i = 0; i < N / 5; i++)
      48      {
      49         a = *pInput++;
      50         b = *pInput++;
      51         c = *pInput++;
      52         d = *pInput++;
      53         e = *pInput++;
      54  
      55         *pOutput++ = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
      56         *pOutput++ = M10 * a + M11 * b + M12 * c + M13 * d + M14 * e;
      57         *pOutput++ = M20 * a + M21 * b + M22 * c + M23 * d + M24 * e;
      58         *pOutput++ = M30 * a + M31 * b + M32 * c + M33 * d + M34 * e;
      59         *pOutput++ = M40 * a + M41 * b + M42 * c + M43 * d + M44 * e;
      60      }
      61  }
      62  
      63  int main (int argc, const char* argv[])
      64  {
      65    unsigned int input[N], output[N], i;
      66  
      67    check_vect ();
      68  
      69    for (i = 0; i < N; i++)
      70      {
      71        input[i] = i%256;
      72        output[i] = 0;
      73        asm volatile ("" ::: "memory");
      74      }
      75  
      76  #if N == 20
      77    unsigned int check_results[N]
      78      = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
      79      22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619};
      80  #else
      81    volatile unsigned int check_results[N];
      82  
      83    for (i = 0; i < N / 5; i++)
      84      {
      85        unsigned int a = input[i * 5];
      86        unsigned int b = input[i * 5 + 1];
      87        unsigned int c = input[i * 5 + 2];
      88        unsigned int d = input[i * 5 + 3];
      89        unsigned int e = input[i * 5 + 4];
      90  
      91        check_results[i * 5] = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
      92        check_results[i * 5 + 1] = (M10 * a + M11 * b + M12 * c
      93  				  + M13 * d + M14 * e);
      94        check_results[i * 5 + 2] = (M20 * a + M21 * b + M22 * c
      95  				  + M23 * d + M24 * e);
      96        check_results[i * 5 + 3] = (M30 * a + M31 * b + M32 * c
      97  				  + M33 * d + M34 * e);
      98        check_results[i * 5 + 4] = (M40 * a + M41 * b + M42 * c
      99  				  + M43 * d + M44 * e);
     100        asm volatile ("");
     101      }
     102  #endif
     103  
     104    foo (input, output);
     105  
     106    for (i = 0; i < N; i++)
     107      if (output[i] != check_results[i])
     108        abort ();
     109  
     110    return 0;
     111  }
     112  
     113  /* Currently interleaving is not supported for a group-size of 5.  */
     114  
     115  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
     116  /* { dg-final { scan-tree-dump-times "gaps requires scalar epilogue loop" 0 "vect" } } */
     117  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */