(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.dg/
vect/
slp-21.c
       1  /* { dg-require-effective-target vect_int } */
       2  
       3  #include <stdarg.h>
       4  #include "tree-vect.h"
       5  
       6  #define N 128 
       7  
       8  int
       9  main1 ()
      10  {
      11    unsigned short i;
      12    unsigned short out[N*8], out2[N*8], b0, b1, b2, b3, b4, a0, a1, a2, a3, b5;
      13    unsigned short in[N*8];
      14  
      15    for (i = 0; i < N*8; i++)
      16      {
      17        in[i] = i;
      18      }
      19  
      20    /* Different operations in both cases - vectorization with interleaving.  */
      21    for (i = 0; i < N; i++)
      22      {
      23        a0 = in[i*4];
      24        a1 = in[i*4 + 1];
      25        a2 = in[i*4 + 2];
      26        a3 = in[i*4 + 3];
      27  
      28        b0 = a0 * 8;
      29        b1 = a1 + 7;
      30        b2 = a2 + 6;
      31        b3 = a3 * 5;
      32        
      33        b4 = a2 + 4;
      34        b5 = a3 + 3;
      35  
      36        out[i*4] = b0;
      37        out[i*4 + 1] = b1;
      38        out[i*4 + 2] = b2;
      39        out[i*4 + 3] = b3;
      40  
      41        out2[i*4] = b0;
      42        out2[i*4 + 1] = b1;
      43        out2[i*4 + 2] = b4;
      44        out2[i*4 + 3] = b5;
      45      }
      46  
      47    /* check results:  */
      48    for (i = 0; i < N; i++)
      49      {
      50        a0 = in[i*4];
      51        a1 = in[i*4 + 1];
      52        a2 = in[i*4 + 2];
      53        a3 = in[i*4 + 3];
      54  
      55        b0 = a0 * 8;
      56        b1 = a1 + 7;
      57        b2 = a2 + 6;
      58        b3 = a3 * 5;
      59  
      60        b4 = a2 + 4;
      61        b5 = a3 + 3;
      62  
      63        if (out[i*4] != b0 
      64           || out[i*4 + 1] != b1
      65           || out[i*4 + 2] != b2
      66           || out[i*4 + 3] != b3)
      67  	abort ();
      68  
      69        if (out2[i*4] != b0  
      70           || out2[i*4 + 1] != b1
      71           || out2[i*4 + 2] != b4
      72           || out2[i*4 + 3] != b5)
      73          abort ();
      74      }
      75  
      76    /* Different operations in the first case - vectorization with interleaving.  */
      77    for (i = 0; i < N; i++)
      78      {
      79        a0 = in[i*4];
      80        a1 = in[i*4 + 1];
      81        a2 = in[i*4 + 2];
      82        a3 = in[i*4 + 3];
      83  
      84        b0 = a0 + 8;
      85        b1 = a1 + 7;
      86        b2 = a2 + 6;
      87        b3 = a3 * 5;
      88  
      89        b4 = a2 + 4;
      90        b5 = a3 + 3;
      91  
      92        out[i*4] = b0;
      93        out[i*4 + 1] = b1;
      94        out[i*4 + 2] = b2;
      95        out[i*4 + 3] = b3;
      96  
      97        out2[i*4] = b0;
      98        out2[i*4 + 1] = b1;
      99        out2[i*4 + 2] = b4;
     100        out2[i*4 + 3] = b5;
     101      }
     102  
     103    /* check results:  */
     104    for (i = 0; i < N; i++)
     105      {
     106        a0 = in[i*4];
     107        a1 = in[i*4 + 1];
     108        a2 = in[i*4 + 2];
     109        a3 = in[i*4 + 3];
     110  
     111        b0 = a0 + 8;
     112        b1 = a1 + 7;
     113        b2 = a2 + 6;
     114        b3 = a3 * 5;
     115  
     116        b4 = a2 + 4;
     117        b5 = a3 + 3;
     118  
     119        if (out[i*4] != b0
     120           || out[i*4 + 1] != b1
     121           || out[i*4 + 2] != b2
     122           || out[i*4 + 3] != b3)
     123          abort ();
     124  
     125        if (out2[i*4] != b0
     126           || out2[i*4 + 1] != b1
     127           || out2[i*4 + 2] != b4
     128           || out2[i*4 + 3] != b5)
     129          abort ();
     130      }
     131  
     132  
     133    /* Different operations in the second case - vectorization with interleaving.  */
     134    for (i = 0; i < N; i++) 
     135      { 
     136        a0 = in[i*4];
     137        a1 = in[i*4 + 1];
     138        a2 = in[i*4 + 2];
     139        a3 = in[i*4 + 3];
     140  
     141        b0 = a0 + 8;
     142        b1 = a1 + 7;
     143        b2 = a2 + 6;
     144        b3 = a3 + 5;
     145  
     146        b4 = a2 * 4;
     147        b5 = a3 + 3;
     148  
     149        out[i*4] = b0;
     150        out[i*4 + 1] = b1;
     151        out[i*4 + 2] = b2;
     152        out[i*4 + 3] = b3;
     153  
     154        out2[i*4] = b0;
     155        out2[i*4 + 1] = b1;
     156        out2[i*4 + 2] = b4;
     157        out2[i*4 + 3] = b5;
     158      }
     159  
     160    /* check results:  */
     161    for (i = 0; i < N; i++)
     162      {
     163        a0 = in[i*4];
     164        a1 = in[i*4 + 1];
     165        a2 = in[i*4 + 2];
     166        a3 = in[i*4 + 3];
     167  
     168        b0 = a0 + 8;
     169        b1 = a1 + 7;
     170        b2 = a2 + 6;
     171        b3 = a3 + 5;
     172  
     173        b4 = a2 * 4;
     174        b5 = a3 + 3;
     175  
     176        if (out[i*4] != b0
     177           || out[i*4 + 1] != b1
     178           || out[i*4 + 2] != b2
     179           || out[i*4 + 3] != b3)
     180          abort ();
     181  
     182        if (out2[i*4] != b0
     183           || out2[i*4 + 1] != b1
     184           || out2[i*4 + 2] != b4
     185           || out2[i*4 + 3] != b5)
     186          abort ();
     187      }
     188  
     189  
     190    return 0;
     191  }
     192  
     193  int main (void)
     194  {
     195    check_vect ();
     196  
     197    main1 ();
     198  
     199    return 0;
     200  }
     201  
     202  /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  { target { vect_strided4 || vect_extract_even_odd } } } } */
     203  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target  { ! { vect_strided4 || vect_extract_even_odd } } } } } */
     204  /* Some targets can vectorize the second of the three main loops using
     205     hybrid SLP.  For 128-bit vectors, the required 4->3 permutations are:
     206  
     207     { 0, 1, 2, 4, 5, 6, 8, 9 }
     208     { 2, 4, 5, 6, 8, 9, 10, 12 }
     209     { 5, 6, 8, 9, 10, 12, 13, 14 }
     210  
     211     Not all vect_perm targets support that, and it's a bit too specific to have
     212     its own effective-target selector, so we just test targets directly.  */
     213  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* } } } } */
     214  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* } } } } } } */
     215  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided4 } } } } } */
     216