(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.dg/
vect/
slp-reduc-7.c
       1  /* { dg-require-effective-target vect_int } */
       2  
       3  #include <stdarg.h>
       4  #include "tree-vect.h"
       5  
       6  #define N 32
       7  
       8  unsigned int ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
       9      0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
      10  unsigned int uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
      11      0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
      12  
      13  /* Vectorization of reduction using loop-aware SLP (with two copies).  */
      14  
      15  __attribute__ ((noinline))
      16  int main1 (int n, int res0, int res1, int res2, int res3,
      17  	   int res4, int res5, int res6, int res7)
      18  {
      19    int i;
      20    unsigned int udiff0 = 5, udiff1 = 10, udiff2 = 20, udiff3 = 30;
      21    unsigned int udiff4 = 45, udiff5 = 50, udiff6 = 60, udiff7 = 70;
      22  
      23    for (i = 0; i < n; i++) {
      24      udiff7 += (ub[8*i + 7] - uc[8*i + 7]);
      25      udiff6 += (ub[8*i + 6] - uc[8*i + 6]);
      26      udiff5 += (ub[8*i + 5] - uc[8*i + 5]);
      27      udiff4 += (ub[8*i + 4] - uc[8*i + 4]);
      28      udiff3 += (ub[8*i + 3] - uc[8*i + 3]);
      29      udiff2 += (ub[8*i + 2] - uc[8*i + 2]);
      30      udiff1 += (ub[8*i + 1] - uc[8*i + 1]);
      31      udiff0 += (ub[8*i] - uc[8*i]);
      32    }
      33  
      34    /* Check results:  */
      35    if (udiff0 != res0
      36        || udiff1 != res1
      37        || udiff2 != res2
      38        || udiff3 != res3
      39        || udiff4 != res4
      40        || udiff5 != res5
      41        || udiff6 != res6
      42        || udiff7 != res7)
      43      abort ();
      44  
      45    return 0;
      46  }
      47  
      48  int main (void)
      49  {
      50    check_vect ();
      51  
      52    main1 (N/8, 37, 50, 68, 86, 109, 122, 140, 158);
      53    main1 (N/8 - 1, 21, 32, 48, 64, 85, 96, 112, 128);
      54    return 0;
      55  }
      56  
      57  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_add } } } */
      58  /* For variable-length SVE, the number of scalar statements in the
      59     reduction exceeds the number of elements in a 128-bit granule.  */
      60  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_int_add || { aarch64_sve && vect_variable_length } } } } } */
      61  /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" { xfail { aarch64_sve && vect_variable_length } } } } */