1  /* { dg-require-effective-target vect_int } */
       2  /* { dg-additional-options "--param=vect-epilogues-nomask=0" } */
       3  
       4  #include <stdarg.h>
       5  #include "tree-vect.h"
       6  
       7  #define N 64
       8  
       9  #define DOT1 21834 
      10  #define DOT2 21876
      11  
      12  unsigned short X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
      13  unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
      14  
      15  /* short->short->int dot product. 
      16     Not detected as a dot-product pattern.
      17     Requires support for non-widneing multiplication and widening-summation.  
      18     Vectorized with loop-aware SLP. */
      19  __attribute__ ((noinline)) unsigned int
      20  foo1(int len, int *result1, int *result2) 
      21  {
      22    int i;
      23    unsigned int res1 = 10, res2 = 20;
      24    unsigned short prod;
      25  
      26    for (i=0; i<len; i++) {
      27      prod = X[2*i] * Y[2*i];
      28      res1 += prod;
      29      prod = X[2*i+1] * Y[2*i+1];
      30      res2 += prod;
      31    }
      32  
      33    *result1 = res1;
      34    *result2 = res2;
      35  
      36    return 0;
      37  }
      38  
      39  int main (void)
      40  {
      41    unsigned int dot1, dot2;
      42    unsigned short i;
      43  
      44    check_vect ();
      45  
      46    for (i=0; i<N; i++) {
      47      X[i] = i;
      48      Y[i] = 64-i;
      49      asm volatile ("" ::: "memory");
      50    }
      51  
      52    foo1 (N/2, &dot1, &dot2);
      53  
      54    if (dot1 != DOT1 || dot2 != DOT2)
      55      abort ();
      56  
      57    return 0;
      58  }
      59  
      60  /* The initialization loop in main also gets vectorized.  */
      61  /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
      62  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } } } */ 
      63  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_widen_sum_hi_to_si_pattern || { ! { vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } } } } } */
      64  /* Check we can elide permutes if SLP vectorizing the reduction.  */
      65  /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" { xfail { { { vect_widen_sum_hi_to_si_pattern || { ! vect_unpack } } && { ! vect_load_lanes } } && { vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } } } } */