(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.dg/
vect/
pr97832-3.c
       1  /* { dg-do compile } */
       2  /* { dg-additional-options "-Ofast" } */
       3  /* { dg-require-effective-target vect_double } */
       4  
       5  void foo(double* restrict y, const double* restrict x0, const double* restrict x1, int clen)
       6  {
       7    int xi = clen & 2;
       8    double f00_re = x0[0+xi+0];
       9    double f10_re = x1[0+xi+0];
      10    double f01_re = x0[0+xi+1];
      11    double f11_re = x1[0+xi+1];
      12    double f00_im = x0[4+xi+0];
      13    double f10_im = x1[4+xi+0];
      14    double f01_im = x0[4+xi+1];
      15    double f11_im = x1[4+xi+1];
      16    int clen2 = (clen+xi) * 2;
      17    double* y0 = &y[0];
      18    double* y1 = &y[clen2];
      19    #pragma GCC unroll 0
      20    for (int c = 0; c < clen2; c += 8) {
      21      // y0[c] = y0[c] - x0[c]*conj(f00) - x1[c]*conj(f10);
      22      // y1[c] = y1[c] - x0[c]*conj(f01) - x1[c]*conj(f11);
      23      #pragma GCC unroll 4
      24      for (int k = 0; k < 4; ++k) {
      25        double x0_re = x0[c+0+k];
      26        double x0_im = x0[c+4+k];
      27        double y0_re = y0[c+0+k];
      28        double y0_im = y0[c+4+k];
      29        double y1_re = y1[c+0+k];
      30        double y1_im = y1[c+4+k];
      31        y0_re = y0_re - x0_re * f00_re - x0_im * f00_im;
      32        y0_im = y0_im + x0_re * f00_im - x0_im * f00_re;
      33        y1_re = y1_re - x0_re * f01_re - x0_im * f01_im;
      34        y1_im = y1_im + x0_re * f01_im - x0_im * f01_re;
      35        double x1_re = x1[c+0+k];
      36        double x1_im = x1[c+4+k];
      37        y0_re = y0_re - x1_re * f10_re - x1_im * f10_im;
      38        y0_im = y0_im + x1_re * f10_im - x1_im * f10_re;
      39        y1_re = y1_re - x1_re * f11_re - x1_im * f11_im;
      40        y1_im = y1_im + x1_re * f11_im - x1_im * f11_re;
      41        y0[c+0+k] = y0_re;
      42        y0[c+4+k] = y0_im;
      43        y1[c+0+k] = y1_re;
      44        y1[c+4+k] = y1_im;
      45      }
      46    }
      47  }
      48  
      49  /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
      50  /* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */