1  /* { dg-do compile } */
       2  /* { dg-additional-options "-O3" } */
       3  /* { dg-require-effective-target vect_double } */
       4  
       5  void
       6  gemm (const double* __restrict__ A, const double* __restrict__ B,
       7        double* __restrict__ C)
       8  {
       9    unsigned int l_m = 0;
      10    unsigned int l_n = 0;
      11    unsigned int l_k = 0;
      12  
      13    for ( l_n = 0; l_n < 9; l_n++ ) {
      14      /* Use -O3 so this loop is unrolled completely early.  */
      15      for ( l_m = 0; l_m < 10; l_m++ ) { C[(l_n*10)+l_m] = 0.0; }
      16      for ( l_k = 0; l_k < 17; l_k++ ) {
      17        /* Use -O3 so this loop is unrolled completely early.  */
      18        for ( l_m = 0; l_m < 10; l_m++ ) {
      19          C[(l_n*10)+l_m] += A[(l_k*20)+l_m] * B[(l_n*20)+l_k];
      20        }
      21      }
      22    }
      23  }
      24  
      25  /* Exact scanning is difficult but we expect all loads and stores
      26     and computations to be vectorized.  */
      27  /* { dg-final { scan-tree-dump "optimized: basic block" "slp1" } } */