1  /* { dg-do compile } */
       2  /* { dg-require-effective-target powerpc_p8vector_ok } */
       3  /* { dg-options "-mdejagnu-cpu=power8 -O3" } */
       4  /* { dg-final { scan-assembler "lxvd2x" } } */
       5  /* { dg-final { scan-assembler "stxvd2x" } } */
       6  /* { dg-final { scan-assembler "xxspltw" } } */
       7  
       8  /* Currently the analyze_swaps phase cannot optimize this loop because
       9     of the presence of an UNSPEC_VSX_CVDPSPN.  At such time as this is 
      10     handled, we need to add a 'scan-assembler-not "xxpermdi"' directive to
      11     this test.  */
      12  #include <altivec.h>
      13  void abort();
      14  
      15  #define N 4096
      16  #define M 10000000
      17  vector float ca[N][4] = {0};
      18  vector float cb[N][4] = {0};
      19  vector float cc[N][4] = {0};
      20  
      21  __attribute__((noinline)) void foo ()
      22  {
      23    int i;
      24    for (i = 0; i < N; i++) {
      25      cc[i][0] = vec_mul(vec_splats(cb[i][0][0]), ca[i][0]);
      26      cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][1]), ca[i][1]);
      27      cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][2]), ca[i][2]);
      28      cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][3]), ca[i][3]);
      29  
      30      cc[i][1] = vec_mul(vec_splats(cb[i][1][0]), ca[i][0]);
      31      cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][1]), ca[i][1]);
      32      cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][2]), ca[i][2]);
      33      cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][3]), ca[i][3]);
      34      
      35      cc[i][2] = vec_mul(vec_splats(cb[i][2][0]), ca[i][0]);
      36      cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][1]), ca[i][1]);
      37      cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][2]), ca[i][2]);
      38      cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][3]), ca[i][3]);
      39      
      40      cc[i][3] = vec_mul(vec_splats(cb[i][3][0]), ca[i][0]);
      41      cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][1]), ca[i][1]);
      42      cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][2]), ca[i][2]);
      43      cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][3]), ca[i][3]);
      44    }
      45  }
      46  
      47  int main ()
      48  {
      49    foo ();
      50    return 0;
      51  }