1  /* Test that the compiler properly optimizes floating point multiply and add
       2     instructions vector into vfmaddps on FMA4 systems.  */
       3  
       4  /* { dg-do compile { target { ! ia32 } } } */
       5  /* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic -mno-fma" } */
       6  
       7  extern void exit (int);
       8  
       9  typedef float     __m128  __attribute__ ((__vector_size__ (16), __may_alias__));
      10  typedef double    __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
      11  
      12  #define SIZE 10240
      13  
      14  union {
      15    __m128 f_align;
      16    __m128d d_align;
      17    float f[SIZE];
      18    double d[SIZE];
      19  } a, b, c, d;
      20  
      21  void
      22  flt_mul_add (void)
      23  {
      24    int i;
      25  
      26    for (i = 0; i < SIZE; i++)
      27      a.f[i] = (b.f[i] * c.f[i]) + d.f[i];
      28  }
      29  
      30  void
      31  dbl_mul_add (void)
      32  {
      33    int i;
      34  
      35    for (i = 0; i < SIZE; i++)
      36      a.d[i] = (b.d[i] * c.d[i]) + d.d[i];
      37  }
      38  
      39  void
      40  flt_mul_sub (void)
      41  {
      42    int i;
      43  
      44    for (i = 0; i < SIZE; i++)
      45      a.f[i] = (b.f[i] * c.f[i]) - d.f[i];
      46  }
      47  
      48  void
      49  dbl_mul_sub (void)
      50  {
      51    int i;
      52  
      53    for (i = 0; i < SIZE; i++)
      54      a.d[i] = (b.d[i] * c.d[i]) - d.d[i];
      55  }
      56  
      57  void
      58  flt_neg_mul_add (void)
      59  {
      60    int i;
      61  
      62    for (i = 0; i < SIZE; i++)
      63      a.f[i] = (-(b.f[i] * c.f[i])) + d.f[i];
      64  }
      65  
      66  void
      67  dbl_neg_mul_add (void)
      68  {
      69    int i;
      70  
      71    for (i = 0; i < SIZE; i++)
      72      a.d[i] = (-(b.d[i] * c.d[i])) + d.d[i];
      73  }
      74  
      75  int main ()
      76  {
      77    flt_mul_add ();
      78    flt_mul_sub ();
      79    flt_neg_mul_add ();
      80  
      81    dbl_mul_add ();
      82    dbl_mul_sub ();
      83    dbl_neg_mul_add ();
      84    exit (0);
      85  }
      86  
      87  /* { dg-final { scan-assembler "vfmaddps" } } */
      88  /* { dg-final { scan-assembler "vfmaddpd" } } */
      89  /* { dg-final { scan-assembler "vfmsubps" } } */
      90  /* { dg-final { scan-assembler "vfmsubpd" } } */
      91  /* { dg-final { scan-assembler "vfnmaddps" } } */
      92  /* { dg-final { scan-assembler "vfnmaddpd" } } */