1  /* { dg-do compile }  */
       2  /* { dg-require-effective-target arm_v8_2a_fp16_neon_ok }  */
       3  /* { dg-options "-O2 -ffast-math" }  */
       4  /* { dg-add-options arm_v8_2a_fp16_neon }  */
       5  
       6  /* Test compiler use of FP16 FMA/FMS instructions with -ffast-math.  */
       7  
       8  #include <arm_neon.h>
       9  
      10  float16x4_t
      11  test_vfma_1 (float16x4_t a, float16x4_t b, float16x4_t c)
      12  {
      13    return vadd_f16 (vmul_f16 (a, b), c);
      14  }
      15  
      16  float16x4_t
      17  test_vfma_2 (float16x4_t a, float16x4_t b, float16x4_t c)
      18  {
      19    return vsub_f16 (vmul_f16 (a, b), vneg_f16 (c));
      20  }
      21  
      22  float16x4_t
      23  test_vfma_3 (float16x4_t a, float16x4_t b, float16x4_t c)
      24  {
      25    return vsub_f16 (vmul_f16 (vneg_f16 (a), vneg_f16 (b)), vneg_f16 (c));
      26  }
      27  
      28  float16x4_t
      29  test_vfma_4 (float16x4_t a, float16x4_t b, float16x4_t c)
      30  {
      31    return vsub_f16 (vmul_f16 (a, b), vneg_f16 (c));
      32  }
      33  /* { dg-final { scan-assembler-times {vfma\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 4 } }  */
      34  
      35  float16x8_t
      36  test_vfmaq_1 (float16x8_t a, float16x8_t b, float16x8_t c)
      37  {
      38    return vaddq_f16 (vmulq_f16 (a, b), c);
      39  }
      40  
      41  float16x8_t
      42  test_vfmaq_2 (float16x8_t a, float16x8_t b, float16x8_t c)
      43  {
      44    return vsubq_f16 (vmulq_f16 (a, b), vnegq_f16 (c));
      45  }
      46  
      47  float16x8_t
      48  test_vfmaq_3 (float16x8_t a, float16x8_t b, float16x8_t c)
      49  {
      50    return vsubq_f16 (vmulq_f16 (vnegq_f16 (a), vnegq_f16 (b)), vnegq_f16 (c));
      51  }
      52  
      53  float16x8_t
      54  test_vfmaq_4 (float16x8_t a, float16x8_t b, float16x8_t c)
      55  {
      56    return vsubq_f16 (vmulq_f16 (a, b), vnegq_f16 (c));
      57  }
      58  /* { dg-final { scan-assembler-times {vfma\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } }  */
      59  
      60  float16x4_t
      61  test_vfms_1 (float16x4_t a, float16x4_t b, float16x4_t c)
      62  {
      63    return vsub_f16 (c, vmul_f16 (a, b));
      64  }
      65  
      66  float16x4_t
      67  test_vfms_2 (float16x4_t a, float16x4_t b, float16x4_t c)
      68  {
      69    return vsub_f16 (a, vmul_f16 (b, c));
      70  }
      71  
      72  float16x4_t
      73  test_vfms_3 (float16x4_t a, float16x4_t b, float16x4_t c)
      74  {
      75    return vadd_f16 (vmul_f16 (vneg_f16 (a), b), c);
      76  }
      77  
      78  float16x4_t
      79  test_vfms_4 (float16x4_t a, float16x4_t b, float16x4_t c)
      80  {
      81    return vadd_f16 (vmul_f16 (a, vneg_f16 (b)), c);
      82  }
      83  /* { dg-final { scan-assembler-times {vfms\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 4 } } */
      84  
      85  float16x8_t
      86  test_vfmsq_1 (float16x8_t a, float16x8_t b, float16x8_t c)
      87  {
      88    return vsubq_f16 (c, vmulq_f16 (a, b));
      89  }
      90  
      91  float16x8_t
      92  test_vfmsq_2 (float16x8_t a, float16x8_t b, float16x8_t c)
      93  {
      94    return vsubq_f16 (a, vmulq_f16 (b, c));
      95  }
      96  
      97  float16x8_t
      98  test_vfmsq_3 (float16x8_t a, float16x8_t b, float16x8_t c)
      99  {
     100    return vaddq_f16 (vmulq_f16 (vnegq_f16 (a), b), c);
     101  }
     102  
     103  float16x8_t
     104  test_vfmsq_4 (float16x8_t a, float16x8_t b, float16x8_t c)
     105  {
     106    return vaddq_f16 (vmulq_f16 (a, vnegq_f16 (b)), c);
     107  }
     108  /* { dg-final { scan-assembler-times {vfms\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } } */