1  /* { dg-do compile } */
       2  /* { dg-options "-O3 -fno-vect-cost-model" } */
       3  
       4  /* Check that we vectorize to a full 128-bit vector for _Float16 and __fp16
       5     types.  */
       6  
       7  /* Enable ARMv8.2-A+fp16 so we have access to the vector instructions.  */
       8  #pragma GCC target ("arch=armv8.2-a+fp16")
       9  
      10  _Float16
      11  sum_Float16 (_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
      12  	     _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
      13  	     _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
      14  {
      15    for (int i = 0; i < 256; i++)
      16      a[i] = b[i] + c[i];
      17  }
      18  
      19  _Float16
      20  sum_fp16 (__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
      21  	  __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
      22  	  __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
      23  {
      24    for (int i = 0; i < 256; i++)
      25      a[i] = b[i] + c[i];
      26  }
      27  
      28  /* Two FADD operations on "8h" data widths, one from sum_Float16, one from
      29     sum_fp16.  */
      30  /* { dg-final { scan-assembler-times "fadd\tv\[0-9\]\+.8h" 2 } } */