1  /* { dg-do assemble { target { aarch64*-*-* } } } */
       2  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
       3  /* { dg-add-options arm_v8_2a_bf16_neon } */
       4  /* { dg-additional-options "-save-temps" } */
       5  /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
       6  
       7  #include <arm_neon.h>
       8  
       9  /*
      10  **test_bfmlalb:
      11  **      bfmlalb	v0.4s, v1.8h, v2.8h
      12  **      ret
      13  */
      14  float32x4_t test_bfmlalb (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
      15  {
      16    return vbfmlalbq_f32 (r, a, b);
      17  }
      18  
      19  /*
      20  **test_bfmlalt:
      21  **      bfmlalt	v0.4s, v1.8h, v2.8h
      22  **      ret
      23  */
      24  float32x4_t test_bfmlalt (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
      25  {
      26    return vbfmlaltq_f32 (r, a, b);
      27  }
      28  
      29  /*
      30  **test_bfmlalb_lane:
      31  **      bfmlalb	v0.4s, v1.8h, v2.h[0]
      32  **      ret
      33  */
      34  float32x4_t test_bfmlalb_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
      35  {
      36    return vbfmlalbq_lane_f32 (r, a, b, 0);
      37  }
      38  
      39  /*
      40  **test_bfmlalt_lane:
      41  **      bfmlalt	v0.4s, v1.8h, v2.h[2]
      42  **      ret
      43  */
      44  float32x4_t test_bfmlalt_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
      45  {
      46    return vbfmlaltq_lane_f32 (r, a, b, 2);
      47  }
      48  
      49  /*
      50  **test_bfmlalb_laneq:
      51  **      bfmlalb	v0.4s, v1.8h, v2.h[4]
      52  **      ret
      53  */
      54  float32x4_t test_bfmlalb_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
      55  {
      56    return vbfmlalbq_laneq_f32 (r, a, b, 4);
      57  }
      58  
      59  /*
      60  **test_bfmlalt_laneq:
      61  **      bfmlalt	v0.4s, v1.8h, v2.h[7]
      62  **      ret
      63  */
      64  float32x4_t test_bfmlalt_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
      65  {
      66    return vbfmlaltq_laneq_f32 (r, a, b, 7);
      67  }