(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
aarch64/
advsimd-intrinsics/
bfdot-1.c
       1  /* { dg-do assemble { target { aarch64*-*-* } } } */
       2  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
       3  /* { dg-add-options arm_v8_2a_bf16_neon }  */
       4  /* { dg-additional-options "-save-temps" } */
       5  /* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
       6  /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
       7  
       8  #include <arm_neon.h>
       9  
      10  /*
      11  **ufoo:
      12  **	bfdot	v0.2s, (v1.4h, v2.4h|v2.4h, v1.4h)
      13  **	ret
      14  */
      15  float32x2_t ufoo(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y)
      16  {
      17    return vbfdot_f32 (r, x, y);
      18  }
      19  
      20  /*
      21  **ufooq:
      22  **	bfdot	v0.4s, (v1.8h, v2.8h|v2.8h, v1.8h)
      23  **	ret
      24  */
      25  float32x4_t ufooq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y)
      26  {
      27    return vbfdotq_f32 (r, x, y);
      28  }
      29  
      30  /*
      31  **ufoo_lane:
      32  **	bfdot	v0.2s, v1.4h, v2.2h\[0\]
      33  **	ret
      34  */
      35  float32x2_t ufoo_lane(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y)
      36  {
      37    return vbfdot_lane_f32 (r, x, y, 0);
      38  }
      39  
      40  /*
      41  **ufooq_laneq:
      42  **	bfdot	v0.4s, v1.8h, v2.2h\[2\]
      43  **	ret
      44  */
      45  float32x4_t ufooq_laneq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y)
      46  {
      47    return vbfdotq_laneq_f32 (r, x, y, 2);
      48  }
      49  
      50  /*
      51  **ufoo_laneq:
      52  **	bfdot	v0.2s, v1.4h, v2.2h\[3\]
      53  **	ret
      54  */
      55  float32x2_t ufoo_laneq(float32x2_t r, bfloat16x4_t x, bfloat16x8_t y)
      56  {
      57    return vbfdot_laneq_f32 (r, x, y, 3);
      58  }
      59  
      60  /*
      61  **ufooq_lane:
      62  **	bfdot	v0.4s, v1.8h, v2.2h\[1\]
      63  **	ret
      64  */
      65  float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x, bfloat16x4_t y)
      66  {
      67    return vbfdotq_lane_f32 (r, x, y, 1);
      68  }
      69  
      70  /*
      71  **ufoo_untied:
      72  **	mov	v0.8b, v1.8b
      73  **	bfdot	v0.2s, (v2.4h, v3.4h|v3.4h, v2.4h)
      74  **	ret
      75  */
      76  float32x2_t ufoo_untied(float32x4_t unused, float32x2_t r, bfloat16x4_t x, bfloat16x4_t y)
      77  {
      78    return vbfdot_f32 (r, x, y);
      79  }
      80  
      81  /*
      82  **ufooq_lane_untied:
      83  **	mov	v0.16b, v1.16b
      84  **	bfdot	v0.4s, v2.8h, v3.2h\[1\]
      85  **	ret
      86  */
      87  float32x4_t ufooq_lane_untied(float32x4_t unused, float32x4_t r, bfloat16x8_t x, bfloat16x4_t y)
      88  {
      89    return vbfdotq_lane_f32 (r, x, y, 1);
      90  }
      91