(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
aarch64/
advsimd-intrinsics/
bfdot-2.c
       1  /* { dg-do assemble { target { aarch64*-*-* } } } */
       2  /* { dg-require-effective-target stdint_types_mbig_endian } */
       3  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
       4  /* { dg-add-options arm_v8_2a_bf16_neon }  */
       5  /* { dg-additional-options "-mbig-endian --save-temps" } */
       6  /* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
       7  /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
       8  
       9  #include <arm_neon.h>
      10  
      11  /*
      12  **ufoo:
      13  **	bfdot	v0.2s, (v1.4h, v2.4h|v2.4h, v1.4h)
      14  **	ret
      15  */
      16  float32x2_t ufoo(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y)
      17  {
      18    return vbfdot_f32 (r, x, y);
      19  }
      20  
      21  /*
      22  **ufooq:
      23  **	bfdot	v0.4s, (v1.8h, v2.8h|v2.8h, v1.8h)
      24  **	ret
      25  */
      26  float32x4_t ufooq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y)
      27  {
      28    return vbfdotq_f32 (r, x, y);
      29  }
      30  
      31  /*
      32  **ufoo_lane:
      33  **	bfdot	v0.2s, v1.4h, v2.2h\[0\]
      34  **	ret
      35  */
      36  float32x2_t ufoo_lane(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y)
      37  {
      38    return vbfdot_lane_f32 (r, x, y, 0);
      39  }
      40  
      41  /*
      42  **ufooq_laneq:
      43  **	bfdot	v0.4s, v1.8h, v2.2h\[2\]
      44  **	ret
      45  */
      46  float32x4_t ufooq_laneq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y)
      47  {
      48    return vbfdotq_laneq_f32 (r, x, y, 2);
      49  }
      50  
      51  /*
      52  **ufoo_laneq:
      53  **	bfdot	v0.2s, v1.4h, v2.2h\[3\]
      54  **	ret
      55  */
      56  float32x2_t ufoo_laneq(float32x2_t r, bfloat16x4_t x, bfloat16x8_t y)
      57  {
      58    return vbfdot_laneq_f32 (r, x, y, 3);
      59  }
      60  
      61  /*
      62  **ufooq_lane:
      63  **	bfdot	v0.4s, v1.8h, v2.2h\[1\]
      64  **	ret
      65  */
      66  float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x, bfloat16x4_t y)
      67  {
      68    return vbfdotq_lane_f32 (r, x, y, 1);
      69  }
      70  
      71  /*
      72  **ufoo_untied:
      73  **	mov	v0.8b, v1.8b
      74  **	bfdot	v0.2s, (v2.4h, v3.4h|v3.4h, v2.4h)
      75  **	ret
      76  */
      77  float32x2_t ufoo_untied(float32x4_t unused, float32x2_t r, bfloat16x4_t x, bfloat16x4_t y)
      78  {
      79    return vbfdot_f32 (r, x, y);
      80  }
      81  
      82  /*
      83  **ufooq_lane_untied:
      84  **	mov	v0.16b, v1.16b
      85  **	bfdot	v0.4s, v2.8h, v3.2h\[1\]
      86  **	ret
      87  */
      88  float32x4_t ufooq_lane_untied(float32x4_t unused, float32x4_t r, bfloat16x8_t x, bfloat16x4_t y)
      89  {
      90    return vbfdotq_lane_f32 (r, x, y, 1);
      91  }
      92