(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
arm/
simd/
bf16_dot_1.c
       1  /* { dg-do assemble } */
       2  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
       3  /* { dg-options "-save-temps -O2" } */
       4  /* { dg-add-options arm_v8_2a_bf16_neon } */
       5  
       6  #include "arm_neon.h"
       7  
       8  /* BF16 DOT without lane.  */
       9  float32x2_t
      10  test_vbfdot_f32 (float32x2_t r, bfloat16x4_t a, bfloat16x4_t b)
      11  {
      12    /* vdot.bf16 d, d, d */
      13    return vbfdot_f32 (r, a, b);
      14  }
      15  
      16  float32x4_t
      17  test_vbfdotq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
      18  {
      19    /* vdot.bf16 q, q, q */
      20    return vbfdotq_f32 (r, a, b);
      21  }
      22  
      23  /* 64-bit BF16 DOT with lane.  */
      24  float32x2_t
      25  test_vbfdot_lane_f32_0 (float32x2_t r, bfloat16x4_t a, bfloat16x4_t b)
      26  {
      27    /* vdot.bf16 d, d, d[0] */
      28    return vbfdot_lane_f32 (r, a, b, 0);
      29  }
      30  
      31  float32x2_t
      32  test_vbfdot_lane_f32_1 (float32x2_t r, bfloat16x4_t a, bfloat16x4_t b)
      33  {
      34    /* vdot.bf16 d, d, d[1] */
      35    return vbfdot_lane_f32 (r, a, b, 1);
      36  }
      37  
      38  float32x2_t
      39  test_vbfdot_laneq_f32_0 (float32x2_t r, bfloat16x4_t a, bfloat16x8_t b)
      40  {
      41    /* vdot.bf16 d, d, d[0] */
      42    return vbfdot_laneq_f32 (r, a, b, 0);
      43  }
      44  
      45  float32x2_t
      46  test_vbfdot_laneq_f32_1 (float32x2_t r, bfloat16x4_t a, bfloat16x8_t b)
      47  {
      48    /* vdot.bf16 d, d, d[1] */
      49    return vbfdot_laneq_f32 (r, a, b, 1);
      50  }
      51  
      52  float32x2_t
      53  test_vbfdot_laneq_f32_2 (float32x2_t r, bfloat16x4_t a, bfloat16x8_t b)
      54  {
      55    /* vdot.bf16 d, d, d[0] */
      56    return vbfdot_laneq_f32 (r, a, b, 2);
      57  }
      58  
      59  float32x2_t
      60  test_vbfdot_laneq_f32_3 (float32x2_t r, bfloat16x4_t a, bfloat16x8_t b)
      61  {
      62    /* vdot.bf16 d, d, d[1] */
      63    return vbfdot_laneq_f32 (r, a, b, 3);
      64  }
      65  
      66  /* 128-bit BF16 DOT with lane.  */
      67  float32x4_t
      68  test_vbfdotq_lane_f32_0 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
      69  {
      70    /* vdot.bf16 q, q, d[0] */
      71    return vbfdotq_lane_f32 (r, a, b, 0);
      72  }
      73  
      74  float32x4_t
      75  test_vbfdotq_lane_f32_1 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
      76  {
      77    /* vdot.bf16 q, q, d[1] */
      78    return vbfdotq_lane_f32 (r, a, b, 1);
      79  }
      80  
      81  float32x4_t
      82  test_vbfdotq_laneq_f32_0 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
      83  {
      84    /* vdot.bf16 q, q, d[0] */
      85    return vbfdotq_laneq_f32 (r, a, b, 0);
      86  }
      87  
      88  float32x4_t
      89  test_vbfdotq_laneq_f32_3 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
      90  {
      91    /* vdot.bf16 q, q, d[1] */
      92    return vbfdotq_laneq_f32 (r, a, b, 3);
      93  }
      94  
      95  /* { dg-final { scan-assembler-times {\tvdot.bf16\td[0-9]+, d[0-9]+, d[0-9]+\n} 1 } } */
      96  /* { dg-final { scan-assembler-times {\tvdot.bf16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 1 } } */
      97  /* { dg-final { scan-assembler-times {\tvdot.bf16\td[0-9]+, d[0-9]+, d[0-9]+\[0\]\n} 3 } } */
      98  /* { dg-final { scan-assembler-times {\tvdot.bf16\td[0-9]+, d[0-9]+, d[0-9]+\[1\]\n} 3 } } */
      99  /* { dg-final { scan-assembler-times {\tvdot.bf16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\]\n} 2 } } */
     100  /* { dg-final { scan-assembler-times {\tvdot.bf16\tq[0-9]+, q[0-9]+, d[0-9]+\[1\]\n} 2 } } */