1  /* { dg-do assemble { target { aarch64*-*-* } } } */
       2  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
       3  /* { dg-options "-O2" } */
       4  /* { dg-add-options arm_v8_2a_bf16_neon }  */
       5  /* { dg-additional-options "-save-temps" } */
       6  
       7  #include <arm_neon.h>
       8  
       9  float32x2_t test_vcreate (float32x2_t r, uint64_t a, uint64_t b)
      10  {
      11    bfloat16x4_t _a = vcreate_bf16(a);
      12    bfloat16x4_t _b = vcreate_bf16(b);
      13  
      14    return vbfdot_f32 (r, _a, _b);
      15  }
      16  /* { dg-final { scan-assembler {bfdot\tv[0-9]+.2s, v[0-9]+.4h, v[0-9]+.4h} } } */
      17  
      18  bfloat16x4_t test_vset_lane_bf16 (bfloat16_t a, bfloat16x4_t b)
      19  {
      20    return vset_lane_bf16 (a, b, 3);
      21  }
      22  
      23  bfloat16x8_t test_vsetq_lane_bf16 (bfloat16_t a, bfloat16x8_t b)
      24  {
      25    return vsetq_lane_bf16 (a, b, 7);
      26  }
      27  /* { dg-final { scan-assembler-times "ins\\t" 2 } } */
      28  
      29  bfloat16x4_t vdup_test (bfloat16_t a)
      30  {
      31    return vdup_n_bf16 (a);
      32  }
      33  /* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.4h, v\[0-9\]+.h\\\[0\\\]" } } */
      34  
      35  bfloat16x8_t vdupq_test (bfloat16_t a)
      36  {
      37    return vdupq_n_bf16 (a);
      38  }
      39  
      40  bfloat16x8_t test_vdupq_lane_bf16 (bfloat16x4_t a)
      41  {
      42    return vdupq_lane_bf16 (a, 1);
      43  }
      44  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+.h\\\[0\\\]" 2 } } */
      45  
      46  bfloat16_t test_vget_lane_bf16 (bfloat16x4_t a)
      47  {
      48    return vget_lane_bf16 (a, 1);
      49  }
      50  /* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */
      51  
      52  bfloat16x4_t test_vdup_lane_bf16 (bfloat16x4_t a)
      53  {
      54    return vdup_lane_bf16 (a, 1);
      55  }
      56  /* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" } } */
      57  
      58  bfloat16x4_t test_vdup_laneq_bf16 (bfloat16x8_t a)
      59  {
      60    return vdup_laneq_bf16 (a, 7);
      61  }
      62  /* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[7\\\]" } } */
      63  
      64  bfloat16x8_t test_vdupq_laneq_bf16 (bfloat16x8_t a)
      65  {
      66    return vdupq_laneq_bf16 (a, 5);
      67  }
      68  /* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[5\\\]" } } */
      69  
      70  bfloat16_t test_vduph_lane_bf16 (bfloat16x4_t a)
      71  {
      72    return vduph_lane_bf16 (a, 3);
      73  }
      74  /* { dg-final { scan-assembler "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[3\\\]" } } */
      75  
      76  bfloat16_t test_vgetq_lane_bf16 (bfloat16x8_t a)
      77  {
      78    return vgetq_lane_bf16 (a, 7);
      79  }
      80  
      81  bfloat16_t test_vduph_laneq_bf16 (bfloat16x8_t a)
      82  {
      83    return vduph_laneq_bf16 (a, 7);
      84  }
      85  /* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[7\\\]" 2 } } */