1  /* { dg-do assemble { target { aarch64*-*-* } } } */
       2  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
       3  /* { dg-add-options arm_v8_2a_bf16_neon }  */
       4  /* { dg-additional-options "-O2  --save-temps" } */
       5  
       6  #include <arm_neon.h>
       7  
       8  bfloat16x4x2_t
       9  test_vld2_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x2_t b)
      10  {
      11    return vld2_lane_bf16 (ptr, b, 2);
      12  }
      13  
      14  bfloat16x8x2_t
      15  test_vld2q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x2_t b)
      16  {
      17    return vld2q_lane_bf16 (ptr, b, 2);
      18  }
      19  
      20  /* { dg-final { scan-assembler-times "ld2\\t{v\[0-9\]+.h - v\[0-9\]+.h}\\\[2\\\], \\\[x0\\\]" 2 } } */
      21  
      22  bfloat16x4x3_t
      23  test_vld3_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x3_t b)
      24  {
      25    return vld3_lane_bf16 (ptr, b, 2);
      26  }
      27  
      28  bfloat16x8x3_t
      29  test_vld3q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x3_t b)
      30  {
      31    return vld3q_lane_bf16 (ptr, b, 2);
      32  }
      33  
      34  /* { dg-final { scan-assembler-times "ld3\t{v\[0-9\]+.h - v\[0-9\]+.h}\\\[2\\\], \\\[x0\\\]" 2 } } */
      35  
      36  bfloat16x4x4_t
      37  test_vld4_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x4_t b)
      38  {
      39    return vld4_lane_bf16 (ptr, b, 2);
      40  }
      41  
      42  bfloat16x8x4_t
      43  test_vld4q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x4_t b)
      44  {
      45    return vld4q_lane_bf16 (ptr, b, 2);
      46  }
      47  
      48  /* { dg-final { scan-assembler-times "ld4\t{v\[0-9\]+.h - v\[0-9\]+.h}\\\[2\\\], \\\[x0\\\]" 2 } } */