1  /* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
       2  /* { dg-require-effective-target aarch64_asm_bf16_ok }  */
       3  /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
       4  
       5  #include "test_sve_acle.h"
       6  
       7  /*
       8  ** cvt_bf16_f32_m_tied1:
       9  **	bfcvt	z0\.h, p0/m, z4\.s
      10  **	ret
      11  */
      12  TEST_DUAL_Z (cvt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
      13  	     z0 = svcvt_bf16_f32_m (z0, p0, z4),
      14  	     z0 = svcvt_bf16_m (z0, p0, z4))
      15  
      16  /*
      17  ** cvt_bf16_f32_m_tied2:
      18  **	mov	(z[0-9]+)\.d, z0\.d
      19  **	movprfx	z0, z4
      20  **	bfcvt	z0\.h, p0/m, \1\.s
      21  **	ret
      22  */
      23  TEST_DUAL_Z_REV (cvt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
      24  		 z0_res = svcvt_bf16_f32_m (z4, p0, z0),
      25  		 z0_res = svcvt_bf16_m (z4, p0, z0))
      26  
      27  /*
      28  ** cvt_bf16_f32_m_untied:
      29  **	movprfx	z0, z1
      30  **	bfcvt	z0\.h, p0/m, z4\.s
      31  **	ret
      32  */
      33  TEST_DUAL_Z (cvt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
      34  	     z0 = svcvt_bf16_f32_m (z1, p0, z4),
      35  	     z0 = svcvt_bf16_m (z1, p0, z4))
      36  
      37  /*
      38  ** cvt_bf16_f32_z_tied1:
      39  **	mov	(z[0-9]+)\.d, z0\.d
      40  **	movprfx	z0\.s, p0/z, \1\.s
      41  **	bfcvt	z0\.h, p0/m, \1\.s
      42  **	ret
      43  */
      44  TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t,
      45  		 z0_res = svcvt_bf16_f32_z (p0, z0),
      46  		 z0_res = svcvt_bf16_z (p0, z0))
      47  
      48  /*
      49  ** cvt_bf16_f32_z_untied:
      50  **	movprfx	z0\.s, p0/z, z4\.s
      51  **	bfcvt	z0\.h, p0/m, z4\.s
      52  **	ret
      53  */
      54  TEST_DUAL_Z (cvt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t,
      55  	     z0 = svcvt_bf16_f32_z (p0, z4),
      56  	     z0 = svcvt_bf16_z (p0, z4))
      57  
      58  /*
      59  ** cvt_bf16_f32_x_tied1:
      60  **	bfcvt	z0\.h, p0/m, z0\.s
      61  **	ret
      62  */
      63  TEST_DUAL_Z_REV (cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
      64  		 z0_res = svcvt_bf16_f32_x (p0, z0),
      65  		 z0_res = svcvt_bf16_x (p0, z0))
      66  
      67  /*
      68  ** cvt_bf16_f32_x_untied:
      69  **	movprfx	z0, z4
      70  **	bfcvt	z0\.h, p0/m, z4\.s
      71  **	ret
      72  */
      73  TEST_DUAL_Z (cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
      74  	     z0 = svcvt_bf16_f32_x (p0, z4),
      75  	     z0 = svcvt_bf16_x (p0, z4))
      76  
      77  /*
      78  ** ptrue_cvt_bf16_f32_x_tied1:
      79  **	...
      80  **	ptrue	p[0-9]+\.b[^\n]*
      81  **	...
      82  **	ret
      83  */
      84  TEST_DUAL_Z_REV (ptrue_cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
      85  		 z0_res = svcvt_bf16_f32_x (svptrue_b32 (), z0),
      86  		 z0_res = svcvt_bf16_x (svptrue_b32 (), z0))
      87  
      88  /*
      89  ** ptrue_cvt_bf16_f32_x_untied:
      90  **	...
      91  **	ptrue	p[0-9]+\.b[^\n]*
      92  **	...
      93  **	ret
      94  */
      95  TEST_DUAL_Z (ptrue_cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
      96  	     z0 = svcvt_bf16_f32_x (svptrue_b32 (), z4),
      97  	     z0 = svcvt_bf16_x (svptrue_b32 (), z4))