1  /* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
       2  /* { dg-require-effective-target aarch64_asm_bf16_ok }  */
       3  /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
       4  
       5  #include "test_sve_acle.h"
       6  
       7  /*
       8  ** cvtnt_bf16_f32_m_tied1:
       9  **	bfcvtnt	z0\.h, p0/m, z4\.s
      10  **	ret
      11  */
      12  TEST_DUAL_Z (cvtnt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
      13  	     z0 = svcvtnt_bf16_f32_m (z0, p0, z4),
      14  	     z0 = svcvtnt_bf16_m (z0, p0, z4))
      15  
      16  /* Bad RA choice: no preferred output sequence.  */
      17  TEST_DUAL_Z_REV (cvtnt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
      18  		 z0_res = svcvtnt_bf16_f32_m (z4, p0, z0),
      19  		 z0_res = svcvtnt_bf16_m (z4, p0, z0))
      20  
      21  /*
      22  ** cvtnt_bf16_f32_m_untied:
      23  ** (
      24  **	mov	z0\.d, z1\.d
      25  **	bfcvtnt	z0\.h, p0/m, z4\.s
      26  ** |
      27  **	bfcvtnt	z1\.h, p0/m, z4\.s
      28  **	mov	z0\.d, z1\.d
      29  ** )
      30  **	ret
      31  */
      32  TEST_DUAL_Z (cvtnt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
      33  	     z0 = svcvtnt_bf16_f32_m (z1, p0, z4),
      34  	     z0 = svcvtnt_bf16_m (z1, p0, z4))
      35  
      36  /*
      37  ** cvtnt_bf16_f32_x_tied1:
      38  **	bfcvtnt	z0\.h, p0/m, z4\.s
      39  **	ret
      40  */
      41  TEST_DUAL_Z (cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
      42  	     z0 = svcvtnt_bf16_f32_x (z0, p0, z4),
      43  	     z0 = svcvtnt_bf16_x (z0, p0, z4))
      44  
      45  /* Bad RA choice: no preferred output sequence.  */
      46  TEST_DUAL_Z_REV (cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
      47  		 z0_res = svcvtnt_bf16_f32_x (z4, p0, z0),
      48  		 z0_res = svcvtnt_bf16_x (z4, p0, z0))
      49  
      50  /*
      51  ** cvtnt_bf16_f32_x_untied:
      52  ** (
      53  **	mov	z0\.d, z1\.d
      54  **	bfcvtnt	z0\.h, p0/m, z4\.s
      55  ** |
      56  **	bfcvtnt	z1\.h, p0/m, z4\.s
      57  **	mov	z0\.d, z1\.d
      58  ** )
      59  **	ret
      60  */
      61  TEST_DUAL_Z (cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
      62  	     z0 = svcvtnt_bf16_f32_x (z1, p0, z4),
      63  	     z0 = svcvtnt_bf16_x (z1, p0, z4))
      64  
      65  /*
      66  ** ptrue_cvtnt_bf16_f32_x_tied1:
      67  **	...
      68  **	ptrue	p[0-9]+\.b[^\n]*
      69  **	...
      70  **	ret
      71  */
      72  TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
      73  	     z0 = svcvtnt_bf16_f32_x (z0, svptrue_b32 (), z4),
      74  	     z0 = svcvtnt_bf16_x (z0, svptrue_b32 (), z4))
      75  
      76  /* Bad RA choice: no preferred output sequence.  */
      77  TEST_DUAL_Z_REV (ptrue_cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
      78  		 z0_res = svcvtnt_bf16_f32_x (z4, svptrue_b32 (), z0),
      79  		 z0_res = svcvtnt_bf16_x (z4, svptrue_b32 (), z0))
      80  
      81  /*
      82  ** ptrue_cvtnt_bf16_f32_x_untied:
      83  **	...
      84  **	ptrue	p[0-9]+\.b[^\n]*
      85  **	...
      86  **	ret
      87  */
      88  TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
      89  	     z0 = svcvtnt_bf16_f32_x (z1, svptrue_b32 (), z4),
      90  	     z0 = svcvtnt_bf16_x (z1, svptrue_b32 (), z4))