1  /* { dg-do compile } */
       2  /* { dg-skip-if "Require optimsation to compile DCE tests" { *-*-* } { "-O0" } { "" } } */
       3  /* { dg-require-effective-target arm_v8m_main_cde_ok } */
       4  /* { dg-add-options arm_v8m_main_cde } */
       5  /* { dg-final { check-function-bodies "**" "" } } */
       6  
       7  /* These are the scalar intrinsics.
       8  uint32_t __arm_cx1(int coproc, uint32_t imm);
       9  uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
      10  uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
      11  uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
      12  uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
      13  uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);
      14  
      15  uint64_t __arm_cx1d(int coproc, uint32_t imm);
      16  uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
      17  uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
      18  uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
      19  uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
      20  uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m, uint32_t imm); */
      21  
      22  #include "arm_cde.h"
      23  
      24  #define TEST_CDE_SCALAR_INTRINSIC(name, accum_type, arguments) \
      25    accum_type test_cde_##name (__attribute__ ((unused)) uint32_t n, \
      26  			      __attribute__ ((unused)) uint32_t m) \
      27    {  \
      28       accum_type accum = 0; \
      29       accum += __arm_##name  arguments;  \
      30       return accum;  \
      31    }
      32  
      33  /* Basic test that we produce the assembly as expected.  */
      34  /*
      35  ** test_cde_cx1:
      36  **	cx1	p0, r0, #33
      37  **	bx	lr
      38  */
      39  TEST_CDE_SCALAR_INTRINSIC (cx1, uint32_t, (0, 33))
      40  
      41  /*
      42  ** test_cde_cx1a:
      43  **	movs	r0, #0
      44  **	cx1a	p0, r0, #33
      45  **	bx	lr
      46  */
      47  TEST_CDE_SCALAR_INTRINSIC (cx1a, uint32_t, (0, accum, 33))
      48  
      49  /*
      50  ** test_cde_cx2:
      51  **	cx2	p0, r0, r0, #33
      52  **	bx	lr
      53  */
      54  TEST_CDE_SCALAR_INTRINSIC (cx2, uint32_t, (0, n, 33))
      55  
      56  /*
      57  ** test_cde_cx2a:
      58  **	movs	(r[0-9]+), #0
      59  **	cx2a	p0, \1, r0, #33
      60  **	mov	r0, \1
      61  **	bx	lr
      62  */
      63  TEST_CDE_SCALAR_INTRINSIC (cx2a, uint32_t, (0, accum, n, 33))
      64  
      65  /*
      66  ** test_cde_cx3:
      67  **	cx3	p0, r0, r0, r1, #33
      68  **	bx	lr
      69  */
      70  TEST_CDE_SCALAR_INTRINSIC (cx3, uint32_t, (0, n, m, 33))
      71  
      72  /*
      73  ** test_cde_cx3a:
      74  **	movs	(r[0-9]+), #0
      75  **	cx3a	p0, \1, r0, r1, #33
      76  **	mov	r0, \1
      77  **	bx	lr
      78  */
      79  TEST_CDE_SCALAR_INTRINSIC (cx3a, uint32_t, (0, accum, n, m, 33))
      80  
      81  /*
      82  ** test_cde_cx1d:
      83  **	cx1d	p0, r0, r1, #33
      84  **	bx	lr
      85  */
      86  TEST_CDE_SCALAR_INTRINSIC (cx1d, uint64_t, (0, 33))
      87  
      88  /*
      89  ** test_cde_cx1da:
      90  **	movs	r0, #0
      91  **	movs	r1, #0
      92  **	cx1da	p0, r0, r1, #33
      93  **	bx	lr
      94  */
      95  TEST_CDE_SCALAR_INTRINSIC (cx1da, uint64_t, (0, accum, 33))
      96  
      97  /*
      98  ** test_cde_cx2d:
      99  **	cx2d	p0, r0, r1, r0, #33
     100  **	bx	lr
     101  */
     102  TEST_CDE_SCALAR_INTRINSIC (cx2d, uint64_t, (0, n, 33))
     103  
     104  /* This particular function gets optimised by the compiler in two different
     105     ways depending on the optimisation level.  So does test_cde_cx3da.  That's
     106     why we have two different regexes in each of these function body checks.  */
     107  /*
     108  ** test_cde_cx2da:
     109  ** (
     110  **	mov	(r[0-9]+), r0
     111  **	movs	r0, #0
     112  **	movs	r1, #0
     113  **	cx2da	p0, r0, r1, \1, #33
     114  ** |
     115  **	movs	(r[0-9]+), #0
     116  **	movs	(r[0-9]+), #0
     117  **	cx2da	p0, \2, \3, r0, #33
     118  **	mov	r0, \2
     119  **	mov	r1, \3
     120  ** )
     121  **	bx	lr
     122  */
     123  TEST_CDE_SCALAR_INTRINSIC (cx2da, uint64_t, (0, accum, n, 33))
     124  
     125  /*
     126  ** test_cde_cx3d:
     127  **	cx3d	p0, r0, r1, r0, r1, #33
     128  **	bx	lr
     129  */
     130  TEST_CDE_SCALAR_INTRINSIC (cx3d, uint64_t, (0, n, m, 33))
     131  
     132  /*
     133  ** test_cde_cx3da:
     134  **	...
     135  ** (
     136  **	movs	(r[0-9]+), #0
     137  **	movs	(r[0-9]+), #0
     138  **	cx3da	p0, \1, \2, r0, r1, #33
     139  **	mov	r0, \1
     140  **	mov	r1, \2
     141  ** |
     142  **      movs	r0, #0
     143  **      movs	r1, #0
     144  **      cx3da	p0, r0, r1, r[0-9]+, r[0-9]+, #33
     145  ** )
     146  **	...
     147  **	bx	lr
     148  */
     149  TEST_CDE_SCALAR_INTRINSIC (cx3da, uint64_t, (0, accum, n, m, 33))
     150  
     151  
     152  
     153  /* Ensure this function gets DCE'd out after optimisation.
     154     Should be such since the ACLE specification mentions these functions are
     155     stateless and pure.  */
     156  /*
     157  ** test_cde_dce:
     158  **	bx	lr
     159  */
     160  void test_cde_dce (uint32_t n, uint32_t m)
     161  {
     162    uint64_t accum = 0;
     163    __arm_cx1   (0, 33);
     164    __arm_cx1a  (0, accum, 33);
     165    __arm_cx2   (0, n, 33);
     166    __arm_cx2a  (0, accum, n, 33);
     167    __arm_cx3   (0, n, m, 33);
     168    __arm_cx3a  (0, accum, n, m, 33);
     169    __arm_cx1d   (0, 33);
     170    __arm_cx1da  (0, accum, 33);
     171    __arm_cx2d   (0, n, 33);
     172    __arm_cx2da  (0, accum, n, 33);
     173    __arm_cx3d   (0, n, m, 33);
     174    __arm_cx3da  (0, accum, n, m, 33);
     175  }
     176  
     177  /* Checking this function allows constants with symbolic names.
     178     This test must be run under some level of optimisation.
     179     The actual check we perform is that the function is provided something that,
     180     at the point of expansion, is an immediate.  That check is not as strict as
     181     having something that is an immediate directly.
     182  
     183     Since we've already checked these intrinsics generate code in the manner we
     184     expect (above), here we just check that all the instructions we expect are
     185     there.  To ensure the instructions are from these functions we use different
     186     constants and search for those specifically with `scan-assembler-times`.  */
     187  
     188  /* Checking this function allows constants with symbolic names.  */
     189  uint32_t test_cde2 (uint32_t n, uint32_t m)
     190  {
     191    int coproc = 6;
     192    uint32_t imm = 30;
     193    uint32_t accum = 0;
     194    accum += __arm_cx1   (coproc, imm);
     195    accum += __arm_cx1a  (coproc, accum, imm);
     196    accum += __arm_cx2   (coproc, n, imm);
     197    accum += __arm_cx2a  (coproc, accum, n, imm);
     198    accum += __arm_cx3   (coproc, n, m, imm);
     199    accum += __arm_cx3a  (coproc, accum, n, m, imm);
     200    return accum;
     201  }
     202  
     203  /* Checking this function allows constants with symbolic names.  */
     204  uint64_t test_cdedi2 (uint32_t n, uint32_t m)
     205  {
     206    int coproc = 6;
     207    uint32_t imm = 30;
     208    uint64_t accum = 0;
     209    accum += __arm_cx1d   (coproc, imm);
     210    accum += __arm_cx1da  (coproc, accum, imm);
     211    accum += __arm_cx2d   (coproc, n, imm);
     212    accum += __arm_cx2da  (coproc, accum, n, imm);
     213    accum += __arm_cx3d   (coproc, n, m, imm);
     214    accum += __arm_cx3da  (coproc, accum, n, m, imm);
     215    return accum;
     216  }
     217  
     218  /* { dg-final { scan-assembler-times "cx1\\tp6" 1 } } */
     219  /* { dg-final { scan-assembler-times "cx2\\tp6" 1 } } */
     220  /* { dg-final { scan-assembler-times "cx3\\tp6" 1 } } */
     221  /* { dg-final { scan-assembler-times "cx1a\\tp6" 1 } } */
     222  /* { dg-final { scan-assembler-times "cx2a\\tp6" 1 } } */
     223  /* { dg-final { scan-assembler-times "cx3a\\tp6" 1 } } */
     224  /* { dg-final { scan-assembler-times "cx1d\\tp6" 1 } } */
     225  /* { dg-final { scan-assembler-times "cx2d\\tp6" 1 } } */
     226  /* { dg-final { scan-assembler-times "cx3d\\tp6" 1 } } */
     227  /* { dg-final { scan-assembler-times "cx1da\\tp6" 1 } } */
     228  /* { dg-final { scan-assembler-times "cx2da\\tp6" 1 } } */
     229  /* { dg-final { scan-assembler-times "cx3da\\tp6" 1 } } */