1  /* { dg-do assemble { target { arm*-*-* } } } */
       2  /* { dg-require-effective-target arm_hard_ok } */
       3  /* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
       4  /* { dg-add-options arm_v8_2a_i8mm }  */
       5  /* { dg-additional-options "-O -save-temps -mfloat-abi=hard -mfpu=auto" } */
       6  /* { dg-final { check-function-bodies "**" "" } } */
       7  
       8  #include <arm_neon.h>
       9  
      10  /* Unsigned-Signed Dot Product instructions.  */
      11  
      12  /*
      13  **usfoo:
      14  **	...
      15  **	vusdot\.s8	d0, d1, d2
      16  **	bx	lr
      17  */
      18  int32x2_t usfoo (int32x2_t r, uint8x8_t x, int8x8_t y)
      19  {
      20    return vusdot_s32 (r, x, y);
      21  }
      22  
      23  /*
      24  **usfooq:
      25  **	...
      26  **	vusdot\.s8	q0, q1, q2
      27  **	bx	lr
      28  */
      29  int32x4_t usfooq (int32x4_t r, uint8x16_t x, int8x16_t y)
      30  {
      31    return vusdotq_s32 (r, x, y);
      32  }
      33  
      34  /*
      35  **usfoo_lane:
      36  **	...
      37  **	vusdot\.s8	d0, d1, d2\[0\]
      38  **	bx	lr
      39  */
      40  int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
      41  {
      42    return vusdot_lane_s32 (r, x, y, 0);
      43  }
      44  
      45  /*
      46  **usfooq_lane:
      47  **	...
      48  **	vusdot\.s8	q0, q1, d4\[1\]
      49  **	bx	lr
      50  */
      51  int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
      52  {
      53    return vusdotq_lane_s32 (r, x, y, 1);
      54  }
      55  
      56  /* Signed-Unsigned Dot Product instructions.  */
      57  
      58  /*
      59  **sfoo_lane:
      60  **	...
      61  **	vsudot\.u8	d0, d1, d2\[0\]
      62  **	bx	lr
      63  */
      64  int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
      65  {
      66    return vsudot_lane_s32 (r, x, y, 0);
      67  }
      68  
      69  /*
      70  **sfooq_lane:
      71  **	...
      72  **	vsudot\.u8	q0, q1, d4\[1\]
      73  **	bx	lr
      74  */
      75  int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
      76  {
      77    return vsudotq_lane_s32 (r, x, y, 1);
      78  }
      79  
      80  /*
      81  **usfoo_laneq:
      82  **	...
      83  **	vusdot\.s8	d0, d1, d3\[0\]
      84  **	bx	lr
      85  */
      86  int32x2_t usfoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
      87  {
      88    return vusdot_laneq_s32 (r, x, y, 2);
      89  }
      90  
      91  /*
      92  **usfooq_laneq:
      93  **	...
      94  **	vusdot\.s8	q0, q1, d5\[1\]
      95  **	bx	lr
      96  */
      97  int32x4_t usfooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
      98  {
      99    return vusdotq_laneq_s32 (r, x, y, 3);
     100  }
     101  
     102  /* Signed-Unsigned Dot Product instructions.  */
     103  
     104  /*
     105  **sfoo_laneq:
     106  **	...
     107  **	vsudot\.u8	d0, d1, d3\[0\]
     108  **	bx	lr
     109  */
     110  int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
     111  {
     112    return vsudot_laneq_s32 (r, x, y, 2);
     113  }
     114  
     115  /*
     116  **sfooq_laneq:
     117  **	...
     118  **	vsudot\.u8	q0, q1, d5\[1\]
     119  **	bx	lr
     120  */
     121  int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
     122  {
     123    return vsudotq_laneq_s32 (r, x, y, 3);
     124  }
     125  
     126  /*
     127  **usfoo_untied:
     128  **	...
     129  **	vusdot\.s8	d1, d2, d3
     130  **	vmov	d0, d1  @ v2si
     131  **	bx	lr
     132  */
     133  int32x2_t usfoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
     134  {
     135    return vusdot_s32 (r, x, y);
     136  }
     137  
     138  /*
     139  **usfoo_lane_untied:
     140  **	...
     141  **	vusdot.s8	d1, d2, d3\[0\]
     142  **	vmov	d0, d1  @ v2si
     143  **	bx	lr
     144  */
     145  int32x2_t usfoo_lane_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
     146  {
     147    return vusdot_lane_s32 (r, x, y, 0);
     148  }
     149