(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
arm/
mve/
intrinsics/
mve_vldr_z.c
       1  /* { dg-require-effective-target arm_v8_1m_mve_ok } */
       2  /* { dg-add-options arm_v8_1m_mve } */
       3  /* { dg-additional-options "-O2" } */
       4  
       5  #include "arm_mve.h"
       6  void
       7  foo (uint16_t row_len, const int32_t *bias, int8_t *out)
       8  {
       9    int i_out_ch;
      10    for (;;)
      11     {
      12       int8_t *ip_c3;
      13       int32_t acc_3;
      14       int32_t row_loop_cnt = row_len;
      15       int32x4_t res = {acc_3};
      16       uint32x4_t scatter_offset;
      17       int i_row_loop;
      18       for (; i_row_loop < row_loop_cnt; i_row_loop++)
      19        {
      20  	mve_pred16_t p;
      21  	int16x8_t r0;
      22  	int16x8_t c3 = vldrbq_z_s16(ip_c3, p);
      23  	acc_3 = vmladavaq_p_s16(acc_3, r0, c3, p);
      24        }
      25       vstrbq_scatter_offset_s32(&out[i_out_ch], scatter_offset, res);
      26     }
      27  }
      28  
      29  void
      30  foo1 (uint16_t row_len, const int32_t *bias, int8_t *out)
      31  {
      32    int i_out_ch;
      33    for (;;)
      34     {
      35       int8_t *ip_c3;
      36       int32_t acc_3;
      37       int32_t row_loop_cnt = row_len;
      38       int i_row_loop;
      39       int32x4_t res = {acc_3};
      40       uint32x4_t scatter_offset;
      41       for (; i_row_loop < row_loop_cnt; i_row_loop++)
      42        {
      43  	mve_pred16_t p;
      44  	int32x4_t r0;
      45  	int32x4_t c3 = vldrbq_z_s32(ip_c3, p);
      46  	acc_3 = vmladavaq_p_s32(acc_3, r0, c3, p);
      47        }
      48       vstrbq_scatter_offset_s32(&out[i_out_ch], scatter_offset, res);
      49     }
      50  }
      51  
      52  void
      53  foo2 (uint16_t row_len, const int32_t *bias, int8_t *out)
      54  {
      55    int i_out_ch;
      56    for (;;)
      57     {
      58       int16_t *ip_c3;
      59       int32_t acc_3;
      60       int32_t row_loop_cnt = row_len;
      61       int i_row_loop;
      62       int32x4_t res = {acc_3};
      63       uint32x4_t scatter_offset;
      64       for (; i_row_loop < row_loop_cnt; i_row_loop++)
      65        {
      66  	mve_pred16_t p;
      67  	int32x4_t r0;
      68  	int32x4_t c3 = vldrhq_z_s32(ip_c3, p);
      69  	acc_3 = vmladavaq_p_s32(acc_3, r0, c3, p);
      70        }
      71       vstrbq_scatter_offset_s32(&out[i_out_ch], scatter_offset, res);
      72     }
      73  }