(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
aarch64/
pr98772.c
       1  /* { dg-do run } */
       2  /* { dg-options "-O3 -save-temps" } */
       3  
       4  #pragma GCC target "+nosve"
       5  
       6  #include <stdint.h>
       7  #include <string.h>
       8  
       9  #define DSIZE 16
      10  #define PIXSIZE 64
      11  
      12  extern void
      13  wplus (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
      14  {
      15      for (int y = 0; y < 4; y++ )
      16      {
      17  	for (int x = 0; x < 4; x++ )
      18  	    d[x + y*4] = pix1[x] + pix2[x];
      19  	pix1 += 16;
      20  	pix2 += 16;
      21      }
      22  }
      23  extern void __attribute__((optimize (0)))
      24  wplus_no_opt (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
      25  {
      26      for (int y = 0; y < 4; y++ )
      27      {
      28  	for (int x = 0; x < 4; x++ )
      29  	    d[x + y*4] = pix1[x] + pix2[x];
      30  	pix1 += 16;
      31  	pix2 += 16;
      32      }
      33  }
      34  
      35  extern void
      36  wminus (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
      37  {
      38      for (int y = 0; y < 4; y++ )
      39      {
      40  	for (int x = 0; x < 4; x++ )
      41  	    d[x + y*4] = pix1[x] - pix2[x];
      42  	pix1 += 16;
      43  	pix2 += 16;
      44      }
      45  }
      46  extern void __attribute__((optimize (0)))
      47  wminus_no_opt (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
      48  {
      49      for (int y = 0; y < 4; y++ )
      50      {
      51  	for (int x = 0; x < 4; x++ )
      52  	    d[x + y*4] = pix1[x] - pix2[x];
      53  	pix1 += 16;
      54  	pix2 += 16;
      55      }
      56  }
      57  
      58  extern void
      59  wmult (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
      60  {
      61      for (int y = 0; y < 4; y++ )
      62      {
      63  	for (int x = 0; x < 4; x++ )
      64  	    d[x + y*4] = pix1[x] * pix2[x];
      65  	pix1 += 16;
      66  	pix2 += 16;
      67      }
      68  }
      69  extern void __attribute__((optimize (0)))
      70  wmult_no_opt (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
      71  {
      72      for (int y = 0; y < 4; y++ )
      73      {
      74  	for (int x = 0; x < 4; x++ )
      75  	    d[x + y*4] = pix1[x] * pix2[x];
      76  	pix1 += 16;
      77  	pix2 += 16;
      78      }
      79  }
      80  
      81  extern void
      82  wlshift (uint16_t *d, uint8_t *restrict pix1)
      83  
      84  {
      85      for (int y = 0; y < 4; y++ )
      86      {
      87  	for (int x = 0; x < 4; x++ )
      88  	    d[x + y*4] = pix1[x] << 8;
      89  	pix1 += 16;
      90      }
      91  }
      92  extern void __attribute__((optimize (0)))
      93  wlshift_no_opt (uint16_t *d, uint8_t *restrict pix1)
      94  
      95  {
      96      for (int y = 0; y < 4; y++ )
      97      {
      98  	for (int x = 0; x < 4; x++ )
      99  	    d[x + y*4] = pix1[x] << 8;
     100  	pix1 += 16;
     101      }
     102  }
     103  
     104  void __attribute__((optimize (0)))
     105  init_arrays (uint16_t *d_a, uint16_t *d_b, uint8_t *pix1, uint8_t *pix2)
     106  {
     107    for (int i = 0; i < DSIZE; i++)
     108    {
     109      d_a[i] = (1074 * i)%17;
     110      d_b[i] = (1074 * i)%17;
     111    }
     112    for (int i = 0; i < PIXSIZE; i++)
     113    {
     114      pix1[i] = (1024 * i)%17;
     115      pix2[i] = (1024 * i)%17;
     116    }
     117  }
     118  
     119  /* Don't optimize main so we don't get confused over where the vector
     120     instructions are generated. */
     121  __attribute__((optimize (0)))
     122  int main ()
     123  {
     124    uint16_t d_a[DSIZE];
     125    uint16_t d_b[DSIZE];
     126    uint8_t pix1[PIXSIZE];
     127    uint8_t pix2[PIXSIZE];
     128  
     129    init_arrays (d_a, d_b, pix1, pix2);
     130    wplus (d_a, pix1, pix2);
     131    wplus_no_opt (d_b, pix1, pix2);
     132    if (memcmp (d_a,d_b, DSIZE) != 0)
     133      return 1;
     134  
     135    init_arrays (d_a, d_b, pix1, pix2);
     136    wminus (d_a, pix1, pix2);
     137    wminus_no_opt (d_b, pix1, pix2);
     138    if (memcmp (d_a,d_b, DSIZE) != 0)
     139      return 2;
     140  
     141    init_arrays (d_a, d_b, pix1, pix2);
     142    wmult (d_a, pix1, pix2);
     143    wmult_no_opt (d_b, pix1, pix2);
     144    if (memcmp (d_a,d_b, DSIZE) != 0)
     145      return 3;
     146  
     147    init_arrays (d_a, d_b, pix1, pix2);
     148    wlshift (d_a, pix1);
     149    wlshift_no_opt (d_b, pix1);
     150    if (memcmp (d_a,d_b, DSIZE) != 0)
     151      return 4;
     152  
     153  }
     154  
     155  /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
     156  /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
     157  /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
     158  /* { dg-final { scan-assembler-times "shl\\tv" 2 } } */