(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
aarch64/
vect_saddl_1.c
       1  /* { dg-do run } */
       2  /* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */
       3  
       4  #pragma GCC target "+nosve"
       5  
       6  typedef signed char S8_t;
       7  typedef signed short S16_t;
       8  typedef signed int S32_t;
       9  typedef signed long long S64_t;
      10  
      11  typedef signed char *__restrict__ pS8_t;
      12  typedef signed short *__restrict__ pS16_t;
      13  typedef signed int *__restrict__ pS32_t;
      14  typedef signed long long *__restrict__ pS64_t;
      15  
      16  typedef unsigned char U8_t;
      17  typedef unsigned short U16_t;
      18  typedef unsigned int U32_t;
      19  typedef unsigned long long U64_t;
      20  
      21  typedef unsigned char *__restrict__ pU8_t;
      22  typedef unsigned short *__restrict__ pU16_t;
      23  typedef unsigned int *__restrict__ pU32_t;
      24  typedef unsigned long long *__restrict__ pU64_t;
      25  
      26  extern void abort ();
      27  
      28  void
      29  test_addl_S64_S32_4 (pS64_t a, pS32_t b, pS32_t c)
      30  {
      31    int i;
      32    for (i = 0; i < 4; i++)
      33      a[i] = (S64_t) b[i] + (S64_t) c[i];
      34  }
      35  /* "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
      36  /* "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
      37  
      38  /* a = -b + c => a = c - b */
      39  void
      40  test_addl_S64_S32_4_neg0 (pS64_t a, pS32_t b, pS32_t c)
      41  {
      42    int i;
      43    for (i = 0; i < 4; i++)
      44      a[i] = -(S64_t) b[i] + (S64_t) c[i];
      45  }
      46  /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
      47  /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
      48  
      49  /* a = b + -c => a = b - c */
      50  void
      51  test_addl_S64_S32_4_neg1 (pS64_t a, pS32_t b, pS32_t c)
      52  {
      53    int i;
      54    for (i = 0; i < 4; i++)
      55      a[i] = (S64_t) b[i] + -(S64_t) c[i];
      56  }
      57  /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
      58  /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
      59  
      60  void
      61  test_addl_S32_S16_8 (pS32_t a, pS16_t b, pS16_t c)
      62  {
      63    int i;
      64    for (i = 0; i < 8; i++)
      65      a[i] = (S32_t) b[i] + (S32_t) c[i];
      66  }
      67  /* { dg-final { scan-assembler "saddl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
      68  /* { dg-final { scan-assembler "saddl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
      69  
      70  void
      71  test_addl_S16_S8_16 (pS16_t a, pS8_t b, pS8_t c)
      72  {
      73    int i;
      74    for (i = 0; i < 16; i++)
      75      a[i] = (S16_t) b[i] + (S16_t) c[i];
      76  }
      77  /* { dg-final { scan-assembler "saddl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
      78  /* { dg-final { scan-assembler "saddl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
      79  
      80  void
      81  test_addl_U64_U32_4 (pU64_t a, pU32_t b, pU32_t c)
      82  {
      83    int i;
      84    for (i = 0; i < 4; i++)
      85      a[i] = (U64_t) b[i] + (U64_t) c[i];
      86  }
      87  /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */
      88  /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" } } */
      89  
      90  void
      91  test_addl_U32_U16_8 (pU32_t a, pU16_t b, pU16_t c)
      92  {
      93    int i;
      94    for (i = 0; i < 8; i++)
      95      a[i] = (U32_t) b[i] + (U32_t) c[i];
      96  }
      97  /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
      98  /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
      99  
     100  void
     101  test_addl_U16_U8_16 (pU16_t a, pU8_t b, pU8_t c)
     102  {
     103    int i;
     104    for (i = 0; i < 16; i++)
     105      a[i] = (U16_t) b[i] + (U16_t) c[i];
     106  }
     107  /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
     108  /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
     109  
     110  void
     111  test_subl_S64_S32_4 (pS64_t a, pS32_t b, pS32_t c)
     112  {
     113    int i;
     114    for (i = 0; i < 4; i++)
     115      a[i] = (S64_t) b[i] - (S64_t) c[i];
     116  }
     117  /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
     118  /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
     119  
     120  /* a = b - -c => a = b + c */
     121  void
     122  test_subl_S64_S32_4_neg0 (pS64_t a, pS32_t b, pS32_t c)
     123  {
     124    int i;
     125    for (i = 0; i < 4; i++)
     126      a[i] = (S64_t) b[i] - -(S64_t) c[i];
     127  }
     128  /* { dg-final { scan-assembler-times "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 2 } } */
     129  /* { dg-final { scan-assembler-times "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" 2 } } */
     130  
     131  /* a = -b - -c => a = c - b */
     132  void
     133  test_subl_S64_S32_4_neg1 (pS64_t a, pS32_t b, pS32_t c)
     134  {
     135    int i;
     136    for (i = 0; i < 4; i++)
     137      a[i] = -(S64_t) b[i] - -(S64_t) c[i];
     138  }
     139  /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
     140  /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
     141  
     142  /* a = -(b - c) => a = c - b */
     143  void
     144  test_subl_S64_S32_4_neg2 (pS64_t a, pS32_t b, pS32_t c)
     145  {
     146    int i;
     147    for (i = 0; i < 4; i++)
     148      a[i] = -((S64_t) b[i] - (S64_t) c[i]);
     149  }
     150  /* { dg-final { scan-assembler-times "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 5 } } */
     151  /* { dg-final { scan-assembler-times "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" 5 } } */
     152  
     153  void
     154  test_subl_S32_S16_8 (pS32_t a, pS16_t b, pS16_t c)
     155  {
     156    int i;
     157    for (i = 0; i < 8; i++)
     158      a[i] = (S32_t) b[i] - (S32_t) c[i];
     159  }
     160  /* { dg-final { scan-assembler "ssubl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
     161  /* { dg-final { scan-assembler "ssubl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
     162  
     163  void
     164  test_subl_S16_S8_16 (pS16_t a, pS8_t b, pS8_t c)
     165  {
     166    int i;
     167    for (i = 0; i < 16; i++)
     168      a[i] = (S16_t) b[i] - (S16_t) c[i];
     169  }
     170  /* { dg-final { scan-assembler "ssubl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
     171  /* { dg-final { scan-assembler "ssubl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
     172  
     173  void
     174  test_subl_U64_U32_4 (pU64_t a, pU32_t b, pU32_t c)
     175  {
     176    int i;
     177    for (i = 0; i < 4; i++)
     178      a[i] = (U64_t) b[i] - (U64_t) c[i];
     179  }
     180  /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */
     181  /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" } } */
     182  
     183  void
     184  test_subl_U32_U16_8 (pU32_t a, pU16_t b, pU16_t c)
     185  {
     186    int i;
     187    for (i = 0; i < 8; i++)
     188      a[i] = (U32_t) b[i] - (U32_t) c[i];
     189  }
     190  /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
     191  /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
     192  
     193  void
     194  test_subl_U16_U8_16 (pU16_t a, pU8_t b, pU8_t c)
     195  {
     196    int i;
     197    for (i = 0; i < 16; i++)
     198      a[i] = (U16_t) b[i] - (U16_t) c[i];
     199  }
     200  /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
     201  /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
     202  
     203  /* input values */
     204  
     205  S64_t S64_ta[4];
     206  S32_t S32_tb[4] = { 0, 1, 2, 3 };
     207  S32_t S32_tc[4] = { 2, 2, -2, -2 };
     208  
     209  S32_t S32_ta[8];
     210  S16_t S16_tb[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
     211  S16_t S16_tc[8] = { 2, 2, -2, -2, 2, 2, -2, -2 };
     212  
     213  S16_t S16_ta[16];
     214  S8_t S8_tb[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
     215  S8_t S8_tc[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
     216  
     217  /* expected output */
     218  
     219  S64_t addl_rS64[] = { 2, 3, 0, 1 };
     220  S64_t neg_r[] = { 2, 1, -4, -5 };
     221  S32_t addl_rS32[] = { 2, 3, 0, 1, 6, 7, 4, 5 };
     222  S16_t addl_rS16[] = { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 };
     223  S64_t subl_rS64[] = { -2, -1, 4, 5 };
     224  S32_t subl_rS32[] = { -2, -1, 4, 5, 2, 3, 8, 9 };
     225  S16_t subl_rS16[] =
     226    { -2, -1, 4, 5, 2, 3, 8, 9, 6, 7, 12, 13, 10, 11, 16, 17 };
     227  U64_t addl_rU64[] = { 2, 3, 0x100000000, 0x100000001 };
     228  U32_t addl_rU32[] = { 2, 3, 0x10000, 0x10001, 6, 7, 0x10004, 0x10005 };
     229  U16_t addl_rU16[] =
     230  {
     231    0x0002, 0x0003, 0x0100, 0x0101, 0x0006, 0x0007, 0x0104, 0x0105,
     232    0x000a, 0x000b, 0x0108, 0x0109, 0x000e, 0x000f, 0x010c, 0x010d
     233  };
     234  U64_t subl_rU64[] =
     235  {
     236    0xfffffffffffffffe, 0xffffffffffffffff,
     237    0xffffffff00000004, 0xffffffff00000005
     238  };
     239  U32_t subl_rU32[] =
     240  {
     241    0xfffffffe, 0xffffffff, 0xffff0004, 0xffff0005,
     242    0x00000002, 0x00000003, 0xffff0008, 0xffff0009
     243  };
     244  U16_t subl_rU16[] =
     245  {
     246    0xfffe, 0xffff, 0xff04, 0xff05, 0x0002, 0x0003, 0xff08, 0xff09,
     247    0x0006, 0x0007, 0xff0c, 0xff0d, 0x000a, 0x000b, 0xff10, 0xff11
     248  };
     249  
     250  #define CHECK(T,N,AS,US)                                 \
     251  do                                                       \
     252    {                                                      \
     253      for (i = 0; i < N; i++)                              \
     254        if ((US##T##_t)S##T##_ta[i] != AS##_##r##US##T[i]) \
     255          abort();                                         \
     256    }                                                      \
     257  while (0)
     258  
     259  #define NCHECK(RES)                           \
     260  do                                            \
     261    {                                           \
     262      for (i = 0; i < 4; i++)                   \
     263        if (S64_ta[i] != RES[i])                \
     264          abort ();                             \
     265    }                                           \
     266  while (0)
     267  
     268  #define SCHECK(T,N,AS) CHECK(T,N,AS,S)
     269  #define UCHECK(T,N,AS) CHECK(T,N,AS,U)
     270  
     271  int
     272  main ()
     273  {
     274    int i;
     275  
     276    test_addl_S64_S32_4 (S64_ta, S32_tb, S32_tc);
     277    SCHECK (64, 4, addl);
     278    test_addl_S32_S16_8 (S32_ta, S16_tb, S16_tc);
     279    SCHECK (32, 8, addl);
     280    test_addl_S16_S8_16 (S16_ta, S8_tb, S8_tc);
     281    SCHECK (16, 16, addl);
     282    test_subl_S64_S32_4 (S64_ta, S32_tb, S32_tc);
     283    SCHECK (64, 4, subl);
     284    test_subl_S32_S16_8 (S32_ta, S16_tb, S16_tc);
     285    SCHECK (32, 8, subl);
     286    test_subl_S16_S8_16 (S16_ta, S8_tb, S8_tc);
     287    SCHECK (16, 16, subl);
     288  
     289    test_addl_U64_U32_4 (S64_ta, S32_tb, S32_tc);
     290    UCHECK (64, 4, addl);
     291    test_addl_U32_U16_8 (S32_ta, S16_tb, S16_tc);
     292    UCHECK (32, 8, addl);
     293    test_addl_U16_U8_16 (S16_ta, S8_tb, S8_tc);
     294    UCHECK (16, 16, addl);
     295    test_subl_U64_U32_4 (S64_ta, S32_tb, S32_tc);
     296    UCHECK (64, 4, subl);
     297    test_subl_U32_U16_8 (S32_ta, S16_tb, S16_tc);
     298    UCHECK (32, 8, subl);
     299    test_subl_U16_U8_16 (S16_ta, S8_tb, S8_tc);
     300    UCHECK (16, 16, subl);
     301  
     302    test_addl_S64_S32_4_neg0 (S64_ta, S32_tb, S32_tc);
     303    NCHECK (neg_r);
     304    test_addl_S64_S32_4_neg1 (S64_ta, S32_tb, S32_tc);
     305    NCHECK (subl_rS64);
     306    test_subl_S64_S32_4_neg0 (S64_ta, S32_tb, S32_tc);
     307    NCHECK (addl_rS64);
     308    test_subl_S64_S32_4_neg1 (S64_ta, S32_tb, S32_tc);
     309    NCHECK (neg_r);
     310    test_subl_S64_S32_4_neg2 (S64_ta, S32_tb, S32_tc);
     311    NCHECK (neg_r);
     312  
     313    return 0;
     314  }
     315  
     316