(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
aarch64/
vect_smlal_1.c
       1  /* { dg-do run } */
       2  /* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */
       3  
       4  #pragma GCC target "+nosve"
       5  
       6  typedef signed char S8_t;
       7  typedef signed short S16_t;
       8  typedef signed int S32_t;
       9  typedef signed long long S64_t;
      10  typedef signed char *__restrict__ pS8_t;
      11  typedef signed short *__restrict__ pS16_t;
      12  typedef signed int *__restrict__ pS32_t;
      13  typedef signed long long *__restrict__ pS64_t;
      14  typedef unsigned char U8_t;
      15  typedef unsigned short U16_t;
      16  typedef unsigned int U32_t;
      17  typedef unsigned long long U64_t;
      18  typedef unsigned char *__restrict__ pU8_t;
      19  typedef unsigned short *__restrict__ pU16_t;
      20  typedef unsigned int *__restrict__ pU32_t;
      21  typedef unsigned long long *__restrict__ pU64_t;
      22  
      23  extern void abort ();
      24  
      25  void
      26  test_addS64_tS32_t4 (pS64_t a, pS32_t b, pS32_t c)
      27  {
      28    int i;
      29    for (i = 0; i < 4; i++)
      30      a[i] += (S64_t) b[i] * (S64_t) c[i];
      31  }
      32  
      33  /* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.2d" } } */
      34  /* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.2d" } } */
      35  
      36  void
      37  test_addS32_tS16_t8 (pS32_t a, pS16_t b, pS16_t c)
      38  {
      39    int i;
      40    for (i = 0; i < 8; i++)
      41      a[i] += (S32_t) b[i] * (S32_t) c[i];
      42  }
      43  
      44  /* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.4s" } } */
      45  /* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.4s" } } */
      46  
      47  void
      48  test_addS16_tS8_t16 (pS16_t a, pS8_t b, pS8_t c)
      49  {
      50    int i;
      51    for (i = 0; i < 16; i++)
      52      a[i] += (S16_t) b[i] * (S16_t) c[i];
      53  }
      54  
      55  void
      56  test_addS16_tS8_t16_neg0 (pS16_t a, pS8_t b, pS8_t c)
      57  {
      58    int i;
      59    for (i = 0; i < 16; i++)
      60      a[i] += (S16_t) -b[i] * (S16_t) -c[i];
      61  }
      62  
      63  void
      64  test_addS16_tS8_t16_neg1 (pS16_t a, pS8_t b, pS8_t c)
      65  {
      66    int i;
      67    for (i = 0; i < 16; i++)
      68      a[i] -= (S16_t) b[i] * (S16_t) -c[i];
      69  }
      70  
      71  void
      72  test_addS16_tS8_t16_neg2 (pS16_t a, pS8_t b, pS8_t c)
      73  {
      74    int i;
      75    for (i = 0; i < 16; i++)
      76      a[i] -= (S16_t) -b[i] * (S16_t) c[i];
      77  }
      78  
      79  /* { dg-final { scan-assembler-times "smlal\tv\[0-9\]+\.8h" 4 } } */
      80  /* { dg-final { scan-assembler-times "smlal2\tv\[0-9\]+\.8h" 4 } } */
      81  
      82  void
      83  test_subS64_tS32_t4 (pS64_t a, pS32_t b, pS32_t c)
      84  {
      85    int i;
      86    for (i = 0; i < 4; i++)
      87      a[i] -= (S64_t) b[i] * (S64_t) c[i];
      88  }
      89  
      90  /* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.2d" } } */
      91  /* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.2d" } } */
      92  
      93  void
      94  test_subS32_tS16_t8 (pS32_t a, pS16_t b, pS16_t c)
      95  {
      96    int i;
      97    for (i = 0; i < 8; i++)
      98      a[i] -= (S32_t) b[i] * (S32_t) c[i];
      99  }
     100  
     101  /* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.4s" } } */
     102  /* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.4s" } } */
     103  
     104  void
     105  test_subS16_tS8_t16 (pS16_t a, pS8_t b, pS8_t c)
     106  {
     107    int i;
     108    for (i = 0; i < 16; i++)
     109      a[i] -= (S16_t) b[i] * (S16_t) c[i];
     110  }
     111  
     112  void
     113  test_subS16_tS8_t16_neg0 (pS16_t a, pS8_t b, pS8_t c)
     114  {
     115    int i;
     116    for (i = 0; i < 16; i++)
     117      a[i] += (S16_t) -b[i] * (S16_t) c[i];
     118  }
     119  
     120  void
     121  test_subS16_tS8_t16_neg1 (pS16_t a, pS8_t b, pS8_t c)
     122  {
     123    int i;
     124    for (i = 0; i < 16; i++)
     125      a[i] += (S16_t) b[i] * (S16_t) -c[i];
     126  }
     127  
     128  void
     129  test_subS16_tS8_t16_neg2 (pS16_t a, pS8_t b, pS8_t c)
     130  {
     131    int i;
     132    for (i = 0; i < 16; i++)
     133      a[i] += -((S16_t) b[i] * (S16_t) c[i]);
     134  }
     135  
     136  void
     137  test_subS16_tS8_t16_neg3 (pS16_t a, pS8_t b, pS8_t c)
     138  {
     139    int i;
     140    for (i = 0; i < 16; i++)
     141      a[i] -= (S16_t) -b[i] * (S16_t) -c[i];
     142  }
     143  
     144  /* { dg-final { scan-assembler-times "smlsl\tv\[0-9\]+\.8h" 5 } } */
     145  /* { dg-final { scan-assembler-times "smlsl2\tv\[0-9\]+\.8h" 5 } } */
     146  
     147  void
     148  test_addU64_tU32_t4 (pU64_t a, pU32_t b, pU32_t c)
     149  {
     150    int i;
     151    for (i = 0; i < 4; i++)
     152      a[i] += (U64_t) b[i] * (U64_t) c[i];
     153  }
     154  
     155  /* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.2d" } } */
     156  /* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.2d" } } */
     157  
     158  void
     159  test_addU32_tU16_t8 (pU32_t a, pU16_t b, pU16_t c)
     160  {
     161    int i;
     162    for (i = 0; i < 8; i++)
     163      a[i] += (U32_t) b[i] * (U32_t) c[i];
     164  }
     165  
     166  /* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.4s" } } */
     167  /* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.4s" } } */
     168  
     169  void
     170  test_addU16_tU8_t16 (pU16_t a, pU8_t b, pU8_t c)
     171  {
     172    int i;
     173    for (i = 0; i < 16; i++)
     174      a[i] += (U16_t) b[i] * (U16_t) c[i];
     175  }
     176  
     177  /* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.8h" } } */
     178  /* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.8h" } } */
     179  
     180  void
     181  test_subU64_tU32_t4 (pU64_t a, pU32_t b, pU32_t c)
     182  {
     183    int i;
     184    for (i = 0; i < 4; i++)
     185      a[i] -= (U64_t) b[i] * (U64_t) c[i];
     186  }
     187  
     188  /* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.2d" } } */
     189  /* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.2d" } } */
     190  
     191  void
     192  test_subU32_tU16_t8 (pU32_t a, pU16_t b, pU16_t c)
     193  {
     194    int i;
     195    for (i = 0; i < 8; i++)
     196      a[i] -= (U32_t) b[i] * (U32_t) c[i];
     197  }
     198  
     199  /* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.4s" } } */
     200  /* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.4s" } } */
     201  
     202  void
     203  test_subU16_tU8_t16 (pU16_t a, pU8_t b, pU8_t c)
     204  {
     205    int i;
     206    for (i = 0; i < 16; i++)
     207      a[i] -= (U16_t) b[i] * (U16_t) c[i];
     208  }
     209  
     210  /* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.8h" } } */
     211  /* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.8h" } } */
     212  
     213  
     214  S64_t add_rS64[4] = { 6, 7, -4, -3 };
     215  S32_t add_rS32[8] = { 6, 7, -4, -3, 10, 11, 0, 1 };
     216  S16_t add_rS16[16] =
     217   { 6, 7, -4, -3, 10, 11, 0, 1, 14, 15, 4, 5, 18, 19, 8, 9 };
     218  
     219  S64_t sub_rS64[4] = { 0, 1, 2, 3 };
     220  S32_t sub_rS32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
     221  S16_t sub_rS16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
     222  
     223  U64_t add_rU64[4] = { 0x6, 0x7, 0x2fffffffc, 0x2fffffffd };
     224  
     225  U32_t add_rU32[8] =
     226  {
     227    0x6, 0x7, 0x2fffc, 0x2fffd,
     228    0xa, 0xb, 0x30000, 0x30001
     229  };
     230  
     231  U16_t add_rU16[16] =
     232  {
     233    0x6, 0x7, 0x2fc, 0x2fd, 0xa, 0xb, 0x300, 0x301,
     234    0xe, 0xf, 0x304, 0x305, 0x12, 0x13, 0x308, 0x309
     235  };
     236  
     237  U64_t sub_rU64[4] = { 0, 1, 2, 3 };
     238  U32_t sub_rU32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
     239  U16_t sub_rU16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
     240  
     241  S8_t neg_r[16] = { -6, -5, 8, 9, -2, -1, 12, 13, 2, 3, 16, 17, 6, 7, 20, 21 };
     242  
     243  S64_t S64_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
     244  S32_t S32_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
     245  S32_t S32_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
     246  
     247  S32_t S32_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
     248  S16_t S16_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
     249  S16_t S16_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
     250  
     251  S16_t S16_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
     252  S8_t S8_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
     253  S8_t S8_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
     254  
     255  
     256  #define CHECK(T,N,AS,US)                      \
     257  do                                            \
     258    {                                           \
     259      for (i = 0; i < N; i++)                   \
     260        if (S##T##_ta[i] != AS##_r##US##T[i])   \
     261          abort ();                             \
     262    }                                           \
     263  while (0)
     264  
     265  #define SCHECK(T,N,AS) CHECK(T,N,AS,S)
     266  #define UCHECK(T,N,AS) CHECK(T,N,AS,U)
     267  
     268  #define NCHECK(RES)                           \
     269  do                                            \
     270    {                                           \
     271      for (i = 0; i < 16; i++)                  \
     272        if (S16_ta[i] != RES[i])                \
     273          abort ();                             \
     274    }                                           \
     275  while (0)
     276  
     277  
     278  int
     279  main ()
     280  {
     281    int i;
     282  
     283    test_addS64_tS32_t4 (S64_ta, S32_tb, S32_tc);
     284    SCHECK (64, 4, add);
     285    test_addS32_tS16_t8 (S32_ta, S16_tb, S16_tc);
     286    SCHECK (32, 8, add);
     287    test_addS16_tS8_t16 (S16_ta, S8_tb, S8_tc);
     288    SCHECK (16, 16, add);
     289    test_subS64_tS32_t4 (S64_ta, S32_tb, S32_tc);
     290    SCHECK (64, 4, sub);
     291    test_subS32_tS16_t8 (S32_ta, S16_tb, S16_tc);
     292    SCHECK (32, 8, sub);
     293    test_subS16_tS8_t16 (S16_ta, S8_tb, S8_tc);
     294    SCHECK (16, 16, sub);
     295  
     296    test_addU64_tU32_t4 (S64_ta, S32_tb, S32_tc);
     297    UCHECK (64, 4, add);
     298    test_addU32_tU16_t8 (S32_ta, S16_tb, S16_tc);
     299    UCHECK (32, 8, add);
     300    test_addU16_tU8_t16 (S16_ta, S8_tb, S8_tc);
     301    UCHECK (16, 16, add);
     302    test_subU64_tU32_t4 (S64_ta, S32_tb, S32_tc);
     303    UCHECK (64, 4, sub);
     304    test_subU32_tU16_t8 (S32_ta, S16_tb, S16_tc);
     305    UCHECK (32, 8, sub);
     306    test_subU16_tU8_t16 (S16_ta, S8_tb, S8_tc);
     307    UCHECK (16, 16, sub);
     308  
     309    test_addS16_tS8_t16_neg0 (S16_ta, S8_tb, S8_tc);
     310    NCHECK (add_rS16);
     311    test_subS16_tS8_t16_neg0 (S16_ta, S8_tb, S8_tc);
     312    NCHECK (sub_rS16);
     313    test_addS16_tS8_t16_neg1 (S16_ta, S8_tb, S8_tc);
     314    NCHECK (add_rS16);
     315    test_subS16_tS8_t16_neg1 (S16_ta, S8_tb, S8_tc);
     316    NCHECK (sub_rS16);
     317    test_addS16_tS8_t16_neg2 (S16_ta, S8_tb, S8_tc);
     318    NCHECK (add_rS16);
     319    test_subS16_tS8_t16_neg2 (S16_ta, S8_tb, S8_tc);
     320    NCHECK (sub_rS16);
     321    test_subS16_tS8_t16_neg3 (S16_ta, S8_tb, S8_tc);
     322    NCHECK (neg_r);
     323  
     324    return 0;
     325  }
     326