1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
       3  
       4  #include <stdint.h>
       5  
       6  #define MULTHI(TYPE, BIGGER, RND)                     \
       7  TYPE __attribute__ ((noinline, noclone))              \
       8  mulhs_##TYPE##_##RND (TYPE *restrict x,               \
       9          TYPE *restrict y, TYPE *restrict z, int n)    \
      10  {                                                     \
      11    for (int i = 0; i < n; i++)                         \
      12    {                                                   \
      13      z[i] = ((((BIGGER)x[i] * (BIGGER)y[i]) >>         \
      14              (sizeof(BIGGER)*8/2-2)) + RND) >> 1;      \
      15    }                                                   \
      16  }
      17  
      18  MULTHI (int8_t, int16_t, 0)
      19  MULTHI (int16_t, int32_t, 0)
      20  MULTHI (int32_t, int64_t, 0)
      21  
      22  MULTHI (uint8_t, uint16_t, 0)
      23  MULTHI (uint16_t, uint32_t, 0)
      24  MULTHI (uint32_t, uint64_t, 0)
      25  
      26  MULTHI (int8_t, int16_t, 1)
      27  MULTHI (int16_t, int32_t, 1)
      28  MULTHI (int32_t, int64_t, 1)
      29  
      30  MULTHI (uint8_t, uint16_t, 1)
      31  MULTHI (uint16_t, uint32_t, 1)
      32  MULTHI (uint32_t, uint64_t, 1)
      33  
      34  /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 12 "vect" } } */
      35  
      36  /* { dg-final { scan-assembler-times {\tsmullb\tz[0-9]+\.h, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
      37  /* { dg-final { scan-assembler-times {\tsmullt\tz[0-9]+\.h, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
      38  /* { dg-final { scan-assembler-times {\tsmullb\tz[0-9]+\.s, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
      39  /* { dg-final { scan-assembler-times {\tsmullt\tz[0-9]+\.s, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
      40  /* { dg-final { scan-assembler-times {\tsmullb\tz[0-9]+\.d, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
      41  /* { dg-final { scan-assembler-times {\tsmullt\tz[0-9]+\.d, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
      42  
      43  /* { dg-final { scan-assembler-times {\tshrnb\tz[0-9]+\.b, z[0-9]+\.h, #7\n} 2 } } */
      44  /* { dg-final { scan-assembler-times {\tshrnt\tz[0-9]+\.b, z[0-9]+\.h, #7\n} 2 } } */
      45  /* { dg-final { scan-assembler-times {\tshrnb\tz[0-9]+\.h, z[0-9]+\.s, #15\n} 2 } } */
      46  /* { dg-final { scan-assembler-times {\tshrnt\tz[0-9]+\.h, z[0-9]+\.s, #15\n} 2 } } */
      47  /* { dg-final { scan-assembler-times {\tshrnb\tz[0-9]+\.s, z[0-9]+\.d, #31\n} 2 } } */
      48  /* { dg-final { scan-assembler-times {\tshrnt\tz[0-9]+\.s, z[0-9]+\.d, #31\n} 2 } } */
      49  
      50  /* { dg-final { scan-assembler-times {\tumullb\tz[0-9]+\.h, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
      51  /* { dg-final { scan-assembler-times {\tumullt\tz[0-9]+\.h, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
      52  /* { dg-final { scan-assembler-times {\tumullb\tz[0-9]+\.s, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
      53  /* { dg-final { scan-assembler-times {\tumullt\tz[0-9]+\.s, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
      54  /* { dg-final { scan-assembler-times {\tumullb\tz[0-9]+\.d, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
      55  /* { dg-final { scan-assembler-times {\tumullt\tz[0-9]+\.d, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
      56  
      57  /* { dg-final { scan-assembler-times {\trshrnb\tz[0-9]+\.b, z[0-9]+\.h, #7\n} 2 } } */
      58  /* { dg-final { scan-assembler-times {\trshrnt\tz[0-9]+\.b, z[0-9]+\.h, #7\n} 2 } } */
      59  /* { dg-final { scan-assembler-times {\trshrnb\tz[0-9]+\.h, z[0-9]+\.s, #15\n} 2 } } */
      60  /* { dg-final { scan-assembler-times {\trshrnt\tz[0-9]+\.h, z[0-9]+\.s, #15\n} 2 } } */
      61  /* { dg-final { scan-assembler-times {\trshrnb\tz[0-9]+\.s, z[0-9]+\.d, #31\n} 2 } } */
      62  /* { dg-final { scan-assembler-times {\trshrnt\tz[0-9]+\.s, z[0-9]+\.d, #31\n} 2 } } */
      63