1  /* { dg-do run } */
       2  /* { dg-require-effective-target sse2 } */
       3  /* { dg-options "-O3 -msse2" } */
       4  
       5  #ifndef CHECK_H
       6  #define CHECK_H "sse2-check.h"
       7  #endif
       8  
       9  #ifndef TEST
      10  #define TEST sse2_test
      11  #endif
      12  
      13  #include CHECK_H
      14  
      15  #include <stdlib.h>
      16  
      17  #define N 512
      18  static short a1[N], a2[N], a3[N];
      19  static unsigned short b1[N], b2[N], b3[N];
      20  static int c1[N], c2[N], c3[N];
      21  static unsigned int d1[N], d2[N], d3[N];
      22  static long long e1[N], e2[N], e3[N];
      23  static unsigned long long g1[N], g2[N], g3[N];
      24  
      25  __attribute__((noinline, noclone)) void
      26  f1 (void)
      27  {
      28    int i;
      29    for (i = 0; i < N; ++i)
      30      a1[i] = a2[i] * a3[i];
      31  }
      32  
      33  __attribute__((noinline, noclone)) void
      34  f2 (void)
      35  {
      36    int i;
      37    for (i = 0; i < N; ++i)
      38      b1[i] = b2[i] * b3[i];
      39  }
      40  
      41  __attribute__((noinline, noclone)) void
      42  f3 (void)
      43  {
      44    int i;
      45    for (i = 0; i < N; ++i)
      46      c1[i] = c2[i] * c3[i];
      47  }
      48  
      49  __attribute__((noinline, noclone)) void
      50  f4 (void)
      51  {
      52    int i;
      53    for (i = 0; i < N; ++i)
      54      d1[i] = d2[i] * d3[i];
      55  }
      56  
      57  __attribute__((noinline, noclone)) void
      58  f5 (void)
      59  {
      60    int i;
      61    for (i = 0; i < N; ++i)
      62      e1[i] = e2[i] * e3[i];
      63  }
      64  
      65  __attribute__((noinline, noclone)) void
      66  f6 (void)
      67  {
      68    int i;
      69    for (i = 0; i < N; ++i)
      70      g1[i] = g2[i] * g3[i];
      71  }
      72  
      73  __attribute__((noinline, noclone)) void
      74  f7 (void)
      75  {
      76    int i;
      77    for (i = 0; i < N; ++i)
      78      c1[i] = a2[i] * a3[i];
      79  }
      80  
      81  __attribute__((noinline, noclone)) void
      82  f8 (void)
      83  {
      84    int i;
      85    for (i = 0; i < N; ++i)
      86      d1[i] = (unsigned int) b2[i] * b3[i];
      87  }
      88  
      89  __attribute__((noinline, noclone)) void
      90  f9 (void)
      91  {
      92    int i;
      93    for (i = 0; i < N; ++i)
      94      e1[i] = (long long) c2[i] * (long long) c3[i];
      95  }
      96  
      97  __attribute__((noinline, noclone)) void
      98  f10 (void)
      99  {
     100    int i;
     101    for (i = 0; i < N; ++i)
     102      g1[i] = (unsigned long long) d2[i] * (unsigned long long) d3[i];
     103  }
     104  
     105  __attribute__((noinline, noclone)) int
     106  f11 (void)
     107  {
     108    int i, r = 0;
     109    for (i = 0; i < N; ++i)
     110      r += a2[i] * a3[i];
     111    return r;
     112  }
     113  
     114  __attribute__((noinline, noclone)) unsigned int
     115  f12 (void)
     116  {
     117    int i;
     118    unsigned r = 0;
     119    for (i = 0; i < N; ++i)
     120      r += (unsigned int) b2[i] * b3[i];
     121    return r;
     122  }
     123  
     124  __attribute__((noinline, noclone)) long long
     125  f13 (void)
     126  {
     127    int i;
     128    long long r = 0;
     129    for (i = 0; i < N; ++i)
     130      r += (long long) c2[i] * (long long) c3[i];
     131    return r;
     132  }
     133  
     134  __attribute__((noinline, noclone)) unsigned long long
     135  f14 (void)
     136  {
     137    int i;
     138    unsigned long long r = 0;
     139    for (i = 0; i < N; ++i)
     140      r += (unsigned long long) d2[i] * (unsigned long long) d3[i];
     141    return r;
     142  }
     143  
     144  static void
     145  TEST (void)
     146  {
     147    int i;
     148    int s1 = 0;
     149    unsigned int s2 = 0;
     150    long long s3 = 0;
     151    unsigned long long s4 = 0;
     152    for (i = 0; i < N; ++i)
     153      {
     154        asm volatile ("" : : "r" (&s1) : "memory");
     155        asm volatile ("" : : "r" (&s2) : "memory");
     156        asm volatile ("" : : "r" (&s3) : "memory");
     157        asm volatile ("" : : "r" (&s4) : "memory");
     158        b2[i] = (int) rand ();
     159        b3[i] = (int) rand ();
     160        a2[i] = b2[i];
     161        a3[i] = b3[i];
     162        d2[i] = (((int) rand ()) << 16) | b2[i];
     163        d3[i] = (((int) rand ()) << 16) | b3[i];
     164        c2[i] = d2[i];
     165        c3[i] = d3[i];
     166        s1 += a2[i] * a3[i];
     167        s2 += (unsigned int) b2[i] * b3[i];
     168        s3 += (long long) c2[i] * (long long) c3[i];
     169        s4 += (unsigned long long) d2[i] * (unsigned long long) d3[i];
     170      }
     171    f1 ();
     172    f2 ();
     173    f3 ();
     174    f4 ();
     175    f5 ();
     176    f6 ();
     177    for (i = 0; i < N; ++i)
     178      {
     179        if (a1[i] != (short) (a2[i] * a3[i]))
     180  	abort ();
     181        if (b1[i] != (unsigned short) (b2[i] * b3[i]))
     182  	abort ();
     183        if (c1[i] != c2[i] * c3[i])
     184  	abort ();
     185        if (d1[i] != d2[i] * d3[i])
     186  	abort ();
     187        if (e1[i] != e2[i] * e3[i])
     188  	abort ();
     189        if (g1[i] != g2[i] * g3[i])
     190  	abort ();
     191      }
     192    f7 ();
     193    f8 ();
     194    f9 ();
     195    f10 ();
     196    for (i = 0; i < N; ++i)
     197      {
     198        if (c1[i] != a2[i] * a3[i])
     199  	abort ();
     200        if (d1[i] != b2[i] * b3[i])
     201  	abort ();
     202        if (e1[i] != (long long) c2[i] * (long long) c3[i])
     203  	abort ();
     204        if (g1[i] != (unsigned long long) d2[i] * (unsigned long long) d3[i])
     205  	abort ();
     206      }
     207    if (f11 () != s1 || f12 () != s2 || f13 () != s3 || f14 () != s4)
     208      abort ();
     209  }