1  /* { dg-do run } */
       2  /* { dg-require-effective-target sse4 } */
       3  /* { dg-options "-O2 -msse4.1" } */
       4  
       5  #include "sse4_1-check.h"
       6  
       7  #include <smmintrin.h>
       8  #include "math_m_pi.h"
       9  #include <string.h>
      10  
      11  #define NUM 64
      12  
      13  static void
      14  init_round (float *src)
      15  {
      16    int i, sign = 1;
      17    float f = rand ();
      18  
      19    for (i = 0; i < NUM; i++)
      20      {
      21        src[i] = (i + 1)* f * M_PI * sign;
      22        if (i < (NUM / 2))
      23  	{
      24            if ((i % 6) == 0)
      25  	    f = f * src[i];
      26          }
      27        else if (i == (NUM / 2))
      28  	f = rand ();
      29        else if ((i % 6) == 0)
      30  	f = 1 / (f * (i + 1) * src[i] * M_PI *sign);
      31        sign = -sign;
      32      }
      33  }
      34  
      35  static float
      36  do_round (float f, int type)
      37  {
      38    unsigned short saved_cw, new_cw, clr_mask;
      39    float ret;
      40  
      41    if ((type & 4))
      42      {
      43        type = 0;
      44        clr_mask = 0xFFFF;
      45      }
      46    else
      47      {
      48        type = 0x003F | ((type & 3) << 10);
      49        clr_mask = ~0x0C3F;
      50      }
      51  
      52    __asm__ ("fnstcw %0" : "=m" (saved_cw));
      53  
      54    new_cw = saved_cw & clr_mask;
      55    new_cw |= type;
      56  
      57    __asm__ ("fldcw %2\n\t"
      58  	   "frndint\n\t"
      59  	   "fldcw %3" : "=t" (ret)
      60  		      : "0" (f), "m" (new_cw), "m" (saved_cw));
      61    return ret;
      62  }
      63  
      64  static void
      65  sse4_1_test (void)
      66  {
      67    int i, j;
      68    float f;
      69    union
      70      {
      71        __m128 x[NUM / 4];
      72        float f[NUM];
      73      } dst, src;
      74  
      75    init_round (src.f);
      76    memset (&dst, 0, NUM * sizeof(float));
      77  
      78    for (i = 0; i < NUM / 4 ; i++)
      79      dst.x[i] =  _mm_round_ss (dst.x[i], src.x[i], _MM_FROUND_RINT);
      80  
      81    for (i = 0; i < NUM; i += 4)
      82      {
      83        for (j = 0; j < 3; j++)
      84  	if (dst.f[i + j + 1] != 0.0)
      85  	  abort ();
      86  
      87        f = do_round (src.f[i], 0x04);
      88        if (f != dst.f[i])
      89  	abort ();
      90      }
      91  
      92    for (i = 0; i < NUM / 4 ; i++)
      93      dst.x[i] =  _mm_round_ss (dst.x[i], src.x[i], _MM_FROUND_NEARBYINT);
      94  
      95    for (i = 0; i < NUM; i += 4)
      96      {
      97        for (j = 0; j < 3; j++)
      98  	if (dst.f[i + j + 1] != 0.0)
      99  	  abort ();
     100  
     101        f = do_round (src.f[i], 0x0c);
     102        if (f != dst.f[i])
     103  	abort ();
     104      }
     105  }