1  #include <smmintrin.h>
       2  #include "math_m_pi.h"
       3  
       4  #define NUM 64
       5  
       6  static void
       7  init_round (FP_T *src)
       8  {
       9    int i, sign = 1;
      10    FP_T f = rand ();
      11  
      12    for (i = 0; i < NUM; i++)
      13      {
      14        src[i] = (i + 1)* f * M_PI * sign;
      15        if (i < (NUM / 2))
      16  	{
      17            if ((i % 6) == 0)
      18  	    f = f * src[i];
      19          }
      20        else if (i == (NUM / 2))
      21  	f = rand ();
      22        else if ((i % 6) == 0)
      23  	f = 1 / (f * (i + 1) * src[i] * M_PI *sign);
      24        sign = -sign;
      25      }
      26  }
      27  
      28  static FP_T
      29  do_round (FP_T f, int type)
      30  {
      31    unsigned short saved_cw, new_cw, clr_mask;
      32    FP_T ret;
      33  
      34    if ((type & 4))
      35      {
      36        type = 0;
      37        clr_mask = 0xFFFF;
      38      }
      39    else
      40      {
      41        type = 0x003F | ((type & 3) << 10);
      42        clr_mask = ~0x0C3F;
      43      }
      44  
      45    __asm__ ("fnstcw %0" : "=m" (saved_cw));
      46  
      47    new_cw = saved_cw & clr_mask;
      48    new_cw |= type;
      49  
      50    __asm__ ("fldcw %2\n\t"
      51  	   "frndint\n\t"
      52  	   "fldcw %3" : "=t" (ret)
      53  		      : "0" (f), "m" (new_cw), "m" (saved_cw));
      54    return ret;
      55  }
      56  
      57  static void
      58  sse4_1_test (void)
      59  {
      60    int i;
      61    FP_T f;
      62    union
      63      {
      64        VEC_T x[NUM / LOOP_INCREMENT];
      65        FP_T f[NUM];
      66      } dst, src;
      67  
      68    init_round (src.f);
      69  
      70    for (i = 0; i < NUM / LOOP_INCREMENT; i++)
      71      dst.x[i] =  ROUND_INTRIN (src.x[i], ROUND_MODE);
      72  
      73    for (i = 0; i < NUM; i += CHECK_LOOP_INCREMENT)
      74      {
      75        f = do_round (src.f[i], CHECK_ROUND_MODE);
      76       if (f != dst.f[i])
      77         abort ();
      78      }
      79  
      80    if (_MM_FROUND_TO_NEAREST_INT != 0x00
      81        || _MM_FROUND_TO_NEG_INF != 0x01
      82        || _MM_FROUND_TO_POS_INF != 0x02
      83        || _MM_FROUND_TO_ZERO != 0x03
      84        || _MM_FROUND_CUR_DIRECTION != 0x04
      85        || _MM_FROUND_RAISE_EXC != 0x00
      86        || _MM_FROUND_NO_EXC != 0x08
      87        || _MM_FROUND_NINT != 0x00
      88        || _MM_FROUND_FLOOR != 0x01
      89        || _MM_FROUND_CEIL != 0x02
      90        || _MM_FROUND_TRUNC != 0x03
      91        || _MM_FROUND_RINT != 0x04
      92        || _MM_FROUND_NEARBYINT != 0x0C)
      93      abort ();
      94  }