1  #include <nmmintrin.h>
       2  #include <string.h>
       3  
       4  #define CFLAG 0x00000001
       5  #define ZFLAG 0x00000002
       6  #define SFLAG 0x00000004
       7  #define OFLAG 0x00000008
       8  #define AFLAG 0x00000010
       9  #define PFLAG 0x00000020
      10  
      11  #define PCMPSTR_EQ(X, Y, RES) \
      12    {							\
      13      int __size = (sizeof (*X) ^ 3) * 8;			\
      14      int __i, __j;					\
      15      for (__i = 0; __i < __size; __i++)			\
      16        for (__j = 0; __j < __size; __j++)		\
      17          RES[__j][__i] = (X[__i] == Y[__j]);		\
      18    }
      19  
      20  #define PCMPSTR_RNG(X, Y, RES) \
      21    {							\
      22      int __size = (sizeof (*X) ^ 3) * 8;			\
      23      int __i, __j;					\
      24      for (__j = 0; __j < __size; __j++)			\
      25        for (__i = 0; __i < __size - 1; __i += 2)		\
      26  	{						\
      27  	  RES[__j][__i] = (Y[__j] >= X[__i]);		\
      28  	  RES[__j][__i+1] = (Y[__j] <= X[__i + 1]);	\
      29  	}						\
      30    }
      31  
      32  static void
      33  override_invalid (unsigned char res[16][16], int la, int lb,
      34  		  const int mode, int dim)
      35  {
      36    int i, j;
      37  
      38    for (j = 0; j < dim; j++)
      39      for (i = 0; i < dim; i++)
      40        if (i < la && j >= lb)
      41  	res[j][i] = 0;
      42        else if (i >= la)
      43  	switch ((mode & 0x0C))
      44  	  {
      45  	  case _SIDD_CMP_EQUAL_ANY:
      46  	  case _SIDD_CMP_RANGES:
      47  	    res[j][i] = 0;
      48  	    break;
      49  	  case _SIDD_CMP_EQUAL_EACH:
      50  	    res[j][i] = (j >= lb) ? 1: 0;
      51  	    break;
      52  	  case _SIDD_CMP_EQUAL_ORDERED:
      53  	    res[j][i] = 1;
      54  	    break;
      55            }
      56  }
      57  
      58  static void  
      59  calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode,
      60  	     unsigned char res[16][16])
      61  {
      62    union
      63      {
      64        __m128i x;
      65        signed char sc[16];
      66        unsigned char uc[16];
      67        signed short ss[8];
      68        unsigned short us[8];
      69      } d, s;
      70  
      71    d.x = a;
      72    s.x = b;
      73  
      74    switch ((mode & 3))
      75      {
      76      case _SIDD_UBYTE_OPS:
      77        if ((mode & 0x0C) == _SIDD_CMP_RANGES)
      78  	{
      79  	  PCMPSTR_RNG (d.uc, s.uc, res);
      80  	}
      81        else
      82  	{
      83  	  PCMPSTR_EQ (d.uc, s.uc, res);
      84  	}
      85        break;
      86      case _SIDD_UWORD_OPS:
      87        if ((mode & 0x0C) == _SIDD_CMP_RANGES)
      88  	{
      89  	  PCMPSTR_RNG (d.us, s.us, res);
      90  	}
      91        else
      92  	{
      93  	  PCMPSTR_EQ (d.us, s.us, res);
      94  	}
      95        break;
      96      case _SIDD_SBYTE_OPS:
      97        if ((mode & 0x0C) == _SIDD_CMP_RANGES)
      98  	{
      99  	  PCMPSTR_RNG (d.sc, s.sc, res);
     100  	}
     101        else
     102  	{
     103  	  PCMPSTR_EQ (d.sc, s.sc, res);
     104  	}
     105        break;
     106      case _SIDD_SWORD_OPS:
     107        if ((mode & 0x0C) == _SIDD_CMP_RANGES)
     108  	{
     109  	  PCMPSTR_RNG (d.ss, s.ss, res);
     110  	}
     111        else
     112  	{
     113  	  PCMPSTR_EQ (d.ss, s.ss, res);
     114  	}
     115        break;
     116      }
     117  
     118    override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8);
     119  }
     120  
     121  static int 
     122  calc_res (__m128i a, int la, __m128i b, int lb, const int mode)
     123  {
     124    unsigned char mtx[16][16];
     125    int i, j, k, dim, res = 0;
     126  
     127    memset (mtx, 0, sizeof (mtx));
     128  
     129    dim = (mode & 1) == 0 ? 16 : 8;
     130  
     131    if (la < 0)
     132      la = -la;
     133  
     134    if (lb < 0)
     135      lb = -lb;
     136  
     137    if (la > dim)
     138      la = dim;
     139   
     140    if (lb > dim)
     141      lb = dim;
     142  
     143    calc_matrix (a, la, b, lb, mode, mtx);
     144  
     145    switch ((mode & 0x0C))
     146      {
     147      case _SIDD_CMP_EQUAL_ANY:
     148        for (i = 0; i < dim; i++)
     149  	for (j = 0; j < dim; j++)
     150  	  if (mtx[i][j])
     151  	    res |= (1 << i);
     152        break;
     153  
     154       case _SIDD_CMP_RANGES:
     155        for (i = 0; i < dim; i += 2)
     156  	for(j = 0; j < dim; j++)
     157  	  if (mtx[j][i] && mtx[j][i+1])
     158  	    res |= (1 << j);
     159        break;
     160  
     161       case _SIDD_CMP_EQUAL_EACH:
     162        for(i = 0; i < dim; i++)
     163  	if (mtx[i][i])
     164  	  res |= (1 << i);
     165        break;
     166  
     167       case _SIDD_CMP_EQUAL_ORDERED:
     168        for(i = 0; i < dim; i++)
     169  	{
     170  	  unsigned char val = 1;
     171  
     172  	  for (j = 0, k = i; j < dim - i && k < dim; j++, k++)
     173  	    val &= mtx[k][j];
     174  	  
     175  	  if (val)
     176  	    res |= (1 << i);
     177  	  else
     178  	    res &= ~(1 << i);
     179  	}
     180        break;
     181      }
     182  
     183    switch ((mode & 0x30))
     184      {
     185      case _SIDD_POSITIVE_POLARITY:
     186      case _SIDD_MASKED_POSITIVE_POLARITY:
     187        break;
     188  
     189      case _SIDD_NEGATIVE_POLARITY:
     190        res ^= -1;
     191        break;
     192  
     193      case _SIDD_MASKED_NEGATIVE_POLARITY:
     194        for (i = 0; i < lb; i++)
     195  	if (res & (1 << i))
     196  	  res &= ~(1 << i);
     197  	else
     198  	  res |= (1 << i);
     199        break;
     200      }
     201  
     202    return res & ((dim == 8) ? 0xFF : 0xFFFF);
     203  }
     204  
     205  static int
     206  cmp_flags (__m128i a, int la, __m128i b, int lb,
     207  	   int mode, int res2, int is_implicit)
     208  {
     209    int i;
     210    int flags = 0;
     211    int is_bytes_mode = (mode & 1) == 0;
     212    union
     213      {
     214        __m128i x;
     215        unsigned char uc[16];
     216        unsigned short us[8];
     217      } d, s;
     218  
     219    d.x = a;
     220    s.x = b;
     221  
     222    /* CF: reset if (RES2 == 0), set otherwise.  */
     223    if (res2 != 0)
     224      flags |= CFLAG;
     225  
     226    if (is_implicit)
     227      {
     228        /* ZF: set if any byte/word of src xmm operand is null, reset
     229  	 otherwise.
     230  	 SF: set if any byte/word of dst xmm operand is null, reset
     231  	 otherwise.  */
     232  
     233        if (is_bytes_mode)
     234  	{
     235  	  for (i = 0; i < 16; i++)
     236  	    {
     237  	      if (s.uc[i] == 0)
     238  		flags |= ZFLAG;
     239  	      if (d.uc[i] == 0)
     240  		flags |= SFLAG;
     241              }
     242  	}
     243        else
     244  	{
     245  	  for (i = 0; i < 8; i++)
     246  	    {
     247  	      if (s.us[i] == 0)
     248  		flags |= ZFLAG;
     249  	      if (d.us[i] == 0)
     250  		flags |= SFLAG;
     251              }
     252          }
     253      }
     254    else
     255      {
     256        /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise.
     257  	 SF: set if abs value of EAX/RAX < 16 (8), reset otherwise.  */
     258        int max_ind = is_bytes_mode ? 16 : 8;
     259  
     260        if (la < 0)
     261  	la = -la;
     262        if (lb < 0)
     263  	lb = -lb;
     264  
     265        if (lb < max_ind)
     266  	flags |= ZFLAG;
     267        if (la < max_ind)
     268  	flags |= SFLAG;
     269      }
     270  
     271    /* OF: equal to RES2[0].  */
     272    if ((res2 & 0x1))
     273      flags |= OFLAG;
     274  
     275    /* AF: Reset.
     276       PF: Reset.  */
     277    return flags;
     278  }
     279  
     280  static int
     281  cmp_indexed (__m128i a, int la, __m128i b, int lb,
     282  	     const int mode, int *res2)
     283  {
     284    int i, ndx;
     285    int dim = (mode & 1) == 0 ? 16 : 8;
     286    int r2;
     287    
     288    r2 = calc_res (a, la, b, lb, mode);
     289  
     290    ndx = dim;
     291    if ((mode & 0x40))
     292      {
     293        for (i = dim - 1; i >= 0; i--)
     294  	if (r2 & (1 << i))
     295  	  {
     296  	    ndx = i;
     297  	    break;
     298  	  }
     299      }
     300    else
     301      {
     302        for (i = 0; i < dim; i++)
     303  	if ((r2 & (1 << i)))
     304  	  {
     305  	    ndx = i;
     306  	    break;
     307  	  }
     308      }
     309  
     310     *res2 = r2;
     311     return ndx;
     312  }
     313  
     314  static __m128i 
     315  cmp_masked (__m128i a, int la, __m128i b, int lb,
     316  	    const int mode, int *res2)
     317  {
     318    union
     319      {
     320        __m128i x;
     321        char c[16];
     322        short s[8];
     323      } ret;
     324    int i;
     325    int dim = (mode & 1) == 0 ? 16 : 8;
     326    union
     327      {
     328        int i;
     329        char c[4];
     330        short s[2];
     331      } r2;
     332  
     333    r2.i = calc_res (a, la, b, lb, mode);
     334  
     335    memset (&ret, 0, sizeof (ret));
     336  
     337    if (mode & 0x40)
     338      {
     339        for (i = 0; i < dim; i++)
     340  	if (dim == 8)
     341  	  ret.s [i] = (r2.i & (1 << i)) ? -1 : 0;
     342  	else
     343  	  ret.c [i] = (r2.i & (1 << i)) ? -1 : 0;
     344      }
     345    else
     346      {
     347        if (dim == 16)
     348  	ret.s[0] = r2.s[0];
     349        else
     350  	ret.c[0] = r2.c[0];
     351      }
     352  
     353     *res2 = r2.i;
     354  
     355     return ret.x;
     356  }
     357  
     358  static int 
     359  calc_str_len (__m128i a, const int mode)
     360  {
     361    union
     362      {
     363        __m128i x;
     364        char c[16];
     365        short s[8];
     366      } s;
     367    int i;
     368    int dim  = (mode & 1) == 0 ? 16 : 8;
     369  
     370    s.x = a;
     371  
     372    if ((mode & 1))
     373      {
     374        for (i = 0; i < dim; i++)
     375  	if (s.s[i] == 0)
     376  	  break;
     377      }
     378    else
     379      {
     380        for (i = 0; i < dim; i++)
     381         if (s.c[i] == 0)
     382  	 break;
     383      }
     384  
     385    return i;
     386  }
     387  
     388  static inline int
     389  cmp_ei (__m128i *a, int la, __m128i *b, int lb,
     390  	const int mode, int *flags)
     391  {
     392    int res2;
     393    int index = cmp_indexed (*a, la, *b, lb, mode, &res2);
     394  
     395    if (flags != NULL)
     396      *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
     397  
     398    return index;
     399  }
     400  
     401  static inline int
     402  cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags)
     403  {
     404    int la, lb;
     405    int res2;
     406    int index;
     407  
     408    la = calc_str_len (*a, mode);
     409    lb = calc_str_len (*b, mode);
     410  
     411    index = cmp_indexed (*a, la, *b, lb, mode, &res2);
     412  
     413    if (flags != NULL) 
     414      *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
     415  
     416    return index;
     417  }
     418  
     419  static inline __m128i
     420  cmp_em (__m128i *a, int la, __m128i *b, int lb,
     421  	const int mode, int *flags )
     422  {
     423    int res2;
     424    __m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2);
     425  
     426    if (flags != NULL)
     427      *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
     428  
     429    return mask;
     430  }
     431  
     432  static inline __m128i
     433  cmp_im (__m128i *a, __m128i *b, const int mode, int *flags)
     434  {
     435    int la, lb;
     436    int res2;
     437    __m128i mask;
     438  
     439    la = calc_str_len (*a, mode);
     440    lb = calc_str_len (*b, mode);
     441  
     442    mask = cmp_masked (*a, la, *b, lb, mode, &res2);
     443    if (flags != NULL)
     444      *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
     445  
     446    return mask;
     447  }