(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
sse4_1-mpsadbw.c
       1  /* { dg-do run } */
       2  /* { dg-require-effective-target sse4 } */
       3  /* { dg-options "-O2 -msse4.1" } */
       4  
       5  #ifndef CHECK_H
       6  #define CHECK_H "sse4_1-check.h"
       7  #endif
       8  
       9  #ifndef TEST
      10  #define TEST sse4_1_test
      11  #endif
      12  
      13  #include CHECK_H
      14  
      15  #include <smmintrin.h>
      16  #include <string.h>
      17  
      18  #define msk0 0xC0
      19  #define msk1 0x01
      20  #define msk2 0xF2
      21  #define msk3 0x03
      22  #define msk4 0x84
      23  #define msk5 0x05
      24  #define msk6 0xE6
      25  #define msk7 0x67
      26  
      27  static __m128i
      28  compute_mpsadbw (unsigned char *v1, unsigned char *v2, int mask)
      29  {
      30    union
      31      {
      32        __m128i x;
      33        unsigned short s[8];
      34      } ret;
      35    unsigned char s[4];
      36    int i, j;
      37    int offs1, offs2;
      38  
      39    offs2 = 4 * (mask & 3);
      40    for (i = 0; i < 4; i++)
      41      s[i] = v2[offs2 + i];
      42  
      43    offs1 = 4 * ((mask & 4) >> 2);
      44    for (j = 0; j < 8; j++)
      45      {
      46        ret.s[j] = 0;
      47        for (i = 0; i < 4; i++)
      48  	ret.s[j] += abs (v1[offs1 + j + i] - s[i]);
      49      }
      50  
      51    return ret.x;
      52  }
      53  
      54  static void
      55  TEST (void)
      56  {
      57    union
      58      {
      59        __m128i x;
      60        unsigned int i[4];
      61        unsigned char c[16];
      62      } val1, val2, val3 [8];
      63    __m128i res[8], tmp;
      64    unsigned char masks[8];
      65    int i;
      66  
      67    val1.i[0] = 0x35251505;
      68    val1.i[1] = 0x75655545;
      69    val1.i[2] = 0xB5A59585;
      70    val1.i[3] = 0xF5E5D5C5;
      71  
      72    val2.i[0] = 0x31211101;
      73    val2.i[1] = 0x71615141;
      74    val2.i[2] = 0xB1A19181;
      75    val2.i[3] = 0xF1E1D1C1;
      76  
      77    for (i=0; i < 8; i++)
      78      switch (i % 3)
      79        {
      80        case 1:
      81  	val3[i].i[0] = 0xF1E1D1C1;
      82  	val3[i].i[1] = 0xB1A19181;
      83  	val3[i].i[2] = 0x71615141;
      84  	val3[i].i[3] = 0x31211101;
      85  	break;
      86        default:
      87  	val3[i].x = val2.x;
      88  	break;
      89        }
      90  
      91    /* Check mpsadbw imm8, xmm, xmm.  */
      92    res[0] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk0);
      93    res[1] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk1);
      94    res[2] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk2);
      95    res[3] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk3);
      96    res[4] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk4);
      97    res[5] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk5);
      98    res[6] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk6);
      99    res[7] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk7);
     100  
     101    masks[0] = msk0;
     102    masks[1] = msk1;
     103    masks[2] = msk2;
     104    masks[3] = msk3;
     105    masks[4] = msk4;
     106    masks[5] = msk5;
     107    masks[6] = msk6;
     108    masks[7] = msk7;
     109  
     110    for (i=0; i < 8; i++)
     111      {
     112        tmp = compute_mpsadbw (val1.c, val2.c, masks[i]);
     113        if (memcmp (&tmp, &res[i], sizeof (tmp)))
     114  	abort ();
     115      }
     116      
     117    /* Check mpsadbw imm8, m128, xmm.  */
     118    for (i=0; i < 8; i++)
     119      {
     120        res[i] = _mm_mpsadbw_epu8 (val1.x, val3[i].x, msk4);
     121        masks[i] = msk4;
     122      }
     123  
     124    for (i=0; i < 8; i++)
     125      {
     126        tmp = compute_mpsadbw (val1.c, val3[i].c, masks[i]);
     127        if (memcmp (&tmp, &res[i], sizeof (tmp)))
     128  	abort ();
     129      }
     130  }