(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
sse4_1-dpps-1.c
       1  /* { dg-do run } */
       2  /* { dg-require-effective-target sse4 } */
       3  /* { dg-options "-O2 -msse4.1" } */
       4  
       5  #ifndef CHECK_H
       6  #define CHECK_H "sse4_1-check.h"
       7  #endif
       8  
       9  #ifndef TEST
      10  #define TEST sse4_1_test
      11  #endif
      12  
      13  #include CHECK_H
      14  
      15  #include <smmintrin.h>
      16  
      17  #define lmskN  0x00
      18  #define lmsk0  0x01
      19  #define lmsk1  0x02
      20  #define lmsk2  0x04
      21  #define lmsk3  0x08
      22  #define lmsk01 0x03
      23  #define lmsk02 0x05
      24  #define lmsk03 0x09
      25  #define lmsk12 0x06
      26  #define lmsk13 0x0A
      27  #define lmsk23 0x0C
      28  #define lmskA  0x0F
      29  
      30  #define hmskN  0x00
      31  #define hmskA  0xF0
      32  #define hmsk0  0x10
      33  #define hmsk1  0x20
      34  #define hmsk2  0x40
      35  #define hmsk3  0x80
      36  #define hmsk01 0x30
      37  #define hmsk02 0x50
      38  #define hmsk03 0x90
      39  #define hmsk12 0x60
      40  #define hmsk13 0xA0
      41  #define hmsk23 0xC0
      42  
      43  #ifndef HIMASK
      44  #define HIMASK hmskA
      45  #endif
      46  
      47  static void
      48  TEST (void)
      49  {
      50    union
      51      {
      52        __m128 x;
      53        float f[4];
      54      } val1, val2, res[16];
      55    int masks[16];
      56    int i, j;
      57  
      58    val1.f[0] = 2.;
      59    val1.f[1] = 3.;
      60    val1.f[2] = 4.;
      61    val1.f[3] = 5.;
      62  
      63    val2.f[0] = 10.;
      64    val2.f[1] = 100.;
      65    val2.f[2] = 1000.;
      66    val2.f[3] = 10000.;
      67  
      68    res[0].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk0); 
      69    res[1].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk1); 
      70    res[2].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk2); 
      71    res[3].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk3); 
      72    res[4].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk01); 
      73    res[5].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk02); 
      74    res[6].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk03); 
      75    res[7].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk12); 
      76    res[8].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk13); 
      77    res[9].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk23); 
      78    res[10].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk0)); 
      79    res[11].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk1)); 
      80    res[12].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk2)); 
      81    res[13].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk3)); 
      82    res[14].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskN); 
      83    res[15].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskA); 
      84  
      85    masks[0] = HIMASK | lmsk0; 
      86    masks[1] = HIMASK | lmsk1; 
      87    masks[2] = HIMASK | lmsk2; 
      88    masks[3] = HIMASK | lmsk3; 
      89    masks[4] = HIMASK | lmsk01; 
      90    masks[5] = HIMASK | lmsk02; 
      91    masks[6] = HIMASK | lmsk03; 
      92    masks[7] = HIMASK | lmsk12; 
      93    masks[8] = HIMASK | lmsk13; 
      94    masks[9] = HIMASK | lmsk23; 
      95    masks[10] = HIMASK | (0x0F & ~lmsk0); 
      96    masks[11] = HIMASK | (0x0F & ~lmsk1); 
      97    masks[12] = HIMASK | (0x0F & ~lmsk2); 
      98    masks[13] = HIMASK | (0x0F & ~lmsk3); 
      99    masks[14] = HIMASK | lmskN; 
     100    masks[15] = HIMASK | lmskA; 
     101  
     102    for (i = 0; i <= 15; i++)
     103      {
     104        float tmp = 0.;
     105  
     106        for (j = 0; j < 4; j++)
     107  	if ((HIMASK & (0x10 << j)))
     108  	  tmp += val1.f[j] * val2.f[j];
     109  
     110        for (j = 0; j < 4; j++)
     111  	if ((masks[i] & (1 << j)) && res[i].f[j] != tmp)
     112  	  abort ();
     113     }
     114  }