(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
sse4_1-insertps-4.c
       1  /* { dg-do run } */
       2  /* { dg-require-effective-target sse4 } */
       3  /* { dg-options "-O2 -msse4.1" } */
       4  
       5  #ifndef CHECK_H
       6  #define CHECK_H "sse4_1-check.h"
       7  #endif
       8  
       9  #ifndef TEST
      10  #define TEST sse4_1_test
      11  #endif
      12  
      13  #include CHECK_H
      14  
      15  #include <smmintrin.h>
      16  #include <string.h>
      17  
      18  #define msk0 0x41
      19  #define msk1 0x90
      20  #define msk2 0xe9
      21  #define msk3 0x70
      22  
      23  #define msk4 0xFC
      24  #define msk5 0x05
      25  #define msk6 0x0A
      26  #define msk7 0x0F
      27  
      28  union
      29    {
      30      __m128 x;
      31      float f[4];
      32    } val1;
      33  
      34  static void
      35  TEST (void)
      36  {
      37    union
      38      {
      39        __m128 x;
      40        float f[4];
      41      } res[8], val2, tmp;
      42    int masks[8];
      43    int i, j;
      44  
      45    val2.f[0] = 55.0;
      46    val2.f[1] = 55.0;
      47    val2.f[2] = 55.0;
      48    val2.f[3] = 55.0;
      49  
      50    val1.f[0] = 1.;
      51    val1.f[1] = 2.;
      52    val1.f[2] = 3.;
      53    val1.f[3] = 4.;
      54  
      55    asm volatile ("" : "+m" (val1));
      56    res[0].x = _mm_insert_ps (val2.x, val1.x, msk0);
      57    asm volatile ("" : "+m" (val1));
      58    res[1].x = _mm_insert_ps (val2.x, val1.x, msk1);
      59    asm volatile ("" : "+m" (val1));
      60    res[2].x = _mm_insert_ps (val2.x, val1.x, msk2);
      61    asm volatile ("" : "+m" (val1));
      62    res[3].x = _mm_insert_ps (val2.x, val1.x, msk3);
      63  
      64    masks[0] = msk0;
      65    masks[1] = msk1;
      66    masks[2] = msk2;
      67    masks[3] = msk3;
      68  
      69    for (i = 0; i < 4; i++)
      70      {
      71        asm volatile ("" : "+m" (val1));
      72        res[i + 4].x = _mm_insert_ps (val2.x, val1.x, msk4);
      73      }
      74  
      75    masks[4] = msk4;
      76    masks[5] = msk4;
      77    masks[6] = msk4;
      78    masks[7] = msk4;
      79  
      80    for (i=0; i < 8; i++)
      81      {
      82        tmp = val2;
      83        tmp.f[(masks[i] & 0x30) >> 4] = val1.f[(masks[i] & 0xC0) >> 6];
      84  
      85        for (j = 0; j < 4; j++)
      86  	if (masks[i] & (0x1 << j))
      87  	  tmp.f[j] = 0.f;
      88  
      89        if (memcmp (&res[i], &tmp, sizeof (tmp)))
      90  	abort ();
      91      }
      92  }