1  /* { dg-do run } */
       2  /* { dg-require-effective-target xop } */
       3  /* { dg-options "-O2 -mxop" } */
       4  
       5  #include "xop-check.h"
       6  
       7  #include <x86intrin.h>
       8  #include <string.h>
       9  
      10  #define NUM 10
      11  
      12  union
      13  {
      14    __m128i x[NUM];
      15    unsigned char  ssi[NUM * 16];
      16    unsigned short si[NUM * 8];
      17    unsigned int li[NUM * 4];
      18    unsigned long long  lli[NUM * 2];
      19  } dst, res, src1;
      20  
      21  static void
      22  init_byte ()
      23  {
      24    int i;
      25    for (i=0; i < NUM * 16; i++)
      26      src1.ssi[i] = i;
      27  }
      28  
      29  static void
      30  init_word ()
      31  {
      32    int i;
      33    for (i=0; i < NUM * 8; i++)
      34      src1.si[i] = i;
      35  }
      36  
      37  static void
      38  init_dword ()
      39  {
      40    int i;
      41    for (i=0; i < NUM * 4; i++)
      42      src1.li[i] = i;
      43  }
      44  
      45  static int 
      46  check_byte2word ()
      47  {
      48    int i, j, s, t, check_fails = 0;
      49    for (i = 0; i < NUM * 16; i = i + 16)
      50      {
      51        for (j = 0; j < 8; j++)
      52  	{
      53  	  t = i + (2 * j);
      54  	  s = (i / 2) + j;
      55  	  res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
      56  	  if (res.si[s] != dst.si[s]) 
      57  	    check_fails++;	
      58  	}
      59      }
      60    return check_fails;
      61  }
      62  
      63  static int 
      64  check_byte2dword ()
      65  {
      66    int i, j, s, t, check_fails = 0;
      67    for (i = 0; i < NUM * 16; i = i + 16)
      68      {
      69        for (j = 0; j < 4; j++)
      70  	{
      71  	  t = i + (4 * j);
      72  	  s = (i / 4) + j;
      73  	  res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
      74  	              + src1.ssi[t + 3]); 
      75  	  if (res.li[s] != dst.li[s]) 
      76  	    check_fails++;
      77  	}
      78      }
      79    return check_fails;
      80  }
      81  
      82  static int
      83  check_byte2qword ()
      84  {
      85    int i, j, s, t, check_fails = 0;
      86    for (i = 0; i < NUM * 16; i = i + 16)
      87      {
      88        for (j = 0; j < 2; j++)
      89  	{
      90  	  t = i + (8 * j);
      91  	  s = (i / 8) + j;
      92  	  res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] 
      93  		       + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
      94  	               + (src1.ssi[t + 6] + src1.ssi[t + 7])); 
      95  	  if (res.lli[s] != dst.lli[s]) 
      96  	    check_fails++;
      97  	}
      98      }
      99    return check_fails;
     100  }
     101  
     102  static int
     103  check_word2dword ()
     104  {
     105    int i, j, s, t, check_fails = 0;
     106    for (i = 0; i < NUM * 8; i = i + 8)
     107      {
     108        for (j = 0; j < 4; j++)
     109  	{
     110  	  t = i + (2 * j);
     111  	  s = (i / 2) + j;
     112  	  res.li[s] = src1.si[t] + src1.si[t + 1] ;
     113  	  if (res.li[s] != dst.li[s]) 
     114  	    check_fails++;	
     115  	}
     116      }
     117    return check_fails;
     118  }
     119  
     120  static int 
     121  check_word2qword ()
     122  {
     123    int i, j, s, t, check_fails = 0;
     124    for (i = 0; i < NUM * 8; i = i + 8)
     125      {
     126        for (j = 0; j < 2; j++)
     127  	{
     128  	  t = i + (4 * j);
     129  	  s = (i / 4) + j;
     130  	  res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
     131  	               + src1.si[t + 3]); 
     132  	  if (res.lli[s] != dst.lli[s]) 
     133  	    check_fails++;
     134  	}
     135      }
     136    return check_fails;
     137  }
     138  
     139  static int
     140  check_dword2qword ()
     141  {
     142    int i, j, s, t, check_fails = 0;
     143    for (i = 0; i < NUM * 4; i = i + 4)
     144      {
     145        for (j = 0; j < 2; j++)
     146  	{
     147  	  t = i + (2 * j);
     148  	  s = (i / 2) + j;
     149  	  res.lli[s] = src1.li[t] + src1.li[t + 1] ;
     150  	  if (res.lli[s] != dst.lli[s]) 
     151  	    check_fails++;	
     152  	}
     153      }
     154    return check_fails;
     155  }
     156  
     157  static void
     158  xop_test (void)
     159  {
     160    int i;
     161    
     162    /* Check haddubw */
     163    init_byte ();
     164    
     165    for (i = 0; i < NUM; i++)
     166      dst.x[i] = _mm_haddw_epu8 (src1.x[i]);
     167    
     168    if (check_byte2word())
     169    abort ();
     170    
     171    /* Check haddubd */
     172    for (i = 0; i < NUM; i++)
     173      dst.x[i] = _mm_haddd_epu8 (src1.x[i]);
     174    
     175    if (check_byte2dword())
     176      abort (); 
     177    
     178    /* Check haddubq */
     179    for (i = 0; i < NUM; i++)
     180      dst.x[i] = _mm_haddq_epu8 (src1.x[i]);
     181    
     182    if (check_byte2qword())
     183      abort ();
     184  
     185    /* Check hadduwd */
     186    init_word ();
     187  
     188    for (i = 0; i < NUM; i++)
     189      dst.x[i] = _mm_haddd_epu16 (src1.x[i]);
     190    
     191    if (check_word2dword())
     192      abort (); 
     193     
     194    /* Check haddbuwq */
     195    for (i = 0; i < NUM; i++)
     196      dst.x[i] = _mm_haddq_epu16 (src1.x[i]);
     197    
     198    if (check_word2qword())
     199      abort ();
     200   
     201    /* Check hadudq */
     202    init_dword ();
     203    
     204    for (i = 0; i < NUM; i++)
     205      dst.x[i] = _mm_haddq_epu32 (src1.x[i]);
     206    
     207    if (check_dword2qword())
     208      abort ();
     209  }