1  /* { dg-do run } */
       2  /* { dg-require-effective-target xop } */
       3  /* { dg-options "-O2 -mxop" } */
       4  
       5  #include "xop-check.h"
       6  
       7  #include <x86intrin.h>
       8  #include <string.h>
       9  
      10  #define NUM 10
      11  
      12  union
      13  {
      14    __m128i x[NUM];
      15    signed char ssi[NUM * 16];
      16    short si[NUM * 8];
      17    int li[NUM * 4];
      18    long long lli[NUM * 2];
      19  } dst, res, src1;
      20  
      21  static void
      22  init_sbyte ()
      23  {
      24    int i;
      25    for (i=0; i < NUM * 16; i++)
      26      src1.ssi[i] = i;
      27  }
      28  
      29  static void
      30  init_sword ()
      31  {
      32    int i;
      33    for (i=0; i < NUM * 8; i++)
      34      src1.si[i] = i;
      35  }
      36  
      37  static void
      38  init_sdword ()
      39  {
      40    int i;
      41    for (i=0; i < NUM * 4; i++)
      42      src1.li[i] = i;
      43  }
      44  
      45  static int 
      46  check_sbyte2word ()
      47  {
      48    int i, j, s, t, check_fails = 0;
      49    for (i = 0; i < NUM * 16; i = i + 16)
      50      {
      51        for (j = 0; j < 8; j++)
      52  	{
      53  	  t = i + (2 * j);
      54  	  s = (i / 2) + j;
      55  	  res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
      56  	  if (res.si[s] != dst.si[s]) 
      57  	    check_fails++;	
      58  	}
      59      }
      60    return check_fails;
      61  }
      62  
      63  static int 
      64  check_sbyte2dword ()
      65  {
      66    int i, j, s, t, check_fails = 0;
      67    for (i = 0; i < NUM * 16; i = i + 16)
      68      {
      69        for (j = 0; j < 4; j++)
      70  	{
      71  	  t = i + (4 * j);
      72  	  s = (i / 4) + j;
      73  	  res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
      74  	              + src1.ssi[t + 3]); 
      75  	  if (res.li[s] != dst.li[s]) 
      76  	    check_fails++;
      77  	}
      78      }
      79    return check_fails;
      80  }
      81  
      82  static int
      83  check_sbyte2qword ()
      84  {
      85    int i, j, s, t, check_fails = 0;
      86    for (i = 0; i < NUM * 16; i = i + 16)
      87      {
      88        for (j = 0; j < 2; j++)
      89  	{
      90  	  t = i + (8 * j);
      91  	  s = (i / 8) + j;
      92  	  res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] 
      93  		       + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
      94  	               + (src1.ssi[t + 6] + src1.ssi[t + 7])); 
      95  	  if (res.lli[s] != dst.lli[s]) 
      96  	    check_fails++;
      97  	}
      98      }
      99    return check_fails;
     100  }
     101  
     102  static int
     103  check_sword2dword ()
     104  {
     105    int i, j, s, t, check_fails = 0;
     106    for (i = 0; i < NUM * 8; i = i + 8)
     107      {
     108        for (j = 0; j < 4; j++)
     109  	{
     110  	  t = i + (2 * j);
     111  	  s = (i / 2) + j;
     112  	  res.li[s] = src1.si[t] + src1.si[t + 1] ;
     113  	  if (res.li[s] != dst.li[s]) 
     114  	    check_fails++;	
     115  	}
     116      }
     117    return check_fails;
     118  }
     119  
     120  static int 
     121  check_sword2qword ()
     122  {
     123    int i, j, s, t, check_fails = 0;
     124    for (i = 0; i < NUM * 8; i = i + 8)
     125      {
     126        for (j = 0; j < 2; j++)
     127  	{
     128  	  t = i + (4 * j);
     129  	  s = (i / 4) + j;
     130  	  res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
     131  	               + src1.si[t + 3]); 
     132  	  if (res.lli[s] != dst.lli[s]) 
     133  	    check_fails++;
     134  	}
     135      }
     136    return check_fails;
     137  }
     138  
     139  static int
     140  check_dword2qword ()
     141  {
     142    int i, j, s, t, check_fails = 0;
     143    for (i = 0; i < NUM * 4; i = i + 4)
     144      {
     145        for (j = 0; j < 2; j++)
     146  	{
     147  	  t = i + (2 * j);
     148  	  s = (i / 2) + j;
     149  	  res.lli[s] = src1.li[t] + src1.li[t + 1] ;
     150  	  if (res.lli[s] != dst.lli[s]) 
     151  	    check_fails++;	
     152  	}
     153      }
     154    return check_fails;
     155  }
     156  
     157  static void
     158  xop_test (void)
     159  {
     160    int i;
     161  
     162    init_sbyte ();
     163    
     164    for (i = 0; i < NUM; i++)
     165      dst.x[i] = _mm_haddw_epi8 (src1.x[i]);
     166    
     167    if (check_sbyte2word())
     168      abort ();
     169  
     170    for (i = 0; i < NUM; i++)
     171      dst.x[i] = _mm_haddd_epi8 (src1.x[i]);
     172    
     173    if (check_sbyte2dword())
     174      abort (); 
     175  
     176    for (i = 0; i < NUM; i++)
     177      dst.x[i] = _mm_haddq_epi8 (src1.x[i]);
     178    
     179    if (check_sbyte2qword())
     180      abort ();
     181  
     182    init_sword ();
     183  
     184    for (i = 0; i < NUM; i++)
     185      dst.x[i] = _mm_haddd_epi16 (src1.x[i]);
     186    
     187    if (check_sword2dword())
     188      abort (); 
     189  
     190    for (i = 0; i < NUM; i++)
     191      dst.x[i] = _mm_haddq_epi16 (src1.x[i]);
     192    
     193    if (check_sword2qword())
     194      abort ();
     195  
     196    init_sdword ();
     197  
     198    for (i = 0; i < NUM; i++)
     199      dst.x[i] = _mm_haddq_epi32 (src1.x[i]);
     200    
     201    if (check_dword2qword())
     202      abort ();
     203  }