1  /* { dg-do run } */
       2  /* { dg-require-effective-target ssse3 } */
       3  /* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */
       4  
       5  #ifndef CHECK_H
       6  #define CHECK_H "ssse3-check.h"
       7  #endif
       8  
       9  #ifndef TEST
      10  #define TEST ssse3_test
      11  #endif
      12  
      13  #include CHECK_H
      14  
      15  #include "ssse3-vals.h"
      16  
      17  #include <tmmintrin.h>
      18  
      19  /* Test the 64-bit form */
      20  static void
      21  ssse3_test_phsubsw (int *i1, int *i2, int *r)
      22  {
      23    __m64 t1 = *(__m64 *) i1;
      24    __m64 t2 = *(__m64 *) i2;
      25  
      26    *(__m64 *) r = _mm_hsubs_pi16 (t1, t2);
      27  
      28    _mm_empty ();
      29  }
      30  
      31  /* Test the 128-bit form */
      32  static void
      33  ssse3_test_phsubsw128 (int *i1, int *i2, int *r)
      34  {
      35    /* Assumes incoming pointers are 16-byte aligned */
      36    __m128i t1 = *(__m128i *) i1;
      37    __m128i t2 = *(__m128i *) i2;
      38    *(__m128i *) r = _mm_hsubs_epi16 (t1, t2);
      39  }
      40  
      41  static short
      42  signed_saturate_to_word (int x)
      43  {
      44    if (x > (int )0x7fff)
      45      return 0x7fff;
      46  
      47    if (x < (int) 0xffff8000)
      48      return 0x8000;
      49  
      50    return (short)x;
      51  }
      52  
      53  /* Routine to manually compute the results */
      54  static void
      55  compute_correct_result (int *i1, int *i2, int *r)
      56  {
      57    short *s1 = (short *) i1;
      58    short *s2 = (short *) i2;
      59    short *sout = (short *) r;
      60    int i;
      61  
      62    for (i = 0; i < 4; i++)
      63      sout[i] = signed_saturate_to_word (s1[2 * i] - s1[2 * i + 1]);
      64  
      65    for (i = 0; i < 4; i++)
      66      sout[i + 4] = signed_saturate_to_word (s2[2 * i] - s2[2 * i + 1]);
      67  }
      68  
      69  static void
      70  TEST (void)
      71  {
      72    int i;
      73    int r [4] __attribute__ ((aligned(16)));
      74    int ck [4];
      75    int fail = 0;
      76  
      77    for (i = 0; i < 256; i += 8)
      78      {
      79        /* Manually compute the result */
      80        compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
      81  
      82        /* Run the 64-bit tests */
      83        ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
      84        ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
      85        fail += chk_128 (ck, r);
      86  
      87        /* Run the 128-bit tests */
      88        ssse3_test_phsubsw128 (&vals[i + 0], &vals[i + 4], r);
      89        fail += chk_128 (ck, r);
      90      }
      91  
      92    if (fail != 0)
      93      abort ();
      94  }