1  /* { dg-do run } */
       2  /* { dg-require-effective-target avx2 } */
       3  /* { dg-options "-O2 -mavx2" } */
       4  
       5  #include <string.h>
       6  #include "avx2-check.h"
       7  
       8  #define msk0 0xC0
       9  #define msk1 0x01
      10  #define msk2 0xF2
      11  #define msk3 0x03
      12  #define msk4 0x84
      13  #define msk5 0x05
      14  #define msk6 0xE6
      15  #define msk7 0x67
      16  
      17  
      18  static void
      19  compute_mpsadbw (int *i1, int *i2, int mask, int *r)
      20  {
      21    unsigned char s[4];
      22    int i, j;
      23    int offs1, offs2;
      24    unsigned char *v1 = (char *) i1;
      25    unsigned char *v2 = (char *) i2;
      26    unsigned short *ret = (unsigned short *) r;
      27  
      28    memset (ret, 0, 32);
      29  
      30    /* Lower part */
      31    offs2 = 4 * (mask & 3);
      32    for (i = 0; i < 4; i++)
      33      s[i] = v2[offs2 + i];
      34  
      35    offs1 = 4 * ((mask & 4) >> 2);
      36    for (j = 0; j < 8; j++)
      37      for (i = 0; i < 4; i++)
      38        ret[j] += abs (v1[offs1 + j + i] - s[i]);
      39  
      40    /* Higher part */
      41    offs2 = 4 * ((mask >> 3) & 3) + 16;
      42    for (i = 0; i < 4; i++)
      43      s[i] = v2[offs2 + i];
      44  
      45    offs1 = 4 * ((mask & 0x20) >> 5) + 16;
      46    for (j = 0; j < 8; j++)
      47      for (i = 0; i < 4; i++)
      48        ret[j + 8] += abs (v1[offs1 + j + i] - s[i]);
      49  }
      50  
      51  static void
      52  avx2_test (void)
      53  {
      54    union256i_d val1, val2, val3[8], res[8];
      55    int tmp[8];
      56    unsigned char masks[8];
      57    int i, j;
      58  
      59    val1.a[0] = 0x35251505;
      60    val1.a[1] = 0x75655545;
      61    val1.a[2] = 0xB5A59585;
      62    val1.a[3] = 0xF5E5D5C5;
      63  
      64    val1.a[4] = 0x35251505;
      65    val1.a[5] = 0x75655545;
      66    val1.a[6] = 0xB5A59585;
      67    val1.a[7] = 0xF5E5D5C5;
      68  
      69    val2.a[0] = 0x31211101;
      70    val2.a[1] = 0x71615141;
      71    val2.a[2] = 0xB1A19181;
      72    val2.a[3] = 0xF1E1D1C1;
      73  
      74    val2.a[4] = 0x31211101;
      75    val2.a[5] = 0x71615141;
      76    val2.a[6] = 0xB1A19181;
      77    val2.a[7] = 0xF1E1D1C1;
      78  
      79    for (i = 0; i < 8; i++)
      80      switch (i % 3)
      81        {
      82        case 1:
      83  	val3[i].a[0] = 0xF1E1D1C1;
      84  	val3[i].a[1] = 0xB1A19181;
      85  	val3[i].a[2] = 0x71615141;
      86  	val3[i].a[3] = 0x31211101;
      87  	break;
      88        default:
      89  	val3[i].x = val2.x;
      90  	break;
      91        }
      92  
      93    /* Check mpsadbw imm8, ymm, ymm.  */
      94    res[0].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk0);
      95    res[1].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk1);
      96    res[2].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk2);
      97    res[3].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk3);
      98    res[4].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk4);
      99    res[5].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk5);
     100    res[6].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk6);
     101    res[7].x = _mm256_mpsadbw_epu8 (val1.x, val2.x, msk7);
     102  
     103    masks[0] = msk0;
     104    masks[1] = msk1;
     105    masks[2] = msk2;
     106    masks[3] = msk3;
     107    masks[4] = msk4;
     108    masks[5] = msk5;
     109    masks[6] = msk6;
     110    masks[7] = msk7;
     111  
     112    for (i = 0; i < 8; i++)
     113      {
     114        compute_mpsadbw (val1.a, val2.a, masks[i], tmp);
     115        if (check_union256i_d (res[i], tmp))
     116  	abort ();
     117      }
     118  
     119    /* Check mpsadbw imm8, m256, ymm.  */
     120    for (i = 0; i < 8; i++)
     121      {
     122        res[i].x = _mm256_mpsadbw_epu8 (val1.x, val3[i].x, msk4);
     123        masks[i] = msk4;
     124      }
     125  
     126    for (i = 0; i < 8; i++)
     127      {
     128        compute_mpsadbw (val1.a, val3[i].a, masks[i], tmp);
     129        if (check_union256i_d (res[i], tmp))
     130  	abort ();
     131      }
     132  }