1  /* { dg-do compile } */
       2  /* { dg-additional-options "-march=core-avx2 -fdump-tree-slp-details" } */
       3  
       4  #define abs(x) (x) < 0 ? -(x) : (x)
       5  int
       6  foo (int* diff)
       7  {
       8    int k, satd = 0, m[16], d[16];
       9    
      10      m[ 0] = diff[ 0] + diff[12];
      11      m[ 4] = diff[ 4] + diff[ 8];
      12      m[ 8] = diff[ 4] - diff[ 8];
      13      m[12] = diff[ 0] - diff[12];
      14      m[ 1] = diff[ 1] + diff[13];
      15      m[ 5] = diff[ 5] + diff[ 9];
      16      m[ 9] = diff[ 5] - diff[ 9];
      17      m[13] = diff[ 1] - diff[13];
      18      m[ 2] = diff[ 2] + diff[14];
      19      m[ 6] = diff[ 6] + diff[10];
      20      m[10] = diff[ 6] - diff[10];
      21      m[14] = diff[ 2] - diff[14];
      22      m[ 3] = diff[ 3] + diff[15];
      23      m[ 7] = diff[ 7] + diff[11];
      24      m[11] = diff[ 7] - diff[11];
      25      m[15] = diff[ 3] - diff[15];
      26      
      27      d[ 0] = m[ 0] + m[ 4];
      28      d[ 8] = m[ 0] - m[ 4];
      29      d[ 4] = m[ 8] + m[12];
      30      d[12] = m[12] - m[ 8];
      31      d[ 1] = m[ 1] + m[ 5];
      32      d[ 9] = m[ 1] - m[ 5];
      33      d[ 5] = m[ 9] + m[13];
      34      d[13] = m[13] - m[ 9];
      35      d[ 2] = m[ 2] + m[ 6];
      36      d[10] = m[ 2] - m[ 6];
      37      d[ 6] = m[10] + m[14];
      38      d[14] = m[14] - m[10];
      39      d[ 3] = m[ 3] + m[ 7];
      40      d[11] = m[ 3] - m[ 7];
      41      d[ 7] = m[11] + m[15];
      42      d[15] = m[15] - m[11];
      43      
      44      m[ 0] = d[ 0] + d[ 3];
      45      m[ 1] = d[ 1] + d[ 2];
      46      m[ 2] = d[ 1] - d[ 2];
      47      m[ 3] = d[ 0] - d[ 3];
      48      m[ 4] = d[ 4] + d[ 7];
      49      m[ 5] = d[ 5] + d[ 6];
      50      m[ 6] = d[ 5] - d[ 6];
      51      m[ 7] = d[ 4] - d[ 7];
      52      m[ 8] = d[ 8] + d[11];
      53      m[ 9] = d[ 9] + d[10];
      54      m[10] = d[ 9] - d[10];
      55      m[11] = d[ 8] - d[11];
      56      m[12] = d[12] + d[15];
      57      m[13] = d[13] + d[14];
      58      m[14] = d[13] - d[14];
      59      m[15] = d[12] - d[15];
      60      
      61      d[ 0] = m[ 0] + m[ 1];
      62      d[ 1] = m[ 0] - m[ 1];
      63      d[ 2] = m[ 2] + m[ 3];
      64      d[ 3] = m[ 3] - m[ 2];
      65      d[ 4] = m[ 4] + m[ 5];
      66      d[ 5] = m[ 4] - m[ 5];
      67      d[ 6] = m[ 6] + m[ 7];
      68      d[ 7] = m[ 7] - m[ 6];
      69      d[ 8] = m[ 8] + m[ 9];
      70      d[ 9] = m[ 8] - m[ 9];
      71      d[10] = m[10] + m[11];
      72      d[11] = m[11] - m[10];
      73      d[12] = m[12] + m[13];
      74      d[13] = m[12] - m[13];
      75      d[14] = m[14] + m[15];
      76      d[15] = m[15] - m[14];
      77      /* The following obviously profitable part should not make
      78         the former unprofitable one profitable.  */
      79      diff[16 + 16] = diff[16];
      80      diff[17 + 16] = diff[17];
      81      diff[18 + 16] = diff[18];
      82      diff[19 + 16] = diff[19];
      83      diff[20 + 16] = diff[20];
      84      diff[21 + 16] = diff[21];
      85      diff[22 + 16] = diff[22];
      86      diff[23 + 16] = diff[23];
      87      diff[24 + 16] = diff[24];
      88      diff[25 + 16] = diff[25];
      89      diff[26 + 16] = diff[26];
      90      diff[27 + 16] = diff[27];
      91      diff[28 + 16] = diff[28];
      92      diff[29 + 16] = diff[29];
      93      diff[30 + 16] = diff[30];
      94      diff[31 + 16] = diff[31];
      95      for (k=0; k<16; k++)
      96        satd += abs(d[k]);
      97    return satd;
      98  }
      99  
     100  /* { dg-final { scan-tree-dump "vectorization is not profitable" "slp1" } } */
     101  /* { dg-final { scan-tree-dump-times "Vectorizing SLP tree" 1 "slp1" } } */