1  /* Make sure that the reverse permute patterns are optimized
       2     correctly.  */
       3  /* { dg-do run { target { s390*-*-* } } } */
       4  /* { dg-options "-O2 -march=z15 -mzarch -fno-unroll-loops -save-temps" } */
       5  
       6  /* { dg-final { scan-assembler-times "vsterg\t" 2 } } */
       7  /* { dg-final { scan-assembler-times "vsterf\t" 2 } } */
       8  /* { dg-final { scan-assembler-times "vstbrq\t" 1 } } */
       9  /* { dg-final { scan-assembler-times "vperm\t" 0 } } */
      10  
      11  #include <assert.h>
      12  
      13  __attribute__((noipa))
      14  void reversec (char *restrict a, char *restrict b, int n)
      15  {
      16    for (int i = 0; i < n; i += 16)
      17      {
      18        a[i + 0] = b[i + 15];
      19        a[i + 1] = b[i + 14];
      20        a[i + 2] = b[i + 13];
      21        a[i + 3] = b[i + 12];
      22        a[i + 4] = b[i + 11];
      23        a[i + 5] = b[i + 10];
      24        a[i + 6] = b[i + 9];
      25        a[i + 7] = b[i + 8];
      26        a[i + 8] = b[i + 7];
      27        a[i + 9] = b[i + 6];
      28        a[i + 10] = b[i + 5];
      29        a[i + 11] = b[i + 4];
      30        a[i + 12] = b[i + 3];
      31        a[i + 13] = b[i + 2];
      32        a[i + 14] = b[i + 1];
      33        a[i + 15] = b[i + 0];
      34      }
      35  }
      36  
      37  __attribute__((noipa))
      38  void reversel (long long *restrict a, long long *restrict b, int n)
      39  {
      40    for (int i = 0; i < n; i += 2)
      41      {
      42        a[i + 1] = b[i + 0];
      43        a[i + 0] = b[i + 1];
      44      }
      45  }
      46  
      47  __attribute__((noipa))
      48  void reversed (double *restrict a, double *restrict b, int n)
      49  {
      50    for (int i = 0; i < n; i += 2)
      51      {
      52        a[i + 1] = b[i + 0];
      53        a[i + 0] = b[i + 1];
      54      }
      55  }
      56  
      57  __attribute__((noipa))
      58  void reversei (unsigned int *restrict a, unsigned int *restrict b, int n)
      59  {
      60    for (int i = 0; i < n; i += 4)
      61      {
      62        a[i + 3] = b[i + 0];
      63        a[i + 2] = b[i + 1];
      64        a[i + 1] = b[i + 2];
      65        a[i + 0] = b[i + 3];
      66      }
      67  }
      68  
      69  __attribute__((noipa))
      70  void reversef (float *restrict a, float *restrict b, int n)
      71  {
      72    for (int i = 0; i < n; i += 4)
      73      {
      74        a[i + 3] = b[i + 0];
      75        a[i + 2] = b[i + 1];
      76        a[i + 1] = b[i + 2];
      77        a[i + 0] = b[i + 3];
      78      }
      79  }
      80  
      81  int main()
      82  {
      83    const int n = 1024;
      84    char c[n], c2[n];
      85    unsigned int u[n], u2[n];
      86    long long l[n], l2[n];
      87    double d[n], d2[n];
      88    float f[n], f2[n];
      89  
      90    for (int i = 0; i < n; i++)
      91      {
      92        c[i] = i;
      93        u[i] = i;
      94        l[i] = i;
      95        d[i] = i;
      96        f[i] = i;
      97        c2[i] = i;
      98        u2[i] = i;
      99        l2[i] = i;
     100        d2[i] = i;
     101        f2[i] = i;
     102      }
     103  
     104    reversec (c2, c, n);
     105    reversei (u2, u, n);
     106    reversel (l2, l, n);
     107    reversed (d2, d, n);
     108    reversef (f2, f, n);
     109  
     110    for (int i = 0; i < n - 16; i++)
     111      {
     112        assert (c[i] == c2[i / (16 / sizeof (c[0])) * (16 / sizeof (c[0])) + 16 / sizeof (c[0]) - 1 - i % (16 / sizeof (c[0]))]);
     113        assert (u[i] == u2[i / (16 / sizeof (u[0])) * (16 / sizeof (u[0])) + 16 / sizeof (u[0]) - 1 - i % (16 / sizeof (u[0]))]);
     114        assert (l[i] == l2[i / (16 / sizeof (l[0])) * (16 / sizeof (l[0])) + 16 / sizeof (l[0]) - 1 - i % (16 / sizeof (l[0]))]);
     115        assert (d[i] == d2[i / (16 / sizeof (d[0])) * (16 / sizeof (d[0])) + 16 / sizeof (d[0]) - 1 - i % (16 / sizeof (d[0]))]);
     116        assert (f[i] == f2[i / (16 / sizeof (f[0])) * (16 / sizeof (f[0])) + 16 / sizeof (f[0]) - 1 - i % (16 / sizeof (f[0]))]);
     117      }
     118  }