(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
powerpc/
pr71805.c
       1  /* { dg-require-effective-target p9vector_hw } */
       2  /* { dg-options "-mdejagnu-cpu=power9 -O3 --param tree-reassoc-width=1" } */
       3  
       4  /* Originally from gcc.dg/vect/pr45752.c.  */
       5  #include <stdarg.h>
       6  
       7  #ifdef __cplusplus
       8  extern "C" {
       9  #endif
      10  extern void abort (void);
      11  extern void exit (int);
      12  #ifdef __cplusplus
      13  }
      14  #endif
      15  
      16  #define M00 100
      17  #define M10 216
      18  #define M20 23
      19  #define M30 237
      20  #define M40 437
      21  
      22  #define M01 1322
      23  #define M11 13
      24  #define M21 27271
      25  #define M31 2280
      26  #define M41 284
      27  
      28  #define M02 74
      29  #define M12 191
      30  #define M22 500
      31  #define M32 111
      32  #define M42 1114
      33  
      34  #define M03 134
      35  #define M13 117
      36  #define M23 11
      37  #define M33 771
      38  #define M43 71
      39  
      40  #define M04 334
      41  #define M14 147
      42  #define M24 115
      43  #define M34 7716
      44  #define M44 16
      45  
      46  #define N 20
      47  
      48  void foo (unsigned int *__restrict__ pInput,
      49            unsigned int *__restrict__ pOutput,
      50            unsigned int *__restrict__ pInput2,
      51            unsigned int *__restrict__ pOutput2)
      52  {
      53    unsigned int i, a, b, c, d, e;
      54  
      55    for (i = 0; i < N / 5; i++)
      56      {
      57         a = *pInput++;
      58         b = *pInput++;
      59         c = *pInput++;
      60         d = *pInput++;
      61         e = *pInput++;
      62  
      63         *pOutput++ = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
      64         *pOutput++ = M10 * a + M11 * b + M12 * c + M13 * d + M14 * e;
      65         *pOutput++ = M20 * a + M21 * b + M22 * c + M23 * d + M24 * e;
      66         *pOutput++ = M30 * a + M31 * b + M32 * c + M33 * d + M34 * e;
      67         *pOutput++ = M40 * a + M41 * b + M42 * c + M43 * d + M44 * e;
      68  
      69  
      70         a = *pInput2++;
      71         b = *pInput2++;
      72         c = *pInput2++;
      73         d = *pInput2++;
      74         e = *pInput2++;
      75  
      76         *pOutput2++ = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
      77         *pOutput2++ = M10 * a + M11 * b + M12 * c + M13 * d + M14 * e;
      78         *pOutput2++ = M20 * a + M21 * b + M22 * c + M23 * d + M24 * e;
      79         *pOutput2++ = M30 * a + M31 * b + M32 * c + M33 * d + M34 * e;
      80         *pOutput2++ = M40 * a + M41 * b + M42 * c + M43 * d + M44 * e;
      81  
      82      }
      83  }
      84  
      85  int main (int argc, const char* argv[])
      86  {
      87    unsigned int input[N], output[N], i, input2[N], output2[N];
      88    unsigned int check_results[N]
      89      = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399, 
      90      22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619 };
      91    unsigned int check_results2[N]
      92      = {7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243, 
      93      26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634, 35463 };
      94  
      95    for (i = 0; i < N; i++)
      96      {
      97        input[i] = i%256;
      98        input2[i] = i + 2;
      99        output[i] = 0;
     100        output2[i] = 0;
     101        __asm__ volatile ("");
     102      }
     103  
     104    foo (input, output, input2, output2);
     105  
     106    for (i = 0; i < N; i++)
     107      if (output[i] != check_results[i]
     108          || output2[i] != check_results2[i])
     109        abort ();
     110  
     111    return 0;
     112  }