1 /* { dg-require-effective-target p9vector_hw } */
2 /* { dg-options "-mdejagnu-cpu=power9 -O3 --param tree-reassoc-width=1" } */
3
4 /* Originally from gcc.dg/vect/pr45752.c. */
5 #include <stdarg.h>
6
7 #ifdef __cplusplus
8 extern "C" {
9 #endif
10 extern void abort (void);
11 extern void exit (int);
12 #ifdef __cplusplus
13 }
14 #endif
15
16 #define M00 100
17 #define M10 216
18 #define M20 23
19 #define M30 237
20 #define M40 437
21
22 #define M01 1322
23 #define M11 13
24 #define M21 27271
25 #define M31 2280
26 #define M41 284
27
28 #define M02 74
29 #define M12 191
30 #define M22 500
31 #define M32 111
32 #define M42 1114
33
34 #define M03 134
35 #define M13 117
36 #define M23 11
37 #define M33 771
38 #define M43 71
39
40 #define M04 334
41 #define M14 147
42 #define M24 115
43 #define M34 7716
44 #define M44 16
45
46 #define N 20
47
48 void foo (unsigned int *__restrict__ pInput,
49 unsigned int *__restrict__ pOutput,
50 unsigned int *__restrict__ pInput2,
51 unsigned int *__restrict__ pOutput2)
52 {
53 unsigned int i, a, b, c, d, e;
54
55 for (i = 0; i < N / 5; i++)
56 {
57 a = *pInput++;
58 b = *pInput++;
59 c = *pInput++;
60 d = *pInput++;
61 e = *pInput++;
62
63 *pOutput++ = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
64 *pOutput++ = M10 * a + M11 * b + M12 * c + M13 * d + M14 * e;
65 *pOutput++ = M20 * a + M21 * b + M22 * c + M23 * d + M24 * e;
66 *pOutput++ = M30 * a + M31 * b + M32 * c + M33 * d + M34 * e;
67 *pOutput++ = M40 * a + M41 * b + M42 * c + M43 * d + M44 * e;
68
69
70 a = *pInput2++;
71 b = *pInput2++;
72 c = *pInput2++;
73 d = *pInput2++;
74 e = *pInput2++;
75
76 *pOutput2++ = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
77 *pOutput2++ = M10 * a + M11 * b + M12 * c + M13 * d + M14 * e;
78 *pOutput2++ = M20 * a + M21 * b + M22 * c + M23 * d + M24 * e;
79 *pOutput2++ = M30 * a + M31 * b + M32 * c + M33 * d + M34 * e;
80 *pOutput2++ = M40 * a + M41 * b + M42 * c + M43 * d + M44 * e;
81
82 }
83 }
84
85 int main (int argc, const char* argv[])
86 {
87 unsigned int input[N], output[N], i, input2[N], output2[N];
88 unsigned int check_results[N]
89 = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
90 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619 };
91 unsigned int check_results2[N]
92 = {7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243,
93 26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634, 35463 };
94
95 for (i = 0; i < N; i++)
96 {
97 input[i] = i%256;
98 input2[i] = i + 2;
99 output[i] = 0;
100 output2[i] = 0;
101 __asm__ volatile ("");
102 }
103
104 foo (input, output, input2, output2);
105
106 for (i = 0; i < N; i++)
107 if (output[i] != check_results[i]
108 || output2[i] != check_results2[i])
109 abort ();
110
111 return 0;
112 }