1 /* PR tree-optimization/71488 */
2 /* { dg-require-effective-target vect_int } */
3 /* { dg-require-effective-target vect_pack_trunc } */
4 /* { dg-additional-options "-msse4" { target sse4_runtime } } */
5
6 #include "tree-vect.h"
7
8 int i1, i2;
9
10 void __attribute__((noclone,noinline))
11 fn1 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
12 {
13 int i;
14
15 for (i = 0; i < size; i++)
16 p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
17 }
18
19 void __attribute__((noclone,noinline))
20 fn2 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
21 {
22 int i;
23
24 for (i = 0; i < size; i++)
25 p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
26 }
27
28 void __attribute__((noclone,noinline))
29 fn3 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
30 {
31 int i;
32
33 for (i = 0; i < size; i++)
34 p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
35 }
36
37 void __attribute__((noclone,noinline))
38 fn4 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
39 {
40 int i;
41
42 for (i = 0; i < size; i++)
43 p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
44 }
45
46 void __attribute__((noclone,noinline))
47 fn5 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
48 {
49 int i;
50
51 for (i = 0; i < size; i++)
52 p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
53 }
54
55 void __attribute__((noclone,noinline))
56 fn6 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
57 {
58 int i;
59
60 for (i = 0; i < size; i++)
61 p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
62 }
63
64 void __attribute__((noclone,noinline))
65 fn7 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
66 {
67 int i;
68
69 for (i = 0; i < size; i++)
70 p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
71 }
72
73 void __attribute__((noclone,noinline))
74 fn8 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
75 {
76 int i;
77
78 for (i = 0; i < size; i++)
79 p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
80 }
81
82 void __attribute__((noclone,noinline))
83 fn9 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
84 {
85 int i;
86
87 for (i = 0; i < size; i++)
88 p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
89 }
90
91 void __attribute__((noclone,noinline))
92 fn10 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
93 {
94 int i;
95
96 for (i = 0; i < size; i++)
97 p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
98 }
99
100 void __attribute__((noclone,noinline))
101 fn11 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
102 {
103 int i;
104
105 for (i = 0; i < size; i++)
106 p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
107 }
108
109 void __attribute__((noclone,noinline))
110 fn12 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
111 {
112 int i;
113
114 for (i = 0; i < size; i++)
115 p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
116 }
117
118 void __attribute__((noclone,noinline))
119 fn13 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
120 {
121 int i;
122
123 for (i = 0; i < size; i++)
124 p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
125 }
126
127 void __attribute__((noclone,noinline))
128 fn14 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
129 {
130 int i;
131
132 for (i = 0; i < size; i++)
133 p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
134 }
135
136 void __attribute__((noclone,noinline))
137 fn15 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
138 {
139 int i;
140
141 for (i = 0; i < size; i++)
142 p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
143 }
144
145 void __attribute__((noclone,noinline))
146 fn16 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
147 {
148 int i;
149
150 for (i = 0; i < size; i++)
151 p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
152 }
153
154 void __attribute__((noclone,noinline))
155 fn17 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
156 {
157 int i;
158
159 for (i = 0; i < size; i++)
160 p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
161 }
162
163 void __attribute__((noclone,noinline))
164 fn18 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
165 {
166 int i;
167
168 for (i = 0; i < size; i++)
169 p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
170 }
171
172 int eq (int i1, int i2) { return i1 == i2; }
173 int ne (int i1, int i2) { return i1 != i2; }
174 int lt (int i1, int i2) { return i1 < i2; }
175 int le (int i1, int i2) { return i1 <= i2; }
176 int gt (int i1, int i2) { return i1 > i2; }
177 int ge (int i1, int i2) { return i1 >= i2; }
178
179 typedef int (*cmp_fn)(int, int);
180
181 void
182 check (int *p, cmp_fn fn)
183 {
184 int i;
185
186 for (i = 0; i < 32; i++)
187 {
188 int t1 = ((i % 4) > 1) == 0;
189 int t2 = (i % 2) == 0;
190 int res = fn (t1, t2) + t1;
191 if (p[i] != res)
192 __builtin_abort ();
193 }
194 }
195
196 int
197 main (int argc, char **argv)
198 {
199 int i1[32], i2[32], res[32];
200 short s2[32];
201 long long l2[32];
202 int i;
203
204 check_vect ();
205
206 for (i = 0; i < 32; i++)
207 {
208 l2[i] = i2[i] = s2[i] = i % 2;
209 i1[i] = (i % 4) > 1;
210 asm ("":::"memory");
211 }
212
213 fn1 (res, i1, i2, 32);
214 check (res, gt);
215 fn2 (res, i1, s2, 32);
216 check (res, gt);
217 fn3 (res, i1, l2, 32);
218 check (res, gt);
219
220 fn4 (res, i1, i2, 32);
221 check (res, ge);
222 fn5 (res, i1, s2, 32);
223 check (res, ge);
224 fn6 (res, i1, l2, 32);
225 check (res, ge);
226
227 fn7 (res, i1, i2, 32);
228 check (res, lt);
229 fn8 (res, i1, s2, 32);
230 check (res, lt);
231 fn9 (res, i1, l2, 32);
232 check (res, lt);
233
234 fn10 (res, i1, i2, 32);
235 check (res, le);
236 fn11 (res, i1, s2, 32);
237 check (res, le);
238 fn12 (res, i1, l2, 32);
239 check (res, le);
240
241 fn13 (res, i1, i2, 32);
242 check (res, eq);
243 fn14 (res, i1, s2, 32);
244 check (res, eq);
245 fn15 (res, i1, l2, 32);
246 check (res, eq);
247
248 fn16 (res, i1, i2, 32);
249 check (res, ne);
250 fn17 (res, i1, s2, 32);
251 check (res, ne);
252 fn18 (res, i1, l2, 32);
253 check (res, ne);
254 }
255
256 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 18 "vect" { target sse4_runtime } } } */