1 /* { dg-do run } */
2 /* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
3 /* { dg-require-effective-target p8vector_hw } */
4
5 #ifndef CHECK_H
6 #define CHECK_H "ssse3-check.h"
7 #endif
8
9 #ifndef TEST
10 #define TEST ssse3_test
11 #endif
12
13 #include CHECK_H
14
15 #include "ssse3-vals.h"
16
17 #include <tmmintrin.h>
18 #include <string.h>
19
20 #ifndef __AVX__
21 /* Test the 64-bit form */
22 static void
23 ssse3_test_palignr (__m64 *i1, __m64 *i2, unsigned int imm, __m64 *r)
24 {
25 switch (imm)
26 {
27 case 0:
28 *r = _mm_alignr_pi8 (*i1, *i2, 0);
29 break;
30 case 1:
31 *r = _mm_alignr_pi8 (*i1, *i2, 1);
32 break;
33 case 2:
34 *r = _mm_alignr_pi8 (*i1, *i2, 2);
35 break;
36 case 3:
37 *r = _mm_alignr_pi8 (*i1, *i2, 3);
38 break;
39 case 4:
40 *r = _mm_alignr_pi8 (*i1, *i2, 4);
41 break;
42 case 5:
43 *r = _mm_alignr_pi8 (*i1, *i2, 5);
44 break;
45 case 6:
46 *r = _mm_alignr_pi8 (*i1, *i2, 6);
47 break;
48 case 7:
49 *r = _mm_alignr_pi8 (*i1, *i2, 7);
50 break;
51 case 8:
52 *r = _mm_alignr_pi8 (*i1, *i2, 8);
53 break;
54 case 9:
55 *r = _mm_alignr_pi8 (*i1, *i2, 9);
56 break;
57 case 10:
58 *r = _mm_alignr_pi8 (*i1, *i2, 10);
59 break;
60 case 11:
61 *r = _mm_alignr_pi8 (*i1, *i2, 11);
62 break;
63 case 12:
64 *r = _mm_alignr_pi8 (*i1, *i2, 12);
65 break;
66 case 13:
67 *r = _mm_alignr_pi8 (*i1, *i2, 13);
68 break;
69 case 14:
70 *r = _mm_alignr_pi8 (*i1, *i2, 14);
71 break;
72 case 15:
73 *r = _mm_alignr_pi8 (*i1, *i2, 15);
74 break;
75 default:
76 *r = _mm_alignr_pi8 (*i1, *i2, 16);
77 break;
78 }
79
80 _mm_empty();
81 }
82 #endif
83
84 /* Test the 128-bit form */
85 static void
86 ssse3_test_palignr128 (__m128i *i1, __m128i *i2, unsigned int imm, __m128i *r)
87 {
88 /* Assumes incoming pointers are 16-byte aligned */
89
90 switch (imm)
91 {
92 case 0:
93 *r = _mm_alignr_epi8 (*i1, *i2, 0);
94 break;
95 case 1:
96 *r = _mm_alignr_epi8 (*i1, *i2, 1);
97 break;
98 case 2:
99 *r = _mm_alignr_epi8 (*i1, *i2, 2);
100 break;
101 case 3:
102 *r = _mm_alignr_epi8 (*i1, *i2, 3);
103 break;
104 case 4:
105 *r = _mm_alignr_epi8 (*i1, *i2, 4);
106 break;
107 case 5:
108 *r = _mm_alignr_epi8 (*i1, *i2, 5);
109 break;
110 case 6:
111 *r = _mm_alignr_epi8 (*i1, *i2, 6);
112 break;
113 case 7:
114 *r = _mm_alignr_epi8 (*i1, *i2, 7);
115 break;
116 case 8:
117 *r = _mm_alignr_epi8 (*i1, *i2, 8);
118 break;
119 case 9:
120 *r = _mm_alignr_epi8 (*i1, *i2, 9);
121 break;
122 case 10:
123 *r = _mm_alignr_epi8 (*i1, *i2, 10);
124 break;
125 case 11:
126 *r = _mm_alignr_epi8 (*i1, *i2, 11);
127 break;
128 case 12:
129 *r = _mm_alignr_epi8 (*i1, *i2, 12);
130 break;
131 case 13:
132 *r = _mm_alignr_epi8 (*i1, *i2, 13);
133 break;
134 case 14:
135 *r = _mm_alignr_epi8 (*i1, *i2, 14);
136 break;
137 case 15:
138 *r = _mm_alignr_epi8 (*i1, *i2, 15);
139 break;
140 case 16:
141 *r = _mm_alignr_epi8 (*i1, *i2, 16);
142 break;
143 case 17:
144 *r = _mm_alignr_epi8 (*i1, *i2, 17);
145 break;
146 case 18:
147 *r = _mm_alignr_epi8 (*i1, *i2, 18);
148 break;
149 case 19:
150 *r = _mm_alignr_epi8 (*i1, *i2, 19);
151 break;
152 case 20:
153 *r = _mm_alignr_epi8 (*i1, *i2, 20);
154 break;
155 case 21:
156 *r = _mm_alignr_epi8 (*i1, *i2, 21);
157 break;
158 case 22:
159 *r = _mm_alignr_epi8 (*i1, *i2, 22);
160 break;
161 case 23:
162 *r = _mm_alignr_epi8 (*i1, *i2, 23);
163 break;
164 case 24:
165 *r = _mm_alignr_epi8 (*i1, *i2, 24);
166 break;
167 case 25:
168 *r = _mm_alignr_epi8 (*i1, *i2, 25);
169 break;
170 case 26:
171 *r = _mm_alignr_epi8 (*i1, *i2, 26);
172 break;
173 case 27:
174 *r = _mm_alignr_epi8 (*i1, *i2, 27);
175 break;
176 case 28:
177 *r = _mm_alignr_epi8 (*i1, *i2, 28);
178 break;
179 case 29:
180 *r = _mm_alignr_epi8 (*i1, *i2, 29);
181 break;
182 case 30:
183 *r = _mm_alignr_epi8 (*i1, *i2, 30);
184 break;
185 case 31:
186 *r = _mm_alignr_epi8 (*i1, *i2, 31);
187 break;
188 default:
189 *r = _mm_alignr_epi8 (*i1, *i2, 32);
190 break;
191 }
192 }
193
194 /* Routine to manually compute the results */
195 static void
196 compute_correct_result_128 (signed char *i1, signed char *i2, unsigned int imm,
197 signed char *r)
198 {
199 signed char buf [32];
200 int i;
201
202 memcpy (&buf[0], i2, 16);
203 memcpy (&buf[16], i1, 16);
204
205 for (i = 0; i < 16; i++)
206 if (imm >= 32 || imm + i >= 32)
207 r[i] = 0;
208 else
209 r[i] = buf[imm + i];
210 }
211
212 #ifndef __AVX__
213 static void
214 compute_correct_result_64 (signed char *i1, signed char *i2, unsigned int imm,
215 signed char *r)
216 {
217 signed char buf [16];
218 int i;
219
220 /* Handle the first half */
221 memcpy (&buf[0], &i2[0], 8);
222 memcpy (&buf[8], &i1[0], 8);
223
224 for (i = 0; i < 8; i++)
225 if (imm >= 16 || imm + i >= 16)
226 r[i] = 0;
227 else
228 r[i] = buf[imm + i];
229
230 /* Handle the second half */
231 memcpy (&buf[0], &i2[8], 8);
232 memcpy (&buf[8], &i1[8], 8);
233
234 for (i = 0; i < 8; i++)
235 if (imm >= 16 || imm + i >= 16)
236 r[i + 8] = 0;
237 else
238 r[i + 8] = buf[imm + i];
239 }
240 #endif
241
242 static void
243 TEST (void)
244 {
245 int i;
246 union data r __attribute__ ((aligned(16)));
247 union data ck;
248 unsigned int imm;
249 int fail = 0;
250
251 for (i = 0; i < ARRAY_SIZE (vals) - 1; i++)
252 for (imm = 0; imm < 100; imm++)
253 {
254 #ifndef __AVX__
255 /* Manually compute the result */
256 compute_correct_result_64 (&vals[i + 0].b[0],
257 &vals[i + 1].b[0], imm, &ck.b[0]);
258
259 /* Run the 64-bit tests */
260 ssse3_test_palignr (&vals[i + 0].ll[0],
261 &vals[i + 1].ll[0], imm, &r.ll[0]);
262 ssse3_test_palignr (&vals[i + 0].ll[1],
263 &vals[i + 1].ll[1], imm, &r.ll[1]);
264 fail += chk_128 (ck.m[0], r.m[0]);
265 #endif
266
267 /* Recompute the results for 128-bits */
268 compute_correct_result_128 (&vals[i + 0].b[0],
269 &vals[i + 1].b[0], imm, &ck.b[0]);
270
271 /* Run the 128-bit tests */
272 ssse3_test_palignr128 (&vals[i + 0].m[0],
273 &vals[i + 1].m[0], imm, &r.m[0]);
274 fail += chk_128 (ck.m[0], r.m[0]);
275 }
276
277 if (fail != 0)
278 abort ();
279 }