1 /* { dg-do compile } */
2 /* { dg-require-effective-target arm_neon_ok } */
3 /* { dg-require-effective-target arm_thumb2_ok } */
4 /* { dg-options "-O2 -mthumb" } */
5 /* { dg-add-options arm_neon } */
6 /* { dg-prune-output "switch .* conflicts with" } */
7
8 #include <arm_neon.h>
9 #include <stddef.h>
10
11 void *
12 memset (DST, C, LENGTH)
13 void *DST;
14 int C;
15 size_t LENGTH;
16 {
17 void* DST0 = DST;
18 unsigned char C_BYTE = C;
19
20
21 if (__builtin_expect(LENGTH < 4, 1)) {
22 size_t i = 0;
23 while (i < LENGTH) {
24 ((char*)DST)[i] = C_BYTE;
25 i++;
26 }
27 return DST;
28 }
29
30 const char* DST_end = (char*)DST + LENGTH;
31
32
33 while ((uintptr_t)DST % 4 != 0) {
34 *(char*) (DST++) = C_BYTE;
35 }
36
37
38 uint32_t C_SHORTWORD = (uint32_t)(unsigned char)(C_BYTE) * 0x01010101;
39
40
41 if (__builtin_expect(DST_end - (char*)DST >= 16, 0)) {
42 while ((uintptr_t)DST % 16 != 0) {
43 *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
44 DST += 4;
45 }
46
47
48 uint8x16_t C_WORD = vdupq_n_u8(C_BYTE);
49
50
51
52
53
54 size_t i = 0;
55 LENGTH = DST_end - (char*)DST;
56 while (i + 16 * 16 <= LENGTH) {
57 *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
58 *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
59 *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
60 *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
61 *((uint8x16_t*)((char*)(DST) + (i + 16 * 4))) = C_WORD;
62 *((uint8x16_t*)((char*)(DST) + (i + 16 * 5))) = C_WORD;
63 *((uint8x16_t*)((char*)(DST) + (i + 16 * 6))) = C_WORD;
64 *((uint8x16_t*)((char*)(DST) + (i + 16 * 7))) = C_WORD;
65 *((uint8x16_t*)((char*)(DST) + (i + 16 * 8))) = C_WORD;
66 *((uint8x16_t*)((char*)(DST) + (i + 16 * 9))) = C_WORD;
67 *((uint8x16_t*)((char*)(DST) + (i + 16 * 10))) = C_WORD;
68 *((uint8x16_t*)((char*)(DST) + (i + 16 * 11))) = C_WORD;
69 *((uint8x16_t*)((char*)(DST) + (i + 16 * 12))) = C_WORD;
70 *((uint8x16_t*)((char*)(DST) + (i + 16 * 13))) = C_WORD;
71 *((uint8x16_t*)((char*)(DST) + (i + 16 * 14))) = C_WORD;
72 *((uint8x16_t*)((char*)(DST) + (i + 16 * 15))) = C_WORD;
73 i += 16 * 16;
74 }
75 while (i + 16 * 4 <= LENGTH) {
76 *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
77 *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
78 *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
79 *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
80 i += 16 * 4;
81 }
82 while (i + 16 <= LENGTH) {
83 *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
84 i += 16;
85 }
86 DST += i;
87 }
88
89 while (4 <= DST_end - (char*)DST) {
90 *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
91 DST += 4;
92 }
93
94
95 while ((char*)DST < DST_end) {
96 *((char*)DST) = C_BYTE;
97 DST++;
98 }
99
100 return DST0;
101 }