1 /* Test of u8_prev() function.
2 Copyright (C) 2010-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Written by Bruno Haible <bruno@clisp.org>, 2010. */
18
19 #include <config.h>
20
21 #include "unistr.h"
22
23 #include "macros.h"
24
25 static int
26 check (const uint8_t *input, size_t input_length, ucs4_t *puc)
27 {
28 ucs4_t uc;
29
30 /* Test recognition when at the beginning of the string. */
31 if (u8_prev (&uc, input + input_length, input) != input)
32 return 1;
33
34 /* Test recognition when preceded by a 1-unit character. */
35 {
36 uint8_t buf[100];
37 uint8_t *ptr;
38 size_t i;
39 ucs4_t uc1;
40
41 ptr = buf;
42 *ptr++ = 'x';
43 for (i = 0; i < input_length; i++)
44 ptr[i] = input[i];
45
46 if (u8_prev (&uc1, ptr + input_length, buf) != ptr)
47 return 2;
48 if (uc1 != uc)
49 return 3;
50 }
51
52 /* Test recognition when preceded by a 2-unit character. */
53 {
54 uint8_t buf[100];
55 uint8_t *ptr;
56 size_t i;
57 ucs4_t uc1;
58
59 ptr = buf;
60 *ptr++ = 0xC3;
61 *ptr++ = 0x97;
62 for (i = 0; i < input_length; i++)
63 ptr[i] = input[i];
64
65 if (u8_prev (&uc1, ptr + input_length, buf) != ptr)
66 return 4;
67 if (uc1 != uc)
68 return 5;
69 }
70
71 /* Test recognition when preceded by a 3-unit character. */
72 {
73 uint8_t buf[100];
74 uint8_t *ptr;
75 size_t i;
76 ucs4_t uc1;
77
78 ptr = buf;
79 *ptr++ = 0xE2;
80 *ptr++ = 0x84;
81 *ptr++ = 0x82;
82 for (i = 0; i < input_length; i++)
83 ptr[i] = input[i];
84
85 if (u8_prev (&uc1, ptr + input_length, buf) != ptr)
86 return 6;
87 if (uc1 != uc)
88 return 7;
89 }
90
91 /* Test recognition when preceded by a 4-unit character. */
92 {
93 uint8_t buf[100];
94 uint8_t *ptr;
95 size_t i;
96 ucs4_t uc1;
97
98 ptr = buf;
99 *ptr++ = 0xF0;
100 *ptr++ = 0x9D;
101 *ptr++ = 0x94;
102 *ptr++ = 0x9E;
103 for (i = 0; i < input_length; i++)
104 ptr[i] = input[i];
105
106 if (u8_prev (&uc1, ptr + input_length, buf) != ptr)
107 return 8;
108 if (uc1 != uc)
109 return 9;
110 }
111
112 *puc = uc;
113 return 0;
114 }
115
116 static int
117 check_invalid (const uint8_t *input, size_t input_length)
118 {
119 ucs4_t uc;
120
121 /* Test recognition when at the beginning of the string. */
122 uc = 0xBADFACE;
123 if (u8_prev (&uc, input + input_length, input) != NULL)
124 return 1;
125 if (uc != 0xBADFACE)
126 return 2;
127
128 /* Test recognition when preceded by a 1-unit character. */
129 {
130 uint8_t buf[100];
131 uint8_t *ptr;
132 size_t i;
133
134 ptr = buf;
135 *ptr++ = 'x';
136 for (i = 0; i < input_length; i++)
137 ptr[i] = input[i];
138
139 uc = 0xBADFACE;
140 if (u8_prev (&uc, ptr + input_length, buf) != NULL)
141 return 3;
142 if (uc != 0xBADFACE)
143 return 4;
144 }
145
146 /* Test recognition when preceded by a 2-unit character. */
147 {
148 uint8_t buf[100];
149 uint8_t *ptr;
150 size_t i;
151
152 ptr = buf;
153 *ptr++ = 0xC3;
154 *ptr++ = 0x97;
155 for (i = 0; i < input_length; i++)
156 ptr[i] = input[i];
157
158 uc = 0xBADFACE;
159 if (u8_prev (&uc, ptr + input_length, buf) != NULL)
160 return 5;
161 if (uc != 0xBADFACE)
162 return 6;
163 }
164
165 /* Test recognition when preceded by a 3-unit character. */
166 {
167 uint8_t buf[100];
168 uint8_t *ptr;
169 size_t i;
170
171 ptr = buf;
172 *ptr++ = 0xE2;
173 *ptr++ = 0x84;
174 *ptr++ = 0x82;
175 for (i = 0; i < input_length; i++)
176 ptr[i] = input[i];
177
178 uc = 0xBADFACE;
179 if (u8_prev (&uc, ptr + input_length, buf) != NULL)
180 return 7;
181 if (uc != 0xBADFACE)
182 return 8;
183 }
184
185 /* Test recognition when preceded by a 4-unit character. */
186 {
187 uint8_t buf[100];
188 uint8_t *ptr;
189 size_t i;
190
191 ptr = buf;
192 *ptr++ = 0xF0;
193 *ptr++ = 0x9D;
194 *ptr++ = 0x94;
195 *ptr++ = 0x9E;
196 for (i = 0; i < input_length; i++)
197 ptr[i] = input[i];
198
199 uc = 0xBADFACE;
200 if (u8_prev (&uc, ptr + input_length, buf) != NULL)
201 return 9;
202 if (uc != 0xBADFACE)
203 return 10;
204 }
205
206 return 0;
207 }
208
209 int
210 main ()
211 {
212 ucs4_t uc;
213
214 /* Test ISO 646 unit input. */
215 {
216 ucs4_t c;
217 uint8_t buf[1];
218
219 for (c = 0; c < 0x80; c++)
220 {
221 buf[0] = c;
222 uc = 0xBADFACE;
223 ASSERT (check (buf, 1, &uc) == 0);
224 ASSERT (uc == c);
225 }
226 }
227
228 /* Test 2-byte character input. */
229 {
230 static const uint8_t input[] = { 0xC3, 0x97 };
231 uc = 0xBADFACE;
232 ASSERT (check (input, SIZEOF (input), &uc) == 0);
233 ASSERT (uc == 0x00D7);
234 }
235
236 /* Test 3-byte character input. */
237 {
238 static const uint8_t input[] = { 0xE2, 0x82, 0xAC };
239 uc = 0xBADFACE;
240 ASSERT (check (input, SIZEOF (input), &uc) == 0);
241 ASSERT (uc == 0x20AC);
242 }
243
244 /* Test 4-byte character input. */
245 {
246 static const uint8_t input[] = { 0xF4, 0x8F, 0xBF, 0xBD };
247 uc = 0xBADFACE;
248 ASSERT (check (input, SIZEOF (input), &uc) == 0);
249 ASSERT (uc == 0x10FFFD);
250 }
251
252 /* Test incomplete/invalid 1-byte input. */
253 {
254 static const uint8_t input[] = { 0xC1 };
255 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
256 }
257 {
258 static const uint8_t input[] = { 0xC3 };
259 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
260 }
261 {
262 static const uint8_t input[] = { 0xE2 };
263 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
264 }
265 {
266 static const uint8_t input[] = { 0xF4 };
267 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
268 }
269 {
270 static const uint8_t input[] = { 0xFE };
271 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
272 }
273
274 /* Test incomplete/invalid 2-byte input. */
275 {
276 static const uint8_t input[] = { 0xE0, 0x9F };
277 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
278 }
279 {
280 static const uint8_t input[] = { 0xE2, 0x82 };
281 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
282 }
283 {
284 static const uint8_t input[] = { 0xE2, 0xD0 };
285 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
286 }
287 {
288 static const uint8_t input[] = { 0xF0, 0x8F };
289 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
290 }
291 {
292 static const uint8_t input[] = { 0xF3, 0x8F };
293 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
294 }
295 {
296 static const uint8_t input[] = { 0xF3, 0xD0 };
297 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
298 }
299
300 /* Test incomplete/invalid 3-byte input. */
301 {
302 static const uint8_t input[] = { 0xF3, 0x8F, 0xBF };
303 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
304 }
305 {
306 static const uint8_t input[] = { 0xF3, 0xE4, 0xBF };
307 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
308 }
309 {
310 static const uint8_t input[] = { 0xF3, 0x8F, 0xD0 };
311 ASSERT (check_invalid (input, SIZEOF (input)) == 0);
312 }
313
314 return 0;
315 }