1 /* Test of conversion of string to wide string.
2 Copyright (C) 2008-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008. */
18
19 #include <config.h>
20
21 #include <wchar.h>
22
23 #include "signature.h"
24 SIGNATURE_CHECK (mbsrtowcs, size_t, (wchar_t *, char const **, size_t,
25 mbstate_t *));
26
27 #include <locale.h>
28 #include <stdio.h>
29 #include <string.h>
30
31 #include "macros.h"
32
33 int
34 main (int argc, char *argv[])
35 {
36 mbstate_t state;
37 wchar_t wc;
38 size_t ret;
39
40 /* configure should already have checked that the locale is supported. */
41 if (setlocale (LC_ALL, "") == NULL)
42 return 1;
43
44 /* Test NUL byte input. */
45 {
46 const char *src;
47
48 memset (&state, '\0', sizeof (mbstate_t));
49
50 src = "";
51 ret = mbsrtowcs (NULL, &src, 0, &state);
52 ASSERT (ret == 0);
53 ASSERT (mbsinit (&state));
54
55 src = "";
56 ret = mbsrtowcs (NULL, &src, 1, &state);
57 ASSERT (ret == 0);
58 ASSERT (mbsinit (&state));
59
60 wc = (wchar_t) 0xBADFACE;
61 src = "";
62 ret = mbsrtowcs (&wc, &src, 0, &state);
63 ASSERT (ret == 0);
64 ASSERT (wc == (wchar_t) 0xBADFACE);
65 ASSERT (mbsinit (&state));
66
67 wc = (wchar_t) 0xBADFACE;
68 src = "";
69 ret = mbsrtowcs (&wc, &src, 1, &state);
70 ASSERT (ret == 0);
71 ASSERT (wc == 0);
72 ASSERT (mbsinit (&state));
73 }
74
75 #ifdef __ANDROID__
76 /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
77 "C" locale. Furthermore, when you attempt to set the "C" or "POSIX"
78 locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
79 that is, effectively the "C.UTF-8" locale. */
80 if (argc > 1 && strcmp (argv[1], "5") == 0 && MB_CUR_MAX > 1)
81 argv[1] = "2";
82 #endif
83
84 if (argc > 1)
85 {
86 int unlimited;
87
88 for (unlimited = 0; unlimited < 2; unlimited++)
89 {
90 #define BUFSIZE 10
91 wchar_t buf[BUFSIZE];
92 const char *src;
93 mbstate_t temp_state;
94
95 {
96 size_t i;
97 for (i = 0; i < BUFSIZE; i++)
98 buf[i] = (wchar_t) 0xBADFACE;
99 }
100
101 switch (argv[1][0])
102 {
103 case '1':
104 /* Locale encoding is ISO-8859-1 or ISO-8859-15. */
105 {
106 char input[] = "B\374\337er"; /* "Büßer" */
107 memset (&state, '\0', sizeof (mbstate_t));
108
109 wc = (wchar_t) 0xBADFACE;
110 ret = mbrtowc (&wc, input, 1, &state);
111 ASSERT (ret == 1);
112 ASSERT (wc == 'B');
113 ASSERT (mbsinit (&state));
114 input[0] = '\0';
115
116 wc = (wchar_t) 0xBADFACE;
117 ret = mbrtowc (&wc, input + 1, 1, &state);
118 ASSERT (ret == 1);
119 ASSERT (wctob (wc) == (unsigned char) '\374');
120 ASSERT (mbsinit (&state));
121 input[1] = '\0';
122
123 src = input + 2;
124 temp_state = state;
125 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
126 ASSERT (ret == 3);
127 ASSERT (src == input + 2);
128 ASSERT (mbsinit (&state));
129
130 src = input + 2;
131 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
132 ASSERT (ret == (unlimited ? 3 : 1));
133 ASSERT (src == (unlimited ? NULL : input + 3));
134 ASSERT (wctob (buf[0]) == (unsigned char) '\337');
135 if (unlimited)
136 {
137 ASSERT (buf[1] == 'e');
138 ASSERT (buf[2] == 'r');
139 ASSERT (buf[3] == 0);
140 ASSERT (buf[4] == (wchar_t) 0xBADFACE);
141 }
142 else
143 ASSERT (buf[1] == (wchar_t) 0xBADFACE);
144 ASSERT (mbsinit (&state));
145 }
146 break;
147
148 case '2':
149 /* Locale encoding is UTF-8. */
150 {
151 char input[] = "B\303\274\303\237er"; /* "Büßer" */
152 memset (&state, '\0', sizeof (mbstate_t));
153
154 wc = (wchar_t) 0xBADFACE;
155 ret = mbrtowc (&wc, input, 1, &state);
156 ASSERT (ret == 1);
157 ASSERT (wc == 'B');
158 ASSERT (mbsinit (&state));
159 input[0] = '\0';
160
161 wc = (wchar_t) 0xBADFACE;
162 ret = mbrtowc (&wc, input + 1, 1, &state);
163 ASSERT (ret == (size_t)(-2));
164 ASSERT (wc == (wchar_t) 0xBADFACE);
165 ASSERT (!mbsinit (&state));
166 input[1] = '\0';
167
168 src = input + 2;
169 temp_state = state;
170 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
171 ASSERT (ret == 4);
172 ASSERT (src == input + 2);
173 ASSERT (!mbsinit (&state));
174
175 src = input + 2;
176 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
177 ASSERT (ret == (unlimited ? 4 : 2));
178 ASSERT (src == (unlimited ? NULL : input + 5));
179 ASSERT (wctob (buf[0]) == EOF);
180 ASSERT (wctob (buf[1]) == EOF);
181 if (unlimited)
182 {
183 ASSERT (buf[2] == 'e');
184 ASSERT (buf[3] == 'r');
185 ASSERT (buf[4] == 0);
186 ASSERT (buf[5] == (wchar_t) 0xBADFACE);
187 }
188 else
189 ASSERT (buf[2] == (wchar_t) 0xBADFACE);
190 ASSERT (mbsinit (&state));
191 }
192 break;
193
194 case '3':
195 /* Locale encoding is EUC-JP. */
196 {
197 char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
198 memset (&state, '\0', sizeof (mbstate_t));
199
200 wc = (wchar_t) 0xBADFACE;
201 ret = mbrtowc (&wc, input, 1, &state);
202 ASSERT (ret == 1);
203 ASSERT (wc == '<');
204 ASSERT (mbsinit (&state));
205 input[0] = '\0';
206
207 wc = (wchar_t) 0xBADFACE;
208 ret = mbrtowc (&wc, input + 1, 2, &state);
209 ASSERT (ret == 2);
210 ASSERT (wctob (wc) == EOF);
211 ASSERT (mbsinit (&state));
212 input[1] = '\0';
213 input[2] = '\0';
214
215 wc = (wchar_t) 0xBADFACE;
216 ret = mbrtowc (&wc, input + 3, 1, &state);
217 ASSERT (ret == (size_t)(-2));
218 ASSERT (wc == (wchar_t) 0xBADFACE);
219 ASSERT (!mbsinit (&state));
220 input[3] = '\0';
221
222 src = input + 4;
223 temp_state = state;
224 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
225 ASSERT (ret == 3);
226 ASSERT (src == input + 4);
227 ASSERT (!mbsinit (&state));
228
229 src = input + 4;
230 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
231 ASSERT (ret == (unlimited ? 3 : 2));
232 ASSERT (src == (unlimited ? NULL : input + 7));
233 ASSERT (wctob (buf[0]) == EOF);
234 ASSERT (wctob (buf[1]) == EOF);
235 if (unlimited)
236 {
237 ASSERT (buf[2] == '>');
238 ASSERT (buf[3] == 0);
239 ASSERT (buf[4] == (wchar_t) 0xBADFACE);
240 }
241 else
242 ASSERT (buf[2] == (wchar_t) 0xBADFACE);
243 ASSERT (mbsinit (&state));
244 }
245 break;
246
247 case '4':
248 /* Locale encoding is GB18030. */
249 {
250 char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
251 memset (&state, '\0', sizeof (mbstate_t));
252
253 wc = (wchar_t) 0xBADFACE;
254 ret = mbrtowc (&wc, input, 1, &state);
255 ASSERT (ret == 1);
256 ASSERT (wc == 'B');
257 ASSERT (mbsinit (&state));
258 input[0] = '\0';
259
260 wc = (wchar_t) 0xBADFACE;
261 ret = mbrtowc (&wc, input + 1, 1, &state);
262 ASSERT (ret == (size_t)(-2));
263 ASSERT (wc == (wchar_t) 0xBADFACE);
264 ASSERT (!mbsinit (&state));
265 input[1] = '\0';
266
267 src = input + 2;
268 temp_state = state;
269 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
270 ASSERT (ret == 4);
271 ASSERT (src == input + 2);
272 ASSERT (!mbsinit (&state));
273
274 src = input + 2;
275 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
276 ASSERT (ret == (unlimited ? 4 : 2));
277 ASSERT (src == (unlimited ? NULL : input + 7));
278 ASSERT (wctob (buf[0]) == EOF);
279 ASSERT (wctob (buf[1]) == EOF);
280 if (unlimited)
281 {
282 ASSERT (buf[2] == 'e');
283 ASSERT (buf[3] == 'r');
284 ASSERT (buf[4] == 0);
285 ASSERT (buf[5] == (wchar_t) 0xBADFACE);
286 }
287 else
288 ASSERT (buf[2] == (wchar_t) 0xBADFACE);
289 ASSERT (mbsinit (&state));
290 }
291 break;
292
293 case '5':
294 /* C or POSIX locale. */
295 {
296 char input[] = "n/a";
297 memset (&state, '\0', sizeof (mbstate_t));
298
299 src = input;
300 temp_state = state;
301 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
302 ASSERT (ret == 3);
303 ASSERT (src == input);
304 ASSERT (mbsinit (&state));
305
306 src = input;
307 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
308 ASSERT (ret == (unlimited ? 3 : 1));
309 ASSERT (src == (unlimited ? NULL : input + 1));
310 ASSERT (buf[0] == 'n');
311 if (unlimited)
312 {
313 ASSERT (buf[1] == '/');
314 ASSERT (buf[2] == 'a');
315 ASSERT (buf[3] == 0);
316 ASSERT (buf[4] == (wchar_t) 0xBADFACE);
317 }
318 else
319 ASSERT (buf[1] == (wchar_t) 0xBADFACE);
320 ASSERT (mbsinit (&state));
321 }
322 {
323 int c;
324 char input[2];
325
326 memset (&state, '\0', sizeof (mbstate_t));
327 for (c = 0; c < 0x100; c++)
328 if (c != 0)
329 {
330 /* We are testing all nonnull bytes. */
331 input[0] = c;
332 input[1] = '\0';
333
334 src = input;
335 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &state);
336 ASSERT (ret == 1);
337 ASSERT (src == input);
338 ASSERT (mbsinit (&state));
339
340 buf[0] = buf[1] = (wchar_t) 0xBADFACE;
341 src = input;
342 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
343 /* POSIX:2018 says: "In the POSIX locale an [EILSEQ] error
344 cannot occur since all byte values are valid characters." */
345 ASSERT (ret == 1);
346 ASSERT (src == (unlimited ? NULL : input + 1));
347 if (c < 0x80)
348 /* c is an ASCII character. */
349 ASSERT (buf[0] == c);
350 else
351 /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF.
352 But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */
353 ASSERT (buf[0] == (btowc (c) == 0xDF00 + c ? btowc (c) : c));
354 ASSERT (mbsinit (&state));
355 }
356 }
357 break;
358
359 default:
360 return 1;
361 }
362 }
363
364 return 0;
365 }
366
367 return 1;
368 }