1 /* Test of conversion of string to wide string.
2 Copyright (C) 2008-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008. */
18
19 #include <config.h>
20
21 #include <wchar.h>
22
23 #include "signature.h"
24 SIGNATURE_CHECK (mbsrtowcs, size_t, (wchar_t *, char const **, size_t,
25 mbstate_t *));
26
27 #include <locale.h>
28 #include <stdio.h>
29 #include <string.h>
30
31 #include "macros.h"
32
33 int
34 main (int argc, char *argv[])
35 {
36 mbstate_t state;
37 wchar_t wc;
38 size_t ret;
39
40 /* configure should already have checked that the locale is supported. */
41 if (setlocale (LC_ALL, "") == NULL)
42 return 1;
43
44 /* Test NUL byte input. */
45 {
46 const char *src;
47
48 memset (&state, '\0', sizeof (mbstate_t));
49
50 src = "";
51 ret = mbsrtowcs (NULL, &src, 0, &state);
52 ASSERT (ret == 0);
53 ASSERT (mbsinit (&state));
54
55 src = "";
56 ret = mbsrtowcs (NULL, &src, 1, &state);
57 ASSERT (ret == 0);
58 ASSERT (mbsinit (&state));
59
60 wc = (wchar_t) 0xBADFACE;
61 src = "";
62 ret = mbsrtowcs (&wc, &src, 0, &state);
63 ASSERT (ret == 0);
64 ASSERT (wc == (wchar_t) 0xBADFACE);
65 ASSERT (mbsinit (&state));
66
67 wc = (wchar_t) 0xBADFACE;
68 src = "";
69 ret = mbsrtowcs (&wc, &src, 1, &state);
70 ASSERT (ret == 0);
71 ASSERT (wc == 0);
72 ASSERT (mbsinit (&state));
73 }
74
75 #ifdef __ANDROID__
76 /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
77 "C" locale. Furthermore, when you attempt to set the "C" or "POSIX"
78 locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
79 that is, effectively the "C.UTF-8" locale. */
80 if (argc > 1 && strcmp (argv[1], "1") == 0 && MB_CUR_MAX > 1)
81 argv[1] = "3";
82 #endif
83
84 if (argc > 1)
85 {
86 int unlimited;
87
88 for (unlimited = 0; unlimited < 2; unlimited++)
89 {
90 #define BUFSIZE 10
91 wchar_t buf[BUFSIZE];
92 const char *src;
93 mbstate_t temp_state;
94
95 {
96 size_t i;
97 for (i = 0; i < BUFSIZE; i++)
98 buf[i] = (wchar_t) 0xBADFACE;
99 }
100
101 switch (argv[1][0])
102 {
103 case '1':
104 /* C or POSIX locale. */
105 {
106 char input[] = "n/a";
107 memset (&state, '\0', sizeof (mbstate_t));
108
109 src = input;
110 temp_state = state;
111 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
112 ASSERT (ret == 3);
113 ASSERT (src == input);
114 ASSERT (mbsinit (&state));
115
116 src = input;
117 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
118 ASSERT (ret == (unlimited ? 3 : 1));
119 ASSERT (src == (unlimited ? NULL : input + 1));
120 ASSERT (buf[0] == 'n');
121 if (unlimited)
122 {
123 ASSERT (buf[1] == '/');
124 ASSERT (buf[2] == 'a');
125 ASSERT (buf[3] == 0);
126 ASSERT (buf[4] == (wchar_t) 0xBADFACE);
127 }
128 else
129 ASSERT (buf[1] == (wchar_t) 0xBADFACE);
130 ASSERT (mbsinit (&state));
131 }
132 {
133 int c;
134 char input[2];
135
136 memset (&state, '\0', sizeof (mbstate_t));
137 for (c = 0; c < 0x100; c++)
138 if (c != 0)
139 {
140 /* We are testing all nonnull bytes. */
141 input[0] = c;
142 input[1] = '\0';
143
144 src = input;
145 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &state);
146 ASSERT (ret == 1);
147 ASSERT (src == input);
148 ASSERT (mbsinit (&state));
149
150 buf[0] = buf[1] = (wchar_t) 0xBADFACE;
151 src = input;
152 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
153 /* POSIX:2018 says: "In the POSIX locale an [EILSEQ] error
154 cannot occur since all byte values are valid characters." */
155 ASSERT (ret == 1);
156 ASSERT (src == (unlimited ? NULL : input + 1));
157 if (c < 0x80)
158 /* c is an ASCII character. */
159 ASSERT (buf[0] == c);
160 else
161 /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF.
162 But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */
163 ASSERT (buf[0] == (btowc (c) == 0xDF00 + c ? btowc (c) : c));
164 ASSERT (mbsinit (&state));
165 }
166 }
167 break;
168
169 case '2':
170 /* Locale encoding is ISO-8859-1 or ISO-8859-15. */
171 {
172 char input[] = "B\374\337er"; /* "Büßer" */
173 memset (&state, '\0', sizeof (mbstate_t));
174
175 wc = (wchar_t) 0xBADFACE;
176 ret = mbrtowc (&wc, input, 1, &state);
177 ASSERT (ret == 1);
178 ASSERT (wc == 'B');
179 ASSERT (mbsinit (&state));
180 input[0] = '\0';
181
182 wc = (wchar_t) 0xBADFACE;
183 ret = mbrtowc (&wc, input + 1, 1, &state);
184 ASSERT (ret == 1);
185 ASSERT (wctob (wc) == (unsigned char) '\374');
186 ASSERT (mbsinit (&state));
187 input[1] = '\0';
188
189 src = input + 2;
190 temp_state = state;
191 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
192 ASSERT (ret == 3);
193 ASSERT (src == input + 2);
194 ASSERT (mbsinit (&state));
195
196 src = input + 2;
197 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
198 ASSERT (ret == (unlimited ? 3 : 1));
199 ASSERT (src == (unlimited ? NULL : input + 3));
200 ASSERT (wctob (buf[0]) == (unsigned char) '\337');
201 if (unlimited)
202 {
203 ASSERT (buf[1] == 'e');
204 ASSERT (buf[2] == 'r');
205 ASSERT (buf[3] == 0);
206 ASSERT (buf[4] == (wchar_t) 0xBADFACE);
207 }
208 else
209 ASSERT (buf[1] == (wchar_t) 0xBADFACE);
210 ASSERT (mbsinit (&state));
211 }
212 break;
213
214 case '3':
215 /* Locale encoding is UTF-8. */
216 {
217 char input[] = "B\303\274\303\237er"; /* "Büßer" */
218 memset (&state, '\0', sizeof (mbstate_t));
219
220 wc = (wchar_t) 0xBADFACE;
221 ret = mbrtowc (&wc, input, 1, &state);
222 ASSERT (ret == 1);
223 ASSERT (wc == 'B');
224 ASSERT (mbsinit (&state));
225 input[0] = '\0';
226
227 wc = (wchar_t) 0xBADFACE;
228 ret = mbrtowc (&wc, input + 1, 1, &state);
229 ASSERT (ret == (size_t)(-2));
230 ASSERT (wc == (wchar_t) 0xBADFACE);
231 ASSERT (!mbsinit (&state));
232 input[1] = '\0';
233
234 src = input + 2;
235 temp_state = state;
236 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
237 ASSERT (ret == 4);
238 ASSERT (src == input + 2);
239 ASSERT (!mbsinit (&state));
240
241 src = input + 2;
242 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
243 ASSERT (ret == (unlimited ? 4 : 2));
244 ASSERT (src == (unlimited ? NULL : input + 5));
245 ASSERT (wctob (buf[0]) == EOF);
246 ASSERT (wctob (buf[1]) == EOF);
247 if (unlimited)
248 {
249 ASSERT (buf[2] == 'e');
250 ASSERT (buf[3] == 'r');
251 ASSERT (buf[4] == 0);
252 ASSERT (buf[5] == (wchar_t) 0xBADFACE);
253 }
254 else
255 ASSERT (buf[2] == (wchar_t) 0xBADFACE);
256 ASSERT (mbsinit (&state));
257 }
258 break;
259
260 case '4':
261 /* Locale encoding is EUC-JP. */
262 {
263 char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
264 memset (&state, '\0', sizeof (mbstate_t));
265
266 wc = (wchar_t) 0xBADFACE;
267 ret = mbrtowc (&wc, input, 1, &state);
268 ASSERT (ret == 1);
269 ASSERT (wc == '<');
270 ASSERT (mbsinit (&state));
271 input[0] = '\0';
272
273 wc = (wchar_t) 0xBADFACE;
274 ret = mbrtowc (&wc, input + 1, 2, &state);
275 ASSERT (ret == 2);
276 ASSERT (wctob (wc) == EOF);
277 ASSERT (mbsinit (&state));
278 input[1] = '\0';
279 input[2] = '\0';
280
281 wc = (wchar_t) 0xBADFACE;
282 ret = mbrtowc (&wc, input + 3, 1, &state);
283 ASSERT (ret == (size_t)(-2));
284 ASSERT (wc == (wchar_t) 0xBADFACE);
285 ASSERT (!mbsinit (&state));
286 input[3] = '\0';
287
288 src = input + 4;
289 temp_state = state;
290 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
291 ASSERT (ret == 3);
292 ASSERT (src == input + 4);
293 ASSERT (!mbsinit (&state));
294
295 src = input + 4;
296 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
297 ASSERT (ret == (unlimited ? 3 : 2));
298 ASSERT (src == (unlimited ? NULL : input + 7));
299 ASSERT (wctob (buf[0]) == EOF);
300 ASSERT (wctob (buf[1]) == EOF);
301 if (unlimited)
302 {
303 ASSERT (buf[2] == '>');
304 ASSERT (buf[3] == 0);
305 ASSERT (buf[4] == (wchar_t) 0xBADFACE);
306 }
307 else
308 ASSERT (buf[2] == (wchar_t) 0xBADFACE);
309 ASSERT (mbsinit (&state));
310 }
311 break;
312
313 case '5':
314 /* Locale encoding is GB18030. */
315 {
316 char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
317 memset (&state, '\0', sizeof (mbstate_t));
318
319 wc = (wchar_t) 0xBADFACE;
320 ret = mbrtowc (&wc, input, 1, &state);
321 ASSERT (ret == 1);
322 ASSERT (wc == 'B');
323 ASSERT (mbsinit (&state));
324 input[0] = '\0';
325
326 wc = (wchar_t) 0xBADFACE;
327 ret = mbrtowc (&wc, input + 1, 1, &state);
328 ASSERT (ret == (size_t)(-2));
329 ASSERT (wc == (wchar_t) 0xBADFACE);
330 ASSERT (!mbsinit (&state));
331 input[1] = '\0';
332
333 src = input + 2;
334 temp_state = state;
335 ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
336 ASSERT (ret == 4);
337 ASSERT (src == input + 2);
338 ASSERT (!mbsinit (&state));
339
340 src = input + 2;
341 ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
342 ASSERT (ret == (unlimited ? 4 : 2));
343 ASSERT (src == (unlimited ? NULL : input + 7));
344 ASSERT (wctob (buf[0]) == EOF);
345 ASSERT (wctob (buf[1]) == EOF);
346 if (unlimited)
347 {
348 ASSERT (buf[2] == 'e');
349 ASSERT (buf[3] == 'r');
350 ASSERT (buf[4] == 0);
351 ASSERT (buf[5] == (wchar_t) 0xBADFACE);
352 }
353 else
354 ASSERT (buf[2] == (wchar_t) 0xBADFACE);
355 ASSERT (mbsinit (&state));
356 }
357 break;
358
359 default:
360 return 1;
361 }
362 }
363
364 return 0;
365 }
366
367 return 1;
368 }