1 /* Test of conversion of multibyte character to wide character.
2 Copyright (C) 2008-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Written by Bruno Haible <bruno@clisp.org>, 2023. */
18
19 #include <config.h>
20
21 #include <wchar.h>
22
23 #include "signature.h"
24 SIGNATURE_CHECK (mbrlen, size_t, (char const *, size_t, mbstate_t *));
25
26 #include <locale.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "macros.h"
32
33 int
34 main (int argc, char *argv[])
35 {
36 mbstate_t state;
37 size_t ret;
38
39 /* configure should already have checked that the locale is supported. */
40 if (setlocale (LC_ALL, "") == NULL)
41 return 1;
42
43 /* Test zero-length input. */
44 {
45 memset (&state, '\0', sizeof (mbstate_t));
46 ret = mbrlen ("x", 0, &state);
47 ASSERT (ret == (size_t)(-2));
48 ASSERT (mbsinit (&state));
49 }
50
51 /* Test NUL byte input. */
52 {
53 memset (&state, '\0', sizeof (mbstate_t));
54 ret = mbrlen ("", 1, &state);
55 ASSERT (ret == 0);
56 ASSERT (mbsinit (&state));
57 }
58
59 /* Test single-byte input. */
60 {
61 int c;
62 char buf[1];
63
64 memset (&state, '\0', sizeof (mbstate_t));
65 for (c = 0; c < 0x100; c++)
66 switch (c)
67 {
68 case '\t': case '\v': case '\f':
69 case ' ': case '!': case '"': case '#': case '%':
70 case '&': case '\'': case '(': case ')': case '*':
71 case '+': case ',': case '-': case '.': case '/':
72 case '0': case '1': case '2': case '3': case '4':
73 case '5': case '6': case '7': case '8': case '9':
74 case ':': case ';': case '<': case '=': case '>':
75 case '?':
76 case 'A': case 'B': case 'C': case 'D': case 'E':
77 case 'F': case 'G': case 'H': case 'I': case 'J':
78 case 'K': case 'L': case 'M': case 'N': case 'O':
79 case 'P': case 'Q': case 'R': case 'S': case 'T':
80 case 'U': case 'V': case 'W': case 'X': case 'Y':
81 case 'Z':
82 case '[': case '\\': case ']': case '^': case '_':
83 case 'a': case 'b': case 'c': case 'd': case 'e':
84 case 'f': case 'g': case 'h': case 'i': case 'j':
85 case 'k': case 'l': case 'm': case 'n': case 'o':
86 case 'p': case 'q': case 'r': case 's': case 't':
87 case 'u': case 'v': case 'w': case 'x': case 'y':
88 case 'z': case '{': case '|': case '}': case '~':
89 /* c is in the ISO C "basic character set". */
90 ASSERT (c < 0x80);
91 /* c is an ASCII character. */
92 buf[0] = c;
93
94 ret = mbrlen (buf, 1, &state);
95 ASSERT (ret == 1);
96 ASSERT (mbsinit (&state));
97
98 break;
99 default:
100 break;
101 }
102 }
103
104 /* Test special calling convention, passing a NULL pointer. */
105 {
106 memset (&state, '\0', sizeof (mbstate_t));
107 ret = mbrlen (NULL, 5, &state);
108 ASSERT (ret == 0);
109 ASSERT (mbsinit (&state));
110 }
111
112 #ifdef __ANDROID__
113 /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
114 "C" locale. Furthermore, when you attempt to set the "C" or "POSIX"
115 locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
116 that is, effectively the "C.UTF-8" locale. */
117 if (argc > 1 && strcmp (argv[1], "5") == 0 && MB_CUR_MAX > 1)
118 argv[1] = "2";
119 #endif
120
121 if (argc > 1)
122 switch (argv[1][0])
123 {
124 case '1':
125 /* Locale encoding is ISO-8859-1 or ISO-8859-15. */
126 {
127 char input[] = "B\374\337er"; /* "Büßer" */
128 memset (&state, '\0', sizeof (mbstate_t));
129
130 ret = mbrlen (input, 1, &state);
131 ASSERT (ret == 1);
132 ASSERT (mbsinit (&state));
133 input[0] = '\0';
134
135 ret = mbrlen (input + 1, 1, &state);
136 ASSERT (ret == 1);
137 ASSERT (mbsinit (&state));
138 input[1] = '\0';
139
140 ret = mbrlen (input + 2, 3, &state);
141 ASSERT (ret == 1);
142 ASSERT (mbsinit (&state));
143 input[2] = '\0';
144
145 ret = mbrlen (input + 3, 2, &state);
146 ASSERT (ret == 1);
147 ASSERT (mbsinit (&state));
148 input[3] = '\0';
149
150 ret = mbrlen (input + 4, 1, &state);
151 ASSERT (ret == 1);
152 ASSERT (mbsinit (&state));
153 }
154 return 0;
155
156 case '2':
157 /* Locale encoding is UTF-8. */
158 {
159 char input[] = "B\303\274\303\237er"; /* "Büßer" */
160 memset (&state, '\0', sizeof (mbstate_t));
161
162 ret = mbrlen (input, 1, &state);
163 ASSERT (ret == 1);
164 ASSERT (mbsinit (&state));
165 input[0] = '\0';
166
167 ret = mbrlen (input + 1, 1, &state);
168 ASSERT (ret == (size_t)(-2));
169 ASSERT (!mbsinit (&state));
170 input[1] = '\0';
171
172 ret = mbrlen (input + 2, 5, &state);
173 ASSERT (ret == 1);
174 ASSERT (mbsinit (&state));
175 input[2] = '\0';
176
177 ret = mbrlen (input + 3, 4, &state);
178 ASSERT (ret == 2);
179 ASSERT (mbsinit (&state));
180 input[3] = '\0';
181 input[4] = '\0';
182
183 ret = mbrlen (input + 5, 2, &state);
184 ASSERT (ret == 1);
185 ASSERT (mbsinit (&state));
186 input[5] = '\0';
187
188 ret = mbrlen (input + 6, 1, &state);
189 ASSERT (ret == 1);
190 ASSERT (mbsinit (&state));
191 }
192 return 0;
193
194 case '3':
195 /* Locale encoding is EUC-JP. */
196 {
197 char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
198 memset (&state, '\0', sizeof (mbstate_t));
199
200 ret = mbrlen (input, 1, &state);
201 ASSERT (ret == 1);
202 ASSERT (mbsinit (&state));
203 input[0] = '\0';
204
205 ret = mbrlen (input + 1, 2, &state);
206 ASSERT (ret == 2);
207 ASSERT (mbsinit (&state));
208 input[1] = '\0';
209 input[2] = '\0';
210
211 ret = mbrlen (input + 3, 1, &state);
212 ASSERT (ret == (size_t)(-2));
213 ASSERT (!mbsinit (&state));
214 input[3] = '\0';
215
216 ret = mbrlen (input + 4, 4, &state);
217 ASSERT (ret == 1);
218 ASSERT (mbsinit (&state));
219 input[4] = '\0';
220
221 ret = mbrlen (input + 5, 3, &state);
222 ASSERT (ret == 2);
223 ASSERT (mbsinit (&state));
224 input[5] = '\0';
225 input[6] = '\0';
226
227 ret = mbrlen (input + 7, 1, &state);
228 ASSERT (ret == 1);
229 ASSERT (mbsinit (&state));
230 }
231 return 0;
232
233 case '4':
234 /* Locale encoding is GB18030. */
235 {
236 char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
237 memset (&state, '\0', sizeof (mbstate_t));
238
239 ret = mbrlen (input, 1, &state);
240 ASSERT (ret == 1);
241 ASSERT (mbsinit (&state));
242 input[0] = '\0';
243
244 ret = mbrlen (input + 1, 1, &state);
245 ASSERT (ret == (size_t)(-2));
246 ASSERT (!mbsinit (&state));
247 input[1] = '\0';
248
249 ret = mbrlen (input + 2, 7, &state);
250 ASSERT (ret == 1);
251 ASSERT (mbsinit (&state));
252 input[2] = '\0';
253
254 ret = mbrlen (input + 3, 6, &state);
255 ASSERT (ret == 4);
256 ASSERT (mbsinit (&state));
257 input[3] = '\0';
258 input[4] = '\0';
259 input[5] = '\0';
260 input[6] = '\0';
261
262 ret = mbrlen (input + 7, 2, &state);
263 ASSERT (ret == 1);
264 ASSERT (mbsinit (&state));
265 input[7] = '\0';
266
267 ret = mbrlen (input + 8, 1, &state);
268 ASSERT (ret == 1);
269 ASSERT (mbsinit (&state));
270 }
271 return 0;
272
273 case '5':
274 /* C or POSIX locale. */
275 {
276 int c;
277 char buf[1];
278
279 memset (&state, '\0', sizeof (mbstate_t));
280 for (c = 0; c < 0x100; c++)
281 if (c != 0)
282 {
283 /* We are testing all nonnull bytes. */
284 buf[0] = c;
285
286 ret = mbrlen (buf, 1, &state);
287 /* POSIX:2018 says: "In the POSIX locale an [EILSEQ] error
288 cannot occur since all byte values are valid characters." */
289 ASSERT (ret == 1);
290 ASSERT (mbsinit (&state));
291 }
292 }
293 return 0;
294 }
295
296 return 1;
297 }