1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
3 *
4 * SPDX-License-Identifier: LGPL-2.1-or-later
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 /*
21 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
22 * file for a list of people on the GLib Team. See the ChangeLog
23 * files for a list of changes. These files are distributed with
24 * GLib at ftp://ftp.gtk.org/pub/gtk/.
25 */
26
27 #undef G_DISABLE_ASSERT
28 #undef G_LOG_DOMAIN
29
30 #include <locale.h>
31 #include <string.h>
32
33 #include <glib.h>
34
35 /* Bug 311337 */
36 static void
37 test_iconv_state (void)
38 {
39 const gchar *in = "\xf4\xe5\xf8\xe5\xed";
40 const gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
41 gchar *out;
42 gsize bytes_read = 0;
43 gsize bytes_written = 0;
44 GError *error = NULL;
45
46 out = g_convert (in, -1, "UTF-8", "CP1255",
47 &bytes_read, &bytes_written, &error);
48
49 if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
50 return; /* silently skip if CP1255 is not supported, see bug 467707 */
51
52 g_assert_no_error (error);
53 g_assert_cmpint (bytes_read, ==, 5);
54 g_assert_cmpint (bytes_written, ==, 10);
55 g_assert_cmpstr (out, ==, expected);
56 g_free (out);
57 }
58
59 /* Some tests involving "vulgar fraction one half" (U+00BD). This is
60 * represented in UTF-8 as \xC2\xBD, in ISO-8859-1 as \xBD, and is not
61 * represented in ISO-8859-15. */
62 static void
63 test_one_half (void)
64 {
65 const gchar *in_utf8 = "\xc2\xbd";
66 gchar *out;
67 gsize bytes_read = 0;
68 gsize bytes_written = 0;
69 GError *error = NULL;
70
71 out = g_convert (in_utf8, -1,
72 "ISO-8859-1", "UTF-8",
73 &bytes_read, &bytes_written,
74 &error);
75
76 g_assert_no_error (error);
77 g_assert_cmpint (bytes_read, ==, 2);
78 g_assert_cmpint (bytes_written, ==, 1);
79 g_assert_cmpstr (out, ==, "\xbd");
80 g_free (out);
81
82 out = g_convert (in_utf8, -1,
83 "ISO-8859-15", "UTF-8",
84 &bytes_read, &bytes_written,
85 &error);
86
87 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
88 g_assert_cmpint (bytes_read, ==, 0);
89 g_assert_cmpint (bytes_written, ==, 0);
90 g_assert_cmpstr (out, ==, NULL);
91 g_clear_error (&error);
92 g_free (out);
93
94 out = g_convert_with_fallback (in_utf8, -1,
95 "ISO8859-15", "UTF-8",
96 "a",
97 &bytes_read, &bytes_written,
98 &error);
99
100 g_assert_no_error (error);
101 g_assert_cmpint (bytes_read, ==, 2);
102 g_assert_cmpint (bytes_written, ==, 1);
103 g_assert_cmpstr (out, ==, "a");
104 g_free (out);
105 }
106
107 static void
108 test_byte_order (void)
109 {
110 gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
111 gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
112 const gchar *expected = "\xce\x93";
113 gchar *out;
114 gsize bytes_read = 0;
115 gsize bytes_written = 0;
116 GError *error = NULL;
117
118 out = g_convert (in_be, sizeof (in_be),
119 "UTF-8", "UTF-16",
120 &bytes_read, &bytes_written,
121 &error);
122
123 g_assert_no_error (error);
124 g_assert_cmpint (bytes_read, ==, 4);
125 g_assert_cmpint (bytes_written, ==, 2);
126 g_assert_cmpstr (out, ==, expected);
127 g_free (out);
128
129 out = g_convert (in_le, sizeof (in_le),
130 "UTF-8", "UTF-16",
131 &bytes_read, &bytes_written,
132 &error);
133
134 g_assert_no_error (error);
135 g_assert_cmpint (bytes_read, ==, 4);
136 g_assert_cmpint (bytes_written, ==, 2);
137 g_assert_cmpstr (out, ==, expected);
138 g_free (out);
139 }
140
141 static void
142 check_utf8_to_ucs4 (const char *utf8,
143 gsize utf8_len,
144 const gunichar *ucs4,
145 glong ucs4_len,
146 glong error_pos)
147 {
148 gunichar *result, *result2, *result3;
149 glong items_read, items_read2;
150 glong items_written, items_written2;
151 GError *error, *error2, *error3;
152 gint i;
153
154 if (!error_pos)
155 {
156 /* check the fast conversion */
157 result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
158
159 g_assert_cmpint (items_written, ==, ucs4_len);
160 g_assert (result);
161 for (i = 0; i <= items_written; i++)
162 g_assert (result[i] == ucs4[i]);
163
164 g_free (result);
165 }
166
167 error = NULL;
168 result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
169
170 if (utf8_len == strlen (utf8))
171 {
172 /* check that len == -1 yields identical results */
173 error2 = NULL;
174 result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
175 g_assert (error || items_read2 == items_read);
176 g_assert (error || items_written2 == items_written);
177 g_assert_cmpint (!!result, ==, !!result2);
178 g_assert_cmpint (!!error, ==, !!error2);
179 if (result)
180 for (i = 0; i <= items_written; i++)
181 g_assert (result[i] == result2[i]);
182
183 g_free (result2);
184 if (error2)
185 g_error_free (error2);
186 }
187
188 error3 = NULL;
189 result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
190
191 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
192 {
193 g_assert_no_error (error);
194 g_assert_cmpint (items_read, ==, error_pos);
195 g_assert_cmpint (items_written, ==, ucs4_len);
196 g_assert (result);
197 for (i = 0; i <= items_written; i++)
198 g_assert (result[i] == ucs4[i]);
199 g_error_free (error3);
200 }
201 else if (error_pos)
202 {
203 g_assert (error != NULL);
204 g_assert (result == NULL);
205 g_assert_cmpint (items_read, ==, error_pos);
206 g_error_free (error);
207
208 g_assert (error3 != NULL);
209 g_assert (result3 == NULL);
210 g_error_free (error3);
211 }
212 else
213 {
214 g_assert_no_error (error);
215 g_assert_cmpint (items_read, ==, utf8_len);
216 g_assert_cmpint (items_written, ==, ucs4_len);
217 g_assert (result);
218 for (i = 0; i <= items_written; i++)
219 g_assert (result[i] == ucs4[i]);
220
221 g_assert_no_error (error3);
222 g_assert (result3);
223 for (i = 0; i <= ucs4_len; i++)
224 g_assert (result3[i] == ucs4[i]);
225 }
226
227 g_free (result);
228 g_free (result3);
229 }
230
231 static void
232 check_ucs4_to_utf8 (const gunichar *ucs4,
233 glong ucs4_len,
234 const char *utf8,
235 glong utf8_len,
236 glong error_pos)
237 {
238 gchar *result, *result2, *result3;
239 glong items_read, items_read2;
240 glong items_written, items_written2;
241 GError *error, *error2, *error3;
242
243 error = NULL;
244 result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
245
246 if (ucs4[ucs4_len] == 0)
247 {
248 /* check that len == -1 yields identical results */
249 error2 = NULL;
250 result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
251
252 g_assert (error || items_read2 == items_read);
253 g_assert (error || items_written2 == items_written);
254 g_assert_cmpint (!!result, ==, !!result2);
255 g_assert_cmpint (!!error, ==, !!error2);
256 if (result)
257 g_assert_cmpstr (result, ==, result2);
258
259 g_free (result2);
260 if (error2)
261 g_error_free (error2);
262 }
263
264 error3 = NULL;
265 result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
266
267 if (error_pos)
268 {
269 g_assert (error != NULL);
270 g_assert (result == NULL);
271 g_assert_cmpint (items_read, ==, error_pos);
272 g_error_free (error);
273
274 g_assert (error3 != NULL);
275 g_assert (result3 == NULL);
276 g_error_free (error3);
277 }
278 else
279 {
280 g_assert_no_error (error);
281 g_assert_cmpint (items_read, ==, ucs4_len);
282 g_assert_cmpint (items_written, ==, utf8_len);
283 g_assert (result);
284 g_assert_cmpstr (result, ==, utf8);
285
286 g_assert_no_error (error3);
287 g_assert (result3);
288 g_assert_cmpstr (result3, ==, utf8);
289 }
290
291 g_free (result);
292 g_free (result3);
293 }
294
295 static void
296 check_utf8_to_utf16 (const char *utf8,
297 gsize utf8_len,
298 const gunichar2 *utf16,
299 glong utf16_len,
300 glong error_pos)
301 {
302 gunichar2 *result, *result2, *result3;
303 glong items_read, items_read2;
304 glong items_written, items_written2;
305 GError *error, *error2, *error3;
306 gint i;
307
308 error = NULL;
309 result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
310
311 if (utf8_len == strlen (utf8))
312 {
313 /* check that len == -1 yields identical results */
314 error2 = NULL;
315 result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
316 g_assert (error || items_read2 == items_read);
317 g_assert (error || items_written2 == items_written);
318 g_assert_cmpint (!!result, ==, !!result2);
319 g_assert_cmpint (!!error, ==, !!error2);
320 if (result)
321 for (i = 0; i <= items_written; i++)
322 g_assert (result[i] == result2[i]);
323
324 g_free (result2);
325 if (error2)
326 g_error_free (error2);
327 }
328
329 error3 = NULL;
330 result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
331
332 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
333 {
334 g_assert_no_error (error);
335 g_assert_cmpint (items_read, ==, error_pos);
336 g_assert_cmpint (items_written, ==, utf16_len);
337 g_assert (result);
338 for (i = 0; i <= items_written; i++)
339 g_assert (result[i] == utf16[i]);
340 g_error_free (error3);
341 }
342 else if (error_pos)
343 {
344 g_assert (error != NULL);
345 g_assert (result == NULL);
346 g_assert_cmpint (items_read, ==, error_pos);
347 g_error_free (error);
348
349 g_assert (error3 != NULL);
350 g_assert (result3 == NULL);
351 g_error_free (error3);
352 }
353 else
354 {
355 g_assert_no_error (error);
356 g_assert_cmpint (items_read, ==, utf8_len);
357 g_assert_cmpint (items_written, ==, utf16_len);
358 g_assert (result);
359 for (i = 0; i <= items_written; i++)
360 g_assert (result[i] == utf16[i]);
361
362 g_assert_no_error (error3);
363 g_assert (result3);
364 for (i = 0; i <= utf16_len; i++)
365 g_assert (result3[i] == utf16[i]);
366 }
367
368 g_free (result);
369 g_free (result3);
370 }
371
372 static void
373 check_utf16_to_utf8 (const gunichar2 *utf16,
374 glong utf16_len,
375 const char *utf8,
376 glong utf8_len,
377 glong error_pos)
378 {
379 gchar *result, *result2, *result3;
380 glong items_read, items_read2;
381 glong items_written, items_written2;
382 GError *error, *error2, *error3;
383
384 error = NULL;
385 result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
386 if (utf16[utf16_len] == 0)
387 {
388 /* check that len == -1 yields identical results */
389 error2 = NULL;
390 result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
391
392 g_assert (error || items_read2 == items_read);
393 g_assert (error || items_written2 == items_written);
394 g_assert_cmpint (!!result, ==, !!result2);
395 g_assert_cmpint (!!error, ==, !!error2);
396 if (result)
397 g_assert_cmpstr (result, ==, result2);
398
399 g_free (result2);
400 if (error2)
401 g_error_free (error2);
402 }
403
404 error3 = NULL;
405 result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
406
407 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
408 {
409 g_assert_no_error (error);
410 g_assert_cmpint (items_read, ==, error_pos);
411 g_assert_cmpint (items_read + 1, ==, utf16_len);
412 g_assert_cmpint (items_written, ==, utf8_len);
413 g_assert (result);
414 g_assert_cmpstr (result, ==, utf8);
415 g_error_free (error3);
416 }
417 else if (error_pos)
418 {
419 g_assert (error != NULL);
420 g_assert (result == NULL);
421 g_assert_cmpint (items_read, ==, error_pos);
422 g_error_free (error);
423
424 g_assert (error3 != NULL);
425 g_assert (result3 == NULL);
426 g_error_free (error3);
427 }
428 else
429 {
430 g_assert_no_error (error);
431 g_assert_cmpint (items_read, ==, utf16_len);
432 g_assert_cmpint (items_written, ==, utf8_len);
433 g_assert (result);
434 g_assert_cmpstr (result, ==, utf8);
435
436 g_assert_no_error (error3);
437 g_assert (result3);
438 g_assert_cmpstr (result3, ==, utf8);
439 }
440
441 g_free (result);
442 g_free (result3);
443 }
444
445 static void
446 check_ucs4_to_utf16 (const gunichar *ucs4,
447 glong ucs4_len,
448 const gunichar2 *utf16,
449 glong utf16_len,
450 glong error_pos)
451 {
452 gunichar2 *result, *result2, *result3;
453 glong items_read, items_read2;
454 glong items_written, items_written2;
455 GError *error, *error2, *error3;
456 gint i;
457
458 error = NULL;
459 result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
460
461 if (ucs4[ucs4_len] == 0)
462 {
463 /* check that len == -1 yields identical results */
464 error2 = NULL;
465 result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
466
467 g_assert (error || items_read2 == items_read);
468 g_assert (error || items_written2 == items_written);
469 g_assert_cmpint (!!result, ==, !!result2);
470 g_assert_cmpint (!!error, ==, !!error2);
471 if (result)
472 for (i = 0; i <= utf16_len; i++)
473 g_assert (result[i] == result2[i]);
474
475 g_free (result2);
476 if (error2)
477 g_error_free (error2);
478 }
479
480 error3 = NULL;
481 result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
482
483 if (error_pos)
484 {
485 g_assert (error != NULL);
486 g_assert (result == NULL);
487 g_assert_cmpint (items_read, ==, error_pos);
488 g_error_free (error);
489
490 g_assert (error3 != NULL);
491 g_assert (result3 == NULL);
492 g_error_free (error3);
493 }
494 else
495 {
496 g_assert_no_error (error);
497 g_assert_cmpint (items_read, ==, ucs4_len);
498 g_assert_cmpint (items_written, ==, utf16_len);
499 g_assert (result);
500 for (i = 0; i <= utf16_len; i++)
501 g_assert (result[i] == utf16[i]);
502
503 g_assert_no_error (error3);
504 g_assert (result3);
505 for (i = 0; i <= utf16_len; i++)
506 g_assert (result3[i] == utf16[i]);
507 }
508
509 g_free (result);
510 g_free (result3);
511 }
512
513 static void
514 check_utf16_to_ucs4 (const gunichar2 *utf16,
515 glong utf16_len,
516 const gunichar *ucs4,
517 glong ucs4_len,
518 glong error_pos)
519 {
520 gunichar *result, *result2, *result3;
521 glong items_read, items_read2;
522 glong items_written, items_written2;
523 GError *error, *error2, *error3;
524 gint i;
525
526 error = NULL;
527 result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
528 if (utf16[utf16_len] == 0)
529 {
530 /* check that len == -1 yields identical results */
531 error2 = NULL;
532 result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
533 g_assert (error || items_read2 == items_read);
534 g_assert (error || items_written2 == items_written);
535 g_assert_cmpint (!!result, ==, !!result2);
536 g_assert_cmpint (!!error, ==, !!error2);
537 if (result)
538 for (i = 0; i <= items_written; i++)
539 g_assert (result[i] == result2[i]);
540
541 g_free (result2);
542 if (error2)
543 g_error_free (error2);
544 }
545
546 error3 = NULL;
547 result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
548
549 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
550 {
551 g_assert_no_error (error);
552 g_assert_cmpint (items_read, ==, error_pos);
553 g_assert_cmpint (items_read + 1, ==, utf16_len);
554 g_assert_cmpint (items_written, ==, ucs4_len);
555 g_assert (result);
556 for (i = 0; i <= items_written; i++)
557 g_assert (result[i] == ucs4[i]);
558 g_error_free (error3);
559 }
560 else if (error_pos)
561 {
562 g_assert (error != NULL);
563 g_assert (result == NULL);
564 g_assert_cmpint (items_read, ==, error_pos);
565 g_error_free (error);
566
567 g_assert (error3 != NULL);
568 g_assert (result3 == NULL);
569 g_error_free (error3);
570 }
571 else
572 {
573 g_assert_no_error (error);
574 g_assert_cmpint (items_read, ==, utf16_len);
575 g_assert_cmpint (items_written, ==, ucs4_len);
576 g_assert (result);
577 for (i = 0; i <= ucs4_len; i++)
578 g_assert (result[i] == ucs4[i]);
579
580 g_assert_no_error (error3);
581 g_assert (result3);
582 for (i = 0; i <= ucs4_len; i++)
583 g_assert (result3[i] == ucs4[i]);
584 }
585
586 g_free (result);
587 g_free (result3);
588 }
589
590 static void
591 test_unicode_conversions (void)
592 {
593 const char *utf8;
594 gunichar ucs4[100];
595 gunichar2 utf16[100];
596
597 utf8 = "abc";
598 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
599 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
600
601 check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
602 check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
603 check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
604 check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
605 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
606 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
607
608 utf8 = "\316\261\316\262\316\263";
609 ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
610 utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
611
612 check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
613 check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
614 check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
615 check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
616 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
617 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
618
619 /* partial utf8 character */
620 utf8 = "abc\316";
621 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
622 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
623
624 check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
625 check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
626
627 /* invalid utf8 */
628 utf8 = "abc\316\316";
629 ucs4[0] = 0;
630 utf16[0] = 0;
631
632 check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
633 check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
634
635 /* partial utf16 character */
636 utf8 = "ab";
637 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
638 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
639
640 check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
641 check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
642
643 /* invalid utf16 */
644 utf8 = NULL;
645 ucs4[0] = 0;
646 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
647
648 check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
649 check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
650
651 /* invalid ucs4 */
652 utf8 = NULL;
653 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
654 utf16[0] = 0;
655
656 check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
657 check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
658 }
659
660 static void
661 test_filename_utf8 (void)
662 {
663 const gchar *filename = "/my/path/to/foo";
664 gchar *utf8;
665 gchar *back;
666 GError *error;
667
668 error = NULL;
669 utf8 = g_filename_to_utf8 (filename, -1, NULL, NULL, &error);
670 g_assert_no_error (error);
671 back = g_filename_from_utf8 (utf8, -1, NULL, NULL, &error);
672 g_assert_no_error (error);
673 g_assert_cmpstr (back, ==, filename);
674
675 g_free (utf8);
676 g_free (back);
677 }
678
679 static void
680 test_filename_display (void)
681 {
682 const gchar *filename = "/my/path/to/foo";
683 char *display;
684
685 display = g_filename_display_basename (filename);
686 g_assert_cmpstr (display, ==, "foo");
687
688 g_free (display);
689 }
690
691 /* g_convert() should accept and produce text buffers with embedded
692 * nul bytes/characters.
693 */
694 static void
695 test_convert_embedded_nul (void)
696 {
697 gchar *res;
698 gsize bytes_read, bytes_written;
699 GError *error = NULL;
700
701 res = g_convert ("ab\0\xf6", 4, "UTF-8", "ISO-8859-1",
702 &bytes_read, &bytes_written, &error);
703 g_assert_no_error (error);
704 g_assert_cmpuint (bytes_read, ==, 4);
705 g_assert_cmpmem (res, bytes_written, "ab\0\xc3\xb6", 5);
706 g_free (res);
707 }
708
709 static void
710 test_locale_to_utf8_embedded_nul (void)
711 {
712 g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8",
713 0, G_TEST_SUBPROCESS_DEFAULT);
714 g_test_trap_assert_passed ();
715 g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv",
716 0, G_TEST_SUBPROCESS_DEFAULT);
717 g_test_trap_assert_passed ();
718 }
719
720 /* Test that embedded nul characters in UTF-8 input to g_locale_to_utf8()
721 * result in an error.
722 */
723 static void
724 test_locale_to_utf8_embedded_nul_utf8 (void)
725 {
726 gchar *res;
727 gsize bytes_read;
728 GError *error = NULL;
729
730 setlocale (LC_ALL, "");
731 g_setenv ("CHARSET", "UTF-8", TRUE);
732 g_assert_true (g_get_charset (NULL));
733
734 res = g_locale_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
735
736 g_assert_null (res);
737 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
738 g_assert_cmpuint (bytes_read, ==, 2);
739 g_error_free (error);
740 }
741
742 /* Test that embedded nul characters in output of g_locale_to_utf8(),
743 * when converted from non-UTF8 input, result in an error.
744 */
745 static void
746 test_locale_to_utf8_embedded_nul_iconv (void)
747 {
748 gchar *res;
749 GError *error = NULL;
750
751 setlocale (LC_ALL, "C");
752 g_setenv ("CHARSET", "US-ASCII", TRUE);
753 g_assert_false (g_get_charset (NULL));
754
755 res = g_locale_to_utf8 ("ab\0c", 4, NULL, NULL, &error);
756
757 g_assert_null (res);
758 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
759 g_error_free (error);
760 }
761
762 static void
763 test_locale_from_utf8_embedded_nul (void)
764 {
765 g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8",
766 0, G_TEST_SUBPROCESS_DEFAULT);
767 g_test_trap_assert_passed ();
768 g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv",
769 0, G_TEST_SUBPROCESS_DEFAULT);
770 g_test_trap_assert_passed ();
771 }
772
773 /* Test that embedded nul characters in input to g_locale_from_utf8(),
774 * when converting (copying) to UTF-8 output, result in an error.
775 */
776 static void
777 test_locale_from_utf8_embedded_nul_utf8 (void)
778 {
779 gchar *res;
780 gsize bytes_read;
781 GError *error = NULL;
782
783 setlocale (LC_ALL, "");
784 g_setenv ("CHARSET", "UTF-8", TRUE);
785 g_assert_true (g_get_charset (NULL));
786
787 res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
788
789 g_assert_null (res);
790 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
791 g_assert_cmpuint (bytes_read, ==, 2);
792 g_error_free (error);
793 }
794
795 /* Test that embedded nul characters in input to g_locale_from_utf8(),
796 * when converting to non-UTF-8 output, result in an error.
797 */
798 static void
799 test_locale_from_utf8_embedded_nul_iconv (void)
800 {
801 gchar *res;
802 gsize bytes_read;
803 GError *error = NULL;
804
805 setlocale (LC_ALL, "C");
806 g_setenv ("CHARSET", "US-ASCII", TRUE);
807 g_assert_false (g_get_charset (NULL));
808
809 res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
810
811 g_assert_null (res);
812 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
813 g_assert_cmpuint (bytes_read, ==, 2);
814 g_error_free (error);
815 }
816
817 static void
818 test_filename_to_utf8_embedded_nul (void)
819 {
820 g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8",
821 0, G_TEST_SUBPROCESS_DEFAULT);
822 g_test_trap_assert_passed ();
823 g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv",
824 0, G_TEST_SUBPROCESS_DEFAULT);
825 g_test_trap_assert_passed ();
826 }
827
828 /* Test that embedded nul characters in UTF-8 input to g_filename_to_utf8()
829 * result in an error.
830 */
831 static void
832 test_filename_to_utf8_embedded_nul_utf8 (void)
833 {
834 gchar *res;
835 gsize bytes_read;
836 GError *error = NULL;
837
838 #ifndef G_OS_WIN32
839 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
840 g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
841 g_assert_true (g_get_filename_charsets (NULL));
842 #endif
843
844 res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
845
846 g_assert_null (res);
847 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
848 g_assert_cmpuint (bytes_read, ==, 2);
849 g_error_free (error);
850 }
851
852 /* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()
853 * result in an error.
854 */
855 static void
856 test_filename_to_utf8_embedded_nul_iconv (void)
857 {
858 gchar *res;
859 gsize bytes_read;
860 GError *error = NULL;
861
862 #ifndef G_OS_WIN32
863 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
864 g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
865 g_assert_false (g_get_filename_charsets (NULL));
866 #endif
867
868 res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
869
870 g_assert_null (res);
871 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
872 g_assert_cmpuint (bytes_read, ==, 2);
873 g_error_free (error);
874 }
875
876 static void
877 test_filename_from_utf8_embedded_nul (void)
878 {
879 g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8",
880 0, G_TEST_SUBPROCESS_DEFAULT);
881 g_test_trap_assert_passed ();
882 g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv",
883 0, G_TEST_SUBPROCESS_DEFAULT);
884 g_test_trap_assert_passed ();
885 }
886
887 /* Test that embedded nul characters in input to g_filename_from_utf8(),
888 * when converting (copying) to UTF-8 output, result in an error.
889 */
890 static void
891 test_filename_from_utf8_embedded_nul_utf8 (void)
892 {
893 gchar *res;
894 gsize bytes_read;
895 GError *error = NULL;
896
897 #ifndef G_OS_WIN32
898 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
899 g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
900 g_assert_true (g_get_filename_charsets (NULL));
901 #endif
902
903 res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
904
905 g_assert_null (res);
906 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
907 g_assert_cmpuint (bytes_read, ==, 2);
908 g_error_free (error);
909 }
910
911 /* Test that embedded nul characters in input to g_filename_from_utf8(),
912 * when converting to non-UTF-8 output, result in an error.
913 */
914 static void
915 test_filename_from_utf8_embedded_nul_iconv (void)
916 {
917 gchar *res;
918 gsize bytes_read;
919 GError *error = NULL;
920
921 #ifndef G_OS_WIN32
922 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
923 g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
924 g_assert_false (g_get_filename_charsets (NULL));
925 #endif
926
927 res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
928
929 g_assert_null (res);
930 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
931 g_assert_cmpuint (bytes_read, ==, 2);
932 g_error_free (error);
933 }
934
935 static void
936 test_no_conv (void)
937 {
938 const gchar *in = "";
939 gchar *out G_GNUC_UNUSED;
940 gsize bytes_read = 0;
941 gsize bytes_written = 0;
942 GError *error = NULL;
943
944 out = g_convert (in, -1, "XXX", "UVZ",
945 &bytes_read, &bytes_written, &error);
946
947 /* error code is unreliable, since we mishandle errno there */
948 g_assert (error && error->domain == G_CONVERT_ERROR);
949 g_error_free (error);
950 }
951
952 static void
953 test_filename_from_uri_helper (const gchar *uri,
954 const gchar *expected_filename)
955 {
956 gchar *filename;
957 gchar *expected_platform_filename;
958 GError *error = NULL;
959
960 expected_platform_filename = g_strdup (expected_filename);
961 #ifdef G_OS_WIN32
962 for (gchar *p = expected_platform_filename; *p; p++)
963 {
964 if (*p == '/')
965 *p = '\\';
966 }
967 #endif
968
969 filename = g_filename_from_uri (uri, NULL, &error);
970 g_assert_no_error (error);
971 g_assert_cmpstr (filename, ==, expected_platform_filename);
972 g_free (filename);
973 g_free (expected_platform_filename);
974 }
975
976 static void
977 test_filename_from_uri_query_is_ignored (void)
978 {
979 test_filename_from_uri_helper ("file:///tmp/foo?bar", "/tmp/foo");
980 test_filename_from_uri_helper ("file:///tmp/foo?bar#baz", "/tmp/foo");
981 }
982
983 static void
984 test_filename_from_uri_fragment_is_ignored (void)
985 {
986 test_filename_from_uri_helper ("file:///tmp/foo#bar", "/tmp/foo");
987 /* this doesn't have a query, only a bizarre anchor */
988 test_filename_from_uri_helper ("file:///tmp/foo#bar?baz", "/tmp/foo");
989 }
990
991 int
992 main (int argc, char *argv[])
993 {
994 g_test_init (&argc, &argv, NULL);
995
996 g_test_add_func ("/conversion/no-conv", test_no_conv);
997 g_test_add_func ("/conversion/iconv-state", test_iconv_state);
998 g_test_add_func ("/conversion/illegal-sequence", test_one_half);
999 g_test_add_func ("/conversion/byte-order", test_byte_order);
1000 g_test_add_func ("/conversion/unicode", test_unicode_conversions);
1001 g_test_add_func ("/conversion/filename-utf8", test_filename_utf8);
1002 g_test_add_func ("/conversion/filename-display", test_filename_display);
1003 g_test_add_func ("/conversion/convert-embedded-nul", test_convert_embedded_nul);
1004 g_test_add_func ("/conversion/locale-to-utf8/embedded-nul", test_locale_to_utf8_embedded_nul);
1005 g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", test_locale_to_utf8_embedded_nul_utf8);
1006 g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", test_locale_to_utf8_embedded_nul_iconv);
1007 g_test_add_func ("/conversion/locale-from-utf8/embedded-nul", test_locale_from_utf8_embedded_nul);
1008 g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", test_locale_from_utf8_embedded_nul_utf8);
1009 g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", test_locale_from_utf8_embedded_nul_iconv);
1010 g_test_add_func ("/conversion/filename-to-utf8/embedded-nul", test_filename_to_utf8_embedded_nul);
1011 g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", test_filename_to_utf8_embedded_nul_utf8);
1012 g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", test_filename_to_utf8_embedded_nul_iconv);
1013 g_test_add_func ("/conversion/filename-from-utf8/embedded-nul", test_filename_from_utf8_embedded_nul);
1014 g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", test_filename_from_utf8_embedded_nul_utf8);
1015 g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", test_filename_from_utf8_embedded_nul_iconv);
1016 g_test_add_func ("/conversion/filename-from-uri/query-is-ignored", test_filename_from_uri_query_is_ignored);
1017 g_test_add_func ("/conversion/filename-from-uri/fragment-is-ignored", test_filename_from_uri_fragment_is_ignored);
1018
1019 return g_test_run ();
1020 }