1 /* Convert 32-bit wide character to multibyte character.
2 Copyright (C) 2020-2023 Free Software Foundation, Inc.
3
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2.1 of the
7 License, or (at your option) any later version.
8
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Written by Bruno Haible <bruno@clisp.org>, 2020. */
18
19 #include <config.h>
20
21 /* Specification. */
22 #include <uchar.h>
23
24 #include <errno.h>
25 #include <wchar.h>
26
27 #include "attribute.h" /* FALLTHROUGH */
28 #include "localcharset.h"
29 #include "streq.h"
30
31 #if GL_CHAR32_T_IS_UNICODE
32 # include "lc-charset-unicode.h"
33 #endif
34
35 size_t
36 c32rtomb (char *s, char32_t wc, mbstate_t *ps)
37 #undef c32rtomb
38 {
39 #if HAVE_WORKING_MBRTOC32
40
41 # if C32RTOMB_RETVAL_BUG
42 if (s == NULL)
43 /* We know the NUL wide character corresponds to the NUL character. */
44 return 1;
45 # endif
46
47 return c32rtomb (s, wc, ps);
48
49 #elif _GL_SMALL_WCHAR_T
50
51 if (s == NULL)
52 return wcrtomb (NULL, 0, ps);
53 else
54 {
55 /* Special-case all encodings that may produce wide character values
56 > WCHAR_MAX. */
57 const char *encoding = locale_charset ();
58 if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
59 {
60 /* Special-case the UTF-8 encoding. Assume that the wide-character
61 encoding in a UTF-8 locale is UCS-2 or, equivalently, UTF-16. */
62 if (wc < 0x80)
63 {
64 s[0] = (unsigned char) wc;
65 return 1;
66 }
67 else
68 {
69 int count;
70
71 if (wc < 0x800)
72 count = 2;
73 else if (wc < 0x10000)
74 {
75 if (wc < 0xd800 || wc >= 0xe000)
76 count = 3;
77 else
78 {
79 errno = EILSEQ;
80 return (size_t)(-1);
81 }
82 }
83 else if (wc < 0x110000)
84 count = 4;
85 else
86 {
87 errno = EILSEQ;
88 return (size_t)(-1);
89 }
90
91 switch (count) /* note: code falls through cases! */
92 {
93 case 4: s[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
94 FALLTHROUGH;
95 case 3: s[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
96 FALLTHROUGH;
97 case 2: s[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
98 /*case 1:*/ s[0] = wc;
99 }
100 return count;
101 }
102 }
103 else
104 {
105 if ((wchar_t) wc == wc)
106 return wcrtomb (s, (wchar_t) wc, ps);
107 else
108 {
109 errno = EILSEQ;
110 return (size_t)(-1);
111 }
112 }
113 }
114
115 #else
116
117 /* char32_t and wchar_t are equivalent. */
118 # if GL_CHAR32_T_IS_UNICODE && GL_CHAR32_T_VS_WCHAR_T_NEEDS_CONVERSION
119 if (wc != 0)
120 {
121 wc = unicode_to_locale_encoding (wc);
122 if (wc == 0)
123 {
124 errno = EILSEQ;
125 return (size_t)(-1);
126 }
127 }
128 # endif
129 return wcrtomb (s, (wchar_t) wc, ps);
130
131 #endif
132 }