1 /* Convert multibyte character to wide character.
2 Copyright (C) 1999-2002, 2005-2023 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2008.
4
5 This file is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation; either version 2.1 of the
8 License, or (at your option) any later version.
9
10 This file is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #include <config.h>
19
20 /* Specification. */
21 #include <wchar.h>
22
23 #if GNULIB_defined_mbstate_t
24 /* Implement mbrtowc() on top of mbtowc() for the non-UTF-8 locales
25 and directly for the UTF-8 locales. */
26
27 # include <errno.h>
28 # include <stdint.h>
29 # include <stdlib.h>
30
31 # if AVOID_ANY_THREADS
32
33 /* The option '--disable-threads' explicitly requests no locking. */
34
35 # elif defined _WIN32 && !defined __CYGWIN__
36
37 # define WIN32_LEAN_AND_MEAN /* avoid including junk */
38 # include <windows.h>
39
40 # elif HAVE_PTHREAD_API
41
42 # include <pthread.h>
43 # if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
44 # include <threads.h>
45 # pragma weak thrd_exit
46 # define c11_threads_in_use() (thrd_exit != NULL)
47 # else
48 # define c11_threads_in_use() 0
49 # endif
50
51 # elif HAVE_THREADS_H
52
53 # include <threads.h>
54
55 # endif
56
57 # include "attribute.h"
58 # include "lc-charset-dispatch.h"
59 # include "mbtowc-lock.h"
60
61 static_assert (sizeof (mbstate_t) >= 4);
62 static char internal_state[4];
63
64 size_t
65 mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
66 {
67 # define FITS_IN_CHAR_TYPE(wc) ((wc) <= WCHAR_MAX)
68 # include "mbrtowc-impl.h"
69 }
70
71 #else
72 /* Override the system's mbrtowc() function. */
73
74 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
75 # include "hard-locale.h"
76 # include <locale.h>
77 # endif
78
79 # undef mbrtowc
80
81 size_t
82 rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
83 {
84 size_t ret;
85 wchar_t wc;
86
87 # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG
88 if (s == NULL)
89 {
90 pwc = NULL;
91 s = "";
92 n = 1;
93 }
94 # endif
95
96 # if MBRTOWC_EMPTY_INPUT_BUG
97 if (n == 0)
98 return (size_t) -2;
99 # endif
100
101 if (! pwc)
102 pwc = &wc;
103
104 # if MBRTOWC_RETVAL_BUG
105 {
106 static mbstate_t internal_state;
107
108 /* Override mbrtowc's internal state. We cannot call mbsinit() on the
109 hidden internal state, but we can call it on our variable. */
110 if (ps == NULL)
111 ps = &internal_state;
112
113 if (!mbsinit (ps))
114 {
115 /* Parse the rest of the multibyte character byte for byte. */
116 size_t count = 0;
117 for (; n > 0; s++, n--)
118 {
119 ret = mbrtowc (&wc, s, 1, ps);
120
121 if (ret == (size_t)(-1))
122 return (size_t)(-1);
123 count++;
124 if (ret != (size_t)(-2))
125 {
126 /* The multibyte character has been completed. */
127 *pwc = wc;
128 return (wc == 0 ? 0 : count);
129 }
130 }
131 return (size_t)(-2);
132 }
133 }
134 # endif
135
136 # if MBRTOWC_STORES_INCOMPLETE_BUG
137 ret = mbrtowc (&wc, s, n, ps);
138 if (ret < (size_t) -2 && pwc != NULL)
139 *pwc = wc;
140 # else
141 ret = mbrtowc (pwc, s, n, ps);
142 # endif
143
144 # if MBRTOWC_NUL_RETVAL_BUG
145 if (ret < (size_t) -2 && !*pwc)
146 return 0;
147 # endif
148
149 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
150 if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
151 {
152 unsigned char uc = *s;
153 *pwc = uc;
154 return 1;
155 }
156 # endif
157
158 return ret;
159 }
160
161 #endif