1 /* qmark.c -- quote 'dangerous' filenames
2 Derived from coreutils' ls.c.
3 Copyright (C) 1985-2022 Free Software Foundation, Inc.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>.
17 */
18 /* config.h must be included first. */
19 #include <config.h>
20
21 /* system headers. */
22 #include <ctype.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <wchar.h>
26
27 /* gnulib headers would go here if any needed to be included. */
28
29 /* find headers. */
30 #include "printquoted.h"
31
32
33
34 /*
35 This comment, IN_CTYPE_DOMAIN and ISPRINT were borrowed from
36 coreutils at Sun Jun 5 21:17:40 2005 UTC.
37
38 Jim Meyering writes:
39
40 "... Some ctype macros are valid only for character codes that
41 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
42 using /bin/cc or gcc but without giving an ansi option). So, all
43 ctype uses should be through macros like ISPRINT... If
44 STDC_HEADERS is defined, then autoconf has verified that the ctype
45 macros don't need to be guarded with references to isascii. ...
46 Defining isascii to 1 should let any compiler worth its salt
47 eliminate the && through constant folding."
48
49 Bruno Haible adds:
50
51 "... Furthermore, isupper(c) etc. have an undefined result if c is
52 outside the range -1 <= c <= 255. One is tempted to write isupper(c)
53 with c being of type `char', but this is wrong if c is an 8-bit
54 character >= 128 which gets sign-extended to a negative value.
55 The macro ISUPPER protects against this as well."
56
57 (Actually that rule of ISUPPER is now taken by to_uchar).
58 */
59
60 #if STDC_HEADERS
61 # define IN_CTYPE_DOMAIN(c) 1
62 #else
63 # define IN_CTYPE_DOMAIN(c) isascii(c)
64 #endif
65
66 /* ISPRINT is defined in <sys/euc.h> on at least Solaris2.6 systems. */
67 #undef ISPRINT
68 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
69
70
71
72
73
74 /* Convert a possibly-signed character to an unsigned character. This is
75 * a bit safer than casting to unsigned char, since it catches some type
76 * errors that the cast doesn't.
77 *
78 * This code taken from coreutils' system.h header at
79 * Sun Jun 5 21:05:21 2005 UTC.
80 */
81 static inline unsigned char to_uchar (char ch)
82 {
83 return ch;
84 }
85
86
87 static size_t
88 unibyte_qmark_chars (char *buf, size_t len)
89 {
90 char *p = buf;
91 char const *plimit = buf + len;
92
93 while (p < plimit)
94 {
95 if (! ISPRINT (to_uchar (*p)))
96 *p = '?';
97 p++;
98 }
99 return len;
100 }
101
102
103
104
105
106 /* Scan BUF, replacing any dangerous-looking characters with question
107 * marks. This code is taken from the ls.c file in coreutils as at
108 * Sun Jun 5 20:51:54 2005 UTC.
109 *
110 * This function may shrink the buffer. Either way, the new length
111 * is returned.
112 */
113 size_t
114 qmark_chars (char *buf, size_t len)
115 {
116 if (MB_CUR_MAX <= 1)
117 {
118 return unibyte_qmark_chars (buf, len);
119 }
120 else
121 {
122 char const *p = buf;
123 char const *plimit = buf + len;
124 char *q = buf;
125
126 while (p < plimit)
127 switch (*p)
128 {
129 case ' ': case '!': case '"': case '#': case '%':
130 case '&': case '\'': case '(': case ')': case '*':
131 case '+': case ',': case '-': case '.': case '/':
132 case '0': case '1': case '2': case '3': case '4':
133 case '5': case '6': case '7': case '8': case '9':
134 case ':': case ';': case '<': case '=': case '>':
135 case '?':
136 case 'A': case 'B': case 'C': case 'D': case 'E':
137 case 'F': case 'G': case 'H': case 'I': case 'J':
138 case 'K': case 'L': case 'M': case 'N': case 'O':
139 case 'P': case 'Q': case 'R': case 'S': case 'T':
140 case 'U': case 'V': case 'W': case 'X': case 'Y':
141 case 'Z':
142 case '[': case '\\': case ']': case '^': case '_':
143 case 'a': case 'b': case 'c': case 'd': case 'e':
144 case 'f': case 'g': case 'h': case 'i': case 'j':
145 case 'k': case 'l': case 'm': case 'n': case 'o':
146 case 'p': case 'q': case 'r': case 's': case 't':
147 case 'u': case 'v': case 'w': case 'x': case 'y':
148 case 'z': case '{': case '|': case '}': case '~':
149 /* These characters are printable ASCII characters. */
150 *q++ = *p++;
151 break;
152 default:
153 /* If we have a multibyte sequence, copy it until we
154 reach its end, replacing each non-printable multibyte
155 character with a single question mark. */
156 {
157 mbstate_t mbstate;
158 memset (&mbstate, 0, sizeof mbstate);
159 do
160 {
161 wchar_t wc;
162 size_t bytes;
163 int w;
164
165 bytes = mbrtowc (&wc, p, plimit - p, &mbstate);
166
167 if (bytes == (size_t) -1)
168 {
169 /* An invalid multibyte sequence was
170 encountered. Skip one input byte, and
171 put a question mark. */
172 p++;
173 *q++ = '?';
174 break;
175 }
176
177 if (bytes == (size_t) -2)
178 {
179 /* An incomplete multibyte character
180 at the end. Replace it entirely with
181 a question mark. */
182 p = plimit;
183 *q++ = '?';
184 break;
185 }
186
187 if (bytes == 0)
188 /* A null wide character was encountered. */
189 bytes = 1;
190
191 w = wcwidth (wc);
192 if (w >= 0)
193 {
194 /* A printable multibyte character.
195 Keep it. */
196 for (; bytes > 0; --bytes)
197 *q++ = *p++;
198 }
199 else
200 {
201 /* An unprintable multibyte character.
202 Replace it entirely with a question
203 mark. */
204 p += bytes;
205 *q++ = '?';
206 }
207 }
208 while (! mbsinit (&mbstate));
209 }
210 break;
211 }
212
213 /* The buffer may have shrunk. */
214 len = q - buf;
215 return len;
216 }
217 }