1 /* Copyright 2010-2023 Free Software Foundation, Inc.
2
3 This program is free software: you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation, either version 3 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program. If not, see <http://www.gnu.org/licenses/>. */
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <locale.h>
23 #ifndef _WIN32
24 #include <langinfo.h>
25 #else /* _WIN32 */
26 /* Workaround for problems caused in mingw.org's MinGW build by
27 Gnulib's wchar.h overriding the wint_t type definition, which
28 causes compilation errors when perl.h is included below, because
29 perl.h includes ctype.h. */
30 #include <ctype.h>
31 #endif
32 #include <wchar.h>
33 #include <wctype.h>
34
35 /* See "How do I use all this in extensions" in 'man perlguts'. */
36 #define PERL_NO_GET_CONTEXT
37
38 #include "EXTERN.h"
39 #include "perl.h"
40 #if defined _WIN32 && !defined __CYGWIN__
41 # undef free
42 #endif
43 #include "XSUB.h"
44
45 #include "ppport.h"
46
47 #include "miscxs.h"
48
49 const char *whitespace_chars = " \t\f\v\r\n";
50
51 char *
52 xs_process_text (char *text)
53 {
54 static char *new;
55 char *p, *q;
56
57 dTHX;
58
59 new = realloc (new, strlen (text) + 1);
60 strcpy (new, text);
61
62 p = q = new;
63 while (*p)
64 {
65 if (*p == '-' && p[1] == '-')
66 {
67 if (p[2] == '-')
68 {
69 *q = '-'; q[1] = '-';
70 p += 3; q += 2;
71 }
72 else
73 {
74 *q = '-';
75 p += 2; q += 1;
76 }
77 }
78 else if (*p == '\'' && p[1] == '\'')
79 {
80 *q = '"';
81 p += 2; q += 1;
82 }
83 else if (*p == '`')
84 {
85 if (p[1] == '`')
86 {
87 *q = '"';
88 p += 2; q += 1;
89 }
90 else
91 {
92 *q = '\'';
93 p += 1; q += 1;
94 }
95 }
96 else
97 {
98 *q++ = *p++;
99 }
100 }
101 *q = '\0';
102
103 return new;
104 }
105
106 char *
107 xs_unicode_text (char *text, int in_code)
108 {
109 char *p, *q;
110 static char *new;
111 int new_space, new_len;
112
113 dTHX; /* Perl boilerplate. */
114
115 if (in_code)
116 return text;
117
118 p = text;
119 new_space = strlen (text);
120 new = realloc (new, new_space + 1);
121 new_len = 0;
122 #define ADD3(s) \
123 if (new_len + 2 >= new_space - 1) \
124 { \
125 new_space += 2; \
126 new = realloc (new, new_space *= 2); \
127 } \
128 new[new_len++] = s[0]; \
129 new[new_len++] = s[1]; \
130 new[new_len++] = s[2];
131
132 #define ADD1(s) \
133 if (new_len >= new_space - 1) \
134 new = realloc (new, (new_space *= 2) + 1); \
135 new[new_len++] = s;
136
137 #define ADDN(s, n) \
138 if (new_len + n - 1 >= new_space - 1) \
139 { \
140 new_space += n; \
141 new = realloc (new, (new_space *= 2) + 1); \
142 } \
143 memcpy(new + new_len, s, n); \
144 new_len += n;
145
146 while (1)
147 {
148 q = p + strcspn (p, "-`'");
149 ADDN(p, q - p);
150 if (!*q)
151 break;
152 switch (*q)
153 {
154 case '-':
155 if (!memcmp (q, "---", 3))
156 {
157 p = q + 3;
158 /* Unicode em dash U+2014 (0xE2 0x80 0x94) */
159 ADD3("\xE2\x80\x94");
160 }
161 else if (!memcmp (q, "--", 2))
162 {
163 p = q + 2;
164 /* Unicode en dash U+2013 (0xE2 0x80 0x93) */
165 ADD3("\xE2\x80\x93");
166 }
167 else
168 {
169 p = q + 1;
170 ADD1(*q);
171 }
172 break;
173 case '`':
174 if (!memcmp (q, "``", 2))
175 {
176 p = q + 2;
177 /* U+201C E2 80 9C */
178 ADD3("\xE2\x80\x9C");
179 }
180 else
181 {
182 p = q + 1;
183 /* U+2018 E2 80 98 */
184 ADD3("\xE2\x80\x98");
185 }
186 break;
187 case '\'':
188 if (!memcmp (q, "''", 2))
189 {
190 p = q + 2;
191 /* U+201D E2 80 9D */
192 ADD3("\xE2\x80\x9D");
193 }
194 else
195 {
196 p = q + 1;
197 /* U+2019 E2 80 99 */
198 ADD3("\xE2\x80\x99");
199 }
200 break;
201 }
202 }
203
204 new[new_len] = '\0';
205 return new;
206 }
207
208 char *
209 xs_entity_text (char *text)
210 {
211 char *p, *q;
212 static char *new;
213 int new_space, new_len;
214
215 dTHX; /* Perl boilerplate. */
216
217 p = text;
218 new_space = strlen (text);
219 new = realloc (new, new_space + 1);
220 new_len = 0;
221
222 #define ADDN(s, n) \
223 if (new_len + n - 1 >= new_space - 1) \
224 { \
225 new_space += n; \
226 new = realloc (new, (new_space *= 2) + 1); \
227 } \
228 memcpy(new + new_len, s, n); \
229 new_len += n;
230
231 while (1)
232 {
233 q = p + strcspn (p, "-`'");
234 ADDN(p, q - p);
235 if (!*q)
236 break;
237 switch (*q)
238 {
239 case '-':
240 if (!memcmp (q, "---", 3))
241 {
242 p = q + 3;
243 ADDN("—", 7);
244 }
245 else if (!memcmp (q, "--", 2))
246 {
247 p = q + 2;
248 ADDN("–", 7);
249 }
250 else
251 {
252 p = q + 1;
253 ADD1(*q);
254 }
255 break;
256 case '`':
257 if (!memcmp (q, "``", 2))
258 {
259 p = q + 2;
260 ADDN("“", 7);
261 }
262 else
263 {
264 p = q + 1;
265 ADDN("‘", 7);
266 }
267 break;
268 case '\'':
269 if (!memcmp (q, "''", 2))
270 {
271 p = q + 2;
272 ADDN("”", 7);
273 }
274 else
275 {
276 p = q + 1;
277 ADDN("’", 7);
278 }
279 break;
280 }
281 }
282
283 new[new_len] = '\0';
284 return new;
285 }
286
287 void xs_parse_command_name (SV *text_in,
288 char **command,
289 int *is_single_letter)
290 {
291 char *text;
292
293 dTHX;
294
295 /* Make sure the input is in UTF8. */
296 if (!SvUTF8 (text_in))
297 sv_utf8_upgrade (text_in);
298 text = SvPV_nolen (text_in);
299
300 *command = 0;
301 *is_single_letter = 0;
302
303 if (isalnum(text[0]))
304 {
305 char *p, *q;
306 static char *s;
307
308 p = text;
309 q = text + 1;
310 while (isalnum (*q) || *q == '-' || *q == '_')
311 q++;
312
313 s = realloc (s, q - p + 1);
314 memcpy (s, p, q - p);
315 s[q - p] = '\0';
316 *command = s;
317 }
318 else if (text[0] && strchr ("([\"'~@&}{,.!?"
319 " \t\n"
320 "*-^`=:|/\\",
321 text[0]))
322 {
323 static char a[2];
324 *command = a;
325 a[0] = text[0];
326 a[1] = '\0';
327 *is_single_letter = 1;
328 }
329 return;
330 }
331
332 /* Return list ($at_command, $open_brace, ....) */
333 void xs_parse_texi_regex (SV *text_in,
334 char **arobase,
335 char **open_brace,
336 char **close_brace,
337 char **comma,
338 char **asterisk,
339 char **form_feed,
340 char **menu_only_separator,
341 char **new_text)
342 {
343 char *text;
344
345 dTHX;
346
347 /* Make sure the input is in UTF8. */
348 if (!SvUTF8 (text_in))
349 sv_utf8_upgrade (text_in);
350 text = SvPV_nolen (text_in);
351
352 *arobase = *open_brace = *close_brace = *comma = *asterisk
353 = *form_feed = *menu_only_separator = *new_text = 0;
354
355 if (*text == '@')
356 {
357 *arobase = "@";
358 }
359 else if (*text == '{')
360 {
361 *open_brace = "{";
362 }
363 else if (*text == '}')
364 {
365 *close_brace = "}";
366 }
367
368 else if (*text == ',')
369 {
370 *comma = ",";
371 }
372 else if (strchr (":\t.", *text))
373 {
374 static char a[2];
375 *menu_only_separator = a;
376 a[0] = *text;
377 a[1] = '\0';
378 }
379 else if (*text == '\f')
380 {
381 *form_feed = "\f";
382 }
383 else
384 {
385 char *p;
386
387 if (*text == '*')
388 *asterisk = "*";
389
390 p = text;
391 p += strcspn (p, "{}@,:\t.\n\f");
392 if (p > text)
393 {
394 static char *s;
395 s = realloc (s, p - text + 1);
396 memcpy (s, text, p - text);
397 s[p - text] = '\0';
398 *new_text = s;
399 }
400 }
401
402 return;
403 }
404
405 char *
406 xs_default_format_protect_text (char *text)
407 {
408 char *p, *q;
409 static char *new;
410 int new_space, new_len;
411
412 dTHX; /* Perl boilerplate. */
413
414 p = text;
415 new_space = strlen (text);
416 new = realloc (new, new_space + 1);
417 new_len = 0;
418
419 #define ADDN(s, n) \
420 if (new_len + n - 1 >= new_space - 1) \
421 { \
422 new_space += n; \
423 new = realloc (new, (new_space *= 2) + 1); \
424 } \
425 memcpy(new + new_len, s, n); \
426 new_len += n;
427
428 while (1)
429 {
430 q = p + strcspn (p, "<>&\"\f");
431 ADDN(p, q - p);
432 if (!*q)
433 break;
434 switch (*q)
435 {
436 case '<':
437 ADDN("<", 4);
438 break;
439 case '>':
440 ADDN(">", 4);
441 break;
442 case '&':
443 ADDN("&", 5);
444 break;
445 case '"':
446 ADDN(""", 6);
447 break;
448 case '\f':
449 ADDN("", 5);
450 break;
451 }
452 p = q + 1;
453 }
454 new[new_len] = '\0';
455 return new;
456 }