(root)/
texinfo-7.1/
tp/
Texinfo/
XS/
misc.c
       1  /* Copyright 2010-2023 Free Software Foundation, Inc.
       2  
       3     This program is free software: you can redistribute it and/or modify
       4     it under the terms of the GNU General Public License as published by
       5     the Free Software Foundation, either version 3 of the License, or
       6     (at your option) any later version.
       7  
       8     This program is distributed in the hope that it will be useful,
       9     but WITHOUT ANY WARRANTY; without even the implied warranty of
      10     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      11     GNU General Public License for more details.
      12  
      13     You should have received a copy of the GNU General Public License
      14     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
      15  
      16  #ifdef HAVE_CONFIG_H
      17    #include <config.h>
      18  #endif
      19  #include <stdlib.h>
      20  #include <stdio.h>
      21  #include <string.h>
      22  #include <locale.h>
      23  #ifndef _WIN32
      24  #include <langinfo.h>
      25  #else  /* _WIN32 */
      26  /* Workaround for problems caused in mingw.org's MinGW build by
      27     Gnulib's wchar.h overriding the wint_t type definition, which
      28     causes compilation errors when perl.h is included below, because
      29     perl.h includes ctype.h.  */
      30  #include <ctype.h>
      31  #endif
      32  #include <wchar.h>
      33  #include <wctype.h>
      34  
      35  /* See "How do I use all this in extensions" in 'man perlguts'. */
      36  #define PERL_NO_GET_CONTEXT
      37  
      38  #include "EXTERN.h"
      39  #include "perl.h"
      40  #if defined _WIN32 && !defined __CYGWIN__
      41  # undef free
      42  #endif
      43  #include "XSUB.h"
      44  
      45  #include "ppport.h"
      46  
      47  #include "miscxs.h"
      48  
      49  const char *whitespace_chars = " \t\f\v\r\n";
      50  
      51  char *
      52  xs_process_text (char *text)
      53  {
      54    static char *new;
      55    char *p, *q;
      56  
      57    dTHX;
      58  
      59    new = realloc (new, strlen (text) + 1);
      60    strcpy (new, text);
      61  
      62    p = q = new;
      63    while (*p)
      64      {
      65        if (*p == '-' && p[1] == '-')
      66          {
      67            if (p[2] == '-')
      68              {
      69                *q = '-'; q[1] = '-';
      70                p += 3; q += 2;
      71              }
      72            else
      73              {
      74                *q = '-';
      75                p += 2; q += 1;
      76              }
      77          }
      78        else if (*p == '\'' && p[1] == '\'')
      79          {
      80            *q = '"';
      81            p += 2; q += 1;
      82          }
      83        else if (*p == '`')
      84          {
      85            if (p[1] == '`')
      86              {
      87                *q = '"';
      88                p += 2; q += 1;
      89              }
      90            else
      91              {
      92                *q = '\'';
      93                p += 1; q += 1;
      94              }
      95          }
      96        else
      97          {
      98            *q++ = *p++;
      99          }
     100      }
     101    *q = '\0';
     102  
     103    return new;
     104  }
     105  
     106  char *
     107  xs_unicode_text (char *text, int in_code)
     108  {
     109    char *p, *q;
     110    static char *new;
     111    int new_space, new_len;
     112  
     113    dTHX; /* Perl boilerplate. */
     114  
     115    if (in_code)
     116      return text;
     117  
     118    p = text;
     119    new_space = strlen (text);
     120    new = realloc (new, new_space + 1);
     121    new_len = 0;
     122  #define ADD3(s) \
     123    if (new_len + 2 >= new_space - 1)               \
     124      {                                             \
     125        new_space += 2;                             \
     126        new = realloc (new, new_space *= 2);        \
     127      }                                             \
     128    new[new_len++] = s[0];                          \
     129    new[new_len++] = s[1];                          \
     130    new[new_len++] = s[2];
     131  
     132  #define ADD1(s) \
     133    if (new_len >= new_space - 1)                   \
     134      new = realloc (new, (new_space *= 2) + 1);    \
     135    new[new_len++] = s;
     136  
     137  #define ADDN(s, n) \
     138    if (new_len + n - 1 >= new_space - 1)           \
     139      {                                             \
     140        new_space += n;                             \
     141        new = realloc (new, (new_space *= 2) + 1);  \
     142      }                                             \
     143    memcpy(new + new_len, s, n);                    \
     144    new_len += n;
     145  
     146    while (1)
     147      {
     148        q = p + strcspn (p, "-`'");
     149        ADDN(p, q - p);
     150        if (!*q)
     151          break;
     152        switch (*q)
     153          {
     154          case '-':
     155            if (!memcmp (q, "---", 3))
     156              {
     157                p = q + 3;
     158                /* Unicode em dash U+2014 (0xE2 0x80 0x94) */
     159                ADD3("\xE2\x80\x94");
     160              }
     161            else if (!memcmp (q, "--", 2))
     162              {
     163                p = q + 2;
     164                /* Unicode en dash U+2013 (0xE2 0x80 0x93) */
     165                ADD3("\xE2\x80\x93");
     166              }
     167            else
     168              {
     169                p = q + 1;
     170                ADD1(*q);
     171              }
     172            break;
     173          case '`':
     174            if (!memcmp (q, "``", 2))
     175              {
     176                p = q + 2;
     177                /* U+201C E2 80 9C */
     178                ADD3("\xE2\x80\x9C");
     179              }
     180            else
     181              {
     182                p = q + 1;
     183                /* U+2018 E2 80 98 */
     184                ADD3("\xE2\x80\x98");
     185              }
     186            break;
     187          case '\'':
     188            if (!memcmp (q, "''", 2))
     189              {
     190                p = q + 2;
     191                /* U+201D E2 80 9D */
     192                ADD3("\xE2\x80\x9D");
     193              }
     194            else
     195              {
     196                p = q + 1;
     197                /* U+2019 E2 80 99 */
     198                ADD3("\xE2\x80\x99");
     199              }
     200            break;
     201          }
     202      }
     203  
     204    new[new_len] = '\0';
     205    return new;
     206  }
     207  
     208  char *
     209  xs_entity_text (char *text)
     210  {
     211    char *p, *q;
     212    static char *new;
     213    int new_space, new_len;
     214  
     215    dTHX; /* Perl boilerplate. */
     216  
     217    p = text;
     218    new_space = strlen (text);
     219    new = realloc (new, new_space + 1);
     220    new_len = 0;
     221  
     222  #define ADDN(s, n) \
     223    if (new_len + n - 1 >= new_space - 1)           \
     224      {                                             \
     225        new_space += n;                             \
     226        new = realloc (new, (new_space *= 2) + 1);  \
     227      }                                             \
     228    memcpy(new + new_len, s, n);                    \
     229    new_len += n;
     230  
     231    while (1)
     232      {
     233        q = p + strcspn (p, "-`'");
     234        ADDN(p, q - p);
     235        if (!*q)
     236          break;
     237        switch (*q)
     238          {
     239          case '-':
     240            if (!memcmp (q, "---", 3))
     241              {
     242                p = q + 3;
     243                ADDN("&mdash;", 7);
     244              }
     245            else if (!memcmp (q, "--", 2))
     246              {
     247                p = q + 2;
     248                ADDN("&ndash;", 7);
     249              }
     250            else
     251              {
     252                p = q + 1;
     253                ADD1(*q);
     254              }
     255            break;
     256          case '`':
     257            if (!memcmp (q, "``", 2))
     258              {
     259                p = q + 2;
     260                ADDN("&ldquo;", 7);
     261              }
     262            else
     263              {
     264                p = q + 1;
     265                ADDN("&lsquo;", 7);
     266              }
     267            break;
     268          case '\'':
     269            if (!memcmp (q, "''", 2))
     270              {
     271                p = q + 2;
     272                ADDN("&rdquo;", 7);
     273              }
     274            else
     275              {
     276                p = q + 1;
     277                ADDN("&rsquo;", 7);
     278              }
     279            break;
     280          }
     281      }
     282  
     283    new[new_len] = '\0';
     284    return new;
     285  }
     286  
     287  void xs_parse_command_name (SV *text_in,
     288                              char **command,
     289                              int *is_single_letter)
     290  {
     291    char *text;
     292  
     293    dTHX;
     294  
     295    /* Make sure the input is in UTF8. */
     296    if (!SvUTF8 (text_in))
     297      sv_utf8_upgrade (text_in);
     298    text = SvPV_nolen (text_in);
     299  
     300    *command = 0;
     301    *is_single_letter = 0;
     302  
     303    if (isalnum(text[0]))
     304      {
     305        char *p, *q;
     306        static char *s;
     307  
     308        p = text;
     309        q = text + 1;
     310        while (isalnum (*q) || *q == '-' || *q == '_')
     311          q++;
     312  
     313        s = realloc (s, q - p + 1);
     314        memcpy (s, p, q - p);
     315        s[q - p] = '\0';
     316        *command = s;
     317      }
     318    else if (text[0] && strchr ("([\"'~@&}{,.!?"
     319                                " \t\n"
     320                                "*-^`=:|/\\",
     321                                text[0]))
     322      {
     323        static char a[2];
     324        *command = a;
     325        a[0] = text[0];
     326        a[1] = '\0';
     327        *is_single_letter = 1;
     328      }
     329    return;
     330  }
     331  
     332  /* Return list ($at_command, $open_brace, ....) */
     333  void xs_parse_texi_regex (SV *text_in,
     334                            char **arobase,
     335                            char **open_brace,
     336                            char **close_brace,
     337                            char **comma,
     338                            char **asterisk,
     339                            char **form_feed,
     340                            char **menu_only_separator,
     341                            char **new_text)
     342  {
     343    char *text;
     344  
     345    dTHX;
     346  
     347    /* Make sure the input is in UTF8. */
     348    if (!SvUTF8 (text_in))
     349      sv_utf8_upgrade (text_in);
     350    text = SvPV_nolen (text_in);
     351  
     352    *arobase = *open_brace = *close_brace = *comma = *asterisk
     353       = *form_feed = *menu_only_separator = *new_text = 0;
     354  
     355    if (*text == '@')
     356      {
     357        *arobase = "@";
     358      }
     359    else if (*text == '{')
     360      {
     361        *open_brace = "{";
     362      }
     363    else if (*text == '}')
     364      {
     365        *close_brace = "}";
     366      }
     367  
     368    else if (*text == ',')
     369      {
     370        *comma = ",";
     371      }
     372    else if (strchr (":\t.", *text))
     373      {
     374        static char a[2];
     375        *menu_only_separator = a;
     376        a[0] = *text;
     377        a[1] = '\0';
     378      }
     379    else if (*text == '\f')
     380      {
     381        *form_feed = "\f";
     382      }
     383    else
     384      {
     385        char *p;
     386  
     387        if (*text == '*')
     388          *asterisk = "*";
     389  
     390        p = text;
     391        p += strcspn (p, "{}@,:\t.\n\f");
     392        if (p > text)
     393          {
     394            static char *s;
     395            s = realloc (s, p - text + 1);
     396            memcpy (s, text, p - text);
     397            s[p - text] = '\0';
     398            *new_text = s;
     399          }
     400      }
     401  
     402    return;
     403  }
     404  
     405  char *
     406  xs_default_format_protect_text (char *text)
     407  {
     408    char *p, *q;
     409    static char *new;
     410    int new_space, new_len;
     411  
     412    dTHX; /* Perl boilerplate. */
     413  
     414    p = text;
     415    new_space = strlen (text);
     416    new = realloc (new, new_space + 1);
     417    new_len = 0;
     418  
     419  #define ADDN(s, n) \
     420    if (new_len + n - 1 >= new_space - 1)           \
     421      {                                             \
     422        new_space += n;                             \
     423        new = realloc (new, (new_space *= 2) + 1);  \
     424      }                                             \
     425    memcpy(new + new_len, s, n);                    \
     426    new_len += n;
     427  
     428    while (1)
     429      {
     430        q = p + strcspn (p, "<>&\"\f");
     431        ADDN(p, q - p);
     432        if (!*q)
     433          break;
     434        switch (*q)
     435          {
     436          case '<':
     437            ADDN("&lt;", 4);
     438            break;
     439          case '>':
     440            ADDN("&gt;", 4);
     441            break;
     442          case '&':
     443            ADDN("&amp;", 5);
     444            break;
     445          case '"':
     446            ADDN("&quot;", 6);
     447            break;
     448          case '\f':
     449            ADDN("&#12;", 5);
     450            break;
     451          }
     452        p = q + 1;
     453      }
     454    new[new_len] = '\0';
     455    return new;
     456  }