(root)/
gettext-0.22.4/
gettext-tools/
src/
format-kde-kuit.c
       1  /* KUIT (KDE User Interface Text) format strings.
       2     Copyright (C) 2015, 2018-2019 Free Software Foundation, Inc.
       3     Written by Daiki Ueno <ueno@gnu.org>, 2015.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifdef HAVE_CONFIG_H
      19  # include <config.h>
      20  #endif
      21  
      22  #include <assert.h>
      23  #include <stdbool.h>
      24  #include <stdlib.h>
      25  
      26  #include "format.h"
      27  #include "unistr.h"
      28  #include "xalloc.h"
      29  #include "xvasprintf.h"
      30  #include "gettext.h"
      31  
      32  #if IN_LIBGETTEXTPO
      33  /* Use included markup parser to avoid extra dependency from
      34     libgettextpo to libxml2.  */
      35  # ifndef FORMAT_KDE_KUIT_FALLBACK_MARKUP
      36  #  define FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP 1
      37  # endif
      38  #else
      39  #  define FORMAT_KDE_KUIT_USE_LIBXML2 1
      40  #endif
      41  
      42  #if FORMAT_KDE_KUIT_USE_LIBXML2
      43  # include <libxml/parser.h>
      44  #elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP
      45  # include "markup.h"
      46  #endif
      47  
      48  
      49  #define _(str) gettext (str)
      50  
      51  #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
      52  
      53  
      54  /* KUIT (KDE User Interface Text) is an XML-like markup which augments
      55     translatable strings with semantic information:
      56     https://api.kde.org/frameworks/ki18n/html/prg_guide.html#kuit_markup
      57     KUIT can be seen as a fragment of a well-formed XML document,
      58     except that it allows '&' as a Qt accelerator marker and '%' as a
      59     format directive.  */
      60  
      61  struct spec
      62  {
      63    /* A format string descriptor returned from formatstring_kde.parse.  */
      64    void *base;
      65  };
      66  
      67  #define XML_NS "https://www.gnu.org/s/gettext/kde"
      68  
      69  struct char_range
      70  {
      71    ucs4_t start;
      72    ucs4_t end;
      73  };
      74  
      75  /* Character ranges for NameStartChar defined in:
      76     https://www.w3.org/TR/REC-xml/#NT-NameStartChar  */
      77  static const struct char_range name_chars1[] =
      78    {
      79      { ':', ':' },
      80      { 'A', 'Z' },
      81      { '_', '_' },
      82      { 'a', 'z' },
      83      { 0xC0, 0xD6 },
      84      { 0xD8, 0xF6 },
      85      { 0xF8, 0x2FF },
      86      { 0x370, 0x37D },
      87      { 0x37F, 0x1FFF },
      88      { 0x200C, 0x200D },
      89      { 0x2070, 0x218F },
      90      { 0x2C00, 0x2FEF },
      91      { 0x3001, 0xD7FF },
      92      { 0xF900, 0xFDCF },
      93      { 0xFDF0, 0xFFFD },
      94      { 0x10000, 0xEFFFF }
      95    };
      96  
      97  /* Character ranges for NameChar, excluding NameStartChar:
      98     https://www.w3.org/TR/REC-xml/#NT-NameChar  */
      99  static const struct char_range name_chars2[] =
     100    {
     101      { '-', '-' },
     102      { '.', '.' },
     103      { '0', '9' },
     104      { 0xB7, 0xB7 },
     105      { 0x0300, 0x036F },
     106      { 0x203F, 0x2040 }
     107    };
     108  
     109  /* Return true if INPUT is an XML reference.  */
     110  static bool
     111  is_reference (const char *input)
     112  {
     113    const char *str = input;
     114    const char *str_limit = str + strlen (input);
     115    ucs4_t uc;
     116    int i;
     117  
     118    str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
     119    assert (uc == '&');
     120  
     121    str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
     122  
     123    /* CharRef */
     124    if (uc == '#')
     125      {
     126        str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
     127        if (uc == 'x')
     128          {
     129            while (str < str_limit)
     130              {
     131                str += u8_mbtouc (&uc, (const unsigned char *) str,
     132                                  str_limit - str);
     133                if (!(('0' <= uc && uc <= '9')
     134                      || ('A' <= uc && uc <= 'F')
     135                      || ('a' <= uc && uc <= 'f')))
     136                  break;
     137              }
     138            return uc == ';';
     139          }
     140        else if ('0' <= uc && uc <= '9')
     141          {
     142            while (str < str_limit)
     143              {
     144                str += u8_mbtouc (&uc, (const unsigned char *) str,
     145                                  str_limit - str);
     146                if (!('0' <= uc && uc <= '9'))
     147                  break;
     148              }
     149            return uc == ';';
     150          }
     151      }
     152    else
     153      {
     154        /* EntityRef */
     155        for (i = 0; i < SIZEOF (name_chars1); i++)
     156          if (name_chars1[i].start <= uc && uc <= name_chars1[i].end)
     157            break;
     158  
     159        if (i == SIZEOF (name_chars1))
     160          return false;
     161  
     162        while (str < str_limit)
     163          {
     164            str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
     165            for (i = 0; i < SIZEOF (name_chars1); i++)
     166              if (name_chars1[i].start <= uc && uc <= name_chars1[i].end)
     167                break;
     168            if (i == SIZEOF (name_chars1))
     169              {
     170                for (i = 0; i < SIZEOF (name_chars2); i++)
     171                  if (name_chars2[i].start <= uc && uc <= name_chars2[i].end)
     172                    break;
     173                if (i == SIZEOF (name_chars2))
     174                  return false;
     175              }
     176          }
     177        return uc == ';';
     178      }
     179  
     180    return false;
     181  }
     182  
     183  
     184  static void *
     185  format_parse (const char *format, bool translated, char *fdi,
     186                char **invalid_reason)
     187  {
     188    struct spec spec;
     189    struct spec *result;
     190    const char *str;
     191    const char *str_limit;
     192    size_t amp_count;
     193    char *buffer, *bp;
     194  
     195    spec.base = NULL;
     196  
     197    /* Preprocess the input, putting the content in a <gt:kuit> element.  */
     198    str = format;
     199    str_limit = str + strlen (format);
     200  
     201    for (amp_count = 0; str < str_limit; amp_count++)
     202      {
     203        const char *amp = strchrnul (str, '&');
     204        if (*amp != '&')
     205          break;
     206        str = amp + 1;
     207      }
     208  
     209    buffer = xmalloc (amp_count * 4
     210                      + strlen (format)
     211                      + strlen ("<gt:kuit xmlns:gt=\"" XML_NS "\"></gt:kuit>")
     212                      + 1);
     213    *buffer = '\0';
     214  
     215    bp = buffer;
     216    bp = stpcpy (bp, "<gt:kuit xmlns:gt=\"" XML_NS "\">");
     217    str = format;
     218    while (str < str_limit)
     219      {
     220        const char *amp = strchrnul (str, '&');
     221  
     222        bp = stpncpy (bp, str, amp - str);
     223        if (*amp != '&')
     224          break;
     225  
     226        bp = stpcpy (bp, is_reference (amp) ? "&" : "&amp;");
     227        str = amp + 1;
     228      }
     229    stpcpy (bp, "</gt:kuit>");
     230  
     231  #if FORMAT_KDE_KUIT_USE_LIBXML2
     232      {
     233        xmlDocPtr doc;
     234  
     235        doc = xmlReadMemory (buffer, strlen (buffer), "", NULL,
     236                             XML_PARSE_NONET
     237                             | XML_PARSE_NOWARNING
     238                             | XML_PARSE_NOERROR
     239                             | XML_PARSE_NOBLANKS);
     240        if (doc == NULL)
     241          {
     242            xmlError *err = xmlGetLastError ();
     243            *invalid_reason =
     244              xasprintf (_("error while parsing: %s"),
     245                         err->message);
     246            free (buffer);
     247            xmlFreeDoc (doc);
     248            return NULL;
     249          }
     250  
     251        free (buffer);
     252        xmlFreeDoc (doc);
     253      }
     254  #elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP
     255      {
     256        markup_parser_ty parser;
     257        markup_parse_context_ty *context;
     258  
     259        memset (&parser, 0, sizeof (markup_parser_ty));
     260        context = markup_parse_context_new (&parser, 0, NULL);
     261        if (!markup_parse_context_parse (context, buffer, strlen (buffer)))
     262          {
     263            *invalid_reason =
     264              xasprintf (_("error while parsing: %s"),
     265                         markup_parse_context_get_error (context));
     266            free (buffer);
     267            markup_parse_context_free (context);
     268            return NULL;
     269          }
     270  
     271        if (!markup_parse_context_end_parse (context))
     272          {
     273            *invalid_reason =
     274              xasprintf (_("error while parsing: %s"),
     275                         markup_parse_context_get_error (context));
     276            free (buffer);
     277            markup_parse_context_free (context);
     278            return NULL;
     279          }
     280  
     281        free (buffer);
     282        markup_parse_context_free (context);
     283      }
     284  #else
     285      /* No support for XML.  */
     286      free (buffer);
     287  #endif
     288  
     289    spec.base = formatstring_kde.parse (format, translated, fdi, invalid_reason);
     290    if (spec.base == NULL)
     291      return NULL;
     292  
     293    result = XMALLOC (struct spec);
     294    *result = spec;
     295    return result;
     296  }
     297  
     298  static void
     299  format_free (void *descr)
     300  {
     301    struct spec *spec = descr;
     302    formatstring_kde.free (spec->base);
     303    free (spec);
     304  }
     305  
     306  static int
     307  format_get_number_of_directives (void *descr)
     308  {
     309    struct spec *spec = descr;
     310    return formatstring_kde.get_number_of_directives (spec->base);
     311  }
     312  
     313  static bool
     314  format_check (void *msgid_descr, void *msgstr_descr, bool equality,
     315                formatstring_error_logger_t error_logger,
     316                const char *pretty_msgid, const char *pretty_msgstr)
     317  {
     318    struct spec *msgid_spec = msgid_descr;
     319    struct spec *msgstr_spec = msgstr_descr;
     320  
     321    return formatstring_kde.check (msgid_spec->base, msgstr_spec->base, equality,
     322                                   error_logger,
     323                                   pretty_msgid, pretty_msgstr);
     324  }
     325  
     326  struct formatstring_parser formatstring_kde_kuit =
     327  {
     328    format_parse,
     329    format_free,
     330    format_get_number_of_directives,
     331    NULL,
     332    format_check
     333  };
     334  
     335  
     336  #ifdef TEST
     337  
     338  /* Test program: Print the argument list specification returned by
     339     format_parse for strings read from standard input.  */
     340  
     341  #include <stdio.h>
     342  
     343  static void
     344  format_print (void *descr)
     345  {
     346    struct spec *spec = (struct spec *) descr;
     347    unsigned int last;
     348    unsigned int i;
     349  
     350    if (spec == NULL)
     351      {
     352        printf ("INVALID");
     353        return;
     354      }
     355  
     356    printf ("(");
     357    last = 1;
     358    for (i = 0; i < spec->numbered_arg_count; i++)
     359      {
     360        unsigned int number = spec->numbered[i].number;
     361  
     362        if (i > 0)
     363          printf (" ");
     364        if (number < last)
     365          abort ();
     366        for (; last < number; last++)
     367          printf ("_ ");
     368        last = number + 1;
     369      }
     370    printf (")");
     371  }
     372  
     373  int
     374  main ()
     375  {
     376    for (;;)
     377      {
     378        char *line = NULL;
     379        size_t line_size = 0;
     380        int line_len;
     381        char *invalid_reason;
     382        void *descr;
     383  
     384        line_len = getline (&line, &line_size, stdin);
     385        if (line_len < 0)
     386          break;
     387        if (line_len > 0 && line[line_len - 1] == '\n')
     388          line[--line_len] = '\0';
     389  
     390        invalid_reason = NULL;
     391        descr = format_parse (line, false, NULL, &invalid_reason);
     392  
     393        format_print (descr);
     394        printf ("\n");
     395        if (descr == NULL)
     396          printf ("%s\n", invalid_reason);
     397  
     398        free (invalid_reason);
     399        free (line);
     400      }
     401  
     402    return 0;
     403  }
     404  
     405  /*
     406   * For Emacs M-x compile
     407   * Local Variables:
     408   * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-kde-kuit.c ../gnulib-lib/libgettextlib.la"
     409   * End:
     410   */
     411  
     412  #endif /* TEST */