(root)/
gettext-0.22.4/
gettext-tools/
libgettextpo/
markup.c
       1  /* markup.c -- simple XML-like parser
       2     Copyright (C) 2015, 2018, 2020 Free Software Foundation, Inc.
       3  
       4     This file is not part of the GNU gettext program, but is used with
       5     GNU gettext.
       6  
       7     This is a stripped down version of GLib's gmarkup.c.  The original
       8     copyright notice is as follows:
       9  */
      10  
      11  /* gmarkup.c - Simple XML-like parser
      12   *
      13   *  Copyright 2000, 2003 Red Hat, Inc.
      14   *  Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
      15   *
      16   * GLib is free software; you can redistribute it and/or modify it
      17   * under the terms of the GNU General Public License as
      18   * published by the Free Software Foundation; either version 3 of the
      19   * License, or (at your option) any later version.
      20   *
      21   * GLib is distributed in the hope that it will be useful,
      22   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      23   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      24   * General Public License for more details.
      25   *
      26   * You should have received a copy of the GNU General Public
      27   * License along with GLib; see the file COPYING.LIB.  If not,
      28   * see <https://www.gnu.org/licenses/>.
      29   */
      30  
      31  #include "config.h"
      32  
      33  #include <assert.h>
      34  #include <stdarg.h>
      35  #include <string.h>
      36  #include <stdio.h>
      37  #include <stdlib.h>
      38  #include <errno.h>
      39  
      40  /* Specification */
      41  #include "markup.h"
      42  
      43  #include "c-ctype.h"
      44  #include "gettext.h"
      45  #include "gl_linked_list.h"
      46  #include "gl_xlist.h"
      47  #include "unictype.h"
      48  #include "unistr.h"
      49  #include "xalloc.h"
      50  #include "xvasprintf.h"
      51  
      52  #define _(s) gettext(s)
      53  
      54  /**
      55   * The "markup" parser is intended to parse a simple markup format
      56   * that's a subset of XML.  This is a small, efficient, easy-to-use
      57   * parser.  It should not be used if you expect to interoperate with
      58   * other applications generating full-scale XML.  However, it's very
      59   * useful for application data files, config files, etc. where you
      60   * know your application will be the only one writing the file.
      61   * Full-scale XML parsers should be able to parse the subset used by
      62   * markup, so you can easily migrate to full-scale XML at a later
      63   * time if the need arises.
      64   *
      65   * The parser is not guaranteed to signal an error on all invalid XML;
      66   * the parser may accept documents that an XML parser would not.
      67   * However, XML documents which are not well-formed (which is a weaker
      68   * condition than being valid.  See the XML specification
      69   * <https://www.w3.org/TR/REC-xml/> for definitions of these terms.)
      70   * are not considered valid GMarkup documents.
      71   *
      72   * Simplifications to XML include:
      73   *
      74   * - Only UTF-8 encoding is allowed
      75   *
      76   * - No user-defined entities
      77   *
      78   * - Processing instructions, comments and the doctype declaration
      79   *   are "passed through" but are not interpreted in any way
      80   *
      81   * - No DTD or validation
      82   *
      83   * The markup format does support:
      84   *
      85   * - Elements
      86   *
      87   * - Attributes
      88   *
      89   * - 5 standard entities: &amp; &lt; &gt; &quot; &apos;
      90   *
      91   * - Character references
      92   *
      93   * - Sections marked as CDATA
      94   */
      95  
      96  typedef enum
      97  {
      98    STATE_START,
      99    STATE_AFTER_OPEN_ANGLE,
     100    STATE_AFTER_CLOSE_ANGLE,
     101    STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
     102    STATE_INSIDE_OPEN_TAG_NAME,
     103    STATE_INSIDE_ATTRIBUTE_NAME,
     104    STATE_AFTER_ATTRIBUTE_NAME,
     105    STATE_BETWEEN_ATTRIBUTES,
     106    STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
     107    STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
     108    STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
     109    STATE_INSIDE_TEXT,
     110    STATE_AFTER_CLOSE_TAG_SLASH,
     111    STATE_INSIDE_CLOSE_TAG_NAME,
     112    STATE_AFTER_CLOSE_TAG_NAME,
     113    STATE_INSIDE_PASSTHROUGH,
     114    STATE_ERROR
     115  } markup_parse_state_ty;
     116  
     117  typedef struct
     118  {
     119    const char *prev_element;
     120    const markup_parser_ty *prev_parser;
     121    void *prev_user_data;
     122  } markup_recursion_tracker_ty;
     123  
     124  typedef struct
     125  {
     126    char *buffer;
     127    size_t bufmax;
     128    size_t buflen;
     129  } markup_string_ty;
     130  
     131  struct _markup_parse_context_ty
     132  {
     133    const markup_parser_ty *parser;
     134  
     135    markup_parse_flags_ty flags;
     136  
     137    int line_number;
     138    int char_number;
     139  
     140    markup_parse_state_ty state;
     141  
     142    void *user_data;
     143  
     144    /* A piece of character data or an element that
     145     * hasn't "ended" yet so we haven't yet called
     146     * the callback for it.
     147     */
     148    markup_string_ty *partial_chunk;
     149  
     150    gl_list_t tag_stack;          /* <markup_string_ty> */
     151  
     152    char **attr_names;
     153    char **attr_values;
     154    int cur_attr;
     155    int alloc_attrs;
     156  
     157    const char *current_text;
     158    ssize_t current_text_len;
     159    const char *current_text_end;
     160  
     161    /* used to save the start of the last interesting thingy */
     162    const char *start;
     163  
     164    const char *iter;
     165  
     166    char *error_text;
     167  
     168    unsigned int document_empty : 1;
     169    unsigned int parsing : 1;
     170    unsigned int awaiting_pop : 1;
     171    int balance;
     172  
     173    /* subparser support */
     174    gl_list_t subparser_stack;    /* <markup_recursion_tracker_ty *> */
     175    const char *subparser_element;
     176  };
     177  
     178  static markup_string_ty *
     179  markup_string_new (void)
     180  {
     181    return XZALLOC (markup_string_ty);
     182  }
     183  
     184  static char *
     185  markup_string_free (markup_string_ty *string, bool free_segment)
     186  {
     187    if (free_segment)
     188      {
     189        free (string->buffer);
     190        free (string);
     191        return NULL;
     192      }
     193    else
     194      {
     195        char *result = string->buffer;
     196        free (string);
     197        return result;
     198      }
     199  }
     200  
     201  static void
     202  markup_string_free1 (markup_string_ty *string)
     203  {
     204    markup_string_free (string, true);
     205  }
     206  
     207  static void
     208  markup_string_truncate (markup_string_ty *string, size_t length)
     209  {
     210    assert (string && length < string->buflen - 1);
     211    string->buffer[length] = '\0';
     212    string->buflen = length;
     213  }
     214  
     215  static void
     216  markup_string_append (markup_string_ty *string, const char *to_append,
     217                        size_t length)
     218  {
     219    if (string->buflen + length + 1 > string->bufmax)
     220      {
     221        string->bufmax *= 2;
     222        if (string->buflen + length + 1 > string->bufmax)
     223          string->bufmax = string->buflen + length + 1;
     224        string->buffer = xrealloc (string->buffer, string->bufmax);
     225      }
     226    memcpy (string->buffer + string->buflen, to_append, length);
     227    string->buffer[length] = '\0';
     228    string->buflen = length;
     229  }
     230  
     231  static inline void
     232  string_blank (markup_string_ty *string)
     233  {
     234    if (string->bufmax > 0)
     235      {
     236        *string->buffer = '\0';
     237        string->buflen = 0;
     238      }
     239  }
     240  
     241  /* Creates a new parse context.  A parse context is used to parse
     242     marked-up documents.  You can feed any number of documents into a
     243     context, as long as no errors occur; once an error occurs, the
     244     parse context can't continue to parse text (you have to free it and
     245     create a new parse context).  */
     246  markup_parse_context_ty *
     247  markup_parse_context_new (const markup_parser_ty *parser,
     248                            markup_parse_flags_ty flags,
     249                            void *user_data)
     250  {
     251    markup_parse_context_ty *context;
     252  
     253    assert (parser != NULL);
     254  
     255    context = XMALLOC (markup_parse_context_ty);
     256  
     257    context->parser = parser;
     258    context->flags = flags;
     259    context->user_data = user_data;
     260  
     261    context->line_number = 1;
     262    context->char_number = 1;
     263  
     264    context->partial_chunk = NULL;
     265  
     266    context->state = STATE_START;
     267    context->tag_stack =
     268      gl_list_create_empty (GL_LINKED_LIST,
     269                            NULL, NULL,
     270                            (gl_listelement_dispose_fn) markup_string_free1,
     271                            true);
     272    context->attr_names = NULL;
     273    context->attr_values = NULL;
     274    context->cur_attr = -1;
     275    context->alloc_attrs = 0;
     276  
     277    context->current_text = NULL;
     278    context->current_text_len = -1;
     279    context->current_text_end = NULL;
     280  
     281    context->start = NULL;
     282    context->iter = NULL;
     283  
     284    context->error_text = NULL;
     285  
     286    context->document_empty = true;
     287    context->parsing = false;
     288  
     289    context->awaiting_pop = false;
     290    context->subparser_stack =
     291      gl_list_create_empty (GL_LINKED_LIST,
     292                            NULL, NULL,
     293                            (gl_listelement_dispose_fn) free,
     294                            true);
     295    context->subparser_element = NULL;
     296  
     297    context->balance = 0;
     298  
     299    return context;
     300  }
     301  
     302  static void clear_attributes (markup_parse_context_ty *context);
     303  
     304  /* Frees a parse context.  This function can't be called from inside
     305     one of the markup_parser_ty functions or while a subparser is
     306     pushed.  */
     307  void
     308  markup_parse_context_free (markup_parse_context_ty *context)
     309  {
     310    assert (context != NULL);
     311    assert (!context->parsing);
     312    assert (gl_list_size (context->subparser_stack) == 0);
     313    assert (!context->awaiting_pop);
     314  
     315    clear_attributes (context);
     316    free (context->attr_names);
     317    free (context->attr_values);
     318  
     319    gl_list_free (context->tag_stack);
     320    gl_list_free (context->subparser_stack);
     321  
     322    if (context->partial_chunk)
     323      markup_string_free (context->partial_chunk, true);
     324  
     325    free (context->error_text);
     326  
     327    free (context);
     328  }
     329  
     330  static void pop_subparser_stack (markup_parse_context_ty *context);
     331  
     332  static void
     333  emit_error (markup_parse_context_ty *context, const char *error_text)
     334  {
     335    context->state = STATE_ERROR;
     336  
     337    if (context->parser->error)
     338      (*context->parser->error) (context, error_text, context->user_data);
     339  
     340    /* report the error all the way up to free all the user-data */
     341    while (gl_list_size (context->subparser_stack) > 0)
     342      {
     343        pop_subparser_stack (context);
     344        context->awaiting_pop = false; /* already been freed */
     345  
     346        if (context->parser->error)
     347          (*context->parser->error) (context, error_text, context->user_data);
     348      }
     349  
     350    if (context->error_text)
     351      free (context->error_text);
     352    context->error_text = xstrdup (error_text);
     353  }
     354  
     355  #define IS_COMMON_NAME_END_CHAR(c) \
     356    ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
     357  
     358  static bool
     359  slow_name_validate (markup_parse_context_ty *context, const char *name)
     360  {
     361    const char *p = name;
     362    ucs4_t uc;
     363  
     364    if (u8_check ((const uint8_t *) name, strlen (name)) != NULL)
     365      {
     366        emit_error (context, _("invalid UTF-8 sequence"));
     367        return false;
     368      }
     369  
     370    if (!(c_isalpha (*p)
     371          || (!IS_COMMON_NAME_END_CHAR (*p)
     372              && (*p == '_'
     373                  || *p == ':'
     374                  || (u8_mbtouc (&uc, (const uint8_t *) name, strlen (name)) > 0
     375                      && uc_is_alpha (uc))))))
     376      {
     377        char *error_text = xasprintf (_("'%s' is not a valid name: %c"),
     378                                      name, *p);
     379        emit_error (context, error_text);
     380        free (error_text);
     381        return false;
     382      }
     383  
     384    for (p = (const char *) u8_next (&uc, (const uint8_t *) name);
     385         p != NULL;
     386         p = (const char *) u8_next (&uc, (const uint8_t *) p))
     387      {
     388        /* is_name_char */
     389        if (!(c_isalnum (*p) ||
     390              (!IS_COMMON_NAME_END_CHAR (*p) &&
     391               (*p == '.' ||
     392                *p == '-' ||
     393                *p == '_' ||
     394                *p == ':' ||
     395                uc_is_alpha (uc)))))
     396          {
     397            char *error_text = xasprintf (_("'%s' is not a valid name: '%c'"),
     398                                          name, *p);
     399            emit_error (context, error_text);
     400            free (error_text);
     401            return false;
     402          }
     403      }
     404    return true;
     405  }
     406  
     407  /*
     408   * Use me for elements, attributes etc.
     409   */
     410  static bool
     411  name_validate (markup_parse_context_ty *context, const char *name)
     412  {
     413    char mask;
     414    const char *p;
     415  
     416    /* name start char */
     417    p = name;
     418    if (IS_COMMON_NAME_END_CHAR (*p)
     419        || !(c_isalpha (*p) || *p == '_' || *p == ':'))
     420      goto slow_validate;
     421  
     422    for (mask = *p++; *p != '\0'; p++)
     423      {
     424        mask |= *p;
     425  
     426        /* is_name_char */
     427        if (!(c_isalnum (*p)
     428              || (!IS_COMMON_NAME_END_CHAR (*p)
     429                  && (*p == '.' || *p == '-' || *p == '_' || *p == ':'))))
     430          goto slow_validate;
     431      }
     432  
     433    if (mask & 0x80) /* un-common / non-ascii */
     434      goto slow_validate;
     435  
     436    return true;
     437  
     438   slow_validate:
     439    return slow_name_validate (context, name);
     440  }
     441  
     442  static bool
     443  text_validate (markup_parse_context_ty *context,
     444                 const char *p,
     445                 int len)
     446  {
     447    if (u8_check ((const uint8_t *) p, len) != NULL)
     448      {
     449        emit_error (context, _("invalid UTF-8 sequence"));
     450        return false;
     451      }
     452    else
     453      return true;
     454  }
     455  
     456  /*
     457   * re-write the GString in-place, unescaping anything that escaped.
     458   * most XML does not contain entities, or escaping.
     459   */
     460  static bool
     461  unescape_string_inplace (markup_parse_context_ty *context,
     462                           markup_string_ty *string,
     463                           bool *is_ascii)
     464  {
     465    char mask, *to;
     466    const char *from;
     467    bool normalize_attribute;
     468  
     469    if (string->buflen == 0)
     470      return true;
     471  
     472    *is_ascii = false;
     473  
     474    /* are we unescaping an attribute or not ? */
     475    if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ
     476        || context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
     477      normalize_attribute = true;
     478    else
     479      normalize_attribute = false;
     480  
     481    /*
     482     * Meeks' theorem: unescaping can only shrink text.
     483     * for &lt; etc. this is obvious, for &#xffff; more
     484     * thought is required, but this is patently so.
     485     */
     486    mask = 0;
     487    for (from = to = string->buffer; *from != '\0'; from++, to++)
     488      {
     489        *to = *from;
     490  
     491        mask |= *to;
     492        if (normalize_attribute && (*to == '\t' || *to == '\n'))
     493          *to = ' ';
     494        if (*to == '\r')
     495          {
     496            *to = normalize_attribute ? ' ' : '\n';
     497            if (from[1] == '\n')
     498              from++;
     499          }
     500        if (*from == '&')
     501          {
     502            from++;
     503            if (*from == '#')
     504              {
     505                int base = 10;
     506                unsigned long l;
     507                char *end = NULL;
     508  
     509                from++;
     510  
     511                if (*from == 'x')
     512                  {
     513                    base = 16;
     514                    from++;
     515                  }
     516  
     517                errno = 0;
     518                l = strtoul (from, &end, base);
     519  
     520                if (end == from || errno != 0)
     521                  {
     522                    char *error_text =
     523                      xasprintf (_("invalid character reference: %s"),
     524                                 errno != 0
     525                                 ? strerror (errno)
     526                                 : _("not a valid number specification"));
     527                    emit_error (context, error_text);
     528                    free (error_text);
     529                    return false;
     530                  }
     531                else if (*end != ';')
     532                  {
     533                    char *error_text =
     534                      xasprintf (_("invalid character reference: %s"),
     535                                 _("no ending ';'"));
     536                    emit_error (context, error_text);
     537                    free (error_text);
     538                    return false;
     539                  }
     540                else
     541                  {
     542                    /* characters XML 1.1 permits */
     543                    if ((0 < l && l <= 0xD7FF) ||
     544                        (0xE000 <= l && l <= 0xFFFD) ||
     545                        (0x10000 <= l && l <= 0x10FFFF))
     546                      {
     547                        char buf[8];
     548                        int length;
     549                        length = u8_uctomb ((uint8_t *) buf, l, 8);
     550                        memcpy (to, buf, length);
     551                        to += length - 1;
     552                        from = end;
     553                        if (l >= 0x80) /* not ascii */
     554                          mask |= 0x80;
     555                      }
     556                    else
     557                      {
     558                        char *error_text =
     559                          xasprintf (_("invalid character reference: %s"),
     560                                     _("non-permitted character"));
     561                        emit_error (context, error_text);
     562                        free (error_text);
     563                        return false;
     564                      }
     565                  }
     566              }
     567  
     568            else if (strncmp (from, "lt;", 3) == 0)
     569              {
     570                *to = '<';
     571                from += 2;
     572              }
     573            else if (strncmp (from, "gt;", 3) == 0)
     574              {
     575                *to = '>';
     576                from += 2;
     577              }
     578            else if (strncmp (from, "amp;", 4) == 0)
     579              {
     580                *to = '&';
     581                from += 3;
     582              }
     583            else if (strncmp (from, "quot;", 5) == 0)
     584              {
     585                *to = '"';
     586                from += 4;
     587              }
     588            else if (strncmp (from, "apos;", 5) == 0)
     589              {
     590                *to = '\'';
     591                from += 4;
     592              }
     593            else
     594              {
     595                const char *reason;
     596                char *error_text;
     597  
     598                if (*from == ';')
     599                  reason = _("empty");
     600                else
     601                  {
     602                    const char *end = strchr (from, ';');
     603                    if (end)
     604                      reason = _("unknown");
     605                    else
     606                      reason = _("no ending ';'");
     607                  }
     608                error_text = xasprintf (_("invalid entity reference: %s"),
     609                                        reason);
     610                emit_error (context, error_text);
     611                free (error_text);
     612                return false;
     613              }
     614          }
     615      }
     616  
     617    assert (to - string->buffer <= string->buflen);
     618    if (to - string->buffer != string->buflen)
     619      markup_string_truncate (string, to - string->buffer);
     620  
     621    *is_ascii = !(mask & 0x80);
     622  
     623    return true;
     624  }
     625  
     626  static inline bool
     627  advance_char (markup_parse_context_ty *context)
     628  {
     629    context->iter++;
     630    context->char_number++;
     631  
     632    if (context->iter == context->current_text_end)
     633        return false;
     634  
     635    else if (*context->iter == '\n')
     636      {
     637        context->line_number++;
     638        context->char_number = 1;
     639      }
     640  
     641    return true;
     642  }
     643  
     644  static inline bool
     645  xml_isspace (char c)
     646  {
     647    return c == ' ' || c == '\t' || c == '\n' || c == '\r';
     648  }
     649  
     650  static void
     651  skip_spaces (markup_parse_context_ty *context)
     652  {
     653    do
     654      {
     655        if (!xml_isspace (*context->iter))
     656          return;
     657      }
     658    while (advance_char (context));
     659  }
     660  
     661  static void
     662  advance_to_name_end (markup_parse_context_ty *context)
     663  {
     664    do
     665      {
     666        if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
     667          return;
     668        if (xml_isspace (*(context->iter)))
     669          return;
     670      }
     671    while (advance_char (context));
     672  }
     673  
     674  static void
     675  add_to_partial (markup_parse_context_ty *context,
     676                  const char         *text_start,
     677                  const char         *text_end)
     678  {
     679    if (context->partial_chunk == NULL)
     680      { /* allocate a new chunk to parse into */
     681  
     682        context->partial_chunk = markup_string_new ();
     683      }
     684  
     685    if (text_start != text_end)
     686      markup_string_append (context->partial_chunk,
     687                            text_start, text_end - text_start);
     688  }
     689  
     690  static inline void
     691  truncate_partial (markup_parse_context_ty *context)
     692  {
     693    if (context->partial_chunk != NULL)
     694      string_blank (context->partial_chunk);
     695  }
     696  
     697  static inline const char*
     698  current_element (markup_parse_context_ty *context)
     699  {
     700    const markup_string_ty *string = gl_list_get_at (context->tag_stack, 0);
     701    return string->buffer;
     702  }
     703  
     704  static void
     705  pop_subparser_stack (markup_parse_context_ty *context)
     706  {
     707    markup_recursion_tracker_ty *tracker;
     708  
     709    assert (gl_list_size (context->subparser_stack) > 0);
     710  
     711    tracker = (markup_recursion_tracker_ty *) gl_list_get_at (context->subparser_stack, 0);
     712  
     713    context->awaiting_pop = true;
     714  
     715    context->user_data = tracker->prev_user_data;
     716    context->parser = tracker->prev_parser;
     717    context->subparser_element = tracker->prev_element;
     718    free (tracker);
     719  
     720    gl_list_remove_at (context->subparser_stack, 0);
     721  }
     722  
     723  static void
     724  push_partial_as_tag (markup_parse_context_ty *context)
     725  {
     726    gl_list_add_first (context->tag_stack, context->partial_chunk);
     727    context->partial_chunk = NULL;
     728  }
     729  
     730  static void
     731  pop_tag (markup_parse_context_ty *context)
     732  {
     733    gl_list_remove_at (context->tag_stack, 0);
     734  }
     735  
     736  static void
     737  possibly_finish_subparser (markup_parse_context_ty *context)
     738  {
     739    if (current_element (context) == context->subparser_element)
     740      pop_subparser_stack (context);
     741  }
     742  
     743  static void
     744  ensure_no_outstanding_subparser (markup_parse_context_ty *context)
     745  {
     746    context->awaiting_pop = false;
     747  }
     748  
     749  static void
     750  add_attribute (markup_parse_context_ty *context, markup_string_ty *string)
     751  {
     752    if (context->cur_attr + 2 >= context->alloc_attrs)
     753      {
     754        context->alloc_attrs += 5; /* silly magic number */
     755        context->attr_names = xrealloc (context->attr_names, sizeof (char *) * context->alloc_attrs);
     756        context->attr_values = xrealloc (context->attr_values, sizeof(char *) * context->alloc_attrs);
     757      }
     758    context->cur_attr++;
     759    context->attr_names[context->cur_attr] = xstrdup (string->buffer);
     760    context->attr_values[context->cur_attr] = NULL;
     761    context->attr_names[context->cur_attr+1] = NULL;
     762    context->attr_values[context->cur_attr+1] = NULL;
     763  }
     764  
     765  static void
     766  clear_attributes (markup_parse_context_ty *context)
     767  {
     768    /* Go ahead and free the attributes. */
     769    for (; context->cur_attr >= 0; context->cur_attr--)
     770      {
     771        int pos = context->cur_attr;
     772        free (context->attr_names[pos]);
     773        free (context->attr_values[pos]);
     774        context->attr_names[pos] = context->attr_values[pos] = NULL;
     775      }
     776    assert (context->cur_attr == -1);
     777    assert (context->attr_names == NULL ||
     778            context->attr_names[0] == NULL);
     779    assert (context->attr_values == NULL ||
     780            context->attr_values[0] == NULL);
     781  }
     782  
     783  static void
     784  markup_parse_context_push (markup_parse_context_ty *context,
     785                             const markup_parser_ty *parser,
     786                             void *user_data)
     787  {
     788    markup_recursion_tracker_ty *tracker;
     789  
     790    tracker = XMALLOC (markup_recursion_tracker_ty);
     791    tracker->prev_element = context->subparser_element;
     792    tracker->prev_parser = context->parser;
     793    tracker->prev_user_data = context->user_data;
     794  
     795    context->subparser_element = current_element (context);
     796    context->parser = parser;
     797    context->user_data = user_data;
     798  
     799    gl_list_add_first (context->subparser_stack, tracker);
     800  }
     801  
     802  static void
     803  markup_parse_context_pop (markup_parse_context_ty *context)
     804  {
     805    if (!context->awaiting_pop)
     806      possibly_finish_subparser (context);
     807  
     808    assert (context->awaiting_pop);
     809  
     810    context->awaiting_pop = false;
     811  }
     812  
     813  /* This has to be a separate function to ensure the alloca's
     814   * are unwound on exit - otherwise we grow & blow the stack
     815   * with large documents
     816   */
     817  static inline void
     818  emit_start_element (markup_parse_context_ty *context)
     819  {
     820    int i, j = 0;
     821    const char *start_name;
     822    const char **attr_names;
     823    const char **attr_values;
     824  
     825    /* In case we want to ignore qualified tags and we see that we have
     826     * one here, we push a subparser.  This will ignore all tags inside of
     827     * the qualified tag.
     828     *
     829     * We deal with the end of the subparser from emit_end_element.
     830     */
     831    if ((context->flags & MARKUP_IGNORE_QUALIFIED)
     832        && strchr (current_element (context), ':'))
     833      {
     834        static const markup_parser_ty ignore_parser;
     835        markup_parse_context_push (context, &ignore_parser, NULL);
     836        clear_attributes (context);
     837        return;
     838      }
     839  
     840    attr_names = XCALLOC (context->cur_attr + 2, const char *);
     841    attr_values = XCALLOC (context->cur_attr + 2, const char *);
     842    for (i = 0; i < context->cur_attr + 1; i++)
     843      {
     844        /* Possibly omit qualified attribute names from the list */
     845        if ((context->flags & MARKUP_IGNORE_QUALIFIED)
     846            && strchr (context->attr_names[i], ':'))
     847          continue;
     848  
     849        attr_names[j] = context->attr_names[i];
     850        attr_values[j] = context->attr_values[i];
     851        j++;
     852      }
     853    attr_names[j] = NULL;
     854    attr_values[j] = NULL;
     855  
     856    /* Call user callback for element start */
     857    start_name = current_element (context);
     858  
     859    if (context->parser->start_element && name_validate (context, start_name))
     860      (* context->parser->start_element) (context,
     861                                          start_name,
     862                                          (const char **)attr_names,
     863                                          (const char **)attr_values,
     864                                          context->user_data);
     865    free (attr_names);
     866    free (attr_values);
     867    clear_attributes (context);
     868  }
     869  
     870  static void
     871  emit_end_element (markup_parse_context_ty *context)
     872  {
     873    assert (gl_list_size (context->tag_stack) != 0);
     874  
     875    possibly_finish_subparser (context);
     876  
     877    /* We might have just returned from our ignore subparser */
     878    if ((context->flags & MARKUP_IGNORE_QUALIFIED)
     879        && strchr (current_element (context), ':'))
     880      {
     881        markup_parse_context_pop (context);
     882        pop_tag (context);
     883        return;
     884      }
     885  
     886    if (context->parser->end_element)
     887      (* context->parser->end_element) (context,
     888                                        current_element (context),
     889                                        context->user_data);
     890  
     891    ensure_no_outstanding_subparser (context);
     892  
     893    pop_tag (context);
     894  }
     895  
     896  /* Feed some data to the parse context.  The data need not be valid
     897     UTF-8; an error will be signaled if it's invalid.  The data need
     898     not be an entire document; you can feed a document into the parser
     899     incrementally, via multiple calls to this function.  Typically, as
     900     you receive data from a network connection or file, you feed each
     901     received chunk of data into this function, aborting the process if
     902     an error occurs. Once an error is reported, no further data may be
     903     fed to the parse context; all errors are fatal.  */
     904  bool
     905  markup_parse_context_parse (markup_parse_context_ty *context,
     906                              const char *text,
     907                              ssize_t text_len)
     908  {
     909    assert (context != NULL);
     910    assert (text != NULL);
     911    assert (context->state != STATE_ERROR);
     912    assert (!context->parsing);
     913  
     914    if (text_len < 0)
     915      text_len = strlen (text);
     916  
     917    if (text_len == 0)
     918      return true;
     919  
     920    context->parsing = true;
     921  
     922  
     923    context->current_text = text;
     924    context->current_text_len = text_len;
     925    context->current_text_end = context->current_text + text_len;
     926    context->iter = context->current_text;
     927    context->start = context->iter;
     928  
     929    while (context->iter != context->current_text_end)
     930      {
     931        switch (context->state)
     932          {
     933          case STATE_START:
     934            /* Possible next state: AFTER_OPEN_ANGLE */
     935  
     936            assert (gl_list_size (context->tag_stack) == 0);
     937  
     938            /* whitespace is ignored outside of any elements */
     939            skip_spaces (context);
     940  
     941            if (context->iter != context->current_text_end)
     942              {
     943                if (*context->iter == '<')
     944                  {
     945                    /* Move after the open angle */
     946                    advance_char (context);
     947  
     948                    context->state = STATE_AFTER_OPEN_ANGLE;
     949  
     950                    /* this could start a passthrough */
     951                    context->start = context->iter;
     952  
     953                    /* document is now non-empty */
     954                    context->document_empty = false;
     955                  }
     956                else
     957                  {
     958                    emit_error (context,
     959                                _("document must begin with an element"));
     960                  }
     961              }
     962            break;
     963  
     964          case STATE_AFTER_OPEN_ANGLE:
     965            /* Possible next states: INSIDE_OPEN_TAG_NAME,
     966             *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
     967             */
     968            if (*context->iter == '?' ||
     969                *context->iter == '!')
     970              {
     971                /* include < in the passthrough */
     972                const char *openangle = "<";
     973                add_to_partial (context, openangle, openangle + 1);
     974                context->start = context->iter;
     975                context->balance = 1;
     976                context->state = STATE_INSIDE_PASSTHROUGH;
     977              }
     978            else if (*context->iter == '/')
     979              {
     980                /* move after it */
     981                advance_char (context);
     982  
     983                context->state = STATE_AFTER_CLOSE_TAG_SLASH;
     984              }
     985            else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
     986              {
     987                context->state = STATE_INSIDE_OPEN_TAG_NAME;
     988  
     989                /* start of tag name */
     990                context->start = context->iter;
     991              }
     992            else
     993              {
     994                char *error_text = xasprintf (_("invalid character after '%s'"),
     995                                              "<");
     996                emit_error (context, error_text);
     997                free (error_text);
     998              }
     999            break;
    1000  
    1001            /* The AFTER_CLOSE_ANGLE state is actually sort of
    1002             * broken, because it doesn't correspond to a range
    1003             * of characters in the input stream as the others do,
    1004             * and thus makes things harder to conceptualize
    1005             */
    1006          case STATE_AFTER_CLOSE_ANGLE:
    1007            /* Possible next states: INSIDE_TEXT, STATE_START */
    1008            if (gl_list_size (context->tag_stack) == 0)
    1009              {
    1010                context->start = NULL;
    1011                context->state = STATE_START;
    1012              }
    1013            else
    1014              {
    1015                context->start = context->iter;
    1016                context->state = STATE_INSIDE_TEXT;
    1017              }
    1018            break;
    1019  
    1020          case STATE_AFTER_ELISION_SLASH:
    1021            /* Possible next state: AFTER_CLOSE_ANGLE */
    1022            if (*context->iter == '>')
    1023              {
    1024                /* move after the close angle */
    1025                advance_char (context);
    1026                context->state = STATE_AFTER_CLOSE_ANGLE;
    1027                emit_end_element (context);
    1028              }
    1029            else
    1030              {
    1031                char *error_text = xasprintf (_("missing '%c'"), '>');
    1032                emit_error (context, error_text);
    1033                free (error_text);
    1034              }
    1035            break;
    1036  
    1037          case STATE_INSIDE_OPEN_TAG_NAME:
    1038            /* Possible next states: BETWEEN_ATTRIBUTES */
    1039  
    1040            /* if there's a partial chunk then it's the first part of the
    1041             * tag name. If there's a context->start then it's the start
    1042             * of the tag name in current_text, the partial chunk goes
    1043             * before that start though.
    1044             */
    1045            advance_to_name_end (context);
    1046  
    1047            if (context->iter == context->current_text_end)
    1048              {
    1049                /* The name hasn't necessarily ended. Merge with
    1050                 * partial chunk, leave state unchanged.
    1051                 */
    1052                add_to_partial (context, context->start, context->iter);
    1053              }
    1054            else
    1055              {
    1056                /* The name has ended. Combine it with the partial chunk
    1057                 * if any; push it on the stack; enter next state.
    1058                 */
    1059                add_to_partial (context, context->start, context->iter);
    1060                push_partial_as_tag (context);
    1061  
    1062                context->state = STATE_BETWEEN_ATTRIBUTES;
    1063                context->start = NULL;
    1064              }
    1065            break;
    1066  
    1067          case STATE_INSIDE_ATTRIBUTE_NAME:
    1068            /* Possible next states: AFTER_ATTRIBUTE_NAME */
    1069  
    1070            advance_to_name_end (context);
    1071            add_to_partial (context, context->start, context->iter);
    1072  
    1073            /* read the full name, if we enter the equals sign state
    1074             * then add the attribute to the list (without the value),
    1075             * otherwise store a partial chunk to be prepended later.
    1076             */
    1077            if (context->iter != context->current_text_end)
    1078              context->state = STATE_AFTER_ATTRIBUTE_NAME;
    1079            break;
    1080  
    1081          case STATE_AFTER_ATTRIBUTE_NAME:
    1082            /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
    1083  
    1084            skip_spaces (context);
    1085  
    1086            if (context->iter != context->current_text_end)
    1087              {
    1088                /* The name has ended. Combine it with the partial chunk
    1089                 * if any; push it on the stack; enter next state.
    1090                 */
    1091                if (!name_validate (context, context->partial_chunk->buffer))
    1092                  break;
    1093  
    1094                add_attribute (context, context->partial_chunk);
    1095  
    1096                markup_string_free (context->partial_chunk, true);
    1097                context->partial_chunk = NULL;
    1098                context->start = NULL;
    1099  
    1100                if (*context->iter == '=')
    1101                  {
    1102                    advance_char (context);
    1103                    context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
    1104                  }
    1105                else
    1106                  {
    1107                    char *error_text = xasprintf (_("missing '%c'"), '=');
    1108                    emit_error (context, error_text);
    1109                    free (error_text);
    1110                  }
    1111              }
    1112            break;
    1113  
    1114          case STATE_BETWEEN_ATTRIBUTES:
    1115            /* Possible next states: AFTER_CLOSE_ANGLE,
    1116             * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
    1117             */
    1118            skip_spaces (context);
    1119  
    1120            if (context->iter != context->current_text_end)
    1121              {
    1122                if (*context->iter == '/')
    1123                  {
    1124                    advance_char (context);
    1125                    context->state = STATE_AFTER_ELISION_SLASH;
    1126                  }
    1127                else if (*context->iter == '>')
    1128                  {
    1129                    advance_char (context);
    1130                    context->state = STATE_AFTER_CLOSE_ANGLE;
    1131                  }
    1132                else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
    1133                  {
    1134                    context->state = STATE_INSIDE_ATTRIBUTE_NAME;
    1135                    /* start of attribute name */
    1136                    context->start = context->iter;
    1137                  }
    1138                else
    1139                  {
    1140                    char *error_text = xasprintf (_("missing '%c' or '%c'"),
    1141                                                  '>', '/');
    1142                    emit_error (context, error_text);
    1143                    free (error_text);
    1144                  }
    1145  
    1146                /* If we're done with attributes, invoke
    1147                 * the start_element callback
    1148                 */
    1149                if (context->state == STATE_AFTER_ELISION_SLASH ||
    1150                    context->state == STATE_AFTER_CLOSE_ANGLE)
    1151                  emit_start_element (context);
    1152              }
    1153            break;
    1154  
    1155          case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
    1156            /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
    1157  
    1158            skip_spaces (context);
    1159  
    1160            if (context->iter != context->current_text_end)
    1161              {
    1162                if (*context->iter == '"')
    1163                  {
    1164                    advance_char (context);
    1165                    context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
    1166                    context->start = context->iter;
    1167                  }
    1168                else if (*context->iter == '\'')
    1169                  {
    1170                    advance_char (context);
    1171                    context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
    1172                    context->start = context->iter;
    1173                  }
    1174                else
    1175                  {
    1176                    char *error_text = xasprintf (_("missing '%c' or '%c'"),
    1177                                                  '\'', '"');
    1178                    emit_error (context, error_text);
    1179                    free (error_text);
    1180                  }
    1181              }
    1182            break;
    1183  
    1184          case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
    1185          case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
    1186            /* Possible next states: BETWEEN_ATTRIBUTES */
    1187            {
    1188              char delim;
    1189  
    1190              if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
    1191                {
    1192                  delim = '\'';
    1193                }
    1194              else
    1195                {
    1196                  delim = '"';
    1197                }
    1198  
    1199              do
    1200                {
    1201                  if (*context->iter == delim)
    1202                    break;
    1203                }
    1204              while (advance_char (context));
    1205            }
    1206            if (context->iter == context->current_text_end)
    1207              {
    1208                /* The value hasn't necessarily ended. Merge with
    1209                 * partial chunk, leave state unchanged.
    1210                 */
    1211                add_to_partial (context, context->start, context->iter);
    1212              }
    1213            else
    1214              {
    1215                bool is_ascii;
    1216                /* The value has ended at the quote mark. Combine it
    1217                 * with the partial chunk if any; set it for the current
    1218                 * attribute.
    1219                 */
    1220                add_to_partial (context, context->start, context->iter);
    1221  
    1222                assert (context->cur_attr >= 0);
    1223  
    1224                if (unescape_string_inplace (context, context->partial_chunk,
    1225                                             &is_ascii)
    1226                    && (is_ascii
    1227                        || text_validate (context,
    1228                                          context->partial_chunk->buffer,
    1229                                          context->partial_chunk->buflen)))
    1230                  {
    1231                    /* success, advance past quote and set state. */
    1232                    context->attr_values[context->cur_attr] =
    1233                      markup_string_free (context->partial_chunk, false);
    1234                    context->partial_chunk = NULL;
    1235                    advance_char (context);
    1236                    context->state = STATE_BETWEEN_ATTRIBUTES;
    1237                    context->start = NULL;
    1238                  }
    1239  
    1240                truncate_partial (context);
    1241              }
    1242            break;
    1243  
    1244          case STATE_INSIDE_TEXT:
    1245            /* Possible next states: AFTER_OPEN_ANGLE */
    1246            do
    1247              {
    1248                if (*context->iter == '<')
    1249                  break;
    1250              }
    1251            while (advance_char (context));
    1252  
    1253            /* The text hasn't necessarily ended. Merge with
    1254             * partial chunk, leave state unchanged.
    1255             */
    1256  
    1257            add_to_partial (context, context->start, context->iter);
    1258  
    1259            if (context->iter != context->current_text_end)
    1260              {
    1261                bool is_ascii;
    1262  
    1263                /* The text has ended at the open angle. Call the text
    1264                 * callback.
    1265                 */
    1266                if (unescape_string_inplace (context, context->partial_chunk,
    1267                                             &is_ascii)
    1268                    && (is_ascii
    1269                        || text_validate (context,
    1270                                          context->partial_chunk->buffer,
    1271                                          context->partial_chunk->buflen)))
    1272                  {
    1273                    if (context->parser->text)
    1274                      (*context->parser->text) (context,
    1275                                                context->partial_chunk->buffer,
    1276                                                context->partial_chunk->buflen,
    1277                                                context->user_data);
    1278  
    1279                    /* advance past open angle and set state. */
    1280                    advance_char (context);
    1281                    context->state = STATE_AFTER_OPEN_ANGLE;
    1282                    /* could begin a passthrough */
    1283                    context->start = context->iter;
    1284                  }
    1285  
    1286                truncate_partial (context);
    1287              }
    1288            break;
    1289  
    1290          case STATE_AFTER_CLOSE_TAG_SLASH:
    1291            /* Possible next state: INSIDE_CLOSE_TAG_NAME */
    1292            if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
    1293              {
    1294                context->state = STATE_INSIDE_CLOSE_TAG_NAME;
    1295  
    1296                /* start of tag name */
    1297                context->start = context->iter;
    1298              }
    1299            else
    1300              {
    1301                char *error_text = xasprintf (_("invalid character after '%s'"),
    1302                                              "</");
    1303                emit_error (context, error_text);
    1304                free (error_text);
    1305              }
    1306            break;
    1307  
    1308          case STATE_INSIDE_CLOSE_TAG_NAME:
    1309            /* Possible next state: AFTER_CLOSE_TAG_NAME */
    1310            advance_to_name_end (context);
    1311            add_to_partial (context, context->start, context->iter);
    1312  
    1313            if (context->iter != context->current_text_end)
    1314              context->state = STATE_AFTER_CLOSE_TAG_NAME;
    1315            break;
    1316  
    1317          case STATE_AFTER_CLOSE_TAG_NAME:
    1318            /* Possible next state: AFTER_CLOSE_TAG_SLASH */
    1319  
    1320            skip_spaces (context);
    1321  
    1322            if (context->iter != context->current_text_end)
    1323              {
    1324                markup_string_ty *close_name;
    1325  
    1326                close_name = context->partial_chunk;
    1327                context->partial_chunk = NULL;
    1328  
    1329                if (*context->iter != '>')
    1330                  {
    1331                    char *error_text =
    1332                      xasprintf (_("invalid character after '%s'"),
    1333                                 _("a close element name"));
    1334                    emit_error (context, error_text);
    1335                    free (error_text);
    1336                  }
    1337                else if (gl_list_size (context->tag_stack) == 0)
    1338                  {
    1339                    emit_error (context, _("element is closed"));
    1340                  }
    1341                else if (strcmp (close_name->buffer, current_element (context))
    1342                         != 0)
    1343                  {
    1344                    emit_error (context, _("element is closed"));
    1345                  }
    1346                else
    1347                  {
    1348                    advance_char (context);
    1349                    context->state = STATE_AFTER_CLOSE_ANGLE;
    1350                    context->start = NULL;
    1351  
    1352                    emit_end_element (context);
    1353                  }
    1354                context->partial_chunk = close_name;
    1355                truncate_partial (context);
    1356              }
    1357            break;
    1358  
    1359          case STATE_INSIDE_PASSTHROUGH:
    1360            /* Possible next state: AFTER_CLOSE_ANGLE */
    1361            do
    1362              {
    1363                if (*context->iter == '<')
    1364                  context->balance++;
    1365                if (*context->iter == '>')
    1366                  {
    1367                    char *str;
    1368                    size_t len;
    1369  
    1370                    context->balance--;
    1371                    add_to_partial (context, context->start, context->iter);
    1372                    context->start = context->iter;
    1373  
    1374                    str = context->partial_chunk->buffer;
    1375                    len = context->partial_chunk->buflen;
    1376  
    1377                    if (str[1] == '?' && str[len - 1] == '?')
    1378                      break;
    1379                    if (strncmp (str, "<!--", 4) == 0 &&
    1380                        strcmp (str + len - 2, "--") == 0)
    1381                      break;
    1382                    if (strncmp (str, "<![CDATA[", 9) == 0 &&
    1383                        strcmp (str + len - 2, "]]") == 0)
    1384                      break;
    1385                    if (strncmp (str, "<!DOCTYPE", 9) == 0 &&
    1386                        context->balance == 0)
    1387                      break;
    1388                  }
    1389              }
    1390            while (advance_char (context));
    1391  
    1392            if (context->iter == context->current_text_end)
    1393              {
    1394                /* The passthrough hasn't necessarily ended. Merge with
    1395                 * partial chunk, leave state unchanged.
    1396                 */
    1397                 add_to_partial (context, context->start, context->iter);
    1398              }
    1399            else
    1400              {
    1401                /* The passthrough has ended at the close angle. Combine
    1402                 * it with the partial chunk if any. Call the passthrough
    1403                 * callback. Note that the open/close angles are
    1404                 * included in the text of the passthrough.
    1405                 */
    1406                advance_char (context); /* advance past close angle */
    1407                add_to_partial (context, context->start, context->iter);
    1408  
    1409                if (context->flags & MARKUP_TREAT_CDATA_AS_TEXT &&
    1410                    strncmp (context->partial_chunk->buffer, "<![CDATA[", 9) == 0)
    1411                  {
    1412                    if (context->parser->text &&
    1413                        text_validate (context,
    1414                                       context->partial_chunk->buffer + 9,
    1415                                       context->partial_chunk->buflen - 12))
    1416                      (*context->parser->text) (context,
    1417                                                context->partial_chunk->buffer + 9,
    1418                                                context->partial_chunk->buflen - 12,
    1419                                                context->user_data);
    1420                  }
    1421                else if (context->parser->passthrough &&
    1422                         text_validate (context,
    1423                                        context->partial_chunk->buffer,
    1424                                        context->partial_chunk->buflen))
    1425                  (*context->parser->passthrough) (context,
    1426                                                   context->partial_chunk->buffer,
    1427                                                   context->partial_chunk->buflen,
    1428                                                   context->user_data);
    1429  
    1430                truncate_partial (context);
    1431  
    1432                context->state = STATE_AFTER_CLOSE_ANGLE;
    1433                context->start = context->iter; /* could begin text */
    1434              }
    1435            break;
    1436  
    1437          case STATE_ERROR:
    1438            goto finished;
    1439            break;
    1440  
    1441          default:
    1442            abort ();
    1443            break;
    1444          }
    1445      }
    1446  
    1447   finished:
    1448    context->parsing = false;
    1449  
    1450    return context->state != STATE_ERROR;
    1451  }
    1452  
    1453  /* Signals to the parse context that all data has been fed into the
    1454   * parse context with markup_parse_context_parse.
    1455   *
    1456   * This function reports an error if the document isn't complete,
    1457   * for example if elements are still open.  */
    1458  bool
    1459  markup_parse_context_end_parse (markup_parse_context_ty *context)
    1460  {
    1461    const char *location = NULL;
    1462  
    1463    assert (context != NULL);
    1464    assert (!context->parsing);
    1465    assert (context->state != STATE_ERROR);
    1466  
    1467    if (context->partial_chunk != NULL)
    1468      {
    1469        markup_string_free (context->partial_chunk, true);
    1470        context->partial_chunk = NULL;
    1471      }
    1472  
    1473    if (context->document_empty)
    1474      {
    1475        emit_error (context, _("empty document"));
    1476        return false;
    1477      }
    1478  
    1479    context->parsing = true;
    1480  
    1481    switch (context->state)
    1482      {
    1483      case STATE_START:
    1484        /* Nothing to do */
    1485        break;
    1486  
    1487      case STATE_AFTER_OPEN_ANGLE:
    1488        location = _("after '<'");
    1489        break;
    1490  
    1491      case STATE_AFTER_CLOSE_ANGLE:
    1492        if (gl_list_size (context->tag_stack) > 0)
    1493          {
    1494            /* Error message the same as for INSIDE_TEXT */
    1495            location = _("elements still open");
    1496          }
    1497        break;
    1498  
    1499      case STATE_AFTER_ELISION_SLASH:
    1500        location = _("missing '>'");
    1501        break;
    1502  
    1503      case STATE_INSIDE_OPEN_TAG_NAME:
    1504        location = _("inside an element name");
    1505        break;
    1506  
    1507      case STATE_INSIDE_ATTRIBUTE_NAME:
    1508      case STATE_AFTER_ATTRIBUTE_NAME:
    1509        location = _("inside an attribute name");
    1510        break;
    1511  
    1512      case STATE_BETWEEN_ATTRIBUTES:
    1513        location = _("inside an open tag");
    1514        break;
    1515  
    1516      case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
    1517        location = _("after '='");
    1518        break;
    1519  
    1520      case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
    1521      case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
    1522        location = _("inside an attribute value");
    1523        break;
    1524  
    1525      case STATE_INSIDE_TEXT:
    1526        assert (gl_list_size (context->tag_stack) > 0);
    1527        location = _("elements still open");
    1528        break;
    1529  
    1530      case STATE_AFTER_CLOSE_TAG_SLASH:
    1531      case STATE_INSIDE_CLOSE_TAG_NAME:
    1532      case STATE_AFTER_CLOSE_TAG_NAME:
    1533        location = _("inside the close tag");
    1534        break;
    1535  
    1536      case STATE_INSIDE_PASSTHROUGH:
    1537        location = _("inside a comment or processing instruction");
    1538        break;
    1539  
    1540      case STATE_ERROR:
    1541      default:
    1542        abort ();
    1543        break;
    1544      }
    1545  
    1546    if (location != NULL)
    1547      {
    1548        char *error_text = xasprintf (_("document ended unexpectedly: %s"),
    1549                                      location);
    1550        emit_error (context, error_text);
    1551        free (error_text);
    1552      }
    1553  
    1554    context->parsing = false;
    1555  
    1556    return context->state != STATE_ERROR;
    1557  }
    1558  
    1559  const char *
    1560  markup_parse_context_get_error (markup_parse_context_ty *context)
    1561  {
    1562    return context->error_text;
    1563  }