(root)/
expat-2.5.0/
lib/
xmltok_impl.c
       1  /* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)!
       2                              __  __            _
       3                           ___\ \/ /_ __   __ _| |_
       4                          / _ \\  /| '_ \ / _` | __|
       5                         |  __//  \| |_) | (_| | |_
       6                          \___/_/\_\ .__/ \__,_|\__|
       7                                   |_| XML parser
       8  
       9     Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
      10     Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
      11     Copyright (c) 2002      Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
      12     Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
      13     Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
      14     Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
      15     Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
      16     Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
      17     Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
      18     Copyright (c) 2020      Boris Kolpackov <boris@codesynthesis.com>
      19     Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
      20     Licensed under the MIT license:
      21  
      22     Permission is  hereby granted,  free of charge,  to any  person obtaining
      23     a  copy  of  this  software   and  associated  documentation  files  (the
      24     "Software"),  to  deal in  the  Software  without restriction,  including
      25     without  limitation the  rights  to use,  copy,  modify, merge,  publish,
      26     distribute, sublicense, and/or sell copies of the Software, and to permit
      27     persons  to whom  the Software  is  furnished to  do so,  subject to  the
      28     following conditions:
      29  
      30     The above copyright  notice and this permission notice  shall be included
      31     in all copies or substantial portions of the Software.
      32  
      33     THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
      34     EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
      35     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
      36     NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
      37     DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
      38     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
      39     USE OR OTHER DEALINGS IN THE SOFTWARE.
      40  */
      41  
      42  #ifdef XML_TOK_IMPL_C
      43  
      44  #  ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined
      45  #    define IS_INVALID_CHAR(enc, ptr, n) (0)
      46  #  endif
      47  
      48  #  define INVALID_LEAD_CASE(n, ptr, nextTokPtr)                                \
      49    case BT_LEAD##n:                                                             \
      50      if (end - ptr < n)                                                         \
      51        return XML_TOK_PARTIAL_CHAR;                                             \
      52      if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
      53        *(nextTokPtr) = (ptr);                                                   \
      54        return XML_TOK_INVALID;                                                  \
      55      }                                                                          \
      56      ptr += n;                                                                  \
      57      break;
      58  
      59  #  define INVALID_CASES(ptr, nextTokPtr)                                       \
      60      INVALID_LEAD_CASE(2, ptr, nextTokPtr)                                      \
      61      INVALID_LEAD_CASE(3, ptr, nextTokPtr)                                      \
      62      INVALID_LEAD_CASE(4, ptr, nextTokPtr)                                      \
      63    case BT_NONXML:                                                              \
      64    case BT_MALFORM:                                                             \
      65    case BT_TRAIL:                                                               \
      66      *(nextTokPtr) = (ptr);                                                     \
      67      return XML_TOK_INVALID;
      68  
      69  #  define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr)                        \
      70    case BT_LEAD##n:                                                             \
      71      if (end - ptr < n)                                                         \
      72        return XML_TOK_PARTIAL_CHAR;                                             \
      73      if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
      74        *nextTokPtr = ptr;                                                       \
      75        return XML_TOK_INVALID;                                                  \
      76      }                                                                          \
      77      ptr += n;                                                                  \
      78      break;
      79  
      80  #  define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)                          \
      81    case BT_NONASCII:                                                            \
      82      if (! IS_NAME_CHAR_MINBPC(enc, ptr)) {                                     \
      83        *nextTokPtr = ptr;                                                       \
      84        return XML_TOK_INVALID;                                                  \
      85      }                                                                          \
      86      /* fall through */                                                         \
      87    case BT_NMSTRT:                                                              \
      88    case BT_HEX:                                                                 \
      89    case BT_DIGIT:                                                               \
      90    case BT_NAME:                                                                \
      91    case BT_MINUS:                                                               \
      92      ptr += MINBPC(enc);                                                        \
      93      break;                                                                     \
      94      CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr)                              \
      95      CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr)                              \
      96      CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
      97  
      98  #  define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr)                      \
      99    case BT_LEAD##n:                                                             \
     100      if ((end) - (ptr) < (n))                                                   \
     101        return XML_TOK_PARTIAL_CHAR;                                             \
     102      if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
     103        *nextTokPtr = ptr;                                                       \
     104        return XML_TOK_INVALID;                                                  \
     105      }                                                                          \
     106      ptr += n;                                                                  \
     107      break;
     108  
     109  #  define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)                        \
     110    case BT_NONASCII:                                                            \
     111      if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {                                   \
     112        *nextTokPtr = ptr;                                                       \
     113        return XML_TOK_INVALID;                                                  \
     114      }                                                                          \
     115      /* fall through */                                                         \
     116    case BT_NMSTRT:                                                              \
     117    case BT_HEX:                                                                 \
     118      ptr += MINBPC(enc);                                                        \
     119      break;                                                                     \
     120      CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr)                            \
     121      CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr)                            \
     122      CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
     123  
     124  #  ifndef PREFIX
     125  #    define PREFIX(ident) ident
     126  #  endif
     127  
     128  #  define HAS_CHARS(enc, ptr, end, count)                                      \
     129      ((end) - (ptr) >= ((count)*MINBPC(enc)))
     130  
     131  #  define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)
     132  
     133  #  define REQUIRE_CHARS(enc, ptr, end, count)                                  \
     134      {                                                                          \
     135        if (! HAS_CHARS(enc, ptr, end, count)) {                                 \
     136          return XML_TOK_PARTIAL;                                                \
     137        }                                                                        \
     138      }
     139  
     140  #  define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1)
     141  
     142  /* ptr points to character following "<!-" */
     143  
     144  static int PTRCALL
     145  PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
     146                      const char **nextTokPtr) {
     147    if (HAS_CHAR(enc, ptr, end)) {
     148      if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
     149        *nextTokPtr = ptr;
     150        return XML_TOK_INVALID;
     151      }
     152      ptr += MINBPC(enc);
     153      while (HAS_CHAR(enc, ptr, end)) {
     154        switch (BYTE_TYPE(enc, ptr)) {
     155          INVALID_CASES(ptr, nextTokPtr)
     156        case BT_MINUS:
     157          ptr += MINBPC(enc);
     158          REQUIRE_CHAR(enc, ptr, end);
     159          if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
     160            ptr += MINBPC(enc);
     161            REQUIRE_CHAR(enc, ptr, end);
     162            if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
     163              *nextTokPtr = ptr;
     164              return XML_TOK_INVALID;
     165            }
     166            *nextTokPtr = ptr + MINBPC(enc);
     167            return XML_TOK_COMMENT;
     168          }
     169          break;
     170        default:
     171          ptr += MINBPC(enc);
     172          break;
     173        }
     174      }
     175    }
     176    return XML_TOK_PARTIAL;
     177  }
     178  
     179  /* ptr points to character following "<!" */
     180  
     181  static int PTRCALL
     182  PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
     183                   const char **nextTokPtr) {
     184    REQUIRE_CHAR(enc, ptr, end);
     185    switch (BYTE_TYPE(enc, ptr)) {
     186    case BT_MINUS:
     187      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     188    case BT_LSQB:
     189      *nextTokPtr = ptr + MINBPC(enc);
     190      return XML_TOK_COND_SECT_OPEN;
     191    case BT_NMSTRT:
     192    case BT_HEX:
     193      ptr += MINBPC(enc);
     194      break;
     195    default:
     196      *nextTokPtr = ptr;
     197      return XML_TOK_INVALID;
     198    }
     199    while (HAS_CHAR(enc, ptr, end)) {
     200      switch (BYTE_TYPE(enc, ptr)) {
     201      case BT_PERCNT:
     202        REQUIRE_CHARS(enc, ptr, end, 2);
     203        /* don't allow <!ENTITY% foo "whatever"> */
     204        switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
     205        case BT_S:
     206        case BT_CR:
     207        case BT_LF:
     208        case BT_PERCNT:
     209          *nextTokPtr = ptr;
     210          return XML_TOK_INVALID;
     211        }
     212        /* fall through */
     213      case BT_S:
     214      case BT_CR:
     215      case BT_LF:
     216        *nextTokPtr = ptr;
     217        return XML_TOK_DECL_OPEN;
     218      case BT_NMSTRT:
     219      case BT_HEX:
     220        ptr += MINBPC(enc);
     221        break;
     222      default:
     223        *nextTokPtr = ptr;
     224        return XML_TOK_INVALID;
     225      }
     226    }
     227    return XML_TOK_PARTIAL;
     228  }
     229  
     230  static int PTRCALL
     231  PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
     232                        int *tokPtr) {
     233    int upper = 0;
     234    UNUSED_P(enc);
     235    *tokPtr = XML_TOK_PI;
     236    if (end - ptr != MINBPC(enc) * 3)
     237      return 1;
     238    switch (BYTE_TO_ASCII(enc, ptr)) {
     239    case ASCII_x:
     240      break;
     241    case ASCII_X:
     242      upper = 1;
     243      break;
     244    default:
     245      return 1;
     246    }
     247    ptr += MINBPC(enc);
     248    switch (BYTE_TO_ASCII(enc, ptr)) {
     249    case ASCII_m:
     250      break;
     251    case ASCII_M:
     252      upper = 1;
     253      break;
     254    default:
     255      return 1;
     256    }
     257    ptr += MINBPC(enc);
     258    switch (BYTE_TO_ASCII(enc, ptr)) {
     259    case ASCII_l:
     260      break;
     261    case ASCII_L:
     262      upper = 1;
     263      break;
     264    default:
     265      return 1;
     266    }
     267    if (upper)
     268      return 0;
     269    *tokPtr = XML_TOK_XML_DECL;
     270    return 1;
     271  }
     272  
     273  /* ptr points to character following "<?" */
     274  
     275  static int PTRCALL
     276  PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
     277                 const char **nextTokPtr) {
     278    int tok;
     279    const char *target = ptr;
     280    REQUIRE_CHAR(enc, ptr, end);
     281    switch (BYTE_TYPE(enc, ptr)) {
     282      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     283    default:
     284      *nextTokPtr = ptr;
     285      return XML_TOK_INVALID;
     286    }
     287    while (HAS_CHAR(enc, ptr, end)) {
     288      switch (BYTE_TYPE(enc, ptr)) {
     289        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     290      case BT_S:
     291      case BT_CR:
     292      case BT_LF:
     293        if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
     294          *nextTokPtr = ptr;
     295          return XML_TOK_INVALID;
     296        }
     297        ptr += MINBPC(enc);
     298        while (HAS_CHAR(enc, ptr, end)) {
     299          switch (BYTE_TYPE(enc, ptr)) {
     300            INVALID_CASES(ptr, nextTokPtr)
     301          case BT_QUEST:
     302            ptr += MINBPC(enc);
     303            REQUIRE_CHAR(enc, ptr, end);
     304            if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
     305              *nextTokPtr = ptr + MINBPC(enc);
     306              return tok;
     307            }
     308            break;
     309          default:
     310            ptr += MINBPC(enc);
     311            break;
     312          }
     313        }
     314        return XML_TOK_PARTIAL;
     315      case BT_QUEST:
     316        if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
     317          *nextTokPtr = ptr;
     318          return XML_TOK_INVALID;
     319        }
     320        ptr += MINBPC(enc);
     321        REQUIRE_CHAR(enc, ptr, end);
     322        if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
     323          *nextTokPtr = ptr + MINBPC(enc);
     324          return tok;
     325        }
     326        /* fall through */
     327      default:
     328        *nextTokPtr = ptr;
     329        return XML_TOK_INVALID;
     330      }
     331    }
     332    return XML_TOK_PARTIAL;
     333  }
     334  
     335  static int PTRCALL
     336  PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
     337                           const char **nextTokPtr) {
     338    static const char CDATA_LSQB[]
     339        = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
     340    int i;
     341    UNUSED_P(enc);
     342    /* CDATA[ */
     343    REQUIRE_CHARS(enc, ptr, end, 6);
     344    for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
     345      if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
     346        *nextTokPtr = ptr;
     347        return XML_TOK_INVALID;
     348      }
     349    }
     350    *nextTokPtr = ptr;
     351    return XML_TOK_CDATA_SECT_OPEN;
     352  }
     353  
     354  static int PTRCALL
     355  PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
     356                          const char **nextTokPtr) {
     357    if (ptr >= end)
     358      return XML_TOK_NONE;
     359    if (MINBPC(enc) > 1) {
     360      size_t n = end - ptr;
     361      if (n & (MINBPC(enc) - 1)) {
     362        n &= ~(MINBPC(enc) - 1);
     363        if (n == 0)
     364          return XML_TOK_PARTIAL;
     365        end = ptr + n;
     366      }
     367    }
     368    switch (BYTE_TYPE(enc, ptr)) {
     369    case BT_RSQB:
     370      ptr += MINBPC(enc);
     371      REQUIRE_CHAR(enc, ptr, end);
     372      if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
     373        break;
     374      ptr += MINBPC(enc);
     375      REQUIRE_CHAR(enc, ptr, end);
     376      if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
     377        ptr -= MINBPC(enc);
     378        break;
     379      }
     380      *nextTokPtr = ptr + MINBPC(enc);
     381      return XML_TOK_CDATA_SECT_CLOSE;
     382    case BT_CR:
     383      ptr += MINBPC(enc);
     384      REQUIRE_CHAR(enc, ptr, end);
     385      if (BYTE_TYPE(enc, ptr) == BT_LF)
     386        ptr += MINBPC(enc);
     387      *nextTokPtr = ptr;
     388      return XML_TOK_DATA_NEWLINE;
     389    case BT_LF:
     390      *nextTokPtr = ptr + MINBPC(enc);
     391      return XML_TOK_DATA_NEWLINE;
     392      INVALID_CASES(ptr, nextTokPtr)
     393    default:
     394      ptr += MINBPC(enc);
     395      break;
     396    }
     397    while (HAS_CHAR(enc, ptr, end)) {
     398      switch (BYTE_TYPE(enc, ptr)) {
     399  #  define LEAD_CASE(n)                                                         \
     400    case BT_LEAD##n:                                                             \
     401      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
     402        *nextTokPtr = ptr;                                                       \
     403        return XML_TOK_DATA_CHARS;                                               \
     404      }                                                                          \
     405      ptr += n;                                                                  \
     406      break;
     407        LEAD_CASE(2)
     408        LEAD_CASE(3)
     409        LEAD_CASE(4)
     410  #  undef LEAD_CASE
     411      case BT_NONXML:
     412      case BT_MALFORM:
     413      case BT_TRAIL:
     414      case BT_CR:
     415      case BT_LF:
     416      case BT_RSQB:
     417        *nextTokPtr = ptr;
     418        return XML_TOK_DATA_CHARS;
     419      default:
     420        ptr += MINBPC(enc);
     421        break;
     422      }
     423    }
     424    *nextTokPtr = ptr;
     425    return XML_TOK_DATA_CHARS;
     426  }
     427  
     428  /* ptr points to character following "</" */
     429  
     430  static int PTRCALL
     431  PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
     432                     const char **nextTokPtr) {
     433    REQUIRE_CHAR(enc, ptr, end);
     434    switch (BYTE_TYPE(enc, ptr)) {
     435      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     436    default:
     437      *nextTokPtr = ptr;
     438      return XML_TOK_INVALID;
     439    }
     440    while (HAS_CHAR(enc, ptr, end)) {
     441      switch (BYTE_TYPE(enc, ptr)) {
     442        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     443      case BT_S:
     444      case BT_CR:
     445      case BT_LF:
     446        for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
     447          switch (BYTE_TYPE(enc, ptr)) {
     448          case BT_S:
     449          case BT_CR:
     450          case BT_LF:
     451            break;
     452          case BT_GT:
     453            *nextTokPtr = ptr + MINBPC(enc);
     454            return XML_TOK_END_TAG;
     455          default:
     456            *nextTokPtr = ptr;
     457            return XML_TOK_INVALID;
     458          }
     459        }
     460        return XML_TOK_PARTIAL;
     461  #  ifdef XML_NS
     462      case BT_COLON:
     463        /* no need to check qname syntax here,
     464           since end-tag must match exactly */
     465        ptr += MINBPC(enc);
     466        break;
     467  #  endif
     468      case BT_GT:
     469        *nextTokPtr = ptr + MINBPC(enc);
     470        return XML_TOK_END_TAG;
     471      default:
     472        *nextTokPtr = ptr;
     473        return XML_TOK_INVALID;
     474      }
     475    }
     476    return XML_TOK_PARTIAL;
     477  }
     478  
     479  /* ptr points to character following "&#X" */
     480  
     481  static int PTRCALL
     482  PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
     483                         const char **nextTokPtr) {
     484    if (HAS_CHAR(enc, ptr, end)) {
     485      switch (BYTE_TYPE(enc, ptr)) {
     486      case BT_DIGIT:
     487      case BT_HEX:
     488        break;
     489      default:
     490        *nextTokPtr = ptr;
     491        return XML_TOK_INVALID;
     492      }
     493      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
     494        switch (BYTE_TYPE(enc, ptr)) {
     495        case BT_DIGIT:
     496        case BT_HEX:
     497          break;
     498        case BT_SEMI:
     499          *nextTokPtr = ptr + MINBPC(enc);
     500          return XML_TOK_CHAR_REF;
     501        default:
     502          *nextTokPtr = ptr;
     503          return XML_TOK_INVALID;
     504        }
     505      }
     506    }
     507    return XML_TOK_PARTIAL;
     508  }
     509  
     510  /* ptr points to character following "&#" */
     511  
     512  static int PTRCALL
     513  PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
     514                      const char **nextTokPtr) {
     515    if (HAS_CHAR(enc, ptr, end)) {
     516      if (CHAR_MATCHES(enc, ptr, ASCII_x))
     517        return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     518      switch (BYTE_TYPE(enc, ptr)) {
     519      case BT_DIGIT:
     520        break;
     521      default:
     522        *nextTokPtr = ptr;
     523        return XML_TOK_INVALID;
     524      }
     525      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
     526        switch (BYTE_TYPE(enc, ptr)) {
     527        case BT_DIGIT:
     528          break;
     529        case BT_SEMI:
     530          *nextTokPtr = ptr + MINBPC(enc);
     531          return XML_TOK_CHAR_REF;
     532        default:
     533          *nextTokPtr = ptr;
     534          return XML_TOK_INVALID;
     535        }
     536      }
     537    }
     538    return XML_TOK_PARTIAL;
     539  }
     540  
     541  /* ptr points to character following "&" */
     542  
     543  static int PTRCALL
     544  PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
     545                  const char **nextTokPtr) {
     546    REQUIRE_CHAR(enc, ptr, end);
     547    switch (BYTE_TYPE(enc, ptr)) {
     548      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     549    case BT_NUM:
     550      return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     551    default:
     552      *nextTokPtr = ptr;
     553      return XML_TOK_INVALID;
     554    }
     555    while (HAS_CHAR(enc, ptr, end)) {
     556      switch (BYTE_TYPE(enc, ptr)) {
     557        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     558      case BT_SEMI:
     559        *nextTokPtr = ptr + MINBPC(enc);
     560        return XML_TOK_ENTITY_REF;
     561      default:
     562        *nextTokPtr = ptr;
     563        return XML_TOK_INVALID;
     564      }
     565    }
     566    return XML_TOK_PARTIAL;
     567  }
     568  
     569  /* ptr points to character following first character of attribute name */
     570  
     571  static int PTRCALL
     572  PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
     573                   const char **nextTokPtr) {
     574  #  ifdef XML_NS
     575    int hadColon = 0;
     576  #  endif
     577    while (HAS_CHAR(enc, ptr, end)) {
     578      switch (BYTE_TYPE(enc, ptr)) {
     579        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     580  #  ifdef XML_NS
     581      case BT_COLON:
     582        if (hadColon) {
     583          *nextTokPtr = ptr;
     584          return XML_TOK_INVALID;
     585        }
     586        hadColon = 1;
     587        ptr += MINBPC(enc);
     588        REQUIRE_CHAR(enc, ptr, end);
     589        switch (BYTE_TYPE(enc, ptr)) {
     590          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     591        default:
     592          *nextTokPtr = ptr;
     593          return XML_TOK_INVALID;
     594        }
     595        break;
     596  #  endif
     597      case BT_S:
     598      case BT_CR:
     599      case BT_LF:
     600        for (;;) {
     601          int t;
     602  
     603          ptr += MINBPC(enc);
     604          REQUIRE_CHAR(enc, ptr, end);
     605          t = BYTE_TYPE(enc, ptr);
     606          if (t == BT_EQUALS)
     607            break;
     608          switch (t) {
     609          case BT_S:
     610          case BT_LF:
     611          case BT_CR:
     612            break;
     613          default:
     614            *nextTokPtr = ptr;
     615            return XML_TOK_INVALID;
     616          }
     617        }
     618        /* fall through */
     619      case BT_EQUALS: {
     620        int open;
     621  #  ifdef XML_NS
     622        hadColon = 0;
     623  #  endif
     624        for (;;) {
     625          ptr += MINBPC(enc);
     626          REQUIRE_CHAR(enc, ptr, end);
     627          open = BYTE_TYPE(enc, ptr);
     628          if (open == BT_QUOT || open == BT_APOS)
     629            break;
     630          switch (open) {
     631          case BT_S:
     632          case BT_LF:
     633          case BT_CR:
     634            break;
     635          default:
     636            *nextTokPtr = ptr;
     637            return XML_TOK_INVALID;
     638          }
     639        }
     640        ptr += MINBPC(enc);
     641        /* in attribute value */
     642        for (;;) {
     643          int t;
     644          REQUIRE_CHAR(enc, ptr, end);
     645          t = BYTE_TYPE(enc, ptr);
     646          if (t == open)
     647            break;
     648          switch (t) {
     649            INVALID_CASES(ptr, nextTokPtr)
     650          case BT_AMP: {
     651            int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
     652            if (tok <= 0) {
     653              if (tok == XML_TOK_INVALID)
     654                *nextTokPtr = ptr;
     655              return tok;
     656            }
     657            break;
     658          }
     659          case BT_LT:
     660            *nextTokPtr = ptr;
     661            return XML_TOK_INVALID;
     662          default:
     663            ptr += MINBPC(enc);
     664            break;
     665          }
     666        }
     667        ptr += MINBPC(enc);
     668        REQUIRE_CHAR(enc, ptr, end);
     669        switch (BYTE_TYPE(enc, ptr)) {
     670        case BT_S:
     671        case BT_CR:
     672        case BT_LF:
     673          break;
     674        case BT_SOL:
     675          goto sol;
     676        case BT_GT:
     677          goto gt;
     678        default:
     679          *nextTokPtr = ptr;
     680          return XML_TOK_INVALID;
     681        }
     682        /* ptr points to closing quote */
     683        for (;;) {
     684          ptr += MINBPC(enc);
     685          REQUIRE_CHAR(enc, ptr, end);
     686          switch (BYTE_TYPE(enc, ptr)) {
     687            CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     688          case BT_S:
     689          case BT_CR:
     690          case BT_LF:
     691            continue;
     692          case BT_GT:
     693          gt:
     694            *nextTokPtr = ptr + MINBPC(enc);
     695            return XML_TOK_START_TAG_WITH_ATTS;
     696          case BT_SOL:
     697          sol:
     698            ptr += MINBPC(enc);
     699            REQUIRE_CHAR(enc, ptr, end);
     700            if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
     701              *nextTokPtr = ptr;
     702              return XML_TOK_INVALID;
     703            }
     704            *nextTokPtr = ptr + MINBPC(enc);
     705            return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
     706          default:
     707            *nextTokPtr = ptr;
     708            return XML_TOK_INVALID;
     709          }
     710          break;
     711        }
     712        break;
     713      }
     714      default:
     715        *nextTokPtr = ptr;
     716        return XML_TOK_INVALID;
     717      }
     718    }
     719    return XML_TOK_PARTIAL;
     720  }
     721  
     722  /* ptr points to character following "<" */
     723  
     724  static int PTRCALL
     725  PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
     726                 const char **nextTokPtr) {
     727  #  ifdef XML_NS
     728    int hadColon;
     729  #  endif
     730    REQUIRE_CHAR(enc, ptr, end);
     731    switch (BYTE_TYPE(enc, ptr)) {
     732      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     733    case BT_EXCL:
     734      ptr += MINBPC(enc);
     735      REQUIRE_CHAR(enc, ptr, end);
     736      switch (BYTE_TYPE(enc, ptr)) {
     737      case BT_MINUS:
     738        return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     739      case BT_LSQB:
     740        return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     741      }
     742      *nextTokPtr = ptr;
     743      return XML_TOK_INVALID;
     744    case BT_QUEST:
     745      return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     746    case BT_SOL:
     747      return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     748    default:
     749      *nextTokPtr = ptr;
     750      return XML_TOK_INVALID;
     751    }
     752  #  ifdef XML_NS
     753    hadColon = 0;
     754  #  endif
     755    /* we have a start-tag */
     756    while (HAS_CHAR(enc, ptr, end)) {
     757      switch (BYTE_TYPE(enc, ptr)) {
     758        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     759  #  ifdef XML_NS
     760      case BT_COLON:
     761        if (hadColon) {
     762          *nextTokPtr = ptr;
     763          return XML_TOK_INVALID;
     764        }
     765        hadColon = 1;
     766        ptr += MINBPC(enc);
     767        REQUIRE_CHAR(enc, ptr, end);
     768        switch (BYTE_TYPE(enc, ptr)) {
     769          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     770        default:
     771          *nextTokPtr = ptr;
     772          return XML_TOK_INVALID;
     773        }
     774        break;
     775  #  endif
     776      case BT_S:
     777      case BT_CR:
     778      case BT_LF: {
     779        ptr += MINBPC(enc);
     780        while (HAS_CHAR(enc, ptr, end)) {
     781          switch (BYTE_TYPE(enc, ptr)) {
     782            CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     783          case BT_GT:
     784            goto gt;
     785          case BT_SOL:
     786            goto sol;
     787          case BT_S:
     788          case BT_CR:
     789          case BT_LF:
     790            ptr += MINBPC(enc);
     791            continue;
     792          default:
     793            *nextTokPtr = ptr;
     794            return XML_TOK_INVALID;
     795          }
     796          return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
     797        }
     798        return XML_TOK_PARTIAL;
     799      }
     800      case BT_GT:
     801      gt:
     802        *nextTokPtr = ptr + MINBPC(enc);
     803        return XML_TOK_START_TAG_NO_ATTS;
     804      case BT_SOL:
     805      sol:
     806        ptr += MINBPC(enc);
     807        REQUIRE_CHAR(enc, ptr, end);
     808        if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
     809          *nextTokPtr = ptr;
     810          return XML_TOK_INVALID;
     811        }
     812        *nextTokPtr = ptr + MINBPC(enc);
     813        return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
     814      default:
     815        *nextTokPtr = ptr;
     816        return XML_TOK_INVALID;
     817      }
     818    }
     819    return XML_TOK_PARTIAL;
     820  }
     821  
     822  static int PTRCALL
     823  PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
     824                     const char **nextTokPtr) {
     825    if (ptr >= end)
     826      return XML_TOK_NONE;
     827    if (MINBPC(enc) > 1) {
     828      size_t n = end - ptr;
     829      if (n & (MINBPC(enc) - 1)) {
     830        n &= ~(MINBPC(enc) - 1);
     831        if (n == 0)
     832          return XML_TOK_PARTIAL;
     833        end = ptr + n;
     834      }
     835    }
     836    switch (BYTE_TYPE(enc, ptr)) {
     837    case BT_LT:
     838      return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     839    case BT_AMP:
     840      return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     841    case BT_CR:
     842      ptr += MINBPC(enc);
     843      if (! HAS_CHAR(enc, ptr, end))
     844        return XML_TOK_TRAILING_CR;
     845      if (BYTE_TYPE(enc, ptr) == BT_LF)
     846        ptr += MINBPC(enc);
     847      *nextTokPtr = ptr;
     848      return XML_TOK_DATA_NEWLINE;
     849    case BT_LF:
     850      *nextTokPtr = ptr + MINBPC(enc);
     851      return XML_TOK_DATA_NEWLINE;
     852    case BT_RSQB:
     853      ptr += MINBPC(enc);
     854      if (! HAS_CHAR(enc, ptr, end))
     855        return XML_TOK_TRAILING_RSQB;
     856      if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
     857        break;
     858      ptr += MINBPC(enc);
     859      if (! HAS_CHAR(enc, ptr, end))
     860        return XML_TOK_TRAILING_RSQB;
     861      if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
     862        ptr -= MINBPC(enc);
     863        break;
     864      }
     865      *nextTokPtr = ptr;
     866      return XML_TOK_INVALID;
     867      INVALID_CASES(ptr, nextTokPtr)
     868    default:
     869      ptr += MINBPC(enc);
     870      break;
     871    }
     872    while (HAS_CHAR(enc, ptr, end)) {
     873      switch (BYTE_TYPE(enc, ptr)) {
     874  #  define LEAD_CASE(n)                                                         \
     875    case BT_LEAD##n:                                                             \
     876      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
     877        *nextTokPtr = ptr;                                                       \
     878        return XML_TOK_DATA_CHARS;                                               \
     879      }                                                                          \
     880      ptr += n;                                                                  \
     881      break;
     882        LEAD_CASE(2)
     883        LEAD_CASE(3)
     884        LEAD_CASE(4)
     885  #  undef LEAD_CASE
     886      case BT_RSQB:
     887        if (HAS_CHARS(enc, ptr, end, 2)) {
     888          if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
     889            ptr += MINBPC(enc);
     890            break;
     891          }
     892          if (HAS_CHARS(enc, ptr, end, 3)) {
     893            if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
     894              ptr += MINBPC(enc);
     895              break;
     896            }
     897            *nextTokPtr = ptr + 2 * MINBPC(enc);
     898            return XML_TOK_INVALID;
     899          }
     900        }
     901        /* fall through */
     902      case BT_AMP:
     903      case BT_LT:
     904      case BT_NONXML:
     905      case BT_MALFORM:
     906      case BT_TRAIL:
     907      case BT_CR:
     908      case BT_LF:
     909        *nextTokPtr = ptr;
     910        return XML_TOK_DATA_CHARS;
     911      default:
     912        ptr += MINBPC(enc);
     913        break;
     914      }
     915    }
     916    *nextTokPtr = ptr;
     917    return XML_TOK_DATA_CHARS;
     918  }
     919  
     920  /* ptr points to character following "%" */
     921  
     922  static int PTRCALL
     923  PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
     924                      const char **nextTokPtr) {
     925    REQUIRE_CHAR(enc, ptr, end);
     926    switch (BYTE_TYPE(enc, ptr)) {
     927      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     928    case BT_S:
     929    case BT_LF:
     930    case BT_CR:
     931    case BT_PERCNT:
     932      *nextTokPtr = ptr;
     933      return XML_TOK_PERCENT;
     934    default:
     935      *nextTokPtr = ptr;
     936      return XML_TOK_INVALID;
     937    }
     938    while (HAS_CHAR(enc, ptr, end)) {
     939      switch (BYTE_TYPE(enc, ptr)) {
     940        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     941      case BT_SEMI:
     942        *nextTokPtr = ptr + MINBPC(enc);
     943        return XML_TOK_PARAM_ENTITY_REF;
     944      default:
     945        *nextTokPtr = ptr;
     946        return XML_TOK_INVALID;
     947      }
     948    }
     949    return XML_TOK_PARTIAL;
     950  }
     951  
     952  static int PTRCALL
     953  PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
     954                        const char **nextTokPtr) {
     955    REQUIRE_CHAR(enc, ptr, end);
     956    switch (BYTE_TYPE(enc, ptr)) {
     957      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
     958    default:
     959      *nextTokPtr = ptr;
     960      return XML_TOK_INVALID;
     961    }
     962    while (HAS_CHAR(enc, ptr, end)) {
     963      switch (BYTE_TYPE(enc, ptr)) {
     964        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     965      case BT_CR:
     966      case BT_LF:
     967      case BT_S:
     968      case BT_RPAR:
     969      case BT_GT:
     970      case BT_PERCNT:
     971      case BT_VERBAR:
     972        *nextTokPtr = ptr;
     973        return XML_TOK_POUND_NAME;
     974      default:
     975        *nextTokPtr = ptr;
     976        return XML_TOK_INVALID;
     977      }
     978    }
     979    return -XML_TOK_POUND_NAME;
     980  }
     981  
     982  static int PTRCALL
     983  PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end,
     984                  const char **nextTokPtr) {
     985    while (HAS_CHAR(enc, ptr, end)) {
     986      int t = BYTE_TYPE(enc, ptr);
     987      switch (t) {
     988        INVALID_CASES(ptr, nextTokPtr)
     989      case BT_QUOT:
     990      case BT_APOS:
     991        ptr += MINBPC(enc);
     992        if (t != open)
     993          break;
     994        if (! HAS_CHAR(enc, ptr, end))
     995          return -XML_TOK_LITERAL;
     996        *nextTokPtr = ptr;
     997        switch (BYTE_TYPE(enc, ptr)) {
     998        case BT_S:
     999        case BT_CR:
    1000        case BT_LF:
    1001        case BT_GT:
    1002        case BT_PERCNT:
    1003        case BT_LSQB:
    1004          return XML_TOK_LITERAL;
    1005        default:
    1006          return XML_TOK_INVALID;
    1007        }
    1008      default:
    1009        ptr += MINBPC(enc);
    1010        break;
    1011      }
    1012    }
    1013    return XML_TOK_PARTIAL;
    1014  }
    1015  
    1016  static int PTRCALL
    1017  PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
    1018                    const char **nextTokPtr) {
    1019    int tok;
    1020    if (ptr >= end)
    1021      return XML_TOK_NONE;
    1022    if (MINBPC(enc) > 1) {
    1023      size_t n = end - ptr;
    1024      if (n & (MINBPC(enc) - 1)) {
    1025        n &= ~(MINBPC(enc) - 1);
    1026        if (n == 0)
    1027          return XML_TOK_PARTIAL;
    1028        end = ptr + n;
    1029      }
    1030    }
    1031    switch (BYTE_TYPE(enc, ptr)) {
    1032    case BT_QUOT:
    1033      return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
    1034    case BT_APOS:
    1035      return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
    1036    case BT_LT: {
    1037      ptr += MINBPC(enc);
    1038      REQUIRE_CHAR(enc, ptr, end);
    1039      switch (BYTE_TYPE(enc, ptr)) {
    1040      case BT_EXCL:
    1041        return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
    1042      case BT_QUEST:
    1043        return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
    1044      case BT_NMSTRT:
    1045      case BT_HEX:
    1046      case BT_NONASCII:
    1047      case BT_LEAD2:
    1048      case BT_LEAD3:
    1049      case BT_LEAD4:
    1050        *nextTokPtr = ptr - MINBPC(enc);
    1051        return XML_TOK_INSTANCE_START;
    1052      }
    1053      *nextTokPtr = ptr;
    1054      return XML_TOK_INVALID;
    1055    }
    1056    case BT_CR:
    1057      if (ptr + MINBPC(enc) == end) {
    1058        *nextTokPtr = end;
    1059        /* indicate that this might be part of a CR/LF pair */
    1060        return -XML_TOK_PROLOG_S;
    1061      }
    1062      /* fall through */
    1063    case BT_S:
    1064    case BT_LF:
    1065      for (;;) {
    1066        ptr += MINBPC(enc);
    1067        if (! HAS_CHAR(enc, ptr, end))
    1068          break;
    1069        switch (BYTE_TYPE(enc, ptr)) {
    1070        case BT_S:
    1071        case BT_LF:
    1072          break;
    1073        case BT_CR:
    1074          /* don't split CR/LF pair */
    1075          if (ptr + MINBPC(enc) != end)
    1076            break;
    1077          /* fall through */
    1078        default:
    1079          *nextTokPtr = ptr;
    1080          return XML_TOK_PROLOG_S;
    1081        }
    1082      }
    1083      *nextTokPtr = ptr;
    1084      return XML_TOK_PROLOG_S;
    1085    case BT_PERCNT:
    1086      return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
    1087    case BT_COMMA:
    1088      *nextTokPtr = ptr + MINBPC(enc);
    1089      return XML_TOK_COMMA;
    1090    case BT_LSQB:
    1091      *nextTokPtr = ptr + MINBPC(enc);
    1092      return XML_TOK_OPEN_BRACKET;
    1093    case BT_RSQB:
    1094      ptr += MINBPC(enc);
    1095      if (! HAS_CHAR(enc, ptr, end))
    1096        return -XML_TOK_CLOSE_BRACKET;
    1097      if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
    1098        REQUIRE_CHARS(enc, ptr, end, 2);
    1099        if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
    1100          *nextTokPtr = ptr + 2 * MINBPC(enc);
    1101          return XML_TOK_COND_SECT_CLOSE;
    1102        }
    1103      }
    1104      *nextTokPtr = ptr;
    1105      return XML_TOK_CLOSE_BRACKET;
    1106    case BT_LPAR:
    1107      *nextTokPtr = ptr + MINBPC(enc);
    1108      return XML_TOK_OPEN_PAREN;
    1109    case BT_RPAR:
    1110      ptr += MINBPC(enc);
    1111      if (! HAS_CHAR(enc, ptr, end))
    1112        return -XML_TOK_CLOSE_PAREN;
    1113      switch (BYTE_TYPE(enc, ptr)) {
    1114      case BT_AST:
    1115        *nextTokPtr = ptr + MINBPC(enc);
    1116        return XML_TOK_CLOSE_PAREN_ASTERISK;
    1117      case BT_QUEST:
    1118        *nextTokPtr = ptr + MINBPC(enc);
    1119        return XML_TOK_CLOSE_PAREN_QUESTION;
    1120      case BT_PLUS:
    1121        *nextTokPtr = ptr + MINBPC(enc);
    1122        return XML_TOK_CLOSE_PAREN_PLUS;
    1123      case BT_CR:
    1124      case BT_LF:
    1125      case BT_S:
    1126      case BT_GT:
    1127      case BT_COMMA:
    1128      case BT_VERBAR:
    1129      case BT_RPAR:
    1130        *nextTokPtr = ptr;
    1131        return XML_TOK_CLOSE_PAREN;
    1132      }
    1133      *nextTokPtr = ptr;
    1134      return XML_TOK_INVALID;
    1135    case BT_VERBAR:
    1136      *nextTokPtr = ptr + MINBPC(enc);
    1137      return XML_TOK_OR;
    1138    case BT_GT:
    1139      *nextTokPtr = ptr + MINBPC(enc);
    1140      return XML_TOK_DECL_CLOSE;
    1141    case BT_NUM:
    1142      return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
    1143  #  define LEAD_CASE(n)                                                         \
    1144    case BT_LEAD##n:                                                             \
    1145      if (end - ptr < n)                                                         \
    1146        return XML_TOK_PARTIAL_CHAR;                                             \
    1147      if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
    1148        *nextTokPtr = ptr;                                                       \
    1149        return XML_TOK_INVALID;                                                  \
    1150      }                                                                          \
    1151      if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
    1152        ptr += n;                                                                \
    1153        tok = XML_TOK_NAME;                                                      \
    1154        break;                                                                   \
    1155      }                                                                          \
    1156      if (IS_NAME_CHAR(enc, ptr, n)) {                                           \
    1157        ptr += n;                                                                \
    1158        tok = XML_TOK_NMTOKEN;                                                   \
    1159        break;                                                                   \
    1160      }                                                                          \
    1161      *nextTokPtr = ptr;                                                         \
    1162      return XML_TOK_INVALID;
    1163      LEAD_CASE(2)
    1164      LEAD_CASE(3)
    1165      LEAD_CASE(4)
    1166  #  undef LEAD_CASE
    1167    case BT_NMSTRT:
    1168    case BT_HEX:
    1169      tok = XML_TOK_NAME;
    1170      ptr += MINBPC(enc);
    1171      break;
    1172    case BT_DIGIT:
    1173    case BT_NAME:
    1174    case BT_MINUS:
    1175  #  ifdef XML_NS
    1176    case BT_COLON:
    1177  #  endif
    1178      tok = XML_TOK_NMTOKEN;
    1179      ptr += MINBPC(enc);
    1180      break;
    1181    case BT_NONASCII:
    1182      if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
    1183        ptr += MINBPC(enc);
    1184        tok = XML_TOK_NAME;
    1185        break;
    1186      }
    1187      if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
    1188        ptr += MINBPC(enc);
    1189        tok = XML_TOK_NMTOKEN;
    1190        break;
    1191      }
    1192      /* fall through */
    1193    default:
    1194      *nextTokPtr = ptr;
    1195      return XML_TOK_INVALID;
    1196    }
    1197    while (HAS_CHAR(enc, ptr, end)) {
    1198      switch (BYTE_TYPE(enc, ptr)) {
    1199        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
    1200      case BT_GT:
    1201      case BT_RPAR:
    1202      case BT_COMMA:
    1203      case BT_VERBAR:
    1204      case BT_LSQB:
    1205      case BT_PERCNT:
    1206      case BT_S:
    1207      case BT_CR:
    1208      case BT_LF:
    1209        *nextTokPtr = ptr;
    1210        return tok;
    1211  #  ifdef XML_NS
    1212      case BT_COLON:
    1213        ptr += MINBPC(enc);
    1214        switch (tok) {
    1215        case XML_TOK_NAME:
    1216          REQUIRE_CHAR(enc, ptr, end);
    1217          tok = XML_TOK_PREFIXED_NAME;
    1218          switch (BYTE_TYPE(enc, ptr)) {
    1219            CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
    1220          default:
    1221            tok = XML_TOK_NMTOKEN;
    1222            break;
    1223          }
    1224          break;
    1225        case XML_TOK_PREFIXED_NAME:
    1226          tok = XML_TOK_NMTOKEN;
    1227          break;
    1228        }
    1229        break;
    1230  #  endif
    1231      case BT_PLUS:
    1232        if (tok == XML_TOK_NMTOKEN) {
    1233          *nextTokPtr = ptr;
    1234          return XML_TOK_INVALID;
    1235        }
    1236        *nextTokPtr = ptr + MINBPC(enc);
    1237        return XML_TOK_NAME_PLUS;
    1238      case BT_AST:
    1239        if (tok == XML_TOK_NMTOKEN) {
    1240          *nextTokPtr = ptr;
    1241          return XML_TOK_INVALID;
    1242        }
    1243        *nextTokPtr = ptr + MINBPC(enc);
    1244        return XML_TOK_NAME_ASTERISK;
    1245      case BT_QUEST:
    1246        if (tok == XML_TOK_NMTOKEN) {
    1247          *nextTokPtr = ptr;
    1248          return XML_TOK_INVALID;
    1249        }
    1250        *nextTokPtr = ptr + MINBPC(enc);
    1251        return XML_TOK_NAME_QUESTION;
    1252      default:
    1253        *nextTokPtr = ptr;
    1254        return XML_TOK_INVALID;
    1255      }
    1256    }
    1257    return -tok;
    1258  }
    1259  
    1260  static int PTRCALL
    1261  PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
    1262                            const char **nextTokPtr) {
    1263    const char *start;
    1264    if (ptr >= end)
    1265      return XML_TOK_NONE;
    1266    else if (! HAS_CHAR(enc, ptr, end)) {
    1267      /* This line cannot be executed.  The incoming data has already
    1268       * been tokenized once, so incomplete characters like this have
    1269       * already been eliminated from the input.  Retaining the paranoia
    1270       * check is still valuable, however.
    1271       */
    1272      return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
    1273    }
    1274    start = ptr;
    1275    while (HAS_CHAR(enc, ptr, end)) {
    1276      switch (BYTE_TYPE(enc, ptr)) {
    1277  #  define LEAD_CASE(n)                                                         \
    1278    case BT_LEAD##n:                                                             \
    1279      ptr += n; /* NOTE: The encoding has already been validated. */             \
    1280      break;
    1281        LEAD_CASE(2)
    1282        LEAD_CASE(3)
    1283        LEAD_CASE(4)
    1284  #  undef LEAD_CASE
    1285      case BT_AMP:
    1286        if (ptr == start)
    1287          return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
    1288        *nextTokPtr = ptr;
    1289        return XML_TOK_DATA_CHARS;
    1290      case BT_LT:
    1291        /* this is for inside entity references */
    1292        *nextTokPtr = ptr;
    1293        return XML_TOK_INVALID;
    1294      case BT_LF:
    1295        if (ptr == start) {
    1296          *nextTokPtr = ptr + MINBPC(enc);
    1297          return XML_TOK_DATA_NEWLINE;
    1298        }
    1299        *nextTokPtr = ptr;
    1300        return XML_TOK_DATA_CHARS;
    1301      case BT_CR:
    1302        if (ptr == start) {
    1303          ptr += MINBPC(enc);
    1304          if (! HAS_CHAR(enc, ptr, end))
    1305            return XML_TOK_TRAILING_CR;
    1306          if (BYTE_TYPE(enc, ptr) == BT_LF)
    1307            ptr += MINBPC(enc);
    1308          *nextTokPtr = ptr;
    1309          return XML_TOK_DATA_NEWLINE;
    1310        }
    1311        *nextTokPtr = ptr;
    1312        return XML_TOK_DATA_CHARS;
    1313      case BT_S:
    1314        if (ptr == start) {
    1315          *nextTokPtr = ptr + MINBPC(enc);
    1316          return XML_TOK_ATTRIBUTE_VALUE_S;
    1317        }
    1318        *nextTokPtr = ptr;
    1319        return XML_TOK_DATA_CHARS;
    1320      default:
    1321        ptr += MINBPC(enc);
    1322        break;
    1323      }
    1324    }
    1325    *nextTokPtr = ptr;
    1326    return XML_TOK_DATA_CHARS;
    1327  }
    1328  
    1329  static int PTRCALL
    1330  PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
    1331                         const char **nextTokPtr) {
    1332    const char *start;
    1333    if (ptr >= end)
    1334      return XML_TOK_NONE;
    1335    else if (! HAS_CHAR(enc, ptr, end)) {
    1336      /* This line cannot be executed.  The incoming data has already
    1337       * been tokenized once, so incomplete characters like this have
    1338       * already been eliminated from the input.  Retaining the paranoia
    1339       * check is still valuable, however.
    1340       */
    1341      return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
    1342    }
    1343    start = ptr;
    1344    while (HAS_CHAR(enc, ptr, end)) {
    1345      switch (BYTE_TYPE(enc, ptr)) {
    1346  #  define LEAD_CASE(n)                                                         \
    1347    case BT_LEAD##n:                                                             \
    1348      ptr += n; /* NOTE: The encoding has already been validated. */             \
    1349      break;
    1350        LEAD_CASE(2)
    1351        LEAD_CASE(3)
    1352        LEAD_CASE(4)
    1353  #  undef LEAD_CASE
    1354      case BT_AMP:
    1355        if (ptr == start)
    1356          return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
    1357        *nextTokPtr = ptr;
    1358        return XML_TOK_DATA_CHARS;
    1359      case BT_PERCNT:
    1360        if (ptr == start) {
    1361          int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
    1362          return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
    1363        }
    1364        *nextTokPtr = ptr;
    1365        return XML_TOK_DATA_CHARS;
    1366      case BT_LF:
    1367        if (ptr == start) {
    1368          *nextTokPtr = ptr + MINBPC(enc);
    1369          return XML_TOK_DATA_NEWLINE;
    1370        }
    1371        *nextTokPtr = ptr;
    1372        return XML_TOK_DATA_CHARS;
    1373      case BT_CR:
    1374        if (ptr == start) {
    1375          ptr += MINBPC(enc);
    1376          if (! HAS_CHAR(enc, ptr, end))
    1377            return XML_TOK_TRAILING_CR;
    1378          if (BYTE_TYPE(enc, ptr) == BT_LF)
    1379            ptr += MINBPC(enc);
    1380          *nextTokPtr = ptr;
    1381          return XML_TOK_DATA_NEWLINE;
    1382        }
    1383        *nextTokPtr = ptr;
    1384        return XML_TOK_DATA_CHARS;
    1385      default:
    1386        ptr += MINBPC(enc);
    1387        break;
    1388      }
    1389    }
    1390    *nextTokPtr = ptr;
    1391    return XML_TOK_DATA_CHARS;
    1392  }
    1393  
    1394  #  ifdef XML_DTD
    1395  
    1396  static int PTRCALL
    1397  PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
    1398                           const char **nextTokPtr) {
    1399    int level = 0;
    1400    if (MINBPC(enc) > 1) {
    1401      size_t n = end - ptr;
    1402      if (n & (MINBPC(enc) - 1)) {
    1403        n &= ~(MINBPC(enc) - 1);
    1404        end = ptr + n;
    1405      }
    1406    }
    1407    while (HAS_CHAR(enc, ptr, end)) {
    1408      switch (BYTE_TYPE(enc, ptr)) {
    1409        INVALID_CASES(ptr, nextTokPtr)
    1410      case BT_LT:
    1411        ptr += MINBPC(enc);
    1412        REQUIRE_CHAR(enc, ptr, end);
    1413        if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
    1414          ptr += MINBPC(enc);
    1415          REQUIRE_CHAR(enc, ptr, end);
    1416          if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
    1417            ++level;
    1418            ptr += MINBPC(enc);
    1419          }
    1420        }
    1421        break;
    1422      case BT_RSQB:
    1423        ptr += MINBPC(enc);
    1424        REQUIRE_CHAR(enc, ptr, end);
    1425        if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
    1426          ptr += MINBPC(enc);
    1427          REQUIRE_CHAR(enc, ptr, end);
    1428          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
    1429            ptr += MINBPC(enc);
    1430            if (level == 0) {
    1431              *nextTokPtr = ptr;
    1432              return XML_TOK_IGNORE_SECT;
    1433            }
    1434            --level;
    1435          }
    1436        }
    1437        break;
    1438      default:
    1439        ptr += MINBPC(enc);
    1440        break;
    1441      }
    1442    }
    1443    return XML_TOK_PARTIAL;
    1444  }
    1445  
    1446  #  endif /* XML_DTD */
    1447  
    1448  static int PTRCALL
    1449  PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
    1450                     const char **badPtr) {
    1451    ptr += MINBPC(enc);
    1452    end -= MINBPC(enc);
    1453    for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
    1454      switch (BYTE_TYPE(enc, ptr)) {
    1455      case BT_DIGIT:
    1456      case BT_HEX:
    1457      case BT_MINUS:
    1458      case BT_APOS:
    1459      case BT_LPAR:
    1460      case BT_RPAR:
    1461      case BT_PLUS:
    1462      case BT_COMMA:
    1463      case BT_SOL:
    1464      case BT_EQUALS:
    1465      case BT_QUEST:
    1466      case BT_CR:
    1467      case BT_LF:
    1468      case BT_SEMI:
    1469      case BT_EXCL:
    1470      case BT_AST:
    1471      case BT_PERCNT:
    1472      case BT_NUM:
    1473  #  ifdef XML_NS
    1474      case BT_COLON:
    1475  #  endif
    1476        break;
    1477      case BT_S:
    1478        if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
    1479          *badPtr = ptr;
    1480          return 0;
    1481        }
    1482        break;
    1483      case BT_NAME:
    1484      case BT_NMSTRT:
    1485        if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
    1486          break;
    1487        /* fall through */
    1488      default:
    1489        switch (BYTE_TO_ASCII(enc, ptr)) {
    1490        case 0x24: /* $ */
    1491        case 0x40: /* @ */
    1492          break;
    1493        default:
    1494          *badPtr = ptr;
    1495          return 0;
    1496        }
    1497        break;
    1498      }
    1499    }
    1500    return 1;
    1501  }
    1502  
    1503  /* This must only be called for a well-formed start-tag or empty
    1504     element tag.  Returns the number of attributes.  Pointers to the
    1505     first attsMax attributes are stored in atts.
    1506  */
    1507  
    1508  static int PTRCALL
    1509  PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
    1510                  ATTRIBUTE *atts) {
    1511    enum { other, inName, inValue } state = inName;
    1512    int nAtts = 0;
    1513    int open = 0; /* defined when state == inValue;
    1514                     initialization just to shut up compilers */
    1515  
    1516    for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
    1517      switch (BYTE_TYPE(enc, ptr)) {
    1518  #  define START_NAME                                                           \
    1519      if (state == other) {                                                      \
    1520        if (nAtts < attsMax) {                                                   \
    1521          atts[nAtts].name = ptr;                                                \
    1522          atts[nAtts].normalized = 1;                                            \
    1523        }                                                                        \
    1524        state = inName;                                                          \
    1525      }
    1526  #  define LEAD_CASE(n)                                                         \
    1527    case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
    1528      START_NAME ptr += (n - MINBPC(enc));                                       \
    1529      break;
    1530        LEAD_CASE(2)
    1531        LEAD_CASE(3)
    1532        LEAD_CASE(4)
    1533  #  undef LEAD_CASE
    1534      case BT_NONASCII:
    1535      case BT_NMSTRT:
    1536      case BT_HEX:
    1537        START_NAME
    1538        break;
    1539  #  undef START_NAME
    1540      case BT_QUOT:
    1541        if (state != inValue) {
    1542          if (nAtts < attsMax)
    1543            atts[nAtts].valuePtr = ptr + MINBPC(enc);
    1544          state = inValue;
    1545          open = BT_QUOT;
    1546        } else if (open == BT_QUOT) {
    1547          state = other;
    1548          if (nAtts < attsMax)
    1549            atts[nAtts].valueEnd = ptr;
    1550          nAtts++;
    1551        }
    1552        break;
    1553      case BT_APOS:
    1554        if (state != inValue) {
    1555          if (nAtts < attsMax)
    1556            atts[nAtts].valuePtr = ptr + MINBPC(enc);
    1557          state = inValue;
    1558          open = BT_APOS;
    1559        } else if (open == BT_APOS) {
    1560          state = other;
    1561          if (nAtts < attsMax)
    1562            atts[nAtts].valueEnd = ptr;
    1563          nAtts++;
    1564        }
    1565        break;
    1566      case BT_AMP:
    1567        if (nAtts < attsMax)
    1568          atts[nAtts].normalized = 0;
    1569        break;
    1570      case BT_S:
    1571        if (state == inName)
    1572          state = other;
    1573        else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
    1574                 && (ptr == atts[nAtts].valuePtr
    1575                     || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
    1576                     || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
    1577                     || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
    1578          atts[nAtts].normalized = 0;
    1579        break;
    1580      case BT_CR:
    1581      case BT_LF:
    1582        /* This case ensures that the first attribute name is counted
    1583           Apart from that we could just change state on the quote. */
    1584        if (state == inName)
    1585          state = other;
    1586        else if (state == inValue && nAtts < attsMax)
    1587          atts[nAtts].normalized = 0;
    1588        break;
    1589      case BT_GT:
    1590      case BT_SOL:
    1591        if (state != inValue)
    1592          return nAtts;
    1593        break;
    1594      default:
    1595        break;
    1596      }
    1597    }
    1598    /* not reached */
    1599  }
    1600  
    1601  static int PTRFASTCALL
    1602  PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
    1603    int result = 0;
    1604    /* skip &# */
    1605    UNUSED_P(enc);
    1606    ptr += 2 * MINBPC(enc);
    1607    if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
    1608      for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
    1609           ptr += MINBPC(enc)) {
    1610        int c = BYTE_TO_ASCII(enc, ptr);
    1611        switch (c) {
    1612        case ASCII_0:
    1613        case ASCII_1:
    1614        case ASCII_2:
    1615        case ASCII_3:
    1616        case ASCII_4:
    1617        case ASCII_5:
    1618        case ASCII_6:
    1619        case ASCII_7:
    1620        case ASCII_8:
    1621        case ASCII_9:
    1622          result <<= 4;
    1623          result |= (c - ASCII_0);
    1624          break;
    1625        case ASCII_A:
    1626        case ASCII_B:
    1627        case ASCII_C:
    1628        case ASCII_D:
    1629        case ASCII_E:
    1630        case ASCII_F:
    1631          result <<= 4;
    1632          result += 10 + (c - ASCII_A);
    1633          break;
    1634        case ASCII_a:
    1635        case ASCII_b:
    1636        case ASCII_c:
    1637        case ASCII_d:
    1638        case ASCII_e:
    1639        case ASCII_f:
    1640          result <<= 4;
    1641          result += 10 + (c - ASCII_a);
    1642          break;
    1643        }
    1644        if (result >= 0x110000)
    1645          return -1;
    1646      }
    1647    } else {
    1648      for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
    1649        int c = BYTE_TO_ASCII(enc, ptr);
    1650        result *= 10;
    1651        result += (c - ASCII_0);
    1652        if (result >= 0x110000)
    1653          return -1;
    1654      }
    1655    }
    1656    return checkCharRefNumber(result);
    1657  }
    1658  
    1659  static int PTRCALL
    1660  PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
    1661                               const char *end) {
    1662    UNUSED_P(enc);
    1663    switch ((end - ptr) / MINBPC(enc)) {
    1664    case 2:
    1665      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
    1666        switch (BYTE_TO_ASCII(enc, ptr)) {
    1667        case ASCII_l:
    1668          return ASCII_LT;
    1669        case ASCII_g:
    1670          return ASCII_GT;
    1671        }
    1672      }
    1673      break;
    1674    case 3:
    1675      if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
    1676        ptr += MINBPC(enc);
    1677        if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
    1678          ptr += MINBPC(enc);
    1679          if (CHAR_MATCHES(enc, ptr, ASCII_p))
    1680            return ASCII_AMP;
    1681        }
    1682      }
    1683      break;
    1684    case 4:
    1685      switch (BYTE_TO_ASCII(enc, ptr)) {
    1686      case ASCII_q:
    1687        ptr += MINBPC(enc);
    1688        if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
    1689          ptr += MINBPC(enc);
    1690          if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
    1691            ptr += MINBPC(enc);
    1692            if (CHAR_MATCHES(enc, ptr, ASCII_t))
    1693              return ASCII_QUOT;
    1694          }
    1695        }
    1696        break;
    1697      case ASCII_a:
    1698        ptr += MINBPC(enc);
    1699        if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
    1700          ptr += MINBPC(enc);
    1701          if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
    1702            ptr += MINBPC(enc);
    1703            if (CHAR_MATCHES(enc, ptr, ASCII_s))
    1704              return ASCII_APOS;
    1705          }
    1706        }
    1707        break;
    1708      }
    1709    }
    1710    return 0;
    1711  }
    1712  
    1713  static int PTRCALL
    1714  PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
    1715                           const char *end1, const char *ptr2) {
    1716    UNUSED_P(enc);
    1717    for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
    1718      if (end1 - ptr1 < MINBPC(enc)) {
    1719        /* This line cannot be executed.  The incoming data has already
    1720         * been tokenized once, so incomplete characters like this have
    1721         * already been eliminated from the input.  Retaining the
    1722         * paranoia check is still valuable, however.
    1723         */
    1724        return 0; /* LCOV_EXCL_LINE */
    1725      }
    1726      if (! CHAR_MATCHES(enc, ptr1, *ptr2))
    1727        return 0;
    1728    }
    1729    return ptr1 == end1;
    1730  }
    1731  
    1732  static int PTRFASTCALL
    1733  PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
    1734    const char *start = ptr;
    1735    for (;;) {
    1736      switch (BYTE_TYPE(enc, ptr)) {
    1737  #  define LEAD_CASE(n)                                                         \
    1738    case BT_LEAD##n:                                                             \
    1739      ptr += n; /* NOTE: The encoding has already been validated. */             \
    1740      break;
    1741        LEAD_CASE(2)
    1742        LEAD_CASE(3)
    1743        LEAD_CASE(4)
    1744  #  undef LEAD_CASE
    1745      case BT_NONASCII:
    1746      case BT_NMSTRT:
    1747  #  ifdef XML_NS
    1748      case BT_COLON:
    1749  #  endif
    1750      case BT_HEX:
    1751      case BT_DIGIT:
    1752      case BT_NAME:
    1753      case BT_MINUS:
    1754        ptr += MINBPC(enc);
    1755        break;
    1756      default:
    1757        return (int)(ptr - start);
    1758      }
    1759    }
    1760  }
    1761  
    1762  static const char *PTRFASTCALL
    1763  PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
    1764    for (;;) {
    1765      switch (BYTE_TYPE(enc, ptr)) {
    1766      case BT_LF:
    1767      case BT_CR:
    1768      case BT_S:
    1769        ptr += MINBPC(enc);
    1770        break;
    1771      default:
    1772        return ptr;
    1773      }
    1774    }
    1775  }
    1776  
    1777  static void PTRCALL
    1778  PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
    1779                         POSITION *pos) {
    1780    while (HAS_CHAR(enc, ptr, end)) {
    1781      switch (BYTE_TYPE(enc, ptr)) {
    1782  #  define LEAD_CASE(n)                                                         \
    1783    case BT_LEAD##n:                                                             \
    1784      ptr += n; /* NOTE: The encoding has already been validated. */             \
    1785      pos->columnNumber++;                                                       \
    1786      break;
    1787        LEAD_CASE(2)
    1788        LEAD_CASE(3)
    1789        LEAD_CASE(4)
    1790  #  undef LEAD_CASE
    1791      case BT_LF:
    1792        pos->columnNumber = 0;
    1793        pos->lineNumber++;
    1794        ptr += MINBPC(enc);
    1795        break;
    1796      case BT_CR:
    1797        pos->lineNumber++;
    1798        ptr += MINBPC(enc);
    1799        if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
    1800          ptr += MINBPC(enc);
    1801        pos->columnNumber = 0;
    1802        break;
    1803      default:
    1804        ptr += MINBPC(enc);
    1805        pos->columnNumber++;
    1806        break;
    1807      }
    1808    }
    1809  }
    1810  
    1811  #  undef DO_LEAD_CASE
    1812  #  undef MULTIBYTE_CASES
    1813  #  undef INVALID_CASES
    1814  #  undef CHECK_NAME_CASE
    1815  #  undef CHECK_NAME_CASES
    1816  #  undef CHECK_NMSTRT_CASE
    1817  #  undef CHECK_NMSTRT_CASES
    1818  
    1819  #endif /* XML_TOK_IMPL_C */