(root)/
libxml2-2.12.3/
parserInternals.c
       1  /*
       2   * parserInternals.c : Internal routines (and obsolete ones) needed for the
       3   *                     XML and HTML parsers.
       4   *
       5   * See Copyright for the status of this software.
       6   *
       7   * daniel@veillard.com
       8   */
       9  
      10  #define IN_LIBXML
      11  #include "libxml.h"
      12  
      13  #if defined(_WIN32)
      14  #define XML_DIR_SEP '\\'
      15  #else
      16  #define XML_DIR_SEP '/'
      17  #endif
      18  
      19  #include <string.h>
      20  #include <ctype.h>
      21  #include <stdlib.h>
      22  
      23  #include <libxml/xmlmemory.h>
      24  #include <libxml/tree.h>
      25  #include <libxml/parser.h>
      26  #include <libxml/parserInternals.h>
      27  #include <libxml/entities.h>
      28  #include <libxml/xmlerror.h>
      29  #include <libxml/encoding.h>
      30  #include <libxml/xmlIO.h>
      31  #include <libxml/uri.h>
      32  #include <libxml/dict.h>
      33  #include <libxml/xmlsave.h>
      34  #ifdef LIBXML_CATALOG_ENABLED
      35  #include <libxml/catalog.h>
      36  #endif
      37  #include <libxml/chvalid.h>
      38  
      39  #define CUR(ctxt) ctxt->input->cur
      40  #define END(ctxt) ctxt->input->end
      41  
      42  #include "private/buf.h"
      43  #include "private/enc.h"
      44  #include "private/error.h"
      45  #include "private/io.h"
      46  #include "private/parser.h"
      47  
      48  /*
      49   * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
      50   * factor of serialized output after entity expansion.
      51   */
      52  #define XML_MAX_AMPLIFICATION_DEFAULT 5
      53  
      54  /*
      55   * Various global defaults for parsing
      56   */
      57  
      58  /**
      59   * xmlCheckVersion:
      60   * @version: the include version number
      61   *
      62   * check the compiled lib version against the include one.
      63   * This can warn or immediately kill the application
      64   */
      65  void
      66  xmlCheckVersion(int version) {
      67      int myversion = LIBXML_VERSION;
      68  
      69      xmlInitParser();
      70  
      71      if ((myversion / 10000) != (version / 10000)) {
      72  	xmlGenericError(xmlGenericErrorContext,
      73  		"Fatal: program compiled against libxml %d using libxml %d\n",
      74  		(version / 10000), (myversion / 10000));
      75  	fprintf(stderr,
      76  		"Fatal: program compiled against libxml %d using libxml %d\n",
      77  		(version / 10000), (myversion / 10000));
      78      }
      79      if ((myversion / 100) < (version / 100)) {
      80  	xmlGenericError(xmlGenericErrorContext,
      81  		"Warning: program compiled against libxml %d using older %d\n",
      82  		(version / 100), (myversion / 100));
      83      }
      84  }
      85  
      86  
      87  /************************************************************************
      88   *									*
      89   *		Some factorized error routines				*
      90   *									*
      91   ************************************************************************/
      92  
      93  
      94  /**
      95   * xmlErrMemory:
      96   * @ctxt:  an XML parser context
      97   * @extra:  extra information
      98   *
      99   * Handle a redefinition of attribute error
     100   */
     101  void
     102  xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
     103  {
     104      if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
     105          (ctxt->instate == XML_PARSER_EOF))
     106  	return;
     107      if (ctxt != NULL) {
     108          ctxt->errNo = XML_ERR_NO_MEMORY;
     109          ctxt->instate = XML_PARSER_EOF;
     110          ctxt->disableSAX = 1;
     111      }
     112      if (extra)
     113          __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
     114                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
     115                          NULL, NULL, 0, 0,
     116                          "Memory allocation failed : %s\n", extra);
     117      else
     118          __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
     119                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
     120                          NULL, NULL, 0, 0, "Memory allocation failed\n");
     121  }
     122  
     123  /**
     124   * __xmlErrEncoding:
     125   * @ctxt:  an XML parser context
     126   * @xmlerr:  the error number
     127   * @msg:  the error message
     128   * @str1:  an string info
     129   * @str2:  an string info
     130   *
     131   * Handle an encoding error
     132   */
     133  void
     134  __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
     135                   const char *msg, const xmlChar * str1, const xmlChar * str2)
     136  {
     137      if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
     138          (ctxt->instate == XML_PARSER_EOF))
     139  	return;
     140      if (ctxt != NULL)
     141          ctxt->errNo = xmlerr;
     142      __xmlRaiseError(NULL, NULL, NULL,
     143                      ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
     144                      NULL, 0, (const char *) str1, (const char *) str2,
     145                      NULL, 0, 0, msg, str1, str2);
     146      if (ctxt != NULL) {
     147          ctxt->wellFormed = 0;
     148          if (ctxt->recovery == 0)
     149              ctxt->disableSAX = 1;
     150      }
     151  }
     152  
     153  /**
     154   * xmlErrInternal:
     155   * @ctxt:  an XML parser context
     156   * @msg:  the error message
     157   * @str:  error information
     158   *
     159   * Handle an internal error
     160   */
     161  static void LIBXML_ATTR_FORMAT(2,0)
     162  xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
     163  {
     164      if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
     165          (ctxt->instate == XML_PARSER_EOF))
     166  	return;
     167      if (ctxt != NULL)
     168          ctxt->errNo = XML_ERR_INTERNAL_ERROR;
     169      __xmlRaiseError(NULL, NULL, NULL,
     170                      ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
     171                      XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
     172                      0, 0, msg, str);
     173      if (ctxt != NULL) {
     174          ctxt->wellFormed = 0;
     175          if (ctxt->recovery == 0)
     176              ctxt->disableSAX = 1;
     177      }
     178  }
     179  
     180  /**
     181   * xmlFatalErr:
     182   * @ctxt:  an XML parser context
     183   * @error:  the error number
     184   * @info:  extra information string
     185   *
     186   * Handle a fatal parser error, i.e. violating Well-Formedness constraints
     187   */
     188  void
     189  xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
     190  {
     191      const char *errmsg;
     192  
     193      if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
     194          (ctxt->instate == XML_PARSER_EOF))
     195  	return;
     196      switch (error) {
     197          case XML_ERR_INVALID_HEX_CHARREF:
     198              errmsg = "CharRef: invalid hexadecimal value";
     199              break;
     200          case XML_ERR_INVALID_DEC_CHARREF:
     201              errmsg = "CharRef: invalid decimal value";
     202              break;
     203          case XML_ERR_INVALID_CHARREF:
     204              errmsg = "CharRef: invalid value";
     205              break;
     206          case XML_ERR_INTERNAL_ERROR:
     207              errmsg = "internal error";
     208              break;
     209          case XML_ERR_PEREF_AT_EOF:
     210              errmsg = "PEReference at end of document";
     211              break;
     212          case XML_ERR_PEREF_IN_PROLOG:
     213              errmsg = "PEReference in prolog";
     214              break;
     215          case XML_ERR_PEREF_IN_EPILOG:
     216              errmsg = "PEReference in epilog";
     217              break;
     218          case XML_ERR_PEREF_NO_NAME:
     219              errmsg = "PEReference: no name";
     220              break;
     221          case XML_ERR_PEREF_SEMICOL_MISSING:
     222              errmsg = "PEReference: expecting ';'";
     223              break;
     224          case XML_ERR_ENTITY_LOOP:
     225              errmsg = "Detected an entity reference loop";
     226              break;
     227          case XML_ERR_ENTITY_NOT_STARTED:
     228              errmsg = "EntityValue: \" or ' expected";
     229              break;
     230          case XML_ERR_ENTITY_PE_INTERNAL:
     231              errmsg = "PEReferences forbidden in internal subset";
     232              break;
     233          case XML_ERR_ENTITY_NOT_FINISHED:
     234              errmsg = "EntityValue: \" or ' expected";
     235              break;
     236          case XML_ERR_ATTRIBUTE_NOT_STARTED:
     237              errmsg = "AttValue: \" or ' expected";
     238              break;
     239          case XML_ERR_LT_IN_ATTRIBUTE:
     240              errmsg = "Unescaped '<' not allowed in attributes values";
     241              break;
     242          case XML_ERR_LITERAL_NOT_STARTED:
     243              errmsg = "SystemLiteral \" or ' expected";
     244              break;
     245          case XML_ERR_LITERAL_NOT_FINISHED:
     246              errmsg = "Unfinished System or Public ID \" or ' expected";
     247              break;
     248          case XML_ERR_MISPLACED_CDATA_END:
     249              errmsg = "Sequence ']]>' not allowed in content";
     250              break;
     251          case XML_ERR_URI_REQUIRED:
     252              errmsg = "SYSTEM or PUBLIC, the URI is missing";
     253              break;
     254          case XML_ERR_PUBID_REQUIRED:
     255              errmsg = "PUBLIC, the Public Identifier is missing";
     256              break;
     257          case XML_ERR_HYPHEN_IN_COMMENT:
     258              errmsg = "Comment must not contain '--' (double-hyphen)";
     259              break;
     260          case XML_ERR_PI_NOT_STARTED:
     261              errmsg = "xmlParsePI : no target name";
     262              break;
     263          case XML_ERR_RESERVED_XML_NAME:
     264              errmsg = "Invalid PI name";
     265              break;
     266          case XML_ERR_NOTATION_NOT_STARTED:
     267              errmsg = "NOTATION: Name expected here";
     268              break;
     269          case XML_ERR_NOTATION_NOT_FINISHED:
     270              errmsg = "'>' required to close NOTATION declaration";
     271              break;
     272          case XML_ERR_VALUE_REQUIRED:
     273              errmsg = "Entity value required";
     274              break;
     275          case XML_ERR_URI_FRAGMENT:
     276              errmsg = "Fragment not allowed";
     277              break;
     278          case XML_ERR_ATTLIST_NOT_STARTED:
     279              errmsg = "'(' required to start ATTLIST enumeration";
     280              break;
     281          case XML_ERR_NMTOKEN_REQUIRED:
     282              errmsg = "NmToken expected in ATTLIST enumeration";
     283              break;
     284          case XML_ERR_ATTLIST_NOT_FINISHED:
     285              errmsg = "')' required to finish ATTLIST enumeration";
     286              break;
     287          case XML_ERR_MIXED_NOT_STARTED:
     288              errmsg = "MixedContentDecl : '|' or ')*' expected";
     289              break;
     290          case XML_ERR_PCDATA_REQUIRED:
     291              errmsg = "MixedContentDecl : '#PCDATA' expected";
     292              break;
     293          case XML_ERR_ELEMCONTENT_NOT_STARTED:
     294              errmsg = "ContentDecl : Name or '(' expected";
     295              break;
     296          case XML_ERR_ELEMCONTENT_NOT_FINISHED:
     297              errmsg = "ContentDecl : ',' '|' or ')' expected";
     298              break;
     299          case XML_ERR_PEREF_IN_INT_SUBSET:
     300              errmsg =
     301                  "PEReference: forbidden within markup decl in internal subset";
     302              break;
     303          case XML_ERR_GT_REQUIRED:
     304              errmsg = "expected '>'";
     305              break;
     306          case XML_ERR_CONDSEC_INVALID:
     307              errmsg = "XML conditional section '[' expected";
     308              break;
     309          case XML_ERR_EXT_SUBSET_NOT_FINISHED:
     310              errmsg = "Content error in the external subset";
     311              break;
     312          case XML_ERR_CONDSEC_INVALID_KEYWORD:
     313              errmsg =
     314                  "conditional section INCLUDE or IGNORE keyword expected";
     315              break;
     316          case XML_ERR_CONDSEC_NOT_FINISHED:
     317              errmsg = "XML conditional section not closed";
     318              break;
     319          case XML_ERR_XMLDECL_NOT_STARTED:
     320              errmsg = "Text declaration '<?xml' required";
     321              break;
     322          case XML_ERR_XMLDECL_NOT_FINISHED:
     323              errmsg = "parsing XML declaration: '?>' expected";
     324              break;
     325          case XML_ERR_EXT_ENTITY_STANDALONE:
     326              errmsg = "external parsed entities cannot be standalone";
     327              break;
     328          case XML_ERR_ENTITYREF_SEMICOL_MISSING:
     329              errmsg = "EntityRef: expecting ';'";
     330              break;
     331          case XML_ERR_DOCTYPE_NOT_FINISHED:
     332              errmsg = "DOCTYPE improperly terminated";
     333              break;
     334          case XML_ERR_LTSLASH_REQUIRED:
     335              errmsg = "EndTag: '</' not found";
     336              break;
     337          case XML_ERR_EQUAL_REQUIRED:
     338              errmsg = "expected '='";
     339              break;
     340          case XML_ERR_STRING_NOT_CLOSED:
     341              errmsg = "String not closed expecting \" or '";
     342              break;
     343          case XML_ERR_STRING_NOT_STARTED:
     344              errmsg = "String not started expecting ' or \"";
     345              break;
     346          case XML_ERR_ENCODING_NAME:
     347              errmsg = "Invalid XML encoding name";
     348              break;
     349          case XML_ERR_STANDALONE_VALUE:
     350              errmsg = "standalone accepts only 'yes' or 'no'";
     351              break;
     352          case XML_ERR_DOCUMENT_EMPTY:
     353              errmsg = "Document is empty";
     354              break;
     355          case XML_ERR_DOCUMENT_END:
     356              errmsg = "Extra content at the end of the document";
     357              break;
     358          case XML_ERR_NOT_WELL_BALANCED:
     359              errmsg = "chunk is not well balanced";
     360              break;
     361          case XML_ERR_EXTRA_CONTENT:
     362              errmsg = "extra content at the end of well balanced chunk";
     363              break;
     364          case XML_ERR_VERSION_MISSING:
     365              errmsg = "Malformed declaration expecting version";
     366              break;
     367          case XML_ERR_NAME_TOO_LONG:
     368              errmsg = "Name too long";
     369              break;
     370          case XML_ERR_INVALID_ENCODING:
     371              errmsg = "Invalid bytes in character encoding";
     372              break;
     373          case XML_IO_UNKNOWN:
     374              errmsg = "I/O error";
     375              break;
     376  #if 0
     377          case:
     378              errmsg = "";
     379              break;
     380  #endif
     381          default:
     382              errmsg = "Unregistered error message";
     383      }
     384      if (ctxt != NULL)
     385  	ctxt->errNo = error;
     386      if (info == NULL) {
     387          __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
     388                          XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
     389                          errmsg);
     390      } else {
     391          __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
     392                          XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
     393                          errmsg, info);
     394      }
     395      if (ctxt != NULL) {
     396  	ctxt->wellFormed = 0;
     397  	if (ctxt->recovery == 0)
     398  	    ctxt->disableSAX = 1;
     399      }
     400  }
     401  
     402  /**
     403   * xmlErrEncodingInt:
     404   * @ctxt:  an XML parser context
     405   * @error:  the error number
     406   * @msg:  the error message
     407   * @val:  an integer value
     408   *
     409   * n encoding error
     410   */
     411  static void LIBXML_ATTR_FORMAT(3,0)
     412  xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
     413                    const char *msg, int val)
     414  {
     415      if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
     416          (ctxt->instate == XML_PARSER_EOF))
     417  	return;
     418      if (ctxt != NULL)
     419          ctxt->errNo = error;
     420      __xmlRaiseError(NULL, NULL, NULL,
     421                      ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
     422                      NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
     423      if (ctxt != NULL) {
     424          ctxt->wellFormed = 0;
     425          if (ctxt->recovery == 0)
     426              ctxt->disableSAX = 1;
     427      }
     428  }
     429  
     430  /**
     431   * xmlIsLetter:
     432   * @c:  an unicode character (int)
     433   *
     434   * Check whether the character is allowed by the production
     435   * [84] Letter ::= BaseChar | Ideographic
     436   *
     437   * Returns 0 if not, non-zero otherwise
     438   */
     439  int
     440  xmlIsLetter(int c) {
     441      return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
     442  }
     443  
     444  /************************************************************************
     445   *									*
     446   *		Input handling functions for progressive parsing	*
     447   *									*
     448   ************************************************************************/
     449  
     450  /* we need to keep enough input to show errors in context */
     451  #define LINE_LEN        80
     452  
     453  /**
     454   * xmlHaltParser:
     455   * @ctxt:  an XML parser context
     456   *
     457   * Blocks further parser processing don't override error
     458   * for internal use
     459   */
     460  void
     461  xmlHaltParser(xmlParserCtxtPtr ctxt) {
     462      if (ctxt == NULL)
     463          return;
     464      ctxt->instate = XML_PARSER_EOF;
     465      ctxt->disableSAX = 1;
     466      while (ctxt->inputNr > 1)
     467          xmlFreeInputStream(inputPop(ctxt));
     468      if (ctxt->input != NULL) {
     469          /*
     470  	 * in case there was a specific allocation deallocate before
     471  	 * overriding base
     472  	 */
     473          if (ctxt->input->free != NULL) {
     474  	    ctxt->input->free((xmlChar *) ctxt->input->base);
     475  	    ctxt->input->free = NULL;
     476  	}
     477          if (ctxt->input->buf != NULL) {
     478              xmlFreeParserInputBuffer(ctxt->input->buf);
     479              ctxt->input->buf = NULL;
     480          }
     481  	ctxt->input->cur = BAD_CAST"";
     482          ctxt->input->length = 0;
     483  	ctxt->input->base = ctxt->input->cur;
     484          ctxt->input->end = ctxt->input->cur;
     485      }
     486  }
     487  
     488  /**
     489   * xmlParserInputRead:
     490   * @in:  an XML parser input
     491   * @len:  an indicative size for the lookahead
     492   *
     493   * DEPRECATED: This function was internal and is deprecated.
     494   *
     495   * Returns -1 as this is an error to use it.
     496   */
     497  int
     498  xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
     499      return(-1);
     500  }
     501  
     502  /**
     503   * xmlParserGrow:
     504   * @ctxt:  an XML parser context
     505   *
     506   * Grow the input buffer.
     507   *
     508   * Returns the number of bytes read or -1 in case of error.
     509   */
     510  int
     511  xmlParserGrow(xmlParserCtxtPtr ctxt) {
     512      xmlParserInputPtr in = ctxt->input;
     513      xmlParserInputBufferPtr buf = in->buf;
     514      ptrdiff_t curEnd = in->end - in->cur;
     515      ptrdiff_t curBase = in->cur - in->base;
     516      int ret;
     517  
     518      if (buf == NULL)
     519          return(0);
     520      /* Don't grow push parser buffer. */
     521      if ((ctxt->progressive) && (ctxt->inputNr <= 1))
     522          return(0);
     523      if (buf->error != 0)
     524          return(-1);
     525  
     526      if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
     527           (curBase > XML_MAX_LOOKUP_LIMIT)) &&
     528          ((ctxt->options & XML_PARSE_HUGE) == 0)) {
     529          xmlErrMemory(ctxt, "Huge input lookup");
     530          xmlHaltParser(ctxt);
     531  	return(-1);
     532      }
     533  
     534      if (curEnd >= INPUT_CHUNK)
     535          return(0);
     536  
     537      ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
     538      xmlBufUpdateInput(buf->buffer, in, curBase);
     539  
     540      if (ret < 0) {
     541          xmlFatalErr(ctxt, buf->error, NULL);
     542          /* Buffer contents may be lost in case of memory errors. */
     543          if (buf->error == XML_ERR_NO_MEMORY)
     544              xmlHaltParser(ctxt);
     545      }
     546  
     547      return(ret);
     548  }
     549  
     550  /**
     551   * xmlParserInputGrow:
     552   * @in:  an XML parser input
     553   * @len:  an indicative size for the lookahead
     554   *
     555   * DEPRECATED: Don't use.
     556   *
     557   * This function increase the input for the parser. It tries to
     558   * preserve pointers to the input buffer, and keep already read data
     559   *
     560   * Returns the amount of char read, or -1 in case of error, 0 indicate the
     561   * end of this entity
     562   */
     563  int
     564  xmlParserInputGrow(xmlParserInputPtr in, int len) {
     565      int ret;
     566      size_t indx;
     567  
     568      if ((in == NULL) || (len < 0)) return(-1);
     569      if (in->buf == NULL) return(-1);
     570      if (in->base == NULL) return(-1);
     571      if (in->cur == NULL) return(-1);
     572      if (in->buf->buffer == NULL) return(-1);
     573  
     574      indx = in->cur - in->base;
     575      if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
     576          return(0);
     577      }
     578      ret = xmlParserInputBufferGrow(in->buf, len);
     579  
     580      in->base = xmlBufContent(in->buf->buffer);
     581      if (in->base == NULL) {
     582          in->base = BAD_CAST "";
     583          in->cur = in->base;
     584          in->end = in->base;
     585          return(-1);
     586      }
     587      in->cur = in->base + indx;
     588      in->end = xmlBufEnd(in->buf->buffer);
     589  
     590      return(ret);
     591  }
     592  
     593  /**
     594   * xmlParserShrink:
     595   * @ctxt:  an XML parser context
     596   *
     597   * Shrink the input buffer.
     598   */
     599  void
     600  xmlParserShrink(xmlParserCtxtPtr ctxt) {
     601      xmlParserInputPtr in = ctxt->input;
     602      xmlParserInputBufferPtr buf = in->buf;
     603      size_t used;
     604  
     605      if (buf == NULL)
     606          return;
     607  
     608      used = in->cur - in->base;
     609      /*
     610       * Do not shrink on large buffers whose only a tiny fraction
     611       * was consumed
     612       */
     613      if (used > INPUT_CHUNK) {
     614  	size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
     615  
     616  	if (res > 0) {
     617              used -= res;
     618              if ((res > ULONG_MAX) ||
     619                  (in->consumed > ULONG_MAX - (unsigned long)res))
     620                  in->consumed = ULONG_MAX;
     621              else
     622                  in->consumed += res;
     623  	}
     624      }
     625  
     626      xmlBufUpdateInput(buf->buffer, in, used);
     627  }
     628  
     629  /**
     630   * xmlParserInputShrink:
     631   * @in:  an XML parser input
     632   *
     633   * DEPRECATED: Don't use.
     634   *
     635   * This function removes used input for the parser.
     636   */
     637  void
     638  xmlParserInputShrink(xmlParserInputPtr in) {
     639      size_t used;
     640      size_t ret;
     641  
     642      if (in == NULL) return;
     643      if (in->buf == NULL) return;
     644      if (in->base == NULL) return;
     645      if (in->cur == NULL) return;
     646      if (in->buf->buffer == NULL) return;
     647  
     648      used = in->cur - in->base;
     649      /*
     650       * Do not shrink on large buffers whose only a tiny fraction
     651       * was consumed
     652       */
     653      if (used > INPUT_CHUNK) {
     654  	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
     655  	if (ret > 0) {
     656              used -= ret;
     657              if ((ret > ULONG_MAX) ||
     658                  (in->consumed > ULONG_MAX - (unsigned long)ret))
     659                  in->consumed = ULONG_MAX;
     660              else
     661                  in->consumed += ret;
     662  	}
     663      }
     664  
     665      if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
     666          xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
     667      }
     668  
     669      in->base = xmlBufContent(in->buf->buffer);
     670      if (in->base == NULL) {
     671          /* TODO: raise error */
     672          in->base = BAD_CAST "";
     673          in->cur = in->base;
     674          in->end = in->base;
     675          return;
     676      }
     677      in->cur = in->base + used;
     678      in->end = xmlBufEnd(in->buf->buffer);
     679  }
     680  
     681  /************************************************************************
     682   *									*
     683   *		UTF8 character input and related functions		*
     684   *									*
     685   ************************************************************************/
     686  
     687  /**
     688   * xmlNextChar:
     689   * @ctxt:  the XML parser context
     690   *
     691   * DEPRECATED: Internal function, do not use.
     692   *
     693   * Skip to the next char input char.
     694   */
     695  
     696  void
     697  xmlNextChar(xmlParserCtxtPtr ctxt)
     698  {
     699      const unsigned char *cur;
     700      size_t avail;
     701      int c;
     702  
     703      if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
     704          (ctxt->input == NULL))
     705          return;
     706  
     707      avail = ctxt->input->end - ctxt->input->cur;
     708  
     709      if (avail < INPUT_CHUNK) {
     710          xmlParserGrow(ctxt);
     711          if ((ctxt->instate == XML_PARSER_EOF) ||
     712              (ctxt->input->cur >= ctxt->input->end))
     713              return;
     714          avail = ctxt->input->end - ctxt->input->cur;
     715      }
     716  
     717      cur = ctxt->input->cur;
     718      c = *cur;
     719  
     720      if (c < 0x80) {
     721          if (c == '\n') {
     722              ctxt->input->cur++;
     723              ctxt->input->line++;
     724              ctxt->input->col = 1;
     725          } else if (c == '\r') {
     726              /*
     727               *   2.11 End-of-Line Handling
     728               *   the literal two-character sequence "#xD#xA" or a standalone
     729               *   literal #xD, an XML processor must pass to the application
     730               *   the single character #xA.
     731               */
     732              ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
     733              ctxt->input->line++;
     734              ctxt->input->col = 1;
     735              return;
     736          } else {
     737              ctxt->input->cur++;
     738              ctxt->input->col++;
     739          }
     740      } else {
     741          ctxt->input->col++;
     742  
     743          if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
     744              goto encoding_error;
     745  
     746          if (c < 0xe0) {
     747              /* 2-byte code */
     748              if (c < 0xc2)
     749                  goto encoding_error;
     750              ctxt->input->cur += 2;
     751          } else {
     752              unsigned int val = (c << 8) | cur[1];
     753  
     754              if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
     755                  goto encoding_error;
     756  
     757              if (c < 0xf0) {
     758                  /* 3-byte code */
     759                  if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
     760                      goto encoding_error;
     761                  ctxt->input->cur += 3;
     762              } else {
     763                  if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
     764                      goto encoding_error;
     765  
     766                  /* 4-byte code */
     767                  if ((val < 0xf090) || (val >= 0xf490))
     768                      goto encoding_error;
     769                  ctxt->input->cur += 4;
     770              }
     771          }
     772      }
     773  
     774      return;
     775  
     776  encoding_error:
     777      /* Only report the first error */
     778      if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
     779          if ((ctxt == NULL) || (ctxt->input == NULL) ||
     780              (ctxt->input->end - ctxt->input->cur < 4)) {
     781              __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
     782                           "Input is not proper UTF-8, indicate encoding !\n",
     783                           NULL, NULL);
     784          } else {
     785              char buffer[150];
     786  
     787              snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
     788                              ctxt->input->cur[0], ctxt->input->cur[1],
     789                              ctxt->input->cur[2], ctxt->input->cur[3]);
     790              __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
     791                           "Input is not proper UTF-8, indicate encoding !\n%s",
     792                           BAD_CAST buffer, NULL);
     793          }
     794          ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
     795      }
     796      ctxt->input->cur++;
     797      return;
     798  }
     799  
     800  /**
     801   * xmlCurrentChar:
     802   * @ctxt:  the XML parser context
     803   * @len:  pointer to the length of the char read
     804   *
     805   * DEPRECATED: Internal function, do not use.
     806   *
     807   * The current char value, if using UTF-8 this may actually span multiple
     808   * bytes in the input buffer. Implement the end of line normalization:
     809   * 2.11 End-of-Line Handling
     810   * Wherever an external parsed entity or the literal entity value
     811   * of an internal parsed entity contains either the literal two-character
     812   * sequence "#xD#xA" or a standalone literal #xD, an XML processor
     813   * must pass to the application the single character #xA.
     814   * This behavior can conveniently be produced by normalizing all
     815   * line breaks to #xA on input, before parsing.)
     816   *
     817   * Returns the current char value and its length
     818   */
     819  
     820  int
     821  xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
     822      const unsigned char *cur;
     823      size_t avail;
     824      int c;
     825  
     826      if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
     827      if (ctxt->instate == XML_PARSER_EOF)
     828  	return(0);
     829  
     830      avail = ctxt->input->end - ctxt->input->cur;
     831  
     832      if (avail < INPUT_CHUNK) {
     833          xmlParserGrow(ctxt);
     834          if (ctxt->instate == XML_PARSER_EOF)
     835              return(0);
     836          avail = ctxt->input->end - ctxt->input->cur;
     837      }
     838  
     839      cur = ctxt->input->cur;
     840      c = *cur;
     841  
     842      if (c < 0x80) {
     843  	/* 1-byte code */
     844          if (c < 0x20) {
     845              /*
     846               *   2.11 End-of-Line Handling
     847               *   the literal two-character sequence "#xD#xA" or a standalone
     848               *   literal #xD, an XML processor must pass to the application
     849               *   the single character #xA.
     850               */
     851              if (c == '\r') {
     852                  /*
     853                   * TODO: This function shouldn't change the 'cur' pointer
     854                   * as side effect, but the NEXTL macro in parser.c relies
     855                   * on this behavior when incrementing line numbers.
     856                   */
     857                  if (cur[1] == '\n')
     858                      ctxt->input->cur++;
     859                  *len = 1;
     860                  c = '\n';
     861              } else if (c == 0) {
     862                  if (ctxt->input->cur >= ctxt->input->end) {
     863                      *len = 0;
     864                  } else {
     865                      *len = 1;
     866                      /*
     867                       * TODO: Null bytes should be handled by callers,
     868                       * but this can be tricky.
     869                       */
     870                      xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
     871                              "Char 0x0 out of allowed range\n", c);
     872                  }
     873              } else {
     874                  *len = 1;
     875              }
     876          } else {
     877              *len = 1;
     878          }
     879  
     880          return(c);
     881      } else {
     882          int val;
     883  
     884          if (avail < 2)
     885              goto incomplete_sequence;
     886          if ((cur[1] & 0xc0) != 0x80)
     887              goto encoding_error;
     888  
     889          if (c < 0xe0) {
     890              /* 2-byte code */
     891              if (c < 0xc2)
     892                  goto encoding_error;
     893              val = (c & 0x1f) << 6;
     894              val |= cur[1] & 0x3f;
     895              *len = 2;
     896          } else {
     897              if (avail < 3)
     898                  goto incomplete_sequence;
     899              if ((cur[2] & 0xc0) != 0x80)
     900                  goto encoding_error;
     901  
     902              if (c < 0xf0) {
     903                  /* 3-byte code */
     904                  val = (c & 0xf) << 12;
     905                  val |= (cur[1] & 0x3f) << 6;
     906                  val |= cur[2] & 0x3f;
     907                  if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
     908                      goto encoding_error;
     909                  *len = 3;
     910              } else {
     911                  if (avail < 4)
     912                      goto incomplete_sequence;
     913                  if ((cur[3] & 0xc0) != 0x80)
     914                      goto encoding_error;
     915  
     916                  /* 4-byte code */
     917                  val = (c & 0x0f) << 18;
     918                  val |= (cur[1] & 0x3f) << 12;
     919                  val |= (cur[2] & 0x3f) << 6;
     920                  val |= cur[3] & 0x3f;
     921                  if ((val < 0x10000) || (val >= 0x110000))
     922                      goto encoding_error;
     923                  *len = 4;
     924              }
     925          }
     926  
     927          return(val);
     928      }
     929  
     930  encoding_error:
     931      /* Only report the first error */
     932      if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
     933          if (ctxt->input->end - ctxt->input->cur < 4) {
     934              __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
     935                           "Input is not proper UTF-8, indicate encoding !\n",
     936                           NULL, NULL);
     937          } else {
     938              char buffer[150];
     939  
     940              snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
     941                              ctxt->input->cur[0], ctxt->input->cur[1],
     942                              ctxt->input->cur[2], ctxt->input->cur[3]);
     943              __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
     944                           "Input is not proper UTF-8, indicate encoding !\n%s",
     945                           BAD_CAST buffer, NULL);
     946          }
     947          ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
     948      }
     949      *len = 1;
     950      return(0xFFFD); /* U+FFFD Replacement Character */
     951  
     952  incomplete_sequence:
     953      /*
     954       * An encoding problem may arise from a truncated input buffer
     955       * splitting a character in the middle. In that case do not raise
     956       * an error but return 0. This should only happen when push parsing
     957       * char data.
     958       */
     959      *len = 0;
     960      return(0);
     961  }
     962  
     963  /**
     964   * xmlStringCurrentChar:
     965   * @ctxt:  the XML parser context
     966   * @cur:  pointer to the beginning of the char
     967   * @len:  pointer to the length of the char read
     968   *
     969   * DEPRECATED: Internal function, do not use.
     970   *
     971   * The current char value, if using UTF-8 this may actually span multiple
     972   * bytes in the input buffer.
     973   *
     974   * Returns the current char value and its length
     975   */
     976  
     977  int
     978  xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
     979                       const xmlChar *cur, int *len) {
     980      int c;
     981  
     982      if ((cur == NULL) || (len == NULL))
     983          return(0);
     984  
     985      /* cur is zero-terminated, so we can lie about its length. */
     986      *len = 4;
     987      c = xmlGetUTF8Char(cur, len);
     988  
     989      return((c < 0) ? 0 : c);
     990  }
     991  
     992  /**
     993   * xmlCopyCharMultiByte:
     994   * @out:  pointer to an array of xmlChar
     995   * @val:  the char value
     996   *
     997   * append the char value in the array
     998   *
     999   * Returns the number of xmlChar written
    1000   */
    1001  int
    1002  xmlCopyCharMultiByte(xmlChar *out, int val) {
    1003      if ((out == NULL) || (val < 0)) return(0);
    1004      /*
    1005       * We are supposed to handle UTF8, check it's valid
    1006       * From rfc2044: encoding of the Unicode values on UTF-8:
    1007       *
    1008       * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
    1009       * 0000 0000-0000 007F   0xxxxxxx
    1010       * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
    1011       * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
    1012       */
    1013      if  (val >= 0x80) {
    1014  	xmlChar *savedout = out;
    1015  	int bits;
    1016  	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
    1017  	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
    1018  	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
    1019  	else {
    1020  	    xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
    1021  		    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
    1022  			      val);
    1023  	    return(0);
    1024  	}
    1025  	for ( ; bits >= 0; bits-= 6)
    1026  	    *out++= ((val >> bits) & 0x3F) | 0x80 ;
    1027  	return (out - savedout);
    1028      }
    1029      *out = val;
    1030      return 1;
    1031  }
    1032  
    1033  /**
    1034   * xmlCopyChar:
    1035   * @len:  Ignored, compatibility
    1036   * @out:  pointer to an array of xmlChar
    1037   * @val:  the char value
    1038   *
    1039   * append the char value in the array
    1040   *
    1041   * Returns the number of xmlChar written
    1042   */
    1043  
    1044  int
    1045  xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
    1046      if ((out == NULL) || (val < 0)) return(0);
    1047      /* the len parameter is ignored */
    1048      if  (val >= 0x80) {
    1049  	return(xmlCopyCharMultiByte (out, val));
    1050      }
    1051      *out = val;
    1052      return 1;
    1053  }
    1054  
    1055  /************************************************************************
    1056   *									*
    1057   *		Commodity functions to switch encodings			*
    1058   *									*
    1059   ************************************************************************/
    1060  
    1061  static xmlCharEncodingHandlerPtr
    1062  xmlDetectEBCDIC(xmlParserInputPtr input) {
    1063      xmlChar out[200];
    1064      xmlCharEncodingHandlerPtr handler;
    1065      int inlen, outlen, res, i;
    1066  
    1067      /*
    1068       * To detect the EBCDIC code page, we convert the first 200 bytes
    1069       * to EBCDIC-US and try to find the encoding declaration.
    1070       */
    1071      handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
    1072      if (handler == NULL)
    1073          return(NULL);
    1074      outlen = sizeof(out) - 1;
    1075      inlen = input->end - input->cur;
    1076      res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
    1077      if (res < 0)
    1078          return(handler);
    1079      out[outlen] = 0;
    1080  
    1081      for (i = 0; i < outlen; i++) {
    1082          if (out[i] == '>')
    1083              break;
    1084          if ((out[i] == 'e') &&
    1085              (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
    1086              int start, cur, quote;
    1087  
    1088              i += 8;
    1089              while (IS_BLANK_CH(out[i]))
    1090                  i += 1;
    1091              if (out[i++] != '=')
    1092                  break;
    1093              while (IS_BLANK_CH(out[i]))
    1094                  i += 1;
    1095              quote = out[i++];
    1096              if ((quote != '\'') && (quote != '"'))
    1097                  break;
    1098              start = i;
    1099              cur = out[i];
    1100              while (((cur >= 'a') && (cur <= 'z')) ||
    1101                     ((cur >= 'A') && (cur <= 'Z')) ||
    1102                     ((cur >= '0') && (cur <= '9')) ||
    1103                     (cur == '.') || (cur == '_') ||
    1104                     (cur == '-'))
    1105                  cur = out[++i];
    1106              if (cur != quote)
    1107                  break;
    1108              out[i] = 0;
    1109              xmlCharEncCloseFunc(handler);
    1110              return(xmlFindCharEncodingHandler((char *) out + start));
    1111          }
    1112      }
    1113  
    1114      /*
    1115       * ICU handlers are stateful, so we have to recreate them.
    1116       */
    1117      xmlCharEncCloseFunc(handler);
    1118      return(xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC));
    1119  }
    1120  
    1121  /**
    1122   * xmlSwitchEncoding:
    1123   * @ctxt:  the parser context
    1124   * @enc:  the encoding value (number)
    1125   *
    1126   * Use encoding specified by enum to decode input data.
    1127   *
    1128   * This function can be used to enforce the encoding of chunks passed
    1129   * to xmlParseChunk.
    1130   *
    1131   * Returns 0 in case of success, -1 otherwise
    1132   */
    1133  int
    1134  xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
    1135  {
    1136      xmlCharEncodingHandlerPtr handler = NULL;
    1137      int check = 1;
    1138      int ret;
    1139  
    1140      if ((ctxt == NULL) || (ctxt->input == NULL))
    1141          return(-1);
    1142  
    1143      switch (enc) {
    1144  	case XML_CHAR_ENCODING_NONE:
    1145  	case XML_CHAR_ENCODING_UTF8:
    1146          case XML_CHAR_ENCODING_ASCII:
    1147              check = 0;
    1148              break;
    1149          case XML_CHAR_ENCODING_EBCDIC:
    1150              handler = xmlDetectEBCDIC(ctxt->input);
    1151              break;
    1152          default:
    1153              handler = xmlGetCharEncodingHandler(enc);
    1154              break;
    1155      }
    1156  
    1157      if ((check) && (handler == NULL)) {
    1158          const char *name = xmlGetCharEncodingName(enc);
    1159  
    1160          __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
    1161                  "encoding not supported: %s\n",
    1162                  BAD_CAST (name ? name : "<null>"), NULL);
    1163          /*
    1164           * TODO: We could recover from errors in external entities
    1165           * if we didn't stop the parser. But most callers of this
    1166           * function don't check the return value.
    1167           */
    1168          xmlStopParser(ctxt);
    1169          return(-1);
    1170      }
    1171  
    1172      ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
    1173  
    1174      if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
    1175          ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
    1176      }
    1177  
    1178      return(ret);
    1179  }
    1180  
    1181  /**
    1182   * xmlSwitchInputEncoding:
    1183   * @ctxt:  the parser context
    1184   * @input:  the input stream
    1185   * @handler:  the encoding handler
    1186   *
    1187   * DEPRECATED: Internal function, don't use.
    1188   *
    1189   * Use encoding handler to decode input data.
    1190   *
    1191   * Returns 0 in case of success, -1 otherwise
    1192   */
    1193  int
    1194  xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
    1195                         xmlCharEncodingHandlerPtr handler)
    1196  {
    1197      int nbchars;
    1198      xmlParserInputBufferPtr in;
    1199  
    1200      if ((input == NULL) || (input->buf == NULL)) {
    1201          xmlCharEncCloseFunc(handler);
    1202  	return (-1);
    1203      }
    1204      in = input->buf;
    1205  
    1206      input->flags |= XML_INPUT_HAS_ENCODING;
    1207  
    1208      /*
    1209       * UTF-8 requires no encoding handler.
    1210       */
    1211      if ((handler != NULL) &&
    1212          (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
    1213          xmlCharEncCloseFunc(handler);
    1214          handler = NULL;
    1215      }
    1216  
    1217      if (in->encoder == handler)
    1218          return (0);
    1219  
    1220      if (in->encoder != NULL) {
    1221          /*
    1222           * Switching encodings during parsing is a really bad idea,
    1223           * but Chromium can switch between ISO-8859-1 and UTF-16 before
    1224           * separate calls to xmlParseChunk.
    1225           *
    1226           * TODO: We should check whether the "raw" input buffer is empty and
    1227           * convert the old content using the old encoder.
    1228           */
    1229  
    1230          xmlCharEncCloseFunc(in->encoder);
    1231          in->encoder = handler;
    1232          return (0);
    1233      }
    1234  
    1235      in->encoder = handler;
    1236  
    1237      /*
    1238       * Is there already some content down the pipe to convert ?
    1239       */
    1240      if (xmlBufIsEmpty(in->buffer) == 0) {
    1241          size_t processed;
    1242  
    1243          /*
    1244           * Shrink the current input buffer.
    1245           * Move it as the raw buffer and create a new input buffer
    1246           */
    1247          processed = input->cur - input->base;
    1248          xmlBufShrink(in->buffer, processed);
    1249          input->consumed += processed;
    1250          in->raw = in->buffer;
    1251          in->buffer = xmlBufCreate();
    1252          in->rawconsumed = processed;
    1253  
    1254          nbchars = xmlCharEncInput(in);
    1255          xmlBufResetInput(in->buffer, input);
    1256          if (nbchars < 0) {
    1257              /* TODO: This could be an out of memory or an encoding error. */
    1258              xmlErrInternal(ctxt,
    1259                             "switching encoding: encoder error\n",
    1260                             NULL);
    1261              xmlHaltParser(ctxt);
    1262              return (-1);
    1263          }
    1264      }
    1265      return (0);
    1266  }
    1267  
    1268  /**
    1269   * xmlSwitchToEncoding:
    1270   * @ctxt:  the parser context
    1271   * @handler:  the encoding handler
    1272   *
    1273   * Use encoding handler to decode input data.
    1274   *
    1275   * This function can be used to enforce the encoding of chunks passed
    1276   * to xmlParseChunk.
    1277   *
    1278   * Returns 0 in case of success, -1 otherwise
    1279   */
    1280  int
    1281  xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
    1282  {
    1283      if (ctxt == NULL)
    1284          return(-1);
    1285      return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
    1286  }
    1287  
    1288  /**
    1289   * xmlDetectEncoding:
    1290   * @ctxt:  the parser context
    1291   *
    1292   * Handle optional BOM, detect and switch to encoding.
    1293   *
    1294   * Assumes that there are at least four bytes in the input buffer.
    1295   */
    1296  void
    1297  xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
    1298      const xmlChar *in;
    1299      xmlCharEncoding enc;
    1300      int bomSize;
    1301      int autoFlag = 0;
    1302  
    1303      if (xmlParserGrow(ctxt) < 0)
    1304          return;
    1305      in = ctxt->input->cur;
    1306      if (ctxt->input->end - in < 4)
    1307          return;
    1308  
    1309      if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
    1310          /*
    1311           * If the encoding was already set, only skip the BOM which was
    1312           * possibly decoded to UTF-8.
    1313           */
    1314          if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
    1315              ctxt->input->cur += 3;
    1316          }
    1317  
    1318          return;
    1319      }
    1320  
    1321      enc = XML_CHAR_ENCODING_NONE;
    1322      bomSize = 0;
    1323  
    1324      switch (in[0]) {
    1325          case 0x00:
    1326              if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
    1327                  enc = XML_CHAR_ENCODING_UCS4BE;
    1328                  autoFlag = XML_INPUT_AUTO_OTHER;
    1329              } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
    1330                  enc = XML_CHAR_ENCODING_UTF16BE;
    1331                  autoFlag = XML_INPUT_AUTO_UTF16BE;
    1332              }
    1333              break;
    1334  
    1335          case 0x3C:
    1336              if (in[1] == 0x00) {
    1337                  if ((in[2] == 0x00) && (in[3] == 0x00)) {
    1338                      enc = XML_CHAR_ENCODING_UCS4LE;
    1339                      autoFlag = XML_INPUT_AUTO_OTHER;
    1340                  } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
    1341                      enc = XML_CHAR_ENCODING_UTF16LE;
    1342                      autoFlag = XML_INPUT_AUTO_UTF16LE;
    1343                  }
    1344              }
    1345              break;
    1346  
    1347          case 0x4C:
    1348  	    if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
    1349  	        enc = XML_CHAR_ENCODING_EBCDIC;
    1350                  autoFlag = XML_INPUT_AUTO_OTHER;
    1351              }
    1352              break;
    1353  
    1354          case 0xEF:
    1355              if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
    1356                  enc = XML_CHAR_ENCODING_UTF8;
    1357                  autoFlag = XML_INPUT_AUTO_UTF8;
    1358                  bomSize = 3;
    1359              }
    1360              break;
    1361  
    1362          case 0xFE:
    1363              if (in[1] == 0xFF) {
    1364                  enc = XML_CHAR_ENCODING_UTF16BE;
    1365                  autoFlag = XML_INPUT_AUTO_UTF16BE;
    1366                  bomSize = 2;
    1367              }
    1368              break;
    1369  
    1370          case 0xFF:
    1371              if (in[1] == 0xFE) {
    1372                  enc = XML_CHAR_ENCODING_UTF16LE;
    1373                  autoFlag = XML_INPUT_AUTO_UTF16LE;
    1374                  bomSize = 2;
    1375              }
    1376              break;
    1377      }
    1378  
    1379      if (bomSize > 0) {
    1380          ctxt->input->cur += bomSize;
    1381      }
    1382  
    1383      if (enc != XML_CHAR_ENCODING_NONE) {
    1384          ctxt->input->flags |= autoFlag;
    1385          xmlSwitchEncoding(ctxt, enc);
    1386      }
    1387  }
    1388  
    1389  /**
    1390   * xmlSetDeclaredEncoding:
    1391   * @ctxt:  the parser context
    1392   * @encoding:  declared encoding
    1393   *
    1394   * Set the encoding from a declaration in the document.
    1395   *
    1396   * If no encoding was set yet, switch the encoding. Otherwise, only warn
    1397   * about encoding mismatches.
    1398   *
    1399   * Takes ownership of 'encoding'.
    1400   */
    1401  void
    1402  xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
    1403      if (ctxt->encoding != NULL)
    1404          xmlFree((xmlChar *) ctxt->encoding);
    1405      ctxt->encoding = encoding;
    1406  
    1407      if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
    1408          ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
    1409          xmlCharEncodingHandlerPtr handler;
    1410  
    1411          handler = xmlFindCharEncodingHandler((const char *) encoding);
    1412          if (handler == NULL) {
    1413              __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
    1414                               "Unsupported encoding: %s\n",
    1415                               encoding, NULL);
    1416              return;
    1417          }
    1418  
    1419          xmlSwitchToEncoding(ctxt, handler);
    1420          ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
    1421      } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
    1422          static const char *allowedUTF8[] = {
    1423              "UTF-8", "UTF8", NULL
    1424          };
    1425          static const char *allowedUTF16LE[] = {
    1426              "UTF-16", "UTF-16LE", "UTF16", NULL
    1427          };
    1428          static const char *allowedUTF16BE[] = {
    1429              "UTF-16", "UTF-16BE", "UTF16", NULL
    1430          };
    1431          const char **allowed = NULL;
    1432          const char *autoEnc = NULL;
    1433  
    1434          switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
    1435              case XML_INPUT_AUTO_UTF8:
    1436                  allowed = allowedUTF8;
    1437                  autoEnc = "UTF-8";
    1438                  break;
    1439              case XML_INPUT_AUTO_UTF16LE:
    1440                  allowed = allowedUTF16LE;
    1441                  autoEnc = "UTF-16LE";
    1442                  break;
    1443              case XML_INPUT_AUTO_UTF16BE:
    1444                  allowed = allowedUTF16BE;
    1445                  autoEnc = "UTF-16BE";
    1446                  break;
    1447          }
    1448  
    1449          if (allowed != NULL) {
    1450              const char **p;
    1451              int match = 0;
    1452  
    1453              for (p = allowed; *p != NULL; p++) {
    1454                  if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
    1455                      match = 1;
    1456                      break;
    1457                  }
    1458              }
    1459  
    1460              if (match == 0) {
    1461                  xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
    1462                                "Encoding '%s' doesn't match "
    1463                                "auto-detected '%s'\n",
    1464                                encoding, BAD_CAST autoEnc);
    1465              }
    1466          }
    1467      }
    1468  }
    1469  
    1470  /************************************************************************
    1471   *									*
    1472   *	Commodity functions to handle entities processing		*
    1473   *									*
    1474   ************************************************************************/
    1475  
    1476  /**
    1477   * xmlFreeInputStream:
    1478   * @input:  an xmlParserInputPtr
    1479   *
    1480   * Free up an input stream.
    1481   */
    1482  void
    1483  xmlFreeInputStream(xmlParserInputPtr input) {
    1484      if (input == NULL) return;
    1485  
    1486      if (input->filename != NULL) xmlFree((char *) input->filename);
    1487      if (input->directory != NULL) xmlFree((char *) input->directory);
    1488      if (input->version != NULL) xmlFree((char *) input->version);
    1489      if ((input->free != NULL) && (input->base != NULL))
    1490          input->free((xmlChar *) input->base);
    1491      if (input->buf != NULL)
    1492          xmlFreeParserInputBuffer(input->buf);
    1493      xmlFree(input);
    1494  }
    1495  
    1496  /**
    1497   * xmlNewInputStream:
    1498   * @ctxt:  an XML parser context
    1499   *
    1500   * Create a new input stream structure.
    1501   *
    1502   * Returns the new input stream or NULL
    1503   */
    1504  xmlParserInputPtr
    1505  xmlNewInputStream(xmlParserCtxtPtr ctxt) {
    1506      xmlParserInputPtr input;
    1507  
    1508      input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
    1509      if (input == NULL) {
    1510          xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
    1511  	return(NULL);
    1512      }
    1513      memset(input, 0, sizeof(xmlParserInput));
    1514      input->line = 1;
    1515      input->col = 1;
    1516  
    1517      /*
    1518       * If the context is NULL the id cannot be initialized, but that
    1519       * should not happen while parsing which is the situation where
    1520       * the id is actually needed.
    1521       */
    1522      if (ctxt != NULL) {
    1523          if (input->id >= INT_MAX) {
    1524              xmlErrMemory(ctxt, "Input ID overflow\n");
    1525              return(NULL);
    1526          }
    1527          input->id = ctxt->input_id++;
    1528      }
    1529  
    1530      return(input);
    1531  }
    1532  
    1533  /**
    1534   * xmlNewIOInputStream:
    1535   * @ctxt:  an XML parser context
    1536   * @input:  an I/O Input
    1537   * @enc:  the charset encoding if known
    1538   *
    1539   * Create a new input stream structure encapsulating the @input into
    1540   * a stream suitable for the parser.
    1541   *
    1542   * Returns the new input stream or NULL
    1543   */
    1544  xmlParserInputPtr
    1545  xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
    1546  	            xmlCharEncoding enc) {
    1547      xmlParserInputPtr inputStream;
    1548  
    1549      if (input == NULL) return(NULL);
    1550      if (xmlParserDebugEntities)
    1551  	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
    1552      inputStream = xmlNewInputStream(ctxt);
    1553      if (inputStream == NULL) {
    1554  	return(NULL);
    1555      }
    1556      inputStream->filename = NULL;
    1557      inputStream->buf = input;
    1558      xmlBufResetInput(inputStream->buf->buffer, inputStream);
    1559  
    1560      if (enc != XML_CHAR_ENCODING_NONE) {
    1561          xmlSwitchEncoding(ctxt, enc);
    1562      }
    1563  
    1564      return(inputStream);
    1565  }
    1566  
    1567  /**
    1568   * xmlNewEntityInputStream:
    1569   * @ctxt:  an XML parser context
    1570   * @entity:  an Entity pointer
    1571   *
    1572   * DEPRECATED: Internal function, do not use.
    1573   *
    1574   * Create a new input stream based on an xmlEntityPtr
    1575   *
    1576   * Returns the new input stream or NULL
    1577   */
    1578  xmlParserInputPtr
    1579  xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
    1580      xmlParserInputPtr input;
    1581  
    1582      if (entity == NULL) {
    1583          xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
    1584  	               NULL);
    1585  	return(NULL);
    1586      }
    1587      if (xmlParserDebugEntities)
    1588  	xmlGenericError(xmlGenericErrorContext,
    1589  		"new input from entity: %s\n", entity->name);
    1590      if (entity->content == NULL) {
    1591  	switch (entity->etype) {
    1592              case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
    1593  	        xmlErrInternal(ctxt, "Cannot parse entity %s\n",
    1594  		               entity->name);
    1595                  break;
    1596              case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
    1597              case XML_EXTERNAL_PARAMETER_ENTITY:
    1598  		input = xmlLoadExternalEntity((char *) entity->URI,
    1599  		       (char *) entity->ExternalID, ctxt);
    1600                  if (input != NULL)
    1601                      input->entity = entity;
    1602                  return(input);
    1603              case XML_INTERNAL_GENERAL_ENTITY:
    1604  	        xmlErrInternal(ctxt,
    1605  		      "Internal entity %s without content !\n",
    1606  		               entity->name);
    1607                  break;
    1608              case XML_INTERNAL_PARAMETER_ENTITY:
    1609  	        xmlErrInternal(ctxt,
    1610  		      "Internal parameter entity %s without content !\n",
    1611  		               entity->name);
    1612                  break;
    1613              case XML_INTERNAL_PREDEFINED_ENTITY:
    1614  	        xmlErrInternal(ctxt,
    1615  		      "Predefined entity %s without content !\n",
    1616  		               entity->name);
    1617                  break;
    1618  	}
    1619  	return(NULL);
    1620      }
    1621      input = xmlNewInputStream(ctxt);
    1622      if (input == NULL) {
    1623  	return(NULL);
    1624      }
    1625      if (entity->URI != NULL)
    1626  	input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
    1627      input->base = entity->content;
    1628      if (entity->length == 0)
    1629          entity->length = xmlStrlen(entity->content);
    1630      input->cur = entity->content;
    1631      input->length = entity->length;
    1632      input->end = &entity->content[input->length];
    1633      input->entity = entity;
    1634      return(input);
    1635  }
    1636  
    1637  /**
    1638   * xmlNewStringInputStream:
    1639   * @ctxt:  an XML parser context
    1640   * @buffer:  an memory buffer
    1641   *
    1642   * Create a new input stream based on a memory buffer.
    1643   * Returns the new input stream
    1644   */
    1645  xmlParserInputPtr
    1646  xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
    1647      xmlParserInputPtr input;
    1648      xmlParserInputBufferPtr buf;
    1649  
    1650      if (buffer == NULL) {
    1651          xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
    1652  	               NULL);
    1653  	return(NULL);
    1654      }
    1655      if (xmlParserDebugEntities)
    1656  	xmlGenericError(xmlGenericErrorContext,
    1657  		"new fixed input: %.30s\n", buffer);
    1658      buf = xmlParserInputBufferCreateString(buffer);
    1659      if (buf == NULL) {
    1660  	xmlErrMemory(ctxt, NULL);
    1661          return(NULL);
    1662      }
    1663      input = xmlNewInputStream(ctxt);
    1664      if (input == NULL) {
    1665          xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
    1666  	xmlFreeParserInputBuffer(buf);
    1667  	return(NULL);
    1668      }
    1669      input->buf = buf;
    1670      xmlBufResetInput(input->buf->buffer, input);
    1671      return(input);
    1672  }
    1673  
    1674  /**
    1675   * xmlNewInputFromFile:
    1676   * @ctxt:  an XML parser context
    1677   * @filename:  the filename to use as entity
    1678   *
    1679   * Create a new input stream based on a file or an URL.
    1680   *
    1681   * Returns the new input stream or NULL in case of error
    1682   */
    1683  xmlParserInputPtr
    1684  xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
    1685      xmlParserInputBufferPtr buf;
    1686      xmlParserInputPtr inputStream;
    1687      char *directory = NULL;
    1688      xmlChar *URI = NULL;
    1689  
    1690      if (xmlParserDebugEntities)
    1691  	xmlGenericError(xmlGenericErrorContext,
    1692  		"new input from file: %s\n", filename);
    1693      if (ctxt == NULL) return(NULL);
    1694      buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
    1695      if (buf == NULL) {
    1696  	if (filename == NULL)
    1697  	    __xmlLoaderErr(ctxt,
    1698  	                   "failed to load external entity: NULL filename \n",
    1699  			   NULL);
    1700  	else
    1701  	    __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
    1702  			   (const char *) filename);
    1703  	return(NULL);
    1704      }
    1705  
    1706      inputStream = xmlNewInputStream(ctxt);
    1707      if (inputStream == NULL) {
    1708  	xmlFreeParserInputBuffer(buf);
    1709  	return(NULL);
    1710      }
    1711  
    1712      inputStream->buf = buf;
    1713      inputStream = xmlCheckHTTPInput(ctxt, inputStream);
    1714      if (inputStream == NULL)
    1715          return(NULL);
    1716  
    1717      if (inputStream->filename == NULL)
    1718  	URI = xmlStrdup((xmlChar *) filename);
    1719      else
    1720  	URI = xmlStrdup((xmlChar *) inputStream->filename);
    1721      directory = xmlParserGetDirectory((const char *) URI);
    1722      if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
    1723      inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
    1724      if (URI != NULL) xmlFree((char *) URI);
    1725      inputStream->directory = directory;
    1726  
    1727      xmlBufResetInput(inputStream->buf->buffer, inputStream);
    1728      if ((ctxt->directory == NULL) && (directory != NULL))
    1729          ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
    1730      return(inputStream);
    1731  }
    1732  
    1733  /************************************************************************
    1734   *									*
    1735   *		Commodity functions to handle parser contexts		*
    1736   *									*
    1737   ************************************************************************/
    1738  
    1739  /**
    1740   * xmlInitSAXParserCtxt:
    1741   * @ctxt:  XML parser context
    1742   * @sax:  SAX handlert
    1743   * @userData:  user data
    1744   *
    1745   * Initialize a SAX parser context
    1746   *
    1747   * Returns 0 in case of success and -1 in case of error
    1748   */
    1749  
    1750  static int
    1751  xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
    1752                       void *userData)
    1753  {
    1754      xmlParserInputPtr input;
    1755  
    1756      if(ctxt==NULL) {
    1757          xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
    1758          return(-1);
    1759      }
    1760  
    1761      xmlInitParser();
    1762  
    1763      if (ctxt->dict == NULL)
    1764  	ctxt->dict = xmlDictCreate();
    1765      if (ctxt->dict == NULL) {
    1766          xmlErrMemory(NULL, "cannot initialize parser context\n");
    1767  	return(-1);
    1768      }
    1769      xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
    1770  
    1771      if (ctxt->sax == NULL)
    1772  	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
    1773      if (ctxt->sax == NULL) {
    1774          xmlErrMemory(NULL, "cannot initialize parser context\n");
    1775  	return(-1);
    1776      }
    1777      if (sax == NULL) {
    1778  	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
    1779          xmlSAXVersion(ctxt->sax, 2);
    1780          ctxt->userData = ctxt;
    1781      } else {
    1782  	if (sax->initialized == XML_SAX2_MAGIC) {
    1783  	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
    1784          } else {
    1785  	    memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
    1786  	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
    1787          }
    1788          ctxt->userData = userData ? userData : ctxt;
    1789      }
    1790  
    1791      ctxt->maxatts = 0;
    1792      ctxt->atts = NULL;
    1793      /* Allocate the Input stack */
    1794      if (ctxt->inputTab == NULL) {
    1795  	ctxt->inputTab = (xmlParserInputPtr *)
    1796  		    xmlMalloc(5 * sizeof(xmlParserInputPtr));
    1797  	ctxt->inputMax = 5;
    1798      }
    1799      if (ctxt->inputTab == NULL) {
    1800          xmlErrMemory(NULL, "cannot initialize parser context\n");
    1801  	ctxt->inputNr = 0;
    1802  	ctxt->inputMax = 0;
    1803  	ctxt->input = NULL;
    1804  	return(-1);
    1805      }
    1806      while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
    1807          xmlFreeInputStream(input);
    1808      }
    1809      ctxt->inputNr = 0;
    1810      ctxt->input = NULL;
    1811  
    1812      ctxt->version = NULL;
    1813      ctxt->encoding = NULL;
    1814      ctxt->standalone = -1;
    1815      ctxt->hasExternalSubset = 0;
    1816      ctxt->hasPErefs = 0;
    1817      ctxt->html = 0;
    1818      ctxt->external = 0;
    1819      ctxt->instate = XML_PARSER_START;
    1820      ctxt->token = 0;
    1821      ctxt->directory = NULL;
    1822  
    1823      /* Allocate the Node stack */
    1824      if (ctxt->nodeTab == NULL) {
    1825  	ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
    1826  	ctxt->nodeMax = 10;
    1827      }
    1828      if (ctxt->nodeTab == NULL) {
    1829          xmlErrMemory(NULL, "cannot initialize parser context\n");
    1830  	ctxt->nodeNr = 0;
    1831  	ctxt->nodeMax = 0;
    1832  	ctxt->node = NULL;
    1833  	ctxt->inputNr = 0;
    1834  	ctxt->inputMax = 0;
    1835  	ctxt->input = NULL;
    1836  	return(-1);
    1837      }
    1838      ctxt->nodeNr = 0;
    1839      ctxt->node = NULL;
    1840  
    1841      /* Allocate the Name stack */
    1842      if (ctxt->nameTab == NULL) {
    1843  	ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
    1844  	ctxt->nameMax = 10;
    1845      }
    1846      if (ctxt->nameTab == NULL) {
    1847          xmlErrMemory(NULL, "cannot initialize parser context\n");
    1848  	ctxt->nodeNr = 0;
    1849  	ctxt->nodeMax = 0;
    1850  	ctxt->node = NULL;
    1851  	ctxt->inputNr = 0;
    1852  	ctxt->inputMax = 0;
    1853  	ctxt->input = NULL;
    1854  	ctxt->nameNr = 0;
    1855  	ctxt->nameMax = 0;
    1856  	ctxt->name = NULL;
    1857  	return(-1);
    1858      }
    1859      ctxt->nameNr = 0;
    1860      ctxt->name = NULL;
    1861  
    1862      /* Allocate the space stack */
    1863      if (ctxt->spaceTab == NULL) {
    1864  	ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
    1865  	ctxt->spaceMax = 10;
    1866      }
    1867      if (ctxt->spaceTab == NULL) {
    1868          xmlErrMemory(NULL, "cannot initialize parser context\n");
    1869  	ctxt->nodeNr = 0;
    1870  	ctxt->nodeMax = 0;
    1871  	ctxt->node = NULL;
    1872  	ctxt->inputNr = 0;
    1873  	ctxt->inputMax = 0;
    1874  	ctxt->input = NULL;
    1875  	ctxt->nameNr = 0;
    1876  	ctxt->nameMax = 0;
    1877  	ctxt->name = NULL;
    1878  	ctxt->spaceNr = 0;
    1879  	ctxt->spaceMax = 0;
    1880  	ctxt->space = NULL;
    1881  	return(-1);
    1882      }
    1883      ctxt->spaceNr = 1;
    1884      ctxt->spaceMax = 10;
    1885      ctxt->spaceTab[0] = -1;
    1886      ctxt->space = &ctxt->spaceTab[0];
    1887      ctxt->myDoc = NULL;
    1888      ctxt->wellFormed = 1;
    1889      ctxt->nsWellFormed = 1;
    1890      ctxt->valid = 1;
    1891      ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
    1892      if (ctxt->loadsubset) {
    1893          ctxt->options |= XML_PARSE_DTDLOAD;
    1894      }
    1895      ctxt->validate = xmlDoValidityCheckingDefaultValue;
    1896      ctxt->pedantic = xmlPedanticParserDefaultValue;
    1897      if (ctxt->pedantic) {
    1898          ctxt->options |= XML_PARSE_PEDANTIC;
    1899      }
    1900      ctxt->linenumbers = xmlLineNumbersDefaultValue;
    1901      ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
    1902      if (ctxt->keepBlanks == 0) {
    1903  	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
    1904  	ctxt->options |= XML_PARSE_NOBLANKS;
    1905      }
    1906  
    1907      ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
    1908      ctxt->vctxt.userData = ctxt;
    1909      ctxt->vctxt.error = xmlParserValidityError;
    1910      ctxt->vctxt.warning = xmlParserValidityWarning;
    1911      if (ctxt->validate) {
    1912  	if (xmlGetWarningsDefaultValue == 0)
    1913  	    ctxt->vctxt.warning = NULL;
    1914  	else
    1915  	    ctxt->vctxt.warning = xmlParserValidityWarning;
    1916  	ctxt->vctxt.nodeMax = 0;
    1917          ctxt->options |= XML_PARSE_DTDVALID;
    1918      }
    1919      ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
    1920      if (ctxt->replaceEntities) {
    1921          ctxt->options |= XML_PARSE_NOENT;
    1922      }
    1923      ctxt->record_info = 0;
    1924      ctxt->checkIndex = 0;
    1925      ctxt->inSubset = 0;
    1926      ctxt->errNo = XML_ERR_OK;
    1927      ctxt->depth = 0;
    1928      ctxt->catalogs = NULL;
    1929      ctxt->sizeentities = 0;
    1930      ctxt->sizeentcopy = 0;
    1931      ctxt->input_id = 1;
    1932      ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
    1933      xmlInitNodeInfoSeq(&ctxt->node_seq);
    1934  
    1935      if (ctxt->nsdb == NULL) {
    1936          ctxt->nsdb = xmlParserNsCreate();
    1937          if (ctxt->nsdb == NULL) {
    1938              xmlErrMemory(ctxt, NULL);
    1939              return(-1);
    1940          }
    1941      }
    1942  
    1943      return(0);
    1944  }
    1945  
    1946  /**
    1947   * xmlInitParserCtxt:
    1948   * @ctxt:  an XML parser context
    1949   *
    1950   * DEPRECATED: Internal function which will be made private in a future
    1951   * version.
    1952   *
    1953   * Initialize a parser context
    1954   *
    1955   * Returns 0 in case of success and -1 in case of error
    1956   */
    1957  
    1958  int
    1959  xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
    1960  {
    1961      return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
    1962  }
    1963  
    1964  /**
    1965   * xmlFreeParserCtxt:
    1966   * @ctxt:  an XML parser context
    1967   *
    1968   * Free all the memory used by a parser context. However the parsed
    1969   * document in ctxt->myDoc is not freed.
    1970   */
    1971  
    1972  void
    1973  xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
    1974  {
    1975      xmlParserInputPtr input;
    1976  
    1977      if (ctxt == NULL) return;
    1978  
    1979      while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
    1980          xmlFreeInputStream(input);
    1981      }
    1982      if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
    1983      if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
    1984      if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
    1985      if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
    1986      if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
    1987      if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
    1988      if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
    1989      if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
    1990      if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
    1991  #ifdef LIBXML_SAX1_ENABLED
    1992      if ((ctxt->sax != NULL) &&
    1993          (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
    1994  #else
    1995      if (ctxt->sax != NULL)
    1996  #endif /* LIBXML_SAX1_ENABLED */
    1997          xmlFree(ctxt->sax);
    1998      if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
    1999      if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
    2000      if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
    2001      if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
    2002      if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
    2003      if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
    2004      if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
    2005      if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
    2006      if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
    2007      if (ctxt->attsDefault != NULL)
    2008          xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
    2009      if (ctxt->attsSpecial != NULL)
    2010          xmlHashFree(ctxt->attsSpecial, NULL);
    2011      if (ctxt->freeElems != NULL) {
    2012          xmlNodePtr cur, next;
    2013  
    2014  	cur = ctxt->freeElems;
    2015  	while (cur != NULL) {
    2016  	    next = cur->next;
    2017  	    xmlFree(cur);
    2018  	    cur = next;
    2019  	}
    2020      }
    2021      if (ctxt->freeAttrs != NULL) {
    2022          xmlAttrPtr cur, next;
    2023  
    2024  	cur = ctxt->freeAttrs;
    2025  	while (cur != NULL) {
    2026  	    next = cur->next;
    2027  	    xmlFree(cur);
    2028  	    cur = next;
    2029  	}
    2030      }
    2031      /*
    2032       * cleanup the error strings
    2033       */
    2034      if (ctxt->lastError.message != NULL)
    2035          xmlFree(ctxt->lastError.message);
    2036      if (ctxt->lastError.file != NULL)
    2037          xmlFree(ctxt->lastError.file);
    2038      if (ctxt->lastError.str1 != NULL)
    2039          xmlFree(ctxt->lastError.str1);
    2040      if (ctxt->lastError.str2 != NULL)
    2041          xmlFree(ctxt->lastError.str2);
    2042      if (ctxt->lastError.str3 != NULL)
    2043          xmlFree(ctxt->lastError.str3);
    2044  
    2045  #ifdef LIBXML_CATALOG_ENABLED
    2046      if (ctxt->catalogs != NULL)
    2047  	xmlCatalogFreeLocal(ctxt->catalogs);
    2048  #endif
    2049      xmlFree(ctxt);
    2050  }
    2051  
    2052  /**
    2053   * xmlNewParserCtxt:
    2054   *
    2055   * Allocate and initialize a new parser context.
    2056   *
    2057   * Returns the xmlParserCtxtPtr or NULL
    2058   */
    2059  
    2060  xmlParserCtxtPtr
    2061  xmlNewParserCtxt(void)
    2062  {
    2063      return(xmlNewSAXParserCtxt(NULL, NULL));
    2064  }
    2065  
    2066  /**
    2067   * xmlNewSAXParserCtxt:
    2068   * @sax:  SAX handler
    2069   * @userData:  user data
    2070   *
    2071   * Allocate and initialize a new SAX parser context. If userData is NULL,
    2072   * the parser context will be passed as user data.
    2073   *
    2074   * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
    2075   */
    2076  
    2077  xmlParserCtxtPtr
    2078  xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
    2079  {
    2080      xmlParserCtxtPtr ctxt;
    2081  
    2082      ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
    2083      if (ctxt == NULL) {
    2084  	xmlErrMemory(NULL, "cannot allocate parser context\n");
    2085  	return(NULL);
    2086      }
    2087      memset(ctxt, 0, sizeof(xmlParserCtxt));
    2088      if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
    2089          xmlFreeParserCtxt(ctxt);
    2090  	return(NULL);
    2091      }
    2092      return(ctxt);
    2093  }
    2094  
    2095  /************************************************************************
    2096   *									*
    2097   *		Handling of node information				*
    2098   *									*
    2099   ************************************************************************/
    2100  
    2101  /**
    2102   * xmlClearParserCtxt:
    2103   * @ctxt:  an XML parser context
    2104   *
    2105   * Clear (release owned resources) and reinitialize a parser context
    2106   */
    2107  
    2108  void
    2109  xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
    2110  {
    2111    if (ctxt==NULL)
    2112      return;
    2113    xmlClearNodeInfoSeq(&ctxt->node_seq);
    2114    xmlCtxtReset(ctxt);
    2115  }
    2116  
    2117  
    2118  /**
    2119   * xmlParserFindNodeInfo:
    2120   * @ctx:  an XML parser context
    2121   * @node:  an XML node within the tree
    2122   *
    2123   * DEPRECATED: Don't use.
    2124   *
    2125   * Find the parser node info struct for a given node
    2126   *
    2127   * Returns an xmlParserNodeInfo block pointer or NULL
    2128   */
    2129  const xmlParserNodeInfo *
    2130  xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
    2131  {
    2132      unsigned long pos;
    2133  
    2134      if ((ctx == NULL) || (node == NULL))
    2135          return (NULL);
    2136      /* Find position where node should be at */
    2137      pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
    2138      if (pos < ctx->node_seq.length
    2139          && ctx->node_seq.buffer[pos].node == node)
    2140          return &ctx->node_seq.buffer[pos];
    2141      else
    2142          return NULL;
    2143  }
    2144  
    2145  
    2146  /**
    2147   * xmlInitNodeInfoSeq:
    2148   * @seq:  a node info sequence pointer
    2149   *
    2150   * DEPRECATED: Don't use.
    2151   *
    2152   * -- Initialize (set to initial state) node info sequence
    2153   */
    2154  void
    2155  xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
    2156  {
    2157      if (seq == NULL)
    2158          return;
    2159      seq->length = 0;
    2160      seq->maximum = 0;
    2161      seq->buffer = NULL;
    2162  }
    2163  
    2164  /**
    2165   * xmlClearNodeInfoSeq:
    2166   * @seq:  a node info sequence pointer
    2167   *
    2168   * DEPRECATED: Don't use.
    2169   *
    2170   * -- Clear (release memory and reinitialize) node
    2171   *   info sequence
    2172   */
    2173  void
    2174  xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
    2175  {
    2176      if (seq == NULL)
    2177          return;
    2178      if (seq->buffer != NULL)
    2179          xmlFree(seq->buffer);
    2180      xmlInitNodeInfoSeq(seq);
    2181  }
    2182  
    2183  /**
    2184   * xmlParserFindNodeInfoIndex:
    2185   * @seq:  a node info sequence pointer
    2186   * @node:  an XML node pointer
    2187   *
    2188   * DEPRECATED: Don't use.
    2189   *
    2190   * xmlParserFindNodeInfoIndex : Find the index that the info record for
    2191   *   the given node is or should be at in a sorted sequence
    2192   *
    2193   * Returns a long indicating the position of the record
    2194   */
    2195  unsigned long
    2196  xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
    2197                             const xmlNodePtr node)
    2198  {
    2199      unsigned long upper, lower, middle;
    2200      int found = 0;
    2201  
    2202      if ((seq == NULL) || (node == NULL))
    2203          return ((unsigned long) -1);
    2204  
    2205      /* Do a binary search for the key */
    2206      lower = 1;
    2207      upper = seq->length;
    2208      middle = 0;
    2209      while (lower <= upper && !found) {
    2210          middle = lower + (upper - lower) / 2;
    2211          if (node == seq->buffer[middle - 1].node)
    2212              found = 1;
    2213          else if (node < seq->buffer[middle - 1].node)
    2214              upper = middle - 1;
    2215          else
    2216              lower = middle + 1;
    2217      }
    2218  
    2219      /* Return position */
    2220      if (middle == 0 || seq->buffer[middle - 1].node < node)
    2221          return middle;
    2222      else
    2223          return middle - 1;
    2224  }
    2225  
    2226  
    2227  /**
    2228   * xmlParserAddNodeInfo:
    2229   * @ctxt:  an XML parser context
    2230   * @info:  a node info sequence pointer
    2231   *
    2232   * DEPRECATED: Don't use.
    2233   *
    2234   * Insert node info record into the sorted sequence
    2235   */
    2236  void
    2237  xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
    2238                       const xmlParserNodeInfoPtr info)
    2239  {
    2240      unsigned long pos;
    2241  
    2242      if ((ctxt == NULL) || (info == NULL)) return;
    2243  
    2244      /* Find pos and check to see if node is already in the sequence */
    2245      pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
    2246                                       info->node);
    2247  
    2248      if ((pos < ctxt->node_seq.length) &&
    2249          (ctxt->node_seq.buffer != NULL) &&
    2250          (ctxt->node_seq.buffer[pos].node == info->node)) {
    2251          ctxt->node_seq.buffer[pos] = *info;
    2252      }
    2253  
    2254      /* Otherwise, we need to add new node to buffer */
    2255      else {
    2256          if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
    2257  	    (ctxt->node_seq.buffer == NULL)) {
    2258              xmlParserNodeInfo *tmp_buffer;
    2259              unsigned int byte_size;
    2260  
    2261              if (ctxt->node_seq.maximum == 0)
    2262                  ctxt->node_seq.maximum = 2;
    2263              byte_size = (sizeof(*ctxt->node_seq.buffer) *
    2264  			(2 * ctxt->node_seq.maximum));
    2265  
    2266              if (ctxt->node_seq.buffer == NULL)
    2267                  tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
    2268              else
    2269                  tmp_buffer =
    2270                      (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
    2271                                                       byte_size);
    2272  
    2273              if (tmp_buffer == NULL) {
    2274  		xmlErrMemory(ctxt, "failed to allocate buffer\n");
    2275                  return;
    2276              }
    2277              ctxt->node_seq.buffer = tmp_buffer;
    2278              ctxt->node_seq.maximum *= 2;
    2279          }
    2280  
    2281          /* If position is not at end, move elements out of the way */
    2282          if (pos != ctxt->node_seq.length) {
    2283              unsigned long i;
    2284  
    2285              for (i = ctxt->node_seq.length; i > pos; i--)
    2286                  ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
    2287          }
    2288  
    2289          /* Copy element and increase length */
    2290          ctxt->node_seq.buffer[pos] = *info;
    2291          ctxt->node_seq.length++;
    2292      }
    2293  }
    2294  
    2295  /************************************************************************
    2296   *									*
    2297   *		Defaults settings					*
    2298   *									*
    2299   ************************************************************************/
    2300  /**
    2301   * xmlPedanticParserDefault:
    2302   * @val:  int 0 or 1
    2303   *
    2304   * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
    2305   *
    2306   * Set and return the previous value for enabling pedantic warnings.
    2307   *
    2308   * Returns the last value for 0 for no substitution, 1 for substitution.
    2309   */
    2310  
    2311  int
    2312  xmlPedanticParserDefault(int val) {
    2313      int old = xmlPedanticParserDefaultValue;
    2314  
    2315      xmlPedanticParserDefaultValue = val;
    2316      return(old);
    2317  }
    2318  
    2319  /**
    2320   * xmlLineNumbersDefault:
    2321   * @val:  int 0 or 1
    2322   *
    2323   * DEPRECATED: The modern options API always enables line numbers.
    2324   *
    2325   * Set and return the previous value for enabling line numbers in elements
    2326   * contents. This may break on old application and is turned off by default.
    2327   *
    2328   * Returns the last value for 0 for no substitution, 1 for substitution.
    2329   */
    2330  
    2331  int
    2332  xmlLineNumbersDefault(int val) {
    2333      int old = xmlLineNumbersDefaultValue;
    2334  
    2335      xmlLineNumbersDefaultValue = val;
    2336      return(old);
    2337  }
    2338  
    2339  /**
    2340   * xmlSubstituteEntitiesDefault:
    2341   * @val:  int 0 or 1
    2342   *
    2343   * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
    2344   *
    2345   * Set and return the previous value for default entity support.
    2346   * Initially the parser always keep entity references instead of substituting
    2347   * entity values in the output. This function has to be used to change the
    2348   * default parser behavior
    2349   * SAX::substituteEntities() has to be used for changing that on a file by
    2350   * file basis.
    2351   *
    2352   * Returns the last value for 0 for no substitution, 1 for substitution.
    2353   */
    2354  
    2355  int
    2356  xmlSubstituteEntitiesDefault(int val) {
    2357      int old = xmlSubstituteEntitiesDefaultValue;
    2358  
    2359      xmlSubstituteEntitiesDefaultValue = val;
    2360      return(old);
    2361  }
    2362  
    2363  /**
    2364   * xmlKeepBlanksDefault:
    2365   * @val:  int 0 or 1
    2366   *
    2367   * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
    2368   *
    2369   * Set and return the previous value for default blanks text nodes support.
    2370   * The 1.x version of the parser used an heuristic to try to detect
    2371   * ignorable white spaces. As a result the SAX callback was generating
    2372   * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
    2373   * using the DOM output text nodes containing those blanks were not generated.
    2374   * The 2.x and later version will switch to the XML standard way and
    2375   * ignorableWhitespace() are only generated when running the parser in
    2376   * validating mode and when the current element doesn't allow CDATA or
    2377   * mixed content.
    2378   * This function is provided as a way to force the standard behavior
    2379   * on 1.X libs and to switch back to the old mode for compatibility when
    2380   * running 1.X client code on 2.X . Upgrade of 1.X code should be done
    2381   * by using xmlIsBlankNode() commodity function to detect the "empty"
    2382   * nodes generated.
    2383   * This value also affect autogeneration of indentation when saving code
    2384   * if blanks sections are kept, indentation is not generated.
    2385   *
    2386   * Returns the last value for 0 for no substitution, 1 for substitution.
    2387   */
    2388  
    2389  int
    2390  xmlKeepBlanksDefault(int val) {
    2391      int old = xmlKeepBlanksDefaultValue;
    2392  
    2393      xmlKeepBlanksDefaultValue = val;
    2394  #ifdef LIBXML_OUTPUT_ENABLED
    2395      if (!val)
    2396          xmlIndentTreeOutput = 1;
    2397  #endif
    2398      return(old);
    2399  }
    2400