(root)/
Python-3.11.7/
Lib/
xml/
sax/
xmlreader.py
       1  """An XML Reader is the SAX 2 name for an XML parser. XML Parsers
       2  should be based on this code. """
       3  
       4  from . import handler
       5  
       6  from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
       7  
       8  
       9  # ===== XMLREADER =====
      10  
      11  class ESC[4;38;5;81mXMLReader:
      12      """Interface for reading an XML document using callbacks.
      13  
      14      XMLReader is the interface that an XML parser's SAX2 driver must
      15      implement. This interface allows an application to set and query
      16      features and properties in the parser, to register event handlers
      17      for document processing, and to initiate a document parse.
      18  
      19      All SAX interfaces are assumed to be synchronous: the parse
      20      methods must not return until parsing is complete, and readers
      21      must wait for an event-handler callback to return before reporting
      22      the next event."""
      23  
      24      def __init__(self):
      25          self._cont_handler = handler.ContentHandler()
      26          self._dtd_handler = handler.DTDHandler()
      27          self._ent_handler = handler.EntityResolver()
      28          self._err_handler = handler.ErrorHandler()
      29  
      30      def parse(self, source):
      31          "Parse an XML document from a system identifier or an InputSource."
      32          raise NotImplementedError("This method must be implemented!")
      33  
      34      def getContentHandler(self):
      35          "Returns the current ContentHandler."
      36          return self._cont_handler
      37  
      38      def setContentHandler(self, handler):
      39          "Registers a new object to receive document content events."
      40          self._cont_handler = handler
      41  
      42      def getDTDHandler(self):
      43          "Returns the current DTD handler."
      44          return self._dtd_handler
      45  
      46      def setDTDHandler(self, handler):
      47          "Register an object to receive basic DTD-related events."
      48          self._dtd_handler = handler
      49  
      50      def getEntityResolver(self):
      51          "Returns the current EntityResolver."
      52          return self._ent_handler
      53  
      54      def setEntityResolver(self, resolver):
      55          "Register an object to resolve external entities."
      56          self._ent_handler = resolver
      57  
      58      def getErrorHandler(self):
      59          "Returns the current ErrorHandler."
      60          return self._err_handler
      61  
      62      def setErrorHandler(self, handler):
      63          "Register an object to receive error-message events."
      64          self._err_handler = handler
      65  
      66      def setLocale(self, locale):
      67          """Allow an application to set the locale for errors and warnings.
      68  
      69          SAX parsers are not required to provide localization for errors
      70          and warnings; if they cannot support the requested locale,
      71          however, they must raise a SAX exception. Applications may
      72          request a locale change in the middle of a parse."""
      73          raise SAXNotSupportedException("Locale support not implemented")
      74  
      75      def getFeature(self, name):
      76          "Looks up and returns the state of a SAX2 feature."
      77          raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
      78  
      79      def setFeature(self, name, state):
      80          "Sets the state of a SAX2 feature."
      81          raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
      82  
      83      def getProperty(self, name):
      84          "Looks up and returns the value of a SAX2 property."
      85          raise SAXNotRecognizedException("Property '%s' not recognized" % name)
      86  
      87      def setProperty(self, name, value):
      88          "Sets the value of a SAX2 property."
      89          raise SAXNotRecognizedException("Property '%s' not recognized" % name)
      90  
      91  class ESC[4;38;5;81mIncrementalParser(ESC[4;38;5;149mXMLReader):
      92      """This interface adds three extra methods to the XMLReader
      93      interface that allow XML parsers to support incremental
      94      parsing. Support for this interface is optional, since not all
      95      underlying XML parsers support this functionality.
      96  
      97      When the parser is instantiated it is ready to begin accepting
      98      data from the feed method immediately. After parsing has been
      99      finished with a call to close the reset method must be called to
     100      make the parser ready to accept new data, either from feed or
     101      using the parse method.
     102  
     103      Note that these methods must _not_ be called during parsing, that
     104      is, after parse has been called and before it returns.
     105  
     106      By default, the class also implements the parse method of the XMLReader
     107      interface using the feed, close and reset methods of the
     108      IncrementalParser interface as a convenience to SAX 2.0 driver
     109      writers."""
     110  
     111      def __init__(self, bufsize=2**16):
     112          self._bufsize = bufsize
     113          XMLReader.__init__(self)
     114  
     115      def parse(self, source):
     116          from . import saxutils
     117          source = saxutils.prepare_input_source(source)
     118  
     119          self.prepareParser(source)
     120          file = source.getCharacterStream()
     121          if file is None:
     122              file = source.getByteStream()
     123          buffer = file.read(self._bufsize)
     124          while buffer:
     125              self.feed(buffer)
     126              buffer = file.read(self._bufsize)
     127          self.close()
     128  
     129      def feed(self, data):
     130          """This method gives the raw XML data in the data parameter to
     131          the parser and makes it parse the data, emitting the
     132          corresponding events. It is allowed for XML constructs to be
     133          split across several calls to feed.
     134  
     135          feed may raise SAXException."""
     136          raise NotImplementedError("This method must be implemented!")
     137  
     138      def prepareParser(self, source):
     139          """This method is called by the parse implementation to allow
     140          the SAX 2.0 driver to prepare itself for parsing."""
     141          raise NotImplementedError("prepareParser must be overridden!")
     142  
     143      def close(self):
     144          """This method is called when the entire XML document has been
     145          passed to the parser through the feed method, to notify the
     146          parser that there are no more data. This allows the parser to
     147          do the final checks on the document and empty the internal
     148          data buffer.
     149  
     150          The parser will not be ready to parse another document until
     151          the reset method has been called.
     152  
     153          close may raise SAXException."""
     154          raise NotImplementedError("This method must be implemented!")
     155  
     156      def reset(self):
     157          """This method is called after close has been called to reset
     158          the parser so that it is ready to parse new documents. The
     159          results of calling parse or feed after close without calling
     160          reset are undefined."""
     161          raise NotImplementedError("This method must be implemented!")
     162  
     163  # ===== LOCATOR =====
     164  
     165  class ESC[4;38;5;81mLocator:
     166      """Interface for associating a SAX event with a document
     167      location. A locator object will return valid results only during
     168      calls to DocumentHandler methods; at any other time, the
     169      results are unpredictable."""
     170  
     171      def getColumnNumber(self):
     172          "Return the column number where the current event ends."
     173          return -1
     174  
     175      def getLineNumber(self):
     176          "Return the line number where the current event ends."
     177          return -1
     178  
     179      def getPublicId(self):
     180          "Return the public identifier for the current event."
     181          return None
     182  
     183      def getSystemId(self):
     184          "Return the system identifier for the current event."
     185          return None
     186  
     187  # ===== INPUTSOURCE =====
     188  
     189  class ESC[4;38;5;81mInputSource:
     190      """Encapsulation of the information needed by the XMLReader to
     191      read entities.
     192  
     193      This class may include information about the public identifier,
     194      system identifier, byte stream (possibly with character encoding
     195      information) and/or the character stream of an entity.
     196  
     197      Applications will create objects of this class for use in the
     198      XMLReader.parse method and for returning from
     199      EntityResolver.resolveEntity.
     200  
     201      An InputSource belongs to the application, the XMLReader is not
     202      allowed to modify InputSource objects passed to it from the
     203      application, although it may make copies and modify those."""
     204  
     205      def __init__(self, system_id = None):
     206          self.__system_id = system_id
     207          self.__public_id = None
     208          self.__encoding  = None
     209          self.__bytefile  = None
     210          self.__charfile  = None
     211  
     212      def setPublicId(self, public_id):
     213          "Sets the public identifier of this InputSource."
     214          self.__public_id = public_id
     215  
     216      def getPublicId(self):
     217          "Returns the public identifier of this InputSource."
     218          return self.__public_id
     219  
     220      def setSystemId(self, system_id):
     221          "Sets the system identifier of this InputSource."
     222          self.__system_id = system_id
     223  
     224      def getSystemId(self):
     225          "Returns the system identifier of this InputSource."
     226          return self.__system_id
     227  
     228      def setEncoding(self, encoding):
     229          """Sets the character encoding of this InputSource.
     230  
     231          The encoding must be a string acceptable for an XML encoding
     232          declaration (see section 4.3.3 of the XML recommendation).
     233  
     234          The encoding attribute of the InputSource is ignored if the
     235          InputSource also contains a character stream."""
     236          self.__encoding = encoding
     237  
     238      def getEncoding(self):
     239          "Get the character encoding of this InputSource."
     240          return self.__encoding
     241  
     242      def setByteStream(self, bytefile):
     243          """Set the byte stream (a Python file-like object which does
     244          not perform byte-to-character conversion) for this input
     245          source.
     246  
     247          The SAX parser will ignore this if there is also a character
     248          stream specified, but it will use a byte stream in preference
     249          to opening a URI connection itself.
     250  
     251          If the application knows the character encoding of the byte
     252          stream, it should set it with the setEncoding method."""
     253          self.__bytefile = bytefile
     254  
     255      def getByteStream(self):
     256          """Get the byte stream for this input source.
     257  
     258          The getEncoding method will return the character encoding for
     259          this byte stream, or None if unknown."""
     260          return self.__bytefile
     261  
     262      def setCharacterStream(self, charfile):
     263          """Set the character stream for this input source. (The stream
     264          must be a Python 2.0 Unicode-wrapped file-like that performs
     265          conversion to Unicode strings.)
     266  
     267          If there is a character stream specified, the SAX parser will
     268          ignore any byte stream and will not attempt to open a URI
     269          connection to the system identifier."""
     270          self.__charfile = charfile
     271  
     272      def getCharacterStream(self):
     273          "Get the character stream for this input source."
     274          return self.__charfile
     275  
     276  # ===== ATTRIBUTESIMPL =====
     277  
     278  class ESC[4;38;5;81mAttributesImpl:
     279  
     280      def __init__(self, attrs):
     281          """Non-NS-aware implementation.
     282  
     283          attrs should be of the form {name : value}."""
     284          self._attrs = attrs
     285  
     286      def getLength(self):
     287          return len(self._attrs)
     288  
     289      def getType(self, name):
     290          return "CDATA"
     291  
     292      def getValue(self, name):
     293          return self._attrs[name]
     294  
     295      def getValueByQName(self, name):
     296          return self._attrs[name]
     297  
     298      def getNameByQName(self, name):
     299          if name not in self._attrs:
     300              raise KeyError(name)
     301          return name
     302  
     303      def getQNameByName(self, name):
     304          if name not in self._attrs:
     305              raise KeyError(name)
     306          return name
     307  
     308      def getNames(self):
     309          return list(self._attrs.keys())
     310  
     311      def getQNames(self):
     312          return list(self._attrs.keys())
     313  
     314      def __len__(self):
     315          return len(self._attrs)
     316  
     317      def __getitem__(self, name):
     318          return self._attrs[name]
     319  
     320      def keys(self):
     321          return list(self._attrs.keys())
     322  
     323      def __contains__(self, name):
     324          return name in self._attrs
     325  
     326      def get(self, name, alternative=None):
     327          return self._attrs.get(name, alternative)
     328  
     329      def copy(self):
     330          return self.__class__(self._attrs)
     331  
     332      def items(self):
     333          return list(self._attrs.items())
     334  
     335      def values(self):
     336          return list(self._attrs.values())
     337  
     338  # ===== ATTRIBUTESNSIMPL =====
     339  
     340  class ESC[4;38;5;81mAttributesNSImpl(ESC[4;38;5;149mAttributesImpl):
     341  
     342      def __init__(self, attrs, qnames):
     343          """NS-aware implementation.
     344  
     345          attrs should be of the form {(ns_uri, lname): value, ...}.
     346          qnames of the form {(ns_uri, lname): qname, ...}."""
     347          self._attrs = attrs
     348          self._qnames = qnames
     349  
     350      def getValueByQName(self, name):
     351          for (nsname, qname) in self._qnames.items():
     352              if qname == name:
     353                  return self._attrs[nsname]
     354  
     355          raise KeyError(name)
     356  
     357      def getNameByQName(self, name):
     358          for (nsname, qname) in self._qnames.items():
     359              if qname == name:
     360                  return nsname
     361  
     362          raise KeyError(name)
     363  
     364      def getQNameByName(self, name):
     365          return self._qnames[name]
     366  
     367      def getQNames(self):
     368          return list(self._qnames.values())
     369  
     370      def copy(self):
     371          return self.__class__(self._attrs, self._qnames)
     372  
     373  
     374  def _test():
     375      XMLReader()
     376      IncrementalParser()
     377      Locator()
     378  
     379  if __name__ == "__main__":
     380      _test()