(root)/
Python-3.12.0/
Lib/
xml/
sax/
xmlreader.py
       1  """An XML Reader is the SAX 2 name for an XML parser. XML Parsers
       2  should be based on this code. """
       3  
       4  from . import handler
       5  
       6  from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
       7  
       8  
       9  # ===== XMLREADER =====
      10  
      11  class ESC[4;38;5;81mXMLReader:
      12      """Interface for reading an XML document using callbacks.
      13  
      14      XMLReader is the interface that an XML parser's SAX2 driver must
      15      implement. This interface allows an application to set and query
      16      features and properties in the parser, to register event handlers
      17      for document processing, and to initiate a document parse.
      18  
      19      All SAX interfaces are assumed to be synchronous: the parse
      20      methods must not return until parsing is complete, and readers
      21      must wait for an event-handler callback to return before reporting
      22      the next event."""
      23  
      24      def __init__(self):
      25          self._cont_handler = handler.ContentHandler()
      26          self._dtd_handler = handler.DTDHandler()
      27          self._ent_handler = handler.EntityResolver()
      28          self._err_handler = handler.ErrorHandler()
      29  
      30      def parse(self, source):
      31          "Parse an XML document from a system identifier or an InputSource."
      32          raise NotImplementedError("This method must be implemented!")
      33  
      34      def getContentHandler(self):
      35          "Returns the current ContentHandler."
      36          return self._cont_handler
      37  
      38      def setContentHandler(self, handler):
      39          "Registers a new object to receive document content events."
      40          self._cont_handler = handler
      41  
      42      def getDTDHandler(self):
      43          "Returns the current DTD handler."
      44          return self._dtd_handler
      45  
      46      def setDTDHandler(self, handler):
      47          "Register an object to receive basic DTD-related events."
      48          self._dtd_handler = handler
      49  
      50      def getEntityResolver(self):
      51          "Returns the current EntityResolver."
      52          return self._ent_handler
      53  
      54      def setEntityResolver(self, resolver):
      55          "Register an object to resolve external entities."
      56          self._ent_handler = resolver
      57  
      58      def getErrorHandler(self):
      59          "Returns the current ErrorHandler."
      60          return self._err_handler
      61  
      62      def setErrorHandler(self, handler):
      63          "Register an object to receive error-message events."
      64          self._err_handler = handler
      65  
      66      def setLocale(self, locale):
      67          """Allow an application to set the locale for errors and warnings.
      68  
      69          SAX parsers are not required to provide localization for errors
      70          and warnings; if they cannot support the requested locale,
      71          however, they must raise a SAX exception. Applications may
      72          request a locale change in the middle of a parse."""
      73          raise SAXNotSupportedException("Locale support not implemented")
      74  
      75      def getFeature(self, name):
      76          "Looks up and returns the state of a SAX2 feature."
      77          raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
      78  
      79      def setFeature(self, name, state):
      80          "Sets the state of a SAX2 feature."
      81          raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
      82  
      83      def getProperty(self, name):
      84          "Looks up and returns the value of a SAX2 property."
      85          raise SAXNotRecognizedException("Property '%s' not recognized" % name)
      86  
      87      def setProperty(self, name, value):
      88          "Sets the value of a SAX2 property."
      89          raise SAXNotRecognizedException("Property '%s' not recognized" % name)
      90  
      91  class ESC[4;38;5;81mIncrementalParser(ESC[4;38;5;149mXMLReader):
      92      """This interface adds three extra methods to the XMLReader
      93      interface that allow XML parsers to support incremental
      94      parsing. Support for this interface is optional, since not all
      95      underlying XML parsers support this functionality.
      96  
      97      When the parser is instantiated it is ready to begin accepting
      98      data from the feed method immediately. After parsing has been
      99      finished with a call to close the reset method must be called to
     100      make the parser ready to accept new data, either from feed or
     101      using the parse method.
     102  
     103      Note that these methods must _not_ be called during parsing, that
     104      is, after parse has been called and before it returns.
     105  
     106      By default, the class also implements the parse method of the XMLReader
     107      interface using the feed, close and reset methods of the
     108      IncrementalParser interface as a convenience to SAX 2.0 driver
     109      writers."""
     110  
     111      def __init__(self, bufsize=2**16):
     112          self._bufsize = bufsize
     113          XMLReader.__init__(self)
     114  
     115      def parse(self, source):
     116          from . import saxutils
     117          source = saxutils.prepare_input_source(source)
     118  
     119          self.prepareParser(source)
     120          file = source.getCharacterStream()
     121          if file is None:
     122              file = source.getByteStream()
     123          while buffer := file.read(self._bufsize):
     124              self.feed(buffer)
     125          self.close()
     126  
     127      def feed(self, data):
     128          """This method gives the raw XML data in the data parameter to
     129          the parser and makes it parse the data, emitting the
     130          corresponding events. It is allowed for XML constructs to be
     131          split across several calls to feed.
     132  
     133          feed may raise SAXException."""
     134          raise NotImplementedError("This method must be implemented!")
     135  
     136      def prepareParser(self, source):
     137          """This method is called by the parse implementation to allow
     138          the SAX 2.0 driver to prepare itself for parsing."""
     139          raise NotImplementedError("prepareParser must be overridden!")
     140  
     141      def close(self):
     142          """This method is called when the entire XML document has been
     143          passed to the parser through the feed method, to notify the
     144          parser that there are no more data. This allows the parser to
     145          do the final checks on the document and empty the internal
     146          data buffer.
     147  
     148          The parser will not be ready to parse another document until
     149          the reset method has been called.
     150  
     151          close may raise SAXException."""
     152          raise NotImplementedError("This method must be implemented!")
     153  
     154      def reset(self):
     155          """This method is called after close has been called to reset
     156          the parser so that it is ready to parse new documents. The
     157          results of calling parse or feed after close without calling
     158          reset are undefined."""
     159          raise NotImplementedError("This method must be implemented!")
     160  
     161  # ===== LOCATOR =====
     162  
     163  class ESC[4;38;5;81mLocator:
     164      """Interface for associating a SAX event with a document
     165      location. A locator object will return valid results only during
     166      calls to DocumentHandler methods; at any other time, the
     167      results are unpredictable."""
     168  
     169      def getColumnNumber(self):
     170          "Return the column number where the current event ends."
     171          return -1
     172  
     173      def getLineNumber(self):
     174          "Return the line number where the current event ends."
     175          return -1
     176  
     177      def getPublicId(self):
     178          "Return the public identifier for the current event."
     179          return None
     180  
     181      def getSystemId(self):
     182          "Return the system identifier for the current event."
     183          return None
     184  
     185  # ===== INPUTSOURCE =====
     186  
     187  class ESC[4;38;5;81mInputSource:
     188      """Encapsulation of the information needed by the XMLReader to
     189      read entities.
     190  
     191      This class may include information about the public identifier,
     192      system identifier, byte stream (possibly with character encoding
     193      information) and/or the character stream of an entity.
     194  
     195      Applications will create objects of this class for use in the
     196      XMLReader.parse method and for returning from
     197      EntityResolver.resolveEntity.
     198  
     199      An InputSource belongs to the application, the XMLReader is not
     200      allowed to modify InputSource objects passed to it from the
     201      application, although it may make copies and modify those."""
     202  
     203      def __init__(self, system_id = None):
     204          self.__system_id = system_id
     205          self.__public_id = None
     206          self.__encoding  = None
     207          self.__bytefile  = None
     208          self.__charfile  = None
     209  
     210      def setPublicId(self, public_id):
     211          "Sets the public identifier of this InputSource."
     212          self.__public_id = public_id
     213  
     214      def getPublicId(self):
     215          "Returns the public identifier of this InputSource."
     216          return self.__public_id
     217  
     218      def setSystemId(self, system_id):
     219          "Sets the system identifier of this InputSource."
     220          self.__system_id = system_id
     221  
     222      def getSystemId(self):
     223          "Returns the system identifier of this InputSource."
     224          return self.__system_id
     225  
     226      def setEncoding(self, encoding):
     227          """Sets the character encoding of this InputSource.
     228  
     229          The encoding must be a string acceptable for an XML encoding
     230          declaration (see section 4.3.3 of the XML recommendation).
     231  
     232          The encoding attribute of the InputSource is ignored if the
     233          InputSource also contains a character stream."""
     234          self.__encoding = encoding
     235  
     236      def getEncoding(self):
     237          "Get the character encoding of this InputSource."
     238          return self.__encoding
     239  
     240      def setByteStream(self, bytefile):
     241          """Set the byte stream (a Python file-like object which does
     242          not perform byte-to-character conversion) for this input
     243          source.
     244  
     245          The SAX parser will ignore this if there is also a character
     246          stream specified, but it will use a byte stream in preference
     247          to opening a URI connection itself.
     248  
     249          If the application knows the character encoding of the byte
     250          stream, it should set it with the setEncoding method."""
     251          self.__bytefile = bytefile
     252  
     253      def getByteStream(self):
     254          """Get the byte stream for this input source.
     255  
     256          The getEncoding method will return the character encoding for
     257          this byte stream, or None if unknown."""
     258          return self.__bytefile
     259  
     260      def setCharacterStream(self, charfile):
     261          """Set the character stream for this input source. (The stream
     262          must be a Python 2.0 Unicode-wrapped file-like that performs
     263          conversion to Unicode strings.)
     264  
     265          If there is a character stream specified, the SAX parser will
     266          ignore any byte stream and will not attempt to open a URI
     267          connection to the system identifier."""
     268          self.__charfile = charfile
     269  
     270      def getCharacterStream(self):
     271          "Get the character stream for this input source."
     272          return self.__charfile
     273  
     274  # ===== ATTRIBUTESIMPL =====
     275  
     276  class ESC[4;38;5;81mAttributesImpl:
     277  
     278      def __init__(self, attrs):
     279          """Non-NS-aware implementation.
     280  
     281          attrs should be of the form {name : value}."""
     282          self._attrs = attrs
     283  
     284      def getLength(self):
     285          return len(self._attrs)
     286  
     287      def getType(self, name):
     288          return "CDATA"
     289  
     290      def getValue(self, name):
     291          return self._attrs[name]
     292  
     293      def getValueByQName(self, name):
     294          return self._attrs[name]
     295  
     296      def getNameByQName(self, name):
     297          if name not in self._attrs:
     298              raise KeyError(name)
     299          return name
     300  
     301      def getQNameByName(self, name):
     302          if name not in self._attrs:
     303              raise KeyError(name)
     304          return name
     305  
     306      def getNames(self):
     307          return list(self._attrs.keys())
     308  
     309      def getQNames(self):
     310          return list(self._attrs.keys())
     311  
     312      def __len__(self):
     313          return len(self._attrs)
     314  
     315      def __getitem__(self, name):
     316          return self._attrs[name]
     317  
     318      def keys(self):
     319          return list(self._attrs.keys())
     320  
     321      def __contains__(self, name):
     322          return name in self._attrs
     323  
     324      def get(self, name, alternative=None):
     325          return self._attrs.get(name, alternative)
     326  
     327      def copy(self):
     328          return self.__class__(self._attrs)
     329  
     330      def items(self):
     331          return list(self._attrs.items())
     332  
     333      def values(self):
     334          return list(self._attrs.values())
     335  
     336  # ===== ATTRIBUTESNSIMPL =====
     337  
     338  class ESC[4;38;5;81mAttributesNSImpl(ESC[4;38;5;149mAttributesImpl):
     339  
     340      def __init__(self, attrs, qnames):
     341          """NS-aware implementation.
     342  
     343          attrs should be of the form {(ns_uri, lname): value, ...}.
     344          qnames of the form {(ns_uri, lname): qname, ...}."""
     345          self._attrs = attrs
     346          self._qnames = qnames
     347  
     348      def getValueByQName(self, name):
     349          for (nsname, qname) in self._qnames.items():
     350              if qname == name:
     351                  return self._attrs[nsname]
     352  
     353          raise KeyError(name)
     354  
     355      def getNameByQName(self, name):
     356          for (nsname, qname) in self._qnames.items():
     357              if qname == name:
     358                  return nsname
     359  
     360          raise KeyError(name)
     361  
     362      def getQNameByName(self, name):
     363          return self._qnames[name]
     364  
     365      def getQNames(self):
     366          return list(self._qnames.values())
     367  
     368      def copy(self):
     369          return self.__class__(self._attrs, self._qnames)
     370  
     371  
     372  def _test():
     373      XMLReader()
     374      IncrementalParser()
     375      Locator()
     376  
     377  if __name__ == "__main__":
     378      _test()