(root)/
Python-3.12.0/
Lib/
xml/
dom/
xmlbuilder.py
       1  """Implementation of the DOM Level 3 'LS-Load' feature."""
       2  
       3  import copy
       4  import xml.dom
       5  
       6  from xml.dom.NodeFilter import NodeFilter
       7  
       8  
       9  __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
      10  
      11  
      12  class ESC[4;38;5;81mOptions:
      13      """Features object that has variables set for each DOMBuilder feature.
      14  
      15      The DOMBuilder class uses an instance of this class to pass settings to
      16      the ExpatBuilder class.
      17      """
      18  
      19      # Note that the DOMBuilder class in LoadSave constrains which of these
      20      # values can be set using the DOM Level 3 LoadSave feature.
      21  
      22      namespaces = 1
      23      namespace_declarations = True
      24      validation = False
      25      external_parameter_entities = True
      26      external_general_entities = True
      27      external_dtd_subset = True
      28      validate_if_schema = False
      29      validate = False
      30      datatype_normalization = False
      31      create_entity_ref_nodes = True
      32      entities = True
      33      whitespace_in_element_content = True
      34      cdata_sections = True
      35      comments = True
      36      charset_overrides_xml_encoding = True
      37      infoset = False
      38      supported_mediatypes_only = False
      39  
      40      errorHandler = None
      41      filter = None
      42  
      43  
      44  class ESC[4;38;5;81mDOMBuilder:
      45      entityResolver = None
      46      errorHandler = None
      47      filter = None
      48  
      49      ACTION_REPLACE = 1
      50      ACTION_APPEND_AS_CHILDREN = 2
      51      ACTION_INSERT_AFTER = 3
      52      ACTION_INSERT_BEFORE = 4
      53  
      54      _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
      55                        ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
      56  
      57      def __init__(self):
      58          self._options = Options()
      59  
      60      def _get_entityResolver(self):
      61          return self.entityResolver
      62      def _set_entityResolver(self, entityResolver):
      63          self.entityResolver = entityResolver
      64  
      65      def _get_errorHandler(self):
      66          return self.errorHandler
      67      def _set_errorHandler(self, errorHandler):
      68          self.errorHandler = errorHandler
      69  
      70      def _get_filter(self):
      71          return self.filter
      72      def _set_filter(self, filter):
      73          self.filter = filter
      74  
      75      def setFeature(self, name, state):
      76          if self.supportsFeature(name):
      77              state = state and 1 or 0
      78              try:
      79                  settings = self._settings[(_name_xform(name), state)]
      80              except KeyError:
      81                  raise xml.dom.NotSupportedErr(
      82                      "unsupported feature: %r" % (name,)) from None
      83              else:
      84                  for name, value in settings:
      85                      setattr(self._options, name, value)
      86          else:
      87              raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
      88  
      89      def supportsFeature(self, name):
      90          return hasattr(self._options, _name_xform(name))
      91  
      92      def canSetFeature(self, name, state):
      93          key = (_name_xform(name), state and 1 or 0)
      94          return key in self._settings
      95  
      96      # This dictionary maps from (feature,value) to a list of
      97      # (option,value) pairs that should be set on the Options object.
      98      # If a (feature,value) setting is not in this dictionary, it is
      99      # not supported by the DOMBuilder.
     100      #
     101      _settings = {
     102          ("namespace_declarations", 0): [
     103              ("namespace_declarations", 0)],
     104          ("namespace_declarations", 1): [
     105              ("namespace_declarations", 1)],
     106          ("validation", 0): [
     107              ("validation", 0)],
     108          ("external_general_entities", 0): [
     109              ("external_general_entities", 0)],
     110          ("external_general_entities", 1): [
     111              ("external_general_entities", 1)],
     112          ("external_parameter_entities", 0): [
     113              ("external_parameter_entities", 0)],
     114          ("external_parameter_entities", 1): [
     115              ("external_parameter_entities", 1)],
     116          ("validate_if_schema", 0): [
     117              ("validate_if_schema", 0)],
     118          ("create_entity_ref_nodes", 0): [
     119              ("create_entity_ref_nodes", 0)],
     120          ("create_entity_ref_nodes", 1): [
     121              ("create_entity_ref_nodes", 1)],
     122          ("entities", 0): [
     123              ("create_entity_ref_nodes", 0),
     124              ("entities", 0)],
     125          ("entities", 1): [
     126              ("entities", 1)],
     127          ("whitespace_in_element_content", 0): [
     128              ("whitespace_in_element_content", 0)],
     129          ("whitespace_in_element_content", 1): [
     130              ("whitespace_in_element_content", 1)],
     131          ("cdata_sections", 0): [
     132              ("cdata_sections", 0)],
     133          ("cdata_sections", 1): [
     134              ("cdata_sections", 1)],
     135          ("comments", 0): [
     136              ("comments", 0)],
     137          ("comments", 1): [
     138              ("comments", 1)],
     139          ("charset_overrides_xml_encoding", 0): [
     140              ("charset_overrides_xml_encoding", 0)],
     141          ("charset_overrides_xml_encoding", 1): [
     142              ("charset_overrides_xml_encoding", 1)],
     143          ("infoset", 0): [],
     144          ("infoset", 1): [
     145              ("namespace_declarations", 0),
     146              ("validate_if_schema", 0),
     147              ("create_entity_ref_nodes", 0),
     148              ("entities", 0),
     149              ("cdata_sections", 0),
     150              ("datatype_normalization", 1),
     151              ("whitespace_in_element_content", 1),
     152              ("comments", 1),
     153              ("charset_overrides_xml_encoding", 1)],
     154          ("supported_mediatypes_only", 0): [
     155              ("supported_mediatypes_only", 0)],
     156          ("namespaces", 0): [
     157              ("namespaces", 0)],
     158          ("namespaces", 1): [
     159              ("namespaces", 1)],
     160      }
     161  
     162      def getFeature(self, name):
     163          xname = _name_xform(name)
     164          try:
     165              return getattr(self._options, xname)
     166          except AttributeError:
     167              if name == "infoset":
     168                  options = self._options
     169                  return (options.datatype_normalization
     170                          and options.whitespace_in_element_content
     171                          and options.comments
     172                          and options.charset_overrides_xml_encoding
     173                          and not (options.namespace_declarations
     174                                   or options.validate_if_schema
     175                                   or options.create_entity_ref_nodes
     176                                   or options.entities
     177                                   or options.cdata_sections))
     178              raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
     179  
     180      def parseURI(self, uri):
     181          if self.entityResolver:
     182              input = self.entityResolver.resolveEntity(None, uri)
     183          else:
     184              input = DOMEntityResolver().resolveEntity(None, uri)
     185          return self.parse(input)
     186  
     187      def parse(self, input):
     188          options = copy.copy(self._options)
     189          options.filter = self.filter
     190          options.errorHandler = self.errorHandler
     191          fp = input.byteStream
     192          if fp is None and options.systemId:
     193              import urllib.request
     194              fp = urllib.request.urlopen(input.systemId)
     195          return self._parse_bytestream(fp, options)
     196  
     197      def parseWithContext(self, input, cnode, action):
     198          if action not in self._legal_actions:
     199              raise ValueError("not a legal action")
     200          raise NotImplementedError("Haven't written this yet...")
     201  
     202      def _parse_bytestream(self, stream, options):
     203          import xml.dom.expatbuilder
     204          builder = xml.dom.expatbuilder.makeBuilder(options)
     205          return builder.parseFile(stream)
     206  
     207  
     208  def _name_xform(name):
     209      return name.lower().replace('-', '_')
     210  
     211  
     212  class ESC[4;38;5;81mDOMEntityResolver(ESC[4;38;5;149mobject):
     213      __slots__ = '_opener',
     214  
     215      def resolveEntity(self, publicId, systemId):
     216          assert systemId is not None
     217          source = DOMInputSource()
     218          source.publicId = publicId
     219          source.systemId = systemId
     220          source.byteStream = self._get_opener().open(systemId)
     221  
     222          # determine the encoding if the transport provided it
     223          source.encoding = self._guess_media_encoding(source)
     224  
     225          # determine the base URI is we can
     226          import posixpath, urllib.parse
     227          parts = urllib.parse.urlparse(systemId)
     228          scheme, netloc, path, params, query, fragment = parts
     229          # XXX should we check the scheme here as well?
     230          if path and not path.endswith("/"):
     231              path = posixpath.dirname(path) + "/"
     232              parts = scheme, netloc, path, params, query, fragment
     233              source.baseURI = urllib.parse.urlunparse(parts)
     234  
     235          return source
     236  
     237      def _get_opener(self):
     238          try:
     239              return self._opener
     240          except AttributeError:
     241              self._opener = self._create_opener()
     242              return self._opener
     243  
     244      def _create_opener(self):
     245          import urllib.request
     246          return urllib.request.build_opener()
     247  
     248      def _guess_media_encoding(self, source):
     249          info = source.byteStream.info()
     250          if "Content-Type" in info:
     251              for param in info.getplist():
     252                  if param.startswith("charset="):
     253                      return param.split("=", 1)[1].lower()
     254  
     255  
     256  class ESC[4;38;5;81mDOMInputSource(ESC[4;38;5;149mobject):
     257      __slots__ = ('byteStream', 'characterStream', 'stringData',
     258                   'encoding', 'publicId', 'systemId', 'baseURI')
     259  
     260      def __init__(self):
     261          self.byteStream = None
     262          self.characterStream = None
     263          self.stringData = None
     264          self.encoding = None
     265          self.publicId = None
     266          self.systemId = None
     267          self.baseURI = None
     268  
     269      def _get_byteStream(self):
     270          return self.byteStream
     271      def _set_byteStream(self, byteStream):
     272          self.byteStream = byteStream
     273  
     274      def _get_characterStream(self):
     275          return self.characterStream
     276      def _set_characterStream(self, characterStream):
     277          self.characterStream = characterStream
     278  
     279      def _get_stringData(self):
     280          return self.stringData
     281      def _set_stringData(self, data):
     282          self.stringData = data
     283  
     284      def _get_encoding(self):
     285          return self.encoding
     286      def _set_encoding(self, encoding):
     287          self.encoding = encoding
     288  
     289      def _get_publicId(self):
     290          return self.publicId
     291      def _set_publicId(self, publicId):
     292          self.publicId = publicId
     293  
     294      def _get_systemId(self):
     295          return self.systemId
     296      def _set_systemId(self, systemId):
     297          self.systemId = systemId
     298  
     299      def _get_baseURI(self):
     300          return self.baseURI
     301      def _set_baseURI(self, uri):
     302          self.baseURI = uri
     303  
     304  
     305  class ESC[4;38;5;81mDOMBuilderFilter:
     306      """Element filter which can be used to tailor construction of
     307      a DOM instance.
     308      """
     309  
     310      # There's really no need for this class; concrete implementations
     311      # should just implement the endElement() and startElement()
     312      # methods as appropriate.  Using this makes it easy to only
     313      # implement one of them.
     314  
     315      FILTER_ACCEPT = 1
     316      FILTER_REJECT = 2
     317      FILTER_SKIP = 3
     318      FILTER_INTERRUPT = 4
     319  
     320      whatToShow = NodeFilter.SHOW_ALL
     321  
     322      def _get_whatToShow(self):
     323          return self.whatToShow
     324  
     325      def acceptNode(self, element):
     326          return self.FILTER_ACCEPT
     327  
     328      def startContainer(self, element):
     329          return self.FILTER_ACCEPT
     330  
     331  del NodeFilter
     332  
     333  
     334  class ESC[4;38;5;81mDocumentLS:
     335      """Mixin to create documents that conform to the load/save spec."""
     336  
     337      async_ = False
     338  
     339      def _get_async(self):
     340          return False
     341  
     342      def _set_async(self, flag):
     343          if flag:
     344              raise xml.dom.NotSupportedErr(
     345                  "asynchronous document loading is not supported")
     346  
     347      def abort(self):
     348          # What does it mean to "clear" a document?  Does the
     349          # documentElement disappear?
     350          raise NotImplementedError(
     351              "haven't figured out what this means yet")
     352  
     353      def load(self, uri):
     354          raise NotImplementedError("haven't written this yet")
     355  
     356      def loadXML(self, source):
     357          raise NotImplementedError("haven't written this yet")
     358  
     359      def saveXML(self, snode):
     360          if snode is None:
     361              snode = self
     362          elif snode.ownerDocument is not self:
     363              raise xml.dom.WrongDocumentErr()
     364          return snode.toxml()
     365  
     366  
     367  class ESC[4;38;5;81mDOMImplementationLS:
     368      MODE_SYNCHRONOUS = 1
     369      MODE_ASYNCHRONOUS = 2
     370  
     371      def createDOMBuilder(self, mode, schemaType):
     372          if schemaType is not None:
     373              raise xml.dom.NotSupportedErr(
     374                  "schemaType not yet supported")
     375          if mode == self.MODE_SYNCHRONOUS:
     376              return DOMBuilder()
     377          if mode == self.MODE_ASYNCHRONOUS:
     378              raise xml.dom.NotSupportedErr(
     379                  "asynchronous builders are not supported")
     380          raise ValueError("unknown value for mode")
     381  
     382      def createDOMWriter(self):
     383          raise NotImplementedError(
     384              "the writer interface hasn't been written yet!")
     385  
     386      def createDOMInputSource(self):
     387          return DOMInputSource()