python (3.12.0)
1 """Simple implementation of the Level 1 DOM.
2
3 Namespaces and other minor Level 2 features are also supported.
4
5 parse("foo.xml")
6
7 parseString("<foo><bar/></foo>")
8
9 Todo:
10 =====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14 interface
15 * SAX 2 namespaces
16 """
17
18 import io
19 import xml.dom
20
21 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
22 from xml.dom.minicompat import *
23 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
24
25 # This is used by the ID-cache invalidation checks; the list isn't
26 # actually complete, since the nodes being checked will never be the
27 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is
28 # the node being added or removed, not the node being modified.)
29 #
30 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
31 xml.dom.Node.ENTITY_REFERENCE_NODE)
32
33
34 class ESC[4;38;5;81mNode(ESC[4;38;5;149mxmlESC[4;38;5;149m.ESC[4;38;5;149mdomESC[4;38;5;149m.ESC[4;38;5;149mNode):
35 namespaceURI = None # this is non-null only for elements and attributes
36 parentNode = None
37 ownerDocument = None
38 nextSibling = None
39 previousSibling = None
40
41 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
42
43 def __bool__(self):
44 return True
45
46 def toxml(self, encoding=None, standalone=None):
47 return self.toprettyxml("", "", encoding, standalone)
48
49 def toprettyxml(self, indent="\t", newl="\n", encoding=None,
50 standalone=None):
51 if encoding is None:
52 writer = io.StringIO()
53 else:
54 writer = io.TextIOWrapper(io.BytesIO(),
55 encoding=encoding,
56 errors="xmlcharrefreplace",
57 newline='\n')
58 if self.nodeType == Node.DOCUMENT_NODE:
59 # Can pass encoding only to document, to put it into XML header
60 self.writexml(writer, "", indent, newl, encoding, standalone)
61 else:
62 self.writexml(writer, "", indent, newl)
63 if encoding is None:
64 return writer.getvalue()
65 else:
66 return writer.detach().getvalue()
67
68 def hasChildNodes(self):
69 return bool(self.childNodes)
70
71 def _get_childNodes(self):
72 return self.childNodes
73
74 def _get_firstChild(self):
75 if self.childNodes:
76 return self.childNodes[0]
77
78 def _get_lastChild(self):
79 if self.childNodes:
80 return self.childNodes[-1]
81
82 def insertBefore(self, newChild, refChild):
83 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
84 for c in tuple(newChild.childNodes):
85 self.insertBefore(c, refChild)
86 ### The DOM does not clearly specify what to return in this case
87 return newChild
88 if newChild.nodeType not in self._child_node_types:
89 raise xml.dom.HierarchyRequestErr(
90 "%s cannot be child of %s" % (repr(newChild), repr(self)))
91 if newChild.parentNode is not None:
92 newChild.parentNode.removeChild(newChild)
93 if refChild is None:
94 self.appendChild(newChild)
95 else:
96 try:
97 index = self.childNodes.index(refChild)
98 except ValueError:
99 raise xml.dom.NotFoundErr()
100 if newChild.nodeType in _nodeTypes_with_children:
101 _clear_id_cache(self)
102 self.childNodes.insert(index, newChild)
103 newChild.nextSibling = refChild
104 refChild.previousSibling = newChild
105 if index:
106 node = self.childNodes[index-1]
107 node.nextSibling = newChild
108 newChild.previousSibling = node
109 else:
110 newChild.previousSibling = None
111 newChild.parentNode = self
112 return newChild
113
114 def appendChild(self, node):
115 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
116 for c in tuple(node.childNodes):
117 self.appendChild(c)
118 ### The DOM does not clearly specify what to return in this case
119 return node
120 if node.nodeType not in self._child_node_types:
121 raise xml.dom.HierarchyRequestErr(
122 "%s cannot be child of %s" % (repr(node), repr(self)))
123 elif node.nodeType in _nodeTypes_with_children:
124 _clear_id_cache(self)
125 if node.parentNode is not None:
126 node.parentNode.removeChild(node)
127 _append_child(self, node)
128 node.nextSibling = None
129 return node
130
131 def replaceChild(self, newChild, oldChild):
132 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
133 refChild = oldChild.nextSibling
134 self.removeChild(oldChild)
135 return self.insertBefore(newChild, refChild)
136 if newChild.nodeType not in self._child_node_types:
137 raise xml.dom.HierarchyRequestErr(
138 "%s cannot be child of %s" % (repr(newChild), repr(self)))
139 if newChild is oldChild:
140 return
141 if newChild.parentNode is not None:
142 newChild.parentNode.removeChild(newChild)
143 try:
144 index = self.childNodes.index(oldChild)
145 except ValueError:
146 raise xml.dom.NotFoundErr()
147 self.childNodes[index] = newChild
148 newChild.parentNode = self
149 oldChild.parentNode = None
150 if (newChild.nodeType in _nodeTypes_with_children
151 or oldChild.nodeType in _nodeTypes_with_children):
152 _clear_id_cache(self)
153 newChild.nextSibling = oldChild.nextSibling
154 newChild.previousSibling = oldChild.previousSibling
155 oldChild.nextSibling = None
156 oldChild.previousSibling = None
157 if newChild.previousSibling:
158 newChild.previousSibling.nextSibling = newChild
159 if newChild.nextSibling:
160 newChild.nextSibling.previousSibling = newChild
161 return oldChild
162
163 def removeChild(self, oldChild):
164 try:
165 self.childNodes.remove(oldChild)
166 except ValueError:
167 raise xml.dom.NotFoundErr()
168 if oldChild.nextSibling is not None:
169 oldChild.nextSibling.previousSibling = oldChild.previousSibling
170 if oldChild.previousSibling is not None:
171 oldChild.previousSibling.nextSibling = oldChild.nextSibling
172 oldChild.nextSibling = oldChild.previousSibling = None
173 if oldChild.nodeType in _nodeTypes_with_children:
174 _clear_id_cache(self)
175
176 oldChild.parentNode = None
177 return oldChild
178
179 def normalize(self):
180 L = []
181 for child in self.childNodes:
182 if child.nodeType == Node.TEXT_NODE:
183 if not child.data:
184 # empty text node; discard
185 if L:
186 L[-1].nextSibling = child.nextSibling
187 if child.nextSibling:
188 child.nextSibling.previousSibling = child.previousSibling
189 child.unlink()
190 elif L and L[-1].nodeType == child.nodeType:
191 # collapse text node
192 node = L[-1]
193 node.data = node.data + child.data
194 node.nextSibling = child.nextSibling
195 if child.nextSibling:
196 child.nextSibling.previousSibling = node
197 child.unlink()
198 else:
199 L.append(child)
200 else:
201 L.append(child)
202 if child.nodeType == Node.ELEMENT_NODE:
203 child.normalize()
204 self.childNodes[:] = L
205
206 def cloneNode(self, deep):
207 return _clone_node(self, deep, self.ownerDocument or self)
208
209 def isSupported(self, feature, version):
210 return self.ownerDocument.implementation.hasFeature(feature, version)
211
212 def _get_localName(self):
213 # Overridden in Element and Attr where localName can be Non-Null
214 return None
215
216 # Node interfaces from Level 3 (WD 9 April 2002)
217
218 def isSameNode(self, other):
219 return self is other
220
221 def getInterface(self, feature):
222 if self.isSupported(feature, None):
223 return self
224 else:
225 return None
226
227 # The "user data" functions use a dictionary that is only present
228 # if some user data has been set, so be careful not to assume it
229 # exists.
230
231 def getUserData(self, key):
232 try:
233 return self._user_data[key][0]
234 except (AttributeError, KeyError):
235 return None
236
237 def setUserData(self, key, data, handler):
238 old = None
239 try:
240 d = self._user_data
241 except AttributeError:
242 d = {}
243 self._user_data = d
244 if key in d:
245 old = d[key][0]
246 if data is None:
247 # ignore handlers passed for None
248 handler = None
249 if old is not None:
250 del d[key]
251 else:
252 d[key] = (data, handler)
253 return old
254
255 def _call_user_data_handler(self, operation, src, dst):
256 if hasattr(self, "_user_data"):
257 for key, (data, handler) in list(self._user_data.items()):
258 if handler is not None:
259 handler.handle(operation, key, data, src, dst)
260
261 # minidom-specific API:
262
263 def unlink(self):
264 self.parentNode = self.ownerDocument = None
265 if self.childNodes:
266 for child in self.childNodes:
267 child.unlink()
268 self.childNodes = NodeList()
269 self.previousSibling = None
270 self.nextSibling = None
271
272 # A Node is its own context manager, to ensure that an unlink() call occurs.
273 # This is similar to how a file object works.
274 def __enter__(self):
275 return self
276
277 def __exit__(self, et, ev, tb):
278 self.unlink()
279
280 defproperty(Node, "firstChild", doc="First child node, or None.")
281 defproperty(Node, "lastChild", doc="Last child node, or None.")
282 defproperty(Node, "localName", doc="Namespace-local name of this node.")
283
284
285 def _append_child(self, node):
286 # fast path with less checks; usable by DOM builders if careful
287 childNodes = self.childNodes
288 if childNodes:
289 last = childNodes[-1]
290 node.previousSibling = last
291 last.nextSibling = node
292 childNodes.append(node)
293 node.parentNode = self
294
295 def _in_document(node):
296 # return True iff node is part of a document tree
297 while node is not None:
298 if node.nodeType == Node.DOCUMENT_NODE:
299 return True
300 node = node.parentNode
301 return False
302
303 def _write_data(writer, data):
304 "Writes datachars to writer."
305 if data:
306 data = data.replace("&", "&").replace("<", "<"). \
307 replace("\"", """).replace(">", ">")
308 writer.write(data)
309
310 def _get_elements_by_tagName_helper(parent, name, rc):
311 for node in parent.childNodes:
312 if node.nodeType == Node.ELEMENT_NODE and \
313 (name == "*" or node.tagName == name):
314 rc.append(node)
315 _get_elements_by_tagName_helper(node, name, rc)
316 return rc
317
318 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
319 for node in parent.childNodes:
320 if node.nodeType == Node.ELEMENT_NODE:
321 if ((localName == "*" or node.localName == localName) and
322 (nsURI == "*" or node.namespaceURI == nsURI)):
323 rc.append(node)
324 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
325 return rc
326
327 class ESC[4;38;5;81mDocumentFragment(ESC[4;38;5;149mNode):
328 nodeType = Node.DOCUMENT_FRAGMENT_NODE
329 nodeName = "#document-fragment"
330 nodeValue = None
331 attributes = None
332 parentNode = None
333 _child_node_types = (Node.ELEMENT_NODE,
334 Node.TEXT_NODE,
335 Node.CDATA_SECTION_NODE,
336 Node.ENTITY_REFERENCE_NODE,
337 Node.PROCESSING_INSTRUCTION_NODE,
338 Node.COMMENT_NODE,
339 Node.NOTATION_NODE)
340
341 def __init__(self):
342 self.childNodes = NodeList()
343
344
345 class ESC[4;38;5;81mAttr(ESC[4;38;5;149mNode):
346 __slots__=('_name', '_value', 'namespaceURI',
347 '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
348 nodeType = Node.ATTRIBUTE_NODE
349 attributes = None
350 specified = False
351 _is_id = False
352
353 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
354
355 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
356 prefix=None):
357 self.ownerElement = None
358 self._name = qName
359 self.namespaceURI = namespaceURI
360 self._prefix = prefix
361 if localName is not None:
362 self._localName = localName
363 self.childNodes = NodeList()
364
365 # Add the single child node that represents the value of the attr
366 self.childNodes.append(Text())
367
368 # nodeValue and value are set elsewhere
369
370 def _get_localName(self):
371 try:
372 return self._localName
373 except AttributeError:
374 return self.nodeName.split(":", 1)[-1]
375
376 def _get_specified(self):
377 return self.specified
378
379 def _get_name(self):
380 return self._name
381
382 def _set_name(self, value):
383 self._name = value
384 if self.ownerElement is not None:
385 _clear_id_cache(self.ownerElement)
386
387 nodeName = name = property(_get_name, _set_name)
388
389 def _get_value(self):
390 return self._value
391
392 def _set_value(self, value):
393 self._value = value
394 self.childNodes[0].data = value
395 if self.ownerElement is not None:
396 _clear_id_cache(self.ownerElement)
397 self.childNodes[0].data = value
398
399 nodeValue = value = property(_get_value, _set_value)
400
401 def _get_prefix(self):
402 return self._prefix
403
404 def _set_prefix(self, prefix):
405 nsuri = self.namespaceURI
406 if prefix == "xmlns":
407 if nsuri and nsuri != XMLNS_NAMESPACE:
408 raise xml.dom.NamespaceErr(
409 "illegal use of 'xmlns' prefix for the wrong namespace")
410 self._prefix = prefix
411 if prefix is None:
412 newName = self.localName
413 else:
414 newName = "%s:%s" % (prefix, self.localName)
415 if self.ownerElement:
416 _clear_id_cache(self.ownerElement)
417 self.name = newName
418
419 prefix = property(_get_prefix, _set_prefix)
420
421 def unlink(self):
422 # This implementation does not call the base implementation
423 # since most of that is not needed, and the expense of the
424 # method call is not warranted. We duplicate the removal of
425 # children, but that's all we needed from the base class.
426 elem = self.ownerElement
427 if elem is not None:
428 del elem._attrs[self.nodeName]
429 del elem._attrsNS[(self.namespaceURI, self.localName)]
430 if self._is_id:
431 self._is_id = False
432 elem._magic_id_nodes -= 1
433 self.ownerDocument._magic_id_count -= 1
434 for child in self.childNodes:
435 child.unlink()
436 del self.childNodes[:]
437
438 def _get_isId(self):
439 if self._is_id:
440 return True
441 doc = self.ownerDocument
442 elem = self.ownerElement
443 if doc is None or elem is None:
444 return False
445
446 info = doc._get_elem_info(elem)
447 if info is None:
448 return False
449 if self.namespaceURI:
450 return info.isIdNS(self.namespaceURI, self.localName)
451 else:
452 return info.isId(self.nodeName)
453
454 def _get_schemaType(self):
455 doc = self.ownerDocument
456 elem = self.ownerElement
457 if doc is None or elem is None:
458 return _no_type
459
460 info = doc._get_elem_info(elem)
461 if info is None:
462 return _no_type
463 if self.namespaceURI:
464 return info.getAttributeTypeNS(self.namespaceURI, self.localName)
465 else:
466 return info.getAttributeType(self.nodeName)
467
468 defproperty(Attr, "isId", doc="True if this attribute is an ID.")
469 defproperty(Attr, "localName", doc="Namespace-local name of this attribute.")
470 defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
471
472
473 class ESC[4;38;5;81mNamedNodeMap(ESC[4;38;5;149mobject):
474 """The attribute list is a transient interface to the underlying
475 dictionaries. Mutations here will change the underlying element's
476 dictionary.
477
478 Ordering is imposed artificially and does not reflect the order of
479 attributes as found in an input document.
480 """
481
482 __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
483
484 def __init__(self, attrs, attrsNS, ownerElement):
485 self._attrs = attrs
486 self._attrsNS = attrsNS
487 self._ownerElement = ownerElement
488
489 def _get_length(self):
490 return len(self._attrs)
491
492 def item(self, index):
493 try:
494 return self[list(self._attrs.keys())[index]]
495 except IndexError:
496 return None
497
498 def items(self):
499 L = []
500 for node in self._attrs.values():
501 L.append((node.nodeName, node.value))
502 return L
503
504 def itemsNS(self):
505 L = []
506 for node in self._attrs.values():
507 L.append(((node.namespaceURI, node.localName), node.value))
508 return L
509
510 def __contains__(self, key):
511 if isinstance(key, str):
512 return key in self._attrs
513 else:
514 return key in self._attrsNS
515
516 def keys(self):
517 return self._attrs.keys()
518
519 def keysNS(self):
520 return self._attrsNS.keys()
521
522 def values(self):
523 return self._attrs.values()
524
525 def get(self, name, value=None):
526 return self._attrs.get(name, value)
527
528 __len__ = _get_length
529
530 def _cmp(self, other):
531 if self._attrs is getattr(other, "_attrs", None):
532 return 0
533 else:
534 return (id(self) > id(other)) - (id(self) < id(other))
535
536 def __eq__(self, other):
537 return self._cmp(other) == 0
538
539 def __ge__(self, other):
540 return self._cmp(other) >= 0
541
542 def __gt__(self, other):
543 return self._cmp(other) > 0
544
545 def __le__(self, other):
546 return self._cmp(other) <= 0
547
548 def __lt__(self, other):
549 return self._cmp(other) < 0
550
551 def __getitem__(self, attname_or_tuple):
552 if isinstance(attname_or_tuple, tuple):
553 return self._attrsNS[attname_or_tuple]
554 else:
555 return self._attrs[attname_or_tuple]
556
557 # same as set
558 def __setitem__(self, attname, value):
559 if isinstance(value, str):
560 try:
561 node = self._attrs[attname]
562 except KeyError:
563 node = Attr(attname)
564 node.ownerDocument = self._ownerElement.ownerDocument
565 self.setNamedItem(node)
566 node.value = value
567 else:
568 if not isinstance(value, Attr):
569 raise TypeError("value must be a string or Attr object")
570 node = value
571 self.setNamedItem(node)
572
573 def getNamedItem(self, name):
574 try:
575 return self._attrs[name]
576 except KeyError:
577 return None
578
579 def getNamedItemNS(self, namespaceURI, localName):
580 try:
581 return self._attrsNS[(namespaceURI, localName)]
582 except KeyError:
583 return None
584
585 def removeNamedItem(self, name):
586 n = self.getNamedItem(name)
587 if n is not None:
588 _clear_id_cache(self._ownerElement)
589 del self._attrs[n.nodeName]
590 del self._attrsNS[(n.namespaceURI, n.localName)]
591 if hasattr(n, 'ownerElement'):
592 n.ownerElement = None
593 return n
594 else:
595 raise xml.dom.NotFoundErr()
596
597 def removeNamedItemNS(self, namespaceURI, localName):
598 n = self.getNamedItemNS(namespaceURI, localName)
599 if n is not None:
600 _clear_id_cache(self._ownerElement)
601 del self._attrsNS[(n.namespaceURI, n.localName)]
602 del self._attrs[n.nodeName]
603 if hasattr(n, 'ownerElement'):
604 n.ownerElement = None
605 return n
606 else:
607 raise xml.dom.NotFoundErr()
608
609 def setNamedItem(self, node):
610 if not isinstance(node, Attr):
611 raise xml.dom.HierarchyRequestErr(
612 "%s cannot be child of %s" % (repr(node), repr(self)))
613 old = self._attrs.get(node.name)
614 if old:
615 old.unlink()
616 self._attrs[node.name] = node
617 self._attrsNS[(node.namespaceURI, node.localName)] = node
618 node.ownerElement = self._ownerElement
619 _clear_id_cache(node.ownerElement)
620 return old
621
622 def setNamedItemNS(self, node):
623 return self.setNamedItem(node)
624
625 def __delitem__(self, attname_or_tuple):
626 node = self[attname_or_tuple]
627 _clear_id_cache(node.ownerElement)
628 node.unlink()
629
630 def __getstate__(self):
631 return self._attrs, self._attrsNS, self._ownerElement
632
633 def __setstate__(self, state):
634 self._attrs, self._attrsNS, self._ownerElement = state
635
636 defproperty(NamedNodeMap, "length",
637 doc="Number of nodes in the NamedNodeMap.")
638
639 AttributeList = NamedNodeMap
640
641
642 class ESC[4;38;5;81mTypeInfo(ESC[4;38;5;149mobject):
643 __slots__ = 'namespace', 'name'
644
645 def __init__(self, namespace, name):
646 self.namespace = namespace
647 self.name = name
648
649 def __repr__(self):
650 if self.namespace:
651 return "<%s %r (from %r)>" % (self.__class__.__name__, self.name,
652 self.namespace)
653 else:
654 return "<%s %r>" % (self.__class__.__name__, self.name)
655
656 def _get_name(self):
657 return self.name
658
659 def _get_namespace(self):
660 return self.namespace
661
662 _no_type = TypeInfo(None, None)
663
664 class ESC[4;38;5;81mElement(ESC[4;38;5;149mNode):
665 __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
666 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
667 'nextSibling', 'previousSibling')
668 nodeType = Node.ELEMENT_NODE
669 nodeValue = None
670 schemaType = _no_type
671
672 _magic_id_nodes = 0
673
674 _child_node_types = (Node.ELEMENT_NODE,
675 Node.PROCESSING_INSTRUCTION_NODE,
676 Node.COMMENT_NODE,
677 Node.TEXT_NODE,
678 Node.CDATA_SECTION_NODE,
679 Node.ENTITY_REFERENCE_NODE)
680
681 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
682 localName=None):
683 self.parentNode = None
684 self.tagName = self.nodeName = tagName
685 self.prefix = prefix
686 self.namespaceURI = namespaceURI
687 self.childNodes = NodeList()
688 self.nextSibling = self.previousSibling = None
689
690 # Attribute dictionaries are lazily created
691 # attributes are double-indexed:
692 # tagName -> Attribute
693 # URI,localName -> Attribute
694 # in the future: consider lazy generation
695 # of attribute objects this is too tricky
696 # for now because of headaches with
697 # namespaces.
698 self._attrs = None
699 self._attrsNS = None
700
701 def _ensure_attributes(self):
702 if self._attrs is None:
703 self._attrs = {}
704 self._attrsNS = {}
705
706 def _get_localName(self):
707 try:
708 return self._localName
709 except AttributeError:
710 return self.tagName.split(":", 1)[-1]
711
712 def _get_tagName(self):
713 return self.tagName
714
715 def unlink(self):
716 if self._attrs is not None:
717 for attr in list(self._attrs.values()):
718 attr.unlink()
719 self._attrs = None
720 self._attrsNS = None
721 Node.unlink(self)
722
723 def getAttribute(self, attname):
724 """Returns the value of the specified attribute.
725
726 Returns the value of the element's attribute named attname as
727 a string. An empty string is returned if the element does not
728 have such an attribute. Note that an empty string may also be
729 returned as an explicitly given attribute value, use the
730 hasAttribute method to distinguish these two cases.
731 """
732 if self._attrs is None:
733 return ""
734 try:
735 return self._attrs[attname].value
736 except KeyError:
737 return ""
738
739 def getAttributeNS(self, namespaceURI, localName):
740 if self._attrsNS is None:
741 return ""
742 try:
743 return self._attrsNS[(namespaceURI, localName)].value
744 except KeyError:
745 return ""
746
747 def setAttribute(self, attname, value):
748 attr = self.getAttributeNode(attname)
749 if attr is None:
750 attr = Attr(attname)
751 attr.value = value # also sets nodeValue
752 attr.ownerDocument = self.ownerDocument
753 self.setAttributeNode(attr)
754 elif value != attr.value:
755 attr.value = value
756 if attr.isId:
757 _clear_id_cache(self)
758
759 def setAttributeNS(self, namespaceURI, qualifiedName, value):
760 prefix, localname = _nssplit(qualifiedName)
761 attr = self.getAttributeNodeNS(namespaceURI, localname)
762 if attr is None:
763 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
764 attr.value = value
765 attr.ownerDocument = self.ownerDocument
766 self.setAttributeNode(attr)
767 else:
768 if value != attr.value:
769 attr.value = value
770 if attr.isId:
771 _clear_id_cache(self)
772 if attr.prefix != prefix:
773 attr.prefix = prefix
774 attr.nodeName = qualifiedName
775
776 def getAttributeNode(self, attrname):
777 if self._attrs is None:
778 return None
779 return self._attrs.get(attrname)
780
781 def getAttributeNodeNS(self, namespaceURI, localName):
782 if self._attrsNS is None:
783 return None
784 return self._attrsNS.get((namespaceURI, localName))
785
786 def setAttributeNode(self, attr):
787 if attr.ownerElement not in (None, self):
788 raise xml.dom.InuseAttributeErr("attribute node already owned")
789 self._ensure_attributes()
790 old1 = self._attrs.get(attr.name, None)
791 if old1 is not None:
792 self.removeAttributeNode(old1)
793 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
794 if old2 is not None and old2 is not old1:
795 self.removeAttributeNode(old2)
796 _set_attribute_node(self, attr)
797
798 if old1 is not attr:
799 # It might have already been part of this node, in which case
800 # it doesn't represent a change, and should not be returned.
801 return old1
802 if old2 is not attr:
803 return old2
804
805 setAttributeNodeNS = setAttributeNode
806
807 def removeAttribute(self, name):
808 if self._attrsNS is None:
809 raise xml.dom.NotFoundErr()
810 try:
811 attr = self._attrs[name]
812 except KeyError:
813 raise xml.dom.NotFoundErr()
814 self.removeAttributeNode(attr)
815
816 def removeAttributeNS(self, namespaceURI, localName):
817 if self._attrsNS is None:
818 raise xml.dom.NotFoundErr()
819 try:
820 attr = self._attrsNS[(namespaceURI, localName)]
821 except KeyError:
822 raise xml.dom.NotFoundErr()
823 self.removeAttributeNode(attr)
824
825 def removeAttributeNode(self, node):
826 if node is None:
827 raise xml.dom.NotFoundErr()
828 try:
829 self._attrs[node.name]
830 except KeyError:
831 raise xml.dom.NotFoundErr()
832 _clear_id_cache(self)
833 node.unlink()
834 # Restore this since the node is still useful and otherwise
835 # unlinked
836 node.ownerDocument = self.ownerDocument
837 return node
838
839 removeAttributeNodeNS = removeAttributeNode
840
841 def hasAttribute(self, name):
842 """Checks whether the element has an attribute with the specified name.
843
844 Returns True if the element has an attribute with the specified name.
845 Otherwise, returns False.
846 """
847 if self._attrs is None:
848 return False
849 return name in self._attrs
850
851 def hasAttributeNS(self, namespaceURI, localName):
852 if self._attrsNS is None:
853 return False
854 return (namespaceURI, localName) in self._attrsNS
855
856 def getElementsByTagName(self, name):
857 """Returns all descendant elements with the given tag name.
858
859 Returns the list of all descendant elements (not direct children
860 only) with the specified tag name.
861 """
862 return _get_elements_by_tagName_helper(self, name, NodeList())
863
864 def getElementsByTagNameNS(self, namespaceURI, localName):
865 return _get_elements_by_tagName_ns_helper(
866 self, namespaceURI, localName, NodeList())
867
868 def __repr__(self):
869 return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
870
871 def writexml(self, writer, indent="", addindent="", newl=""):
872 """Write an XML element to a file-like object
873
874 Write the element to the writer object that must provide
875 a write method (e.g. a file or StringIO object).
876 """
877 # indent = current indentation
878 # addindent = indentation to add to higher levels
879 # newl = newline string
880 writer.write(indent+"<" + self.tagName)
881
882 attrs = self._get_attributes()
883
884 for a_name in attrs.keys():
885 writer.write(" %s=\"" % a_name)
886 _write_data(writer, attrs[a_name].value)
887 writer.write("\"")
888 if self.childNodes:
889 writer.write(">")
890 if (len(self.childNodes) == 1 and
891 self.childNodes[0].nodeType in (
892 Node.TEXT_NODE, Node.CDATA_SECTION_NODE)):
893 self.childNodes[0].writexml(writer, '', '', '')
894 else:
895 writer.write(newl)
896 for node in self.childNodes:
897 node.writexml(writer, indent+addindent, addindent, newl)
898 writer.write(indent)
899 writer.write("</%s>%s" % (self.tagName, newl))
900 else:
901 writer.write("/>%s"%(newl))
902
903 def _get_attributes(self):
904 self._ensure_attributes()
905 return NamedNodeMap(self._attrs, self._attrsNS, self)
906
907 def hasAttributes(self):
908 if self._attrs:
909 return True
910 else:
911 return False
912
913 # DOM Level 3 attributes, based on the 22 Oct 2002 draft
914
915 def setIdAttribute(self, name):
916 idAttr = self.getAttributeNode(name)
917 self.setIdAttributeNode(idAttr)
918
919 def setIdAttributeNS(self, namespaceURI, localName):
920 idAttr = self.getAttributeNodeNS(namespaceURI, localName)
921 self.setIdAttributeNode(idAttr)
922
923 def setIdAttributeNode(self, idAttr):
924 if idAttr is None or not self.isSameNode(idAttr.ownerElement):
925 raise xml.dom.NotFoundErr()
926 if _get_containing_entref(self) is not None:
927 raise xml.dom.NoModificationAllowedErr()
928 if not idAttr._is_id:
929 idAttr._is_id = True
930 self._magic_id_nodes += 1
931 self.ownerDocument._magic_id_count += 1
932 _clear_id_cache(self)
933
934 defproperty(Element, "attributes",
935 doc="NamedNodeMap of attributes on the element.")
936 defproperty(Element, "localName",
937 doc="Namespace-local name of this element.")
938
939
940 def _set_attribute_node(element, attr):
941 _clear_id_cache(element)
942 element._ensure_attributes()
943 element._attrs[attr.name] = attr
944 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
945
946 # This creates a circular reference, but Element.unlink()
947 # breaks the cycle since the references to the attribute
948 # dictionaries are tossed.
949 attr.ownerElement = element
950
951 class ESC[4;38;5;81mChildless:
952 """Mixin that makes childless-ness easy to implement and avoids
953 the complexity of the Node methods that deal with children.
954 """
955 __slots__ = ()
956
957 attributes = None
958 childNodes = EmptyNodeList()
959 firstChild = None
960 lastChild = None
961
962 def _get_firstChild(self):
963 return None
964
965 def _get_lastChild(self):
966 return None
967
968 def appendChild(self, node):
969 raise xml.dom.HierarchyRequestErr(
970 self.nodeName + " nodes cannot have children")
971
972 def hasChildNodes(self):
973 return False
974
975 def insertBefore(self, newChild, refChild):
976 raise xml.dom.HierarchyRequestErr(
977 self.nodeName + " nodes do not have children")
978
979 def removeChild(self, oldChild):
980 raise xml.dom.NotFoundErr(
981 self.nodeName + " nodes do not have children")
982
983 def normalize(self):
984 # For childless nodes, normalize() has nothing to do.
985 pass
986
987 def replaceChild(self, newChild, oldChild):
988 raise xml.dom.HierarchyRequestErr(
989 self.nodeName + " nodes do not have children")
990
991
992 class ESC[4;38;5;81mProcessingInstruction(ESC[4;38;5;149mChildless, ESC[4;38;5;149mNode):
993 nodeType = Node.PROCESSING_INSTRUCTION_NODE
994 __slots__ = ('target', 'data')
995
996 def __init__(self, target, data):
997 self.target = target
998 self.data = data
999
1000 # nodeValue is an alias for data
1001 def _get_nodeValue(self):
1002 return self.data
1003 def _set_nodeValue(self, value):
1004 self.data = value
1005 nodeValue = property(_get_nodeValue, _set_nodeValue)
1006
1007 # nodeName is an alias for target
1008 def _get_nodeName(self):
1009 return self.target
1010 def _set_nodeName(self, value):
1011 self.target = value
1012 nodeName = property(_get_nodeName, _set_nodeName)
1013
1014 def writexml(self, writer, indent="", addindent="", newl=""):
1015 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
1016
1017
1018 class ESC[4;38;5;81mCharacterData(ESC[4;38;5;149mChildless, ESC[4;38;5;149mNode):
1019 __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
1020
1021 def __init__(self):
1022 self.ownerDocument = self.parentNode = None
1023 self.previousSibling = self.nextSibling = None
1024 self._data = ''
1025 Node.__init__(self)
1026
1027 def _get_length(self):
1028 return len(self.data)
1029 __len__ = _get_length
1030
1031 def _get_data(self):
1032 return self._data
1033 def _set_data(self, data):
1034 self._data = data
1035
1036 data = nodeValue = property(_get_data, _set_data)
1037
1038 def __repr__(self):
1039 data = self.data
1040 if len(data) > 10:
1041 dotdotdot = "..."
1042 else:
1043 dotdotdot = ""
1044 return '<DOM %s node "%r%s">' % (
1045 self.__class__.__name__, data[0:10], dotdotdot)
1046
1047 def substringData(self, offset, count):
1048 if offset < 0:
1049 raise xml.dom.IndexSizeErr("offset cannot be negative")
1050 if offset >= len(self.data):
1051 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1052 if count < 0:
1053 raise xml.dom.IndexSizeErr("count cannot be negative")
1054 return self.data[offset:offset+count]
1055
1056 def appendData(self, arg):
1057 self.data = self.data + arg
1058
1059 def insertData(self, offset, arg):
1060 if offset < 0:
1061 raise xml.dom.IndexSizeErr("offset cannot be negative")
1062 if offset >= len(self.data):
1063 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1064 if arg:
1065 self.data = "%s%s%s" % (
1066 self.data[:offset], arg, self.data[offset:])
1067
1068 def deleteData(self, offset, count):
1069 if offset < 0:
1070 raise xml.dom.IndexSizeErr("offset cannot be negative")
1071 if offset >= len(self.data):
1072 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1073 if count < 0:
1074 raise xml.dom.IndexSizeErr("count cannot be negative")
1075 if count:
1076 self.data = self.data[:offset] + self.data[offset+count:]
1077
1078 def replaceData(self, offset, count, arg):
1079 if offset < 0:
1080 raise xml.dom.IndexSizeErr("offset cannot be negative")
1081 if offset >= len(self.data):
1082 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1083 if count < 0:
1084 raise xml.dom.IndexSizeErr("count cannot be negative")
1085 if count:
1086 self.data = "%s%s%s" % (
1087 self.data[:offset], arg, self.data[offset+count:])
1088
1089 defproperty(CharacterData, "length", doc="Length of the string data.")
1090
1091
1092 class ESC[4;38;5;81mText(ESC[4;38;5;149mCharacterData):
1093 __slots__ = ()
1094
1095 nodeType = Node.TEXT_NODE
1096 nodeName = "#text"
1097 attributes = None
1098
1099 def splitText(self, offset):
1100 if offset < 0 or offset > len(self.data):
1101 raise xml.dom.IndexSizeErr("illegal offset value")
1102 newText = self.__class__()
1103 newText.data = self.data[offset:]
1104 newText.ownerDocument = self.ownerDocument
1105 next = self.nextSibling
1106 if self.parentNode and self in self.parentNode.childNodes:
1107 if next is None:
1108 self.parentNode.appendChild(newText)
1109 else:
1110 self.parentNode.insertBefore(newText, next)
1111 self.data = self.data[:offset]
1112 return newText
1113
1114 def writexml(self, writer, indent="", addindent="", newl=""):
1115 _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1116
1117 # DOM Level 3 (WD 9 April 2002)
1118
1119 def _get_wholeText(self):
1120 L = [self.data]
1121 n = self.previousSibling
1122 while n is not None:
1123 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1124 L.insert(0, n.data)
1125 n = n.previousSibling
1126 else:
1127 break
1128 n = self.nextSibling
1129 while n is not None:
1130 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1131 L.append(n.data)
1132 n = n.nextSibling
1133 else:
1134 break
1135 return ''.join(L)
1136
1137 def replaceWholeText(self, content):
1138 # XXX This needs to be seriously changed if minidom ever
1139 # supports EntityReference nodes.
1140 parent = self.parentNode
1141 n = self.previousSibling
1142 while n is not None:
1143 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1144 next = n.previousSibling
1145 parent.removeChild(n)
1146 n = next
1147 else:
1148 break
1149 n = self.nextSibling
1150 if not content:
1151 parent.removeChild(self)
1152 while n is not None:
1153 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1154 next = n.nextSibling
1155 parent.removeChild(n)
1156 n = next
1157 else:
1158 break
1159 if content:
1160 self.data = content
1161 return self
1162 else:
1163 return None
1164
1165 def _get_isWhitespaceInElementContent(self):
1166 if self.data.strip():
1167 return False
1168 elem = _get_containing_element(self)
1169 if elem is None:
1170 return False
1171 info = self.ownerDocument._get_elem_info(elem)
1172 if info is None:
1173 return False
1174 else:
1175 return info.isElementContent()
1176
1177 defproperty(Text, "isWhitespaceInElementContent",
1178 doc="True iff this text node contains only whitespace"
1179 " and is in element content.")
1180 defproperty(Text, "wholeText",
1181 doc="The text of all logically-adjacent text nodes.")
1182
1183
1184 def _get_containing_element(node):
1185 c = node.parentNode
1186 while c is not None:
1187 if c.nodeType == Node.ELEMENT_NODE:
1188 return c
1189 c = c.parentNode
1190 return None
1191
1192 def _get_containing_entref(node):
1193 c = node.parentNode
1194 while c is not None:
1195 if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1196 return c
1197 c = c.parentNode
1198 return None
1199
1200
1201 class ESC[4;38;5;81mComment(ESC[4;38;5;149mCharacterData):
1202 nodeType = Node.COMMENT_NODE
1203 nodeName = "#comment"
1204
1205 def __init__(self, data):
1206 CharacterData.__init__(self)
1207 self._data = data
1208
1209 def writexml(self, writer, indent="", addindent="", newl=""):
1210 if "--" in self.data:
1211 raise ValueError("'--' is not allowed in a comment node")
1212 writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1213
1214
1215 class ESC[4;38;5;81mCDATASection(ESC[4;38;5;149mText):
1216 __slots__ = ()
1217
1218 nodeType = Node.CDATA_SECTION_NODE
1219 nodeName = "#cdata-section"
1220
1221 def writexml(self, writer, indent="", addindent="", newl=""):
1222 if self.data.find("]]>") >= 0:
1223 raise ValueError("']]>' not allowed in a CDATA section")
1224 writer.write("<![CDATA[%s]]>" % self.data)
1225
1226
1227 class ESC[4;38;5;81mReadOnlySequentialNamedNodeMap(ESC[4;38;5;149mobject):
1228 __slots__ = '_seq',
1229
1230 def __init__(self, seq=()):
1231 # seq should be a list or tuple
1232 self._seq = seq
1233
1234 def __len__(self):
1235 return len(self._seq)
1236
1237 def _get_length(self):
1238 return len(self._seq)
1239
1240 def getNamedItem(self, name):
1241 for n in self._seq:
1242 if n.nodeName == name:
1243 return n
1244
1245 def getNamedItemNS(self, namespaceURI, localName):
1246 for n in self._seq:
1247 if n.namespaceURI == namespaceURI and n.localName == localName:
1248 return n
1249
1250 def __getitem__(self, name_or_tuple):
1251 if isinstance(name_or_tuple, tuple):
1252 node = self.getNamedItemNS(*name_or_tuple)
1253 else:
1254 node = self.getNamedItem(name_or_tuple)
1255 if node is None:
1256 raise KeyError(name_or_tuple)
1257 return node
1258
1259 def item(self, index):
1260 if index < 0:
1261 return None
1262 try:
1263 return self._seq[index]
1264 except IndexError:
1265 return None
1266
1267 def removeNamedItem(self, name):
1268 raise xml.dom.NoModificationAllowedErr(
1269 "NamedNodeMap instance is read-only")
1270
1271 def removeNamedItemNS(self, namespaceURI, localName):
1272 raise xml.dom.NoModificationAllowedErr(
1273 "NamedNodeMap instance is read-only")
1274
1275 def setNamedItem(self, node):
1276 raise xml.dom.NoModificationAllowedErr(
1277 "NamedNodeMap instance is read-only")
1278
1279 def setNamedItemNS(self, node):
1280 raise xml.dom.NoModificationAllowedErr(
1281 "NamedNodeMap instance is read-only")
1282
1283 def __getstate__(self):
1284 return [self._seq]
1285
1286 def __setstate__(self, state):
1287 self._seq = state[0]
1288
1289 defproperty(ReadOnlySequentialNamedNodeMap, "length",
1290 doc="Number of entries in the NamedNodeMap.")
1291
1292
1293 class ESC[4;38;5;81mIdentified:
1294 """Mix-in class that supports the publicId and systemId attributes."""
1295
1296 __slots__ = 'publicId', 'systemId'
1297
1298 def _identified_mixin_init(self, publicId, systemId):
1299 self.publicId = publicId
1300 self.systemId = systemId
1301
1302 def _get_publicId(self):
1303 return self.publicId
1304
1305 def _get_systemId(self):
1306 return self.systemId
1307
1308 class ESC[4;38;5;81mDocumentType(ESC[4;38;5;149mIdentified, ESC[4;38;5;149mChildless, ESC[4;38;5;149mNode):
1309 nodeType = Node.DOCUMENT_TYPE_NODE
1310 nodeValue = None
1311 name = None
1312 publicId = None
1313 systemId = None
1314 internalSubset = None
1315
1316 def __init__(self, qualifiedName):
1317 self.entities = ReadOnlySequentialNamedNodeMap()
1318 self.notations = ReadOnlySequentialNamedNodeMap()
1319 if qualifiedName:
1320 prefix, localname = _nssplit(qualifiedName)
1321 self.name = localname
1322 self.nodeName = self.name
1323
1324 def _get_internalSubset(self):
1325 return self.internalSubset
1326
1327 def cloneNode(self, deep):
1328 if self.ownerDocument is None:
1329 # it's ok
1330 clone = DocumentType(None)
1331 clone.name = self.name
1332 clone.nodeName = self.name
1333 operation = xml.dom.UserDataHandler.NODE_CLONED
1334 if deep:
1335 clone.entities._seq = []
1336 clone.notations._seq = []
1337 for n in self.notations._seq:
1338 notation = Notation(n.nodeName, n.publicId, n.systemId)
1339 clone.notations._seq.append(notation)
1340 n._call_user_data_handler(operation, n, notation)
1341 for e in self.entities._seq:
1342 entity = Entity(e.nodeName, e.publicId, e.systemId,
1343 e.notationName)
1344 entity.actualEncoding = e.actualEncoding
1345 entity.encoding = e.encoding
1346 entity.version = e.version
1347 clone.entities._seq.append(entity)
1348 e._call_user_data_handler(operation, e, entity)
1349 self._call_user_data_handler(operation, self, clone)
1350 return clone
1351 else:
1352 return None
1353
1354 def writexml(self, writer, indent="", addindent="", newl=""):
1355 writer.write("<!DOCTYPE ")
1356 writer.write(self.name)
1357 if self.publicId:
1358 writer.write("%s PUBLIC '%s'%s '%s'"
1359 % (newl, self.publicId, newl, self.systemId))
1360 elif self.systemId:
1361 writer.write("%s SYSTEM '%s'" % (newl, self.systemId))
1362 if self.internalSubset is not None:
1363 writer.write(" [")
1364 writer.write(self.internalSubset)
1365 writer.write("]")
1366 writer.write(">"+newl)
1367
1368 class ESC[4;38;5;81mEntity(ESC[4;38;5;149mIdentified, ESC[4;38;5;149mNode):
1369 attributes = None
1370 nodeType = Node.ENTITY_NODE
1371 nodeValue = None
1372
1373 actualEncoding = None
1374 encoding = None
1375 version = None
1376
1377 def __init__(self, name, publicId, systemId, notation):
1378 self.nodeName = name
1379 self.notationName = notation
1380 self.childNodes = NodeList()
1381 self._identified_mixin_init(publicId, systemId)
1382
1383 def _get_actualEncoding(self):
1384 return self.actualEncoding
1385
1386 def _get_encoding(self):
1387 return self.encoding
1388
1389 def _get_version(self):
1390 return self.version
1391
1392 def appendChild(self, newChild):
1393 raise xml.dom.HierarchyRequestErr(
1394 "cannot append children to an entity node")
1395
1396 def insertBefore(self, newChild, refChild):
1397 raise xml.dom.HierarchyRequestErr(
1398 "cannot insert children below an entity node")
1399
1400 def removeChild(self, oldChild):
1401 raise xml.dom.HierarchyRequestErr(
1402 "cannot remove children from an entity node")
1403
1404 def replaceChild(self, newChild, oldChild):
1405 raise xml.dom.HierarchyRequestErr(
1406 "cannot replace children of an entity node")
1407
1408 class ESC[4;38;5;81mNotation(ESC[4;38;5;149mIdentified, ESC[4;38;5;149mChildless, ESC[4;38;5;149mNode):
1409 nodeType = Node.NOTATION_NODE
1410 nodeValue = None
1411
1412 def __init__(self, name, publicId, systemId):
1413 self.nodeName = name
1414 self._identified_mixin_init(publicId, systemId)
1415
1416
1417 class ESC[4;38;5;81mDOMImplementation(ESC[4;38;5;149mDOMImplementationLS):
1418 _features = [("core", "1.0"),
1419 ("core", "2.0"),
1420 ("core", None),
1421 ("xml", "1.0"),
1422 ("xml", "2.0"),
1423 ("xml", None),
1424 ("ls-load", "3.0"),
1425 ("ls-load", None),
1426 ]
1427
1428 def hasFeature(self, feature, version):
1429 if version == "":
1430 version = None
1431 return (feature.lower(), version) in self._features
1432
1433 def createDocument(self, namespaceURI, qualifiedName, doctype):
1434 if doctype and doctype.parentNode is not None:
1435 raise xml.dom.WrongDocumentErr(
1436 "doctype object owned by another DOM tree")
1437 doc = self._create_document()
1438
1439 add_root_element = not (namespaceURI is None
1440 and qualifiedName is None
1441 and doctype is None)
1442
1443 if not qualifiedName and add_root_element:
1444 # The spec is unclear what to raise here; SyntaxErr
1445 # would be the other obvious candidate. Since Xerces raises
1446 # InvalidCharacterErr, and since SyntaxErr is not listed
1447 # for createDocument, that seems to be the better choice.
1448 # XXX: need to check for illegal characters here and in
1449 # createElement.
1450
1451 # DOM Level III clears this up when talking about the return value
1452 # of this function. If namespaceURI, qName and DocType are
1453 # Null the document is returned without a document element
1454 # Otherwise if doctype or namespaceURI are not None
1455 # Then we go back to the above problem
1456 raise xml.dom.InvalidCharacterErr("Element with no name")
1457
1458 if add_root_element:
1459 prefix, localname = _nssplit(qualifiedName)
1460 if prefix == "xml" \
1461 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1462 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1463 if prefix and not namespaceURI:
1464 raise xml.dom.NamespaceErr(
1465 "illegal use of prefix without namespaces")
1466 element = doc.createElementNS(namespaceURI, qualifiedName)
1467 if doctype:
1468 doc.appendChild(doctype)
1469 doc.appendChild(element)
1470
1471 if doctype:
1472 doctype.parentNode = doctype.ownerDocument = doc
1473
1474 doc.doctype = doctype
1475 doc.implementation = self
1476 return doc
1477
1478 def createDocumentType(self, qualifiedName, publicId, systemId):
1479 doctype = DocumentType(qualifiedName)
1480 doctype.publicId = publicId
1481 doctype.systemId = systemId
1482 return doctype
1483
1484 # DOM Level 3 (WD 9 April 2002)
1485
1486 def getInterface(self, feature):
1487 if self.hasFeature(feature, None):
1488 return self
1489 else:
1490 return None
1491
1492 # internal
1493 def _create_document(self):
1494 return Document()
1495
1496 class ESC[4;38;5;81mElementInfo(ESC[4;38;5;149mobject):
1497 """Object that represents content-model information for an element.
1498
1499 This implementation is not expected to be used in practice; DOM
1500 builders should provide implementations which do the right thing
1501 using information available to it.
1502
1503 """
1504
1505 __slots__ = 'tagName',
1506
1507 def __init__(self, name):
1508 self.tagName = name
1509
1510 def getAttributeType(self, aname):
1511 return _no_type
1512
1513 def getAttributeTypeNS(self, namespaceURI, localName):
1514 return _no_type
1515
1516 def isElementContent(self):
1517 return False
1518
1519 def isEmpty(self):
1520 """Returns true iff this element is declared to have an EMPTY
1521 content model."""
1522 return False
1523
1524 def isId(self, aname):
1525 """Returns true iff the named attribute is a DTD-style ID."""
1526 return False
1527
1528 def isIdNS(self, namespaceURI, localName):
1529 """Returns true iff the identified attribute is a DTD-style ID."""
1530 return False
1531
1532 def __getstate__(self):
1533 return self.tagName
1534
1535 def __setstate__(self, state):
1536 self.tagName = state
1537
1538 def _clear_id_cache(node):
1539 if node.nodeType == Node.DOCUMENT_NODE:
1540 node._id_cache.clear()
1541 node._id_search_stack = None
1542 elif _in_document(node):
1543 node.ownerDocument._id_cache.clear()
1544 node.ownerDocument._id_search_stack= None
1545
1546 class ESC[4;38;5;81mDocument(ESC[4;38;5;149mNode, ESC[4;38;5;149mDocumentLS):
1547 __slots__ = ('_elem_info', 'doctype',
1548 '_id_search_stack', 'childNodes', '_id_cache')
1549 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1550 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1551
1552 implementation = DOMImplementation()
1553 nodeType = Node.DOCUMENT_NODE
1554 nodeName = "#document"
1555 nodeValue = None
1556 attributes = None
1557 parentNode = None
1558 previousSibling = nextSibling = None
1559
1560
1561 # Document attributes from Level 3 (WD 9 April 2002)
1562
1563 actualEncoding = None
1564 encoding = None
1565 standalone = None
1566 version = None
1567 strictErrorChecking = False
1568 errorHandler = None
1569 documentURI = None
1570
1571 _magic_id_count = 0
1572
1573 def __init__(self):
1574 self.doctype = None
1575 self.childNodes = NodeList()
1576 # mapping of (namespaceURI, localName) -> ElementInfo
1577 # and tagName -> ElementInfo
1578 self._elem_info = {}
1579 self._id_cache = {}
1580 self._id_search_stack = None
1581
1582 def _get_elem_info(self, element):
1583 if element.namespaceURI:
1584 key = element.namespaceURI, element.localName
1585 else:
1586 key = element.tagName
1587 return self._elem_info.get(key)
1588
1589 def _get_actualEncoding(self):
1590 return self.actualEncoding
1591
1592 def _get_doctype(self):
1593 return self.doctype
1594
1595 def _get_documentURI(self):
1596 return self.documentURI
1597
1598 def _get_encoding(self):
1599 return self.encoding
1600
1601 def _get_errorHandler(self):
1602 return self.errorHandler
1603
1604 def _get_standalone(self):
1605 return self.standalone
1606
1607 def _get_strictErrorChecking(self):
1608 return self.strictErrorChecking
1609
1610 def _get_version(self):
1611 return self.version
1612
1613 def appendChild(self, node):
1614 if node.nodeType not in self._child_node_types:
1615 raise xml.dom.HierarchyRequestErr(
1616 "%s cannot be child of %s" % (repr(node), repr(self)))
1617 if node.parentNode is not None:
1618 # This needs to be done before the next test since this
1619 # may *be* the document element, in which case it should
1620 # end up re-ordered to the end.
1621 node.parentNode.removeChild(node)
1622
1623 if node.nodeType == Node.ELEMENT_NODE \
1624 and self._get_documentElement():
1625 raise xml.dom.HierarchyRequestErr(
1626 "two document elements disallowed")
1627 return Node.appendChild(self, node)
1628
1629 def removeChild(self, oldChild):
1630 try:
1631 self.childNodes.remove(oldChild)
1632 except ValueError:
1633 raise xml.dom.NotFoundErr()
1634 oldChild.nextSibling = oldChild.previousSibling = None
1635 oldChild.parentNode = None
1636 if self.documentElement is oldChild:
1637 self.documentElement = None
1638
1639 return oldChild
1640
1641 def _get_documentElement(self):
1642 for node in self.childNodes:
1643 if node.nodeType == Node.ELEMENT_NODE:
1644 return node
1645
1646 def unlink(self):
1647 if self.doctype is not None:
1648 self.doctype.unlink()
1649 self.doctype = None
1650 Node.unlink(self)
1651
1652 def cloneNode(self, deep):
1653 if not deep:
1654 return None
1655 clone = self.implementation.createDocument(None, None, None)
1656 clone.encoding = self.encoding
1657 clone.standalone = self.standalone
1658 clone.version = self.version
1659 for n in self.childNodes:
1660 childclone = _clone_node(n, deep, clone)
1661 assert childclone.ownerDocument.isSameNode(clone)
1662 clone.childNodes.append(childclone)
1663 if childclone.nodeType == Node.DOCUMENT_NODE:
1664 assert clone.documentElement is None
1665 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1666 assert clone.doctype is None
1667 clone.doctype = childclone
1668 childclone.parentNode = clone
1669 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1670 self, clone)
1671 return clone
1672
1673 def createDocumentFragment(self):
1674 d = DocumentFragment()
1675 d.ownerDocument = self
1676 return d
1677
1678 def createElement(self, tagName):
1679 e = Element(tagName)
1680 e.ownerDocument = self
1681 return e
1682
1683 def createTextNode(self, data):
1684 if not isinstance(data, str):
1685 raise TypeError("node contents must be a string")
1686 t = Text()
1687 t.data = data
1688 t.ownerDocument = self
1689 return t
1690
1691 def createCDATASection(self, data):
1692 if not isinstance(data, str):
1693 raise TypeError("node contents must be a string")
1694 c = CDATASection()
1695 c.data = data
1696 c.ownerDocument = self
1697 return c
1698
1699 def createComment(self, data):
1700 c = Comment(data)
1701 c.ownerDocument = self
1702 return c
1703
1704 def createProcessingInstruction(self, target, data):
1705 p = ProcessingInstruction(target, data)
1706 p.ownerDocument = self
1707 return p
1708
1709 def createAttribute(self, qName):
1710 a = Attr(qName)
1711 a.ownerDocument = self
1712 a.value = ""
1713 return a
1714
1715 def createElementNS(self, namespaceURI, qualifiedName):
1716 prefix, localName = _nssplit(qualifiedName)
1717 e = Element(qualifiedName, namespaceURI, prefix)
1718 e.ownerDocument = self
1719 return e
1720
1721 def createAttributeNS(self, namespaceURI, qualifiedName):
1722 prefix, localName = _nssplit(qualifiedName)
1723 a = Attr(qualifiedName, namespaceURI, localName, prefix)
1724 a.ownerDocument = self
1725 a.value = ""
1726 return a
1727
1728 # A couple of implementation-specific helpers to create node types
1729 # not supported by the W3C DOM specs:
1730
1731 def _create_entity(self, name, publicId, systemId, notationName):
1732 e = Entity(name, publicId, systemId, notationName)
1733 e.ownerDocument = self
1734 return e
1735
1736 def _create_notation(self, name, publicId, systemId):
1737 n = Notation(name, publicId, systemId)
1738 n.ownerDocument = self
1739 return n
1740
1741 def getElementById(self, id):
1742 if id in self._id_cache:
1743 return self._id_cache[id]
1744 if not (self._elem_info or self._magic_id_count):
1745 return None
1746
1747 stack = self._id_search_stack
1748 if stack is None:
1749 # we never searched before, or the cache has been cleared
1750 stack = [self.documentElement]
1751 self._id_search_stack = stack
1752 elif not stack:
1753 # Previous search was completed and cache is still valid;
1754 # no matching node.
1755 return None
1756
1757 result = None
1758 while stack:
1759 node = stack.pop()
1760 # add child elements to stack for continued searching
1761 stack.extend([child for child in node.childNodes
1762 if child.nodeType in _nodeTypes_with_children])
1763 # check this node
1764 info = self._get_elem_info(node)
1765 if info:
1766 # We have to process all ID attributes before
1767 # returning in order to get all the attributes set to
1768 # be IDs using Element.setIdAttribute*().
1769 for attr in node.attributes.values():
1770 if attr.namespaceURI:
1771 if info.isIdNS(attr.namespaceURI, attr.localName):
1772 self._id_cache[attr.value] = node
1773 if attr.value == id:
1774 result = node
1775 elif not node._magic_id_nodes:
1776 break
1777 elif info.isId(attr.name):
1778 self._id_cache[attr.value] = node
1779 if attr.value == id:
1780 result = node
1781 elif not node._magic_id_nodes:
1782 break
1783 elif attr._is_id:
1784 self._id_cache[attr.value] = node
1785 if attr.value == id:
1786 result = node
1787 elif node._magic_id_nodes == 1:
1788 break
1789 elif node._magic_id_nodes:
1790 for attr in node.attributes.values():
1791 if attr._is_id:
1792 self._id_cache[attr.value] = node
1793 if attr.value == id:
1794 result = node
1795 if result is not None:
1796 break
1797 return result
1798
1799 def getElementsByTagName(self, name):
1800 return _get_elements_by_tagName_helper(self, name, NodeList())
1801
1802 def getElementsByTagNameNS(self, namespaceURI, localName):
1803 return _get_elements_by_tagName_ns_helper(
1804 self, namespaceURI, localName, NodeList())
1805
1806 def isSupported(self, feature, version):
1807 return self.implementation.hasFeature(feature, version)
1808
1809 def importNode(self, node, deep):
1810 if node.nodeType == Node.DOCUMENT_NODE:
1811 raise xml.dom.NotSupportedErr("cannot import document nodes")
1812 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1813 raise xml.dom.NotSupportedErr("cannot import document type nodes")
1814 return _clone_node(node, deep, self)
1815
1816 def writexml(self, writer, indent="", addindent="", newl="", encoding=None,
1817 standalone=None):
1818 declarations = []
1819
1820 if encoding:
1821 declarations.append(f'encoding="{encoding}"')
1822 if standalone is not None:
1823 declarations.append(f'standalone="{"yes" if standalone else "no"}"')
1824
1825 writer.write(f'<?xml version="1.0" {" ".join(declarations)}?>{newl}')
1826
1827 for node in self.childNodes:
1828 node.writexml(writer, indent, addindent, newl)
1829
1830 # DOM Level 3 (WD 9 April 2002)
1831
1832 def renameNode(self, n, namespaceURI, name):
1833 if n.ownerDocument is not self:
1834 raise xml.dom.WrongDocumentErr(
1835 "cannot rename nodes from other documents;\n"
1836 "expected %s,\nfound %s" % (self, n.ownerDocument))
1837 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1838 raise xml.dom.NotSupportedErr(
1839 "renameNode() only applies to element and attribute nodes")
1840 if namespaceURI != EMPTY_NAMESPACE:
1841 if ':' in name:
1842 prefix, localName = name.split(':', 1)
1843 if ( prefix == "xmlns"
1844 and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1845 raise xml.dom.NamespaceErr(
1846 "illegal use of 'xmlns' prefix")
1847 else:
1848 if ( name == "xmlns"
1849 and namespaceURI != xml.dom.XMLNS_NAMESPACE
1850 and n.nodeType == Node.ATTRIBUTE_NODE):
1851 raise xml.dom.NamespaceErr(
1852 "illegal use of the 'xmlns' attribute")
1853 prefix = None
1854 localName = name
1855 else:
1856 prefix = None
1857 localName = None
1858 if n.nodeType == Node.ATTRIBUTE_NODE:
1859 element = n.ownerElement
1860 if element is not None:
1861 is_id = n._is_id
1862 element.removeAttributeNode(n)
1863 else:
1864 element = None
1865 n.prefix = prefix
1866 n._localName = localName
1867 n.namespaceURI = namespaceURI
1868 n.nodeName = name
1869 if n.nodeType == Node.ELEMENT_NODE:
1870 n.tagName = name
1871 else:
1872 # attribute node
1873 n.name = name
1874 if element is not None:
1875 element.setAttributeNode(n)
1876 if is_id:
1877 element.setIdAttributeNode(n)
1878 # It's not clear from a semantic perspective whether we should
1879 # call the user data handlers for the NODE_RENAMED event since
1880 # we're re-using the existing node. The draft spec has been
1881 # interpreted as meaning "no, don't call the handler unless a
1882 # new node is created."
1883 return n
1884
1885 defproperty(Document, "documentElement",
1886 doc="Top-level element of this document.")
1887
1888
1889 def _clone_node(node, deep, newOwnerDocument):
1890 """
1891 Clone a node and give it the new owner document.
1892 Called by Node.cloneNode and Document.importNode
1893 """
1894 if node.ownerDocument.isSameNode(newOwnerDocument):
1895 operation = xml.dom.UserDataHandler.NODE_CLONED
1896 else:
1897 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1898 if node.nodeType == Node.ELEMENT_NODE:
1899 clone = newOwnerDocument.createElementNS(node.namespaceURI,
1900 node.nodeName)
1901 for attr in node.attributes.values():
1902 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1903 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1904 a.specified = attr.specified
1905
1906 if deep:
1907 for child in node.childNodes:
1908 c = _clone_node(child, deep, newOwnerDocument)
1909 clone.appendChild(c)
1910
1911 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1912 clone = newOwnerDocument.createDocumentFragment()
1913 if deep:
1914 for child in node.childNodes:
1915 c = _clone_node(child, deep, newOwnerDocument)
1916 clone.appendChild(c)
1917
1918 elif node.nodeType == Node.TEXT_NODE:
1919 clone = newOwnerDocument.createTextNode(node.data)
1920 elif node.nodeType == Node.CDATA_SECTION_NODE:
1921 clone = newOwnerDocument.createCDATASection(node.data)
1922 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1923 clone = newOwnerDocument.createProcessingInstruction(node.target,
1924 node.data)
1925 elif node.nodeType == Node.COMMENT_NODE:
1926 clone = newOwnerDocument.createComment(node.data)
1927 elif node.nodeType == Node.ATTRIBUTE_NODE:
1928 clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1929 node.nodeName)
1930 clone.specified = True
1931 clone.value = node.value
1932 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1933 assert node.ownerDocument is not newOwnerDocument
1934 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1935 clone = newOwnerDocument.implementation.createDocumentType(
1936 node.name, node.publicId, node.systemId)
1937 clone.ownerDocument = newOwnerDocument
1938 if deep:
1939 clone.entities._seq = []
1940 clone.notations._seq = []
1941 for n in node.notations._seq:
1942 notation = Notation(n.nodeName, n.publicId, n.systemId)
1943 notation.ownerDocument = newOwnerDocument
1944 clone.notations._seq.append(notation)
1945 if hasattr(n, '_call_user_data_handler'):
1946 n._call_user_data_handler(operation, n, notation)
1947 for e in node.entities._seq:
1948 entity = Entity(e.nodeName, e.publicId, e.systemId,
1949 e.notationName)
1950 entity.actualEncoding = e.actualEncoding
1951 entity.encoding = e.encoding
1952 entity.version = e.version
1953 entity.ownerDocument = newOwnerDocument
1954 clone.entities._seq.append(entity)
1955 if hasattr(e, '_call_user_data_handler'):
1956 e._call_user_data_handler(operation, e, entity)
1957 else:
1958 # Note the cloning of Document and DocumentType nodes is
1959 # implementation specific. minidom handles those cases
1960 # directly in the cloneNode() methods.
1961 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1962
1963 # Check for _call_user_data_handler() since this could conceivably
1964 # used with other DOM implementations (one of the FourThought
1965 # DOMs, perhaps?).
1966 if hasattr(node, '_call_user_data_handler'):
1967 node._call_user_data_handler(operation, node, clone)
1968 return clone
1969
1970
1971 def _nssplit(qualifiedName):
1972 fields = qualifiedName.split(':', 1)
1973 if len(fields) == 2:
1974 return fields
1975 else:
1976 return (None, fields[0])
1977
1978
1979 def _do_pulldom_parse(func, args, kwargs):
1980 events = func(*args, **kwargs)
1981 toktype, rootNode = events.getEvent()
1982 events.expandNode(rootNode)
1983 events.clear()
1984 return rootNode
1985
1986 def parse(file, parser=None, bufsize=None):
1987 """Parse a file into a DOM by filename or file object."""
1988 if parser is None and not bufsize:
1989 from xml.dom import expatbuilder
1990 return expatbuilder.parse(file)
1991 else:
1992 from xml.dom import pulldom
1993 return _do_pulldom_parse(pulldom.parse, (file,),
1994 {'parser': parser, 'bufsize': bufsize})
1995
1996 def parseString(string, parser=None):
1997 """Parse a file into a DOM from a string."""
1998 if parser is None:
1999 from xml.dom import expatbuilder
2000 return expatbuilder.parseString(string)
2001 else:
2002 from xml.dom import pulldom
2003 return _do_pulldom_parse(pulldom.parseString, (string,),
2004 {'parser': parser})
2005
2006 def getDOMImplementation(features=None):
2007 if features:
2008 if isinstance(features, str):
2009 features = domreg._parse_feature_string(features)
2010 for f, v in features:
2011 if not Document.implementation.hasFeature(f, v):
2012 return None
2013 return Document.implementation