1 #
2 # ElementTree
3 # $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
4 #
5 # limited xinclude support for element trees
6 #
7 # history:
8 # 2003-08-15 fl created
9 # 2003-11-14 fl fixed default loader
10 #
11 # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
12 #
13 # fredrik@pythonware.com
14 # http://www.pythonware.com
15 #
16 # --------------------------------------------------------------------
17 # The ElementTree toolkit is
18 #
19 # Copyright (c) 1999-2008 by Fredrik Lundh
20 #
21 # By obtaining, using, and/or copying this software and/or its
22 # associated documentation, you agree that you have read, understood,
23 # and will comply with the following terms and conditions:
24 #
25 # Permission to use, copy, modify, and distribute this software and
26 # its associated documentation for any purpose and without fee is
27 # hereby granted, provided that the above copyright notice appears in
28 # all copies, and that both that copyright notice and this permission
29 # notice appear in supporting documentation, and that the name of
30 # Secret Labs AB or the author not be used in advertising or publicity
31 # pertaining to distribution of the software without specific, written
32 # prior permission.
33 #
34 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
35 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
36 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
37 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
38 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
39 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
41 # OF THIS SOFTWARE.
42 # --------------------------------------------------------------------
43
44 # Licensed to PSF under a Contributor Agreement.
45 # See https://www.python.org/psf/license for licensing details.
46
47 ##
48 # Limited XInclude support for the ElementTree package.
49 ##
50
51 import copy
52 from . import ElementTree
53 from urllib.parse import urljoin
54
55 XINCLUDE = "{http://www.w3.org/2001/XInclude}"
56
57 XINCLUDE_INCLUDE = XINCLUDE + "include"
58 XINCLUDE_FALLBACK = XINCLUDE + "fallback"
59
60 # For security reasons, the inclusion depth is limited to this read-only value by default.
61 DEFAULT_MAX_INCLUSION_DEPTH = 6
62
63
64 ##
65 # Fatal include error.
66
67 class ESC[4;38;5;81mFatalIncludeError(ESC[4;38;5;149mSyntaxError):
68 pass
69
70
71 class ESC[4;38;5;81mLimitedRecursiveIncludeError(ESC[4;38;5;149mFatalIncludeError):
72 pass
73
74
75 ##
76 # Default loader. This loader reads an included resource from disk.
77 #
78 # @param href Resource reference.
79 # @param parse Parse mode. Either "xml" or "text".
80 # @param encoding Optional text encoding (UTF-8 by default for "text").
81 # @return The expanded resource. If the parse mode is "xml", this
82 # is an ElementTree instance. If the parse mode is "text", this
83 # is a Unicode string. If the loader fails, it can return None
84 # or raise an OSError exception.
85 # @throws OSError If the loader fails to load the resource.
86
87 def default_loader(href, parse, encoding=None):
88 if parse == "xml":
89 with open(href, 'rb') as file:
90 data = ElementTree.parse(file).getroot()
91 else:
92 if not encoding:
93 encoding = 'UTF-8'
94 with open(href, 'r', encoding=encoding) as file:
95 data = file.read()
96 return data
97
98 ##
99 # Expand XInclude directives.
100 #
101 # @param elem Root element.
102 # @param loader Optional resource loader. If omitted, it defaults
103 # to {@link default_loader}. If given, it should be a callable
104 # that implements the same interface as <b>default_loader</b>.
105 # @param base_url The base URL of the original file, to resolve
106 # relative include file references.
107 # @param max_depth The maximum number of recursive inclusions.
108 # Limited to reduce the risk of malicious content explosion.
109 # Pass a negative value to disable the limitation.
110 # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
111 # @throws FatalIncludeError If the function fails to include a given
112 # resource, or if the tree contains malformed XInclude elements.
113 # @throws IOError If the function fails to load a given resource.
114 # @returns the node or its replacement if it was an XInclude node
115
116 def include(elem, loader=None, base_url=None,
117 max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
118 if max_depth is None:
119 max_depth = -1
120 elif max_depth < 0:
121 raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
122
123 if hasattr(elem, 'getroot'):
124 elem = elem.getroot()
125 if loader is None:
126 loader = default_loader
127
128 _include(elem, loader, base_url, max_depth, set())
129
130
131 def _include(elem, loader, base_url, max_depth, _parent_hrefs):
132 # look for xinclude elements
133 i = 0
134 while i < len(elem):
135 e = elem[i]
136 if e.tag == XINCLUDE_INCLUDE:
137 # process xinclude directive
138 href = e.get("href")
139 if base_url:
140 href = urljoin(base_url, href)
141 parse = e.get("parse", "xml")
142 if parse == "xml":
143 if href in _parent_hrefs:
144 raise FatalIncludeError("recursive include of %s" % href)
145 if max_depth == 0:
146 raise LimitedRecursiveIncludeError(
147 "maximum xinclude depth reached when including file %s" % href)
148 _parent_hrefs.add(href)
149 node = loader(href, parse)
150 if node is None:
151 raise FatalIncludeError(
152 "cannot load %r as %r" % (href, parse)
153 )
154 node = copy.copy(node) # FIXME: this makes little sense with recursive includes
155 _include(node, loader, href, max_depth - 1, _parent_hrefs)
156 _parent_hrefs.remove(href)
157 if e.tail:
158 node.tail = (node.tail or "") + e.tail
159 elem[i] = node
160 elif parse == "text":
161 text = loader(href, parse, e.get("encoding"))
162 if text is None:
163 raise FatalIncludeError(
164 "cannot load %r as %r" % (href, parse)
165 )
166 if e.tail:
167 text += e.tail
168 if i:
169 node = elem[i-1]
170 node.tail = (node.tail or "") + text
171 else:
172 elem.text = (elem.text or "") + text
173 del elem[i]
174 continue
175 else:
176 raise FatalIncludeError(
177 "unknown parse type in xi:include tag (%r)" % parse
178 )
179 elif e.tag == XINCLUDE_FALLBACK:
180 raise FatalIncludeError(
181 "xi:fallback tag must be child of xi:include (%r)" % e.tag
182 )
183 else:
184 _include(e, loader, base_url, max_depth, _parent_hrefs)
185 i += 1