1 """
2 Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual
3 effect on some MBCS Windows systems.
4
5 https://bugs.python.org/issue32174
6 """
7
8 import pathlib
9 import re
10 from html.entities import codepoint2name
11
12 from sphinx.util.logging import getLogger
13
14 # escape the characters which codepoint > 0x7F
15 def _process(string):
16 def escape(matchobj):
17 codepoint = ord(matchobj.group(0))
18
19 name = codepoint2name.get(codepoint)
20 if name is None:
21 return '&#%d;' % codepoint
22 else:
23 return '&%s;' % name
24
25 return re.sub(r'[^\x00-\x7F]', escape, string)
26
27 def escape_for_chm(app, pagename, templatename, context, doctree):
28 # only works for .chm output
29 if getattr(app.builder, 'name', '') != 'htmlhelp':
30 return
31
32 # escape the `body` part to 7-bit ASCII
33 body = context.get('body')
34 if body is not None:
35 context['body'] = _process(body)
36
37 def fixup_keywords(app, exception):
38 # only works for .chm output
39 if getattr(app.builder, 'name', '') != 'htmlhelp' or exception:
40 return
41
42 getLogger(__name__).info('fixing HTML escapes in keywords file...')
43 outdir = pathlib.Path(app.builder.outdir)
44 outname = app.builder.config.htmlhelp_basename
45 with open(outdir / (outname + '.hhk'), 'rb') as f:
46 index = f.read()
47 with open(outdir / (outname + '.hhk'), 'wb') as f:
48 f.write(index.replace(b''', b'''))
49
50 def setup(app):
51 # `html-page-context` event emitted when the HTML builder has
52 # created a context dictionary to render a template with.
53 app.connect('html-page-context', escape_for_chm)
54 # `build-finished` event emitted when all the files have been
55 # output.
56 app.connect('build-finished', fixup_keywords)
57
58 return {'version': '1.0', 'parallel_read_safe': True}