1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3
4 # Check for stylistic and formal issues in .rst and .py
5 # files included in the documentation.
6 #
7 # 01/2009, Georg Brandl
8
9 # TODO: - wrong versions in versionadded/changed
10 # - wrong markup after versionchanged directive
11
12 import os
13 import re
14 import sys
15 import getopt
16 from string import ascii_letters
17 from os.path import join, splitext, abspath, exists
18 from collections import defaultdict
19
20 directives = [
21 # standard docutils ones
22 'admonition', 'attention', 'caution', 'class', 'compound', 'container',
23 'contents', 'csv-table', 'danger', 'date', 'default-role', 'epigraph',
24 'error', 'figure', 'footer', 'header', 'highlights', 'hint', 'image',
25 'important', 'include', 'line-block', 'list-table', 'meta', 'note',
26 'parsed-literal', 'pull-quote', 'raw', 'replace',
27 'restructuredtext-test-directive', 'role', 'rubric', 'sectnum', 'sidebar',
28 'table', 'target-notes', 'tip', 'title', 'topic', 'unicode', 'warning',
29 # Sphinx and Python docs custom ones
30 'acks', 'attribute', 'autoattribute', 'autoclass', 'autodata',
31 'autoexception', 'autofunction', 'automethod', 'automodule',
32 'availability', 'centered', 'cfunction', 'class', 'classmethod', 'cmacro',
33 'cmdoption', 'cmember', 'code-block', 'confval', 'cssclass', 'ctype',
34 'currentmodule', 'cvar', 'data', 'decorator', 'decoratormethod',
35 'deprecated-removed', 'deprecated(?!-removed)', 'describe', 'directive',
36 'doctest', 'envvar', 'event', 'exception', 'function', 'glossary',
37 'highlight', 'highlightlang', 'impl-detail', 'index', 'literalinclude',
38 'method', 'miscnews', 'module', 'moduleauthor', 'opcode', 'pdbcommand',
39 'productionlist', 'program', 'role', 'sectionauthor', 'seealso',
40 'sourcecode', 'staticmethod', 'tabularcolumns', 'testcode', 'testoutput',
41 'testsetup', 'toctree', 'todo', 'todolist', 'versionadded',
42 'versionchanged'
43 ]
44
45 roles = [
46 "(?<!py):class:",
47 "(?<!:c|py):func:",
48 "(?<!py):meth:",
49 "(?<!:py):mod:",
50 ":exc:",
51 ":issue:",
52 ":attr:",
53 ":c:func:",
54 ":ref:",
55 ":const:",
56 ":term:",
57 "(?<!:c|py):data:",
58 ":keyword:",
59 ":file:",
60 ":pep:",
61 ":c:type:",
62 ":c:member:",
63 ":option:",
64 ":rfc:",
65 ":envvar:",
66 ":c:data:",
67 ":source:",
68 ":mailheader:",
69 ":program:",
70 ":c:macro:",
71 ":dfn:",
72 ":kbd:",
73 ":command:",
74 ":mimetype:",
75 ":opcode:",
76 ":manpage:",
77 ":py:data:",
78 ":RFC:",
79 ":pdbcmd:",
80 ":abbr:",
81 ":samp:",
82 ":token:",
83 ":PEP:",
84 ":sup:",
85 ":py:class:",
86 ":menuselection:",
87 ":doc:",
88 ":sub:",
89 ":py:meth:",
90 ":newsgroup:",
91 ":code:",
92 ":py:func:",
93 ":makevar:",
94 ":guilabel:",
95 ":title-reference:",
96 ":py:mod:",
97 ":download:",
98 ":2to3fixer:",
99 ]
100
101 all_directives = "(" + "|".join(directives) + ")"
102 all_roles = "(" + "|".join(roles) + ")"
103
104 # Find comments that looks like a directive, like:
105 # .. versionchanged 3.6
106 # or
107 # .. versionchanged: 3.6
108 # as it should be:
109 # .. versionchanged:: 3.6
110 seems_directive_re = re.compile(r"(?<!\.)\.\. %s([^a-z:]|:(?!:))" % all_directives)
111
112 # Find directive prefixed with three dots instead of two, like:
113 # ... versionchanged:: 3.6
114 # instead of:
115 # .. versionchanged:: 3.6
116 three_dot_directive_re = re.compile(r"\.\.\. %s::" % all_directives)
117
118 # Find role used with double backticks instead of simple backticks like:
119 # :const:``None``
120 # instead of:
121 # :const:`None`
122 double_backtick_role = re.compile(r"(?<!``)%s``" % all_roles)
123
124
125 # Find role used with no backticks instead of simple backticks like:
126 # :const:None
127 # instead of:
128 # :const:`None`
129 role_with_no_backticks = re.compile(r"%s[^` ]" % all_roles)
130
131 # Find role glued with another word like:
132 # the:c:func:`PyThreadState_LeaveTracing` function.
133 # instead of:
134 # the :c:func:`PyThreadState_LeaveTracing` function.
135 role_glued_with_word = re.compile(r"[a-zA-Z]%s" % all_roles)
136
137 default_role_re = re.compile(r"(^| )`\w([^`]*?\w)?`($| )")
138 leaked_markup_re = re.compile(r"[a-z]::\s|`|\.\.\s*\w+:")
139
140
141 checkers = {}
142
143 checker_props = {'severity': 1, 'falsepositives': False}
144
145
146 def checker(*suffixes, **kwds):
147 """Decorator to register a function as a checker."""
148 def deco(func):
149 for suffix in suffixes:
150 checkers.setdefault(suffix, []).append(func)
151 for prop in checker_props:
152 setattr(func, prop, kwds.get(prop, checker_props[prop]))
153 return func
154 return deco
155
156
157 @checker('.py', severity=4)
158 def check_syntax(fn, lines):
159 """Check Python examples for valid syntax."""
160 code = ''.join(lines)
161 if '\r' in code:
162 if os.name != 'nt':
163 yield 0, '\\r in code file'
164 code = code.replace('\r', '')
165 try:
166 compile(code, fn, 'exec')
167 except SyntaxError as err:
168 yield err.lineno, 'not compilable: %s' % err
169
170
171 @checker('.rst', severity=2)
172 def check_suspicious_constructs(fn, lines):
173 """Check for suspicious reST constructs."""
174 inprod = False
175 for lno, line in enumerate(lines, start=1):
176 if seems_directive_re.search(line):
177 yield lno, "comment seems to be intended as a directive"
178 if three_dot_directive_re.search(line):
179 yield lno, "directive should start with two dots, not three."
180 if double_backtick_role.search(line):
181 yield lno, "role use a single backtick, double backtick found."
182 if role_with_no_backticks.search(line):
183 yield lno, "role use a single backtick, no backtick found."
184 if role_glued_with_word.search(line):
185 yield lno, "missing space before role"
186 if ".. productionlist::" in line:
187 inprod = True
188 elif not inprod and default_role_re.search(line):
189 yield lno, "default role used"
190 elif inprod and not line.strip():
191 inprod = False
192
193
194 @checker('.py', '.rst')
195 def check_whitespace(fn, lines):
196 """Check for whitespace and line length issues."""
197 for lno, line in enumerate(lines):
198 if '\r' in line:
199 yield lno+1, '\\r in line'
200 if '\t' in line:
201 yield lno+1, 'OMG TABS!!!1'
202 if line[:-1].rstrip(' \t') != line[:-1]:
203 yield lno+1, 'trailing whitespace'
204
205
206 @checker('.rst', severity=0)
207 def check_line_length(fn, lines):
208 """Check for line length; this checker is not run by default."""
209 for lno, line in enumerate(lines):
210 if len(line) > 81:
211 # don't complain about tables, links and function signatures
212 if line.lstrip()[0] not in '+|' and \
213 'http://' not in line and \
214 not line.lstrip().startswith(('.. function',
215 '.. method',
216 '.. cfunction')):
217 yield lno+1, "line too long"
218
219
220 @checker('.html', severity=2, falsepositives=True)
221 def check_leaked_markup(fn, lines):
222 """Check HTML files for leaked reST markup; this only works if
223 the HTML files have been built.
224 """
225 for lno, line in enumerate(lines):
226 if leaked_markup_re.search(line):
227 yield lno+1, 'possibly leaked markup: %r' % line
228
229
230 def hide_literal_blocks(lines):
231 """Tool to remove literal blocks from given lines.
232
233 It yields empty lines in place of blocks, so line numbers are
234 still meaningful.
235 """
236 in_block = False
237 for line in lines:
238 if line.endswith("::\n"):
239 in_block = True
240 elif in_block:
241 if line == "\n" or line.startswith(" "):
242 line = "\n"
243 else:
244 in_block = False
245 yield line
246
247
248 def type_of_explicit_markup(line):
249 if re.match(fr'\.\. {all_directives}::', line):
250 return 'directive'
251 if re.match(r'\.\. \[[0-9]+\] ', line):
252 return 'footnote'
253 if re.match(r'\.\. \[[^\]]+\] ', line):
254 return 'citation'
255 if re.match(r'\.\. _.*[^_]: ', line):
256 return 'target'
257 if re.match(r'\.\. \|[^\|]*\| ', line):
258 return 'substitution_definition'
259 return 'comment'
260
261
262 def hide_comments(lines):
263 """Tool to remove comments from given lines.
264
265 It yields empty lines in place of comments, so line numbers are
266 still meaningful.
267 """
268 in_multiline_comment = False
269 for line in lines:
270 if line == "..\n":
271 in_multiline_comment = True
272 elif in_multiline_comment:
273 if line == "\n" or line.startswith(" "):
274 line = "\n"
275 else:
276 in_multiline_comment = False
277 if line.startswith(".. ") and type_of_explicit_markup(line) == 'comment':
278 line = "\n"
279 yield line
280
281
282
283 @checker(".rst", severity=2)
284 def check_missing_surrogate_space_on_plural(fn, lines):
285 r"""Check for missing 'backslash-space' between a code sample a letter.
286
287 Good: ``Point``\ s
288 Bad: ``Point``s
289 """
290 in_code_sample = False
291 check_next_one = False
292 for lno, line in enumerate(hide_comments(hide_literal_blocks(lines))):
293 tokens = line.split("``")
294 for token_no, token in enumerate(tokens):
295 if check_next_one:
296 if token[0] in ascii_letters:
297 yield lno + 1, f"Missing backslash-space between code sample and {token!r}."
298 check_next_one = False
299 if token_no == len(tokens) - 1:
300 continue
301 if in_code_sample:
302 check_next_one = True
303 in_code_sample = not in_code_sample
304
305 def main(argv):
306 usage = '''\
307 Usage: %s [-v] [-f] [-s sev] [-i path]* [path]
308
309 Options: -v verbose (print all checked file names)
310 -f enable checkers that yield many false positives
311 -s sev only show problems with severity >= sev
312 -i path ignore subdir or file path
313 ''' % argv[0]
314 try:
315 gopts, args = getopt.getopt(argv[1:], 'vfs:i:')
316 except getopt.GetoptError:
317 print(usage)
318 return 2
319
320 verbose = False
321 severity = 1
322 ignore = []
323 falsepos = False
324 for opt, val in gopts:
325 if opt == '-v':
326 verbose = True
327 elif opt == '-f':
328 falsepos = True
329 elif opt == '-s':
330 severity = int(val)
331 elif opt == '-i':
332 ignore.append(abspath(val))
333
334 if len(args) == 0:
335 path = '.'
336 elif len(args) == 1:
337 path = args[0]
338 else:
339 print(usage)
340 return 2
341
342 if not exists(path):
343 print('Error: path %s does not exist' % path)
344 return 2
345
346 count = defaultdict(int)
347
348 print("""⚠ rstlint.py is no longer maintained here and will be removed
349 ⚠ in a future release.
350 ⚠ Please use https://pypi.org/p/sphinx-lint instead.
351 """)
352
353 for root, dirs, files in os.walk(path):
354 # ignore subdirs in ignore list
355 if abspath(root) in ignore:
356 del dirs[:]
357 continue
358
359 for fn in files:
360 fn = join(root, fn)
361 if fn[:2] == './':
362 fn = fn[2:]
363
364 # ignore files in ignore list
365 if abspath(fn) in ignore:
366 continue
367
368 ext = splitext(fn)[1]
369 checkerlist = checkers.get(ext, None)
370 if not checkerlist:
371 continue
372
373 if verbose:
374 print('Checking %s...' % fn)
375
376 try:
377 with open(fn, 'r', encoding='utf-8') as f:
378 lines = list(f)
379 except (IOError, OSError) as err:
380 print('%s: cannot open: %s' % (fn, err))
381 count[4] += 1
382 continue
383
384 for checker in checkerlist:
385 if checker.falsepositives and not falsepos:
386 continue
387 csev = checker.severity
388 if csev >= severity:
389 for lno, msg in checker(fn, lines):
390 print('[%d] %s:%d: %s' % (csev, fn, lno, msg))
391 count[csev] += 1
392 if verbose:
393 print()
394 if not count:
395 if severity > 1:
396 print('No problems with severity >= %d found.' % severity)
397 else:
398 print('No problems found.')
399 else:
400 for severity in sorted(count):
401 number = count[severity]
402 print('%d problem%s with severity %d found.' %
403 (number, number > 1 and 's' or '', severity))
404 return int(bool(count))
405
406
407 if __name__ == '__main__':
408 sys.exit(main(sys.argv))