1 import re
2 import sys
3 from collections.abc import Callable
4 from typing import NoReturn
5
6
7 TokenAndCondition = tuple[str, str]
8 TokenStack = list[TokenAndCondition]
9
10 def negate(condition: str) -> str:
11 """
12 Returns a CPP conditional that is the opposite of the conditional passed in.
13 """
14 if condition.startswith('!'):
15 return condition[1:]
16 return "!" + condition
17
18 class ESC[4;38;5;81mMonitor:
19 """
20 A simple C preprocessor that scans C source and computes, line by line,
21 what the current C preprocessor #if state is.
22
23 Doesn't handle everything--for example, if you have /* inside a C string,
24 without a matching */ (also inside a C string), or with a */ inside a C
25 string but on another line and with preprocessor macros in between...
26 the parser will get lost.
27
28 Anyway this implementation seems to work well enough for the CPython sources.
29 """
30
31 is_a_simple_defined: Callable[[str], re.Match[str] | None]
32 is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match
33
34 def __init__(self, filename: str | None = None, *, verbose: bool = False) -> None:
35 self.stack: TokenStack = []
36 self.in_comment = False
37 self.continuation: str | None = None
38 self.line_number = 0
39 self.filename = filename
40 self.verbose = verbose
41
42 def __repr__(self) -> str:
43 return ''.join((
44 '<Monitor ',
45 str(id(self)),
46 " line=", str(self.line_number),
47 " condition=", repr(self.condition()),
48 ">"))
49
50 def status(self) -> str:
51 return str(self.line_number).rjust(4) + ": " + self.condition()
52
53 def condition(self) -> str:
54 """
55 Returns the current preprocessor state, as a single #if condition.
56 """
57 return " && ".join(condition for token, condition in self.stack)
58
59 def fail(self, *a: object) -> NoReturn:
60 if self.filename:
61 filename = " " + self.filename
62 else:
63 filename = ''
64 print("Error at" + filename, "line", self.line_number, ":")
65 print(" ", ' '.join(str(x) for x in a))
66 sys.exit(-1)
67
68 def close(self) -> None:
69 if self.stack:
70 self.fail("Ended file while still in a preprocessor conditional block!")
71
72 def write(self, s: str) -> None:
73 for line in s.split("\n"):
74 self.writeline(line)
75
76 def writeline(self, line: str) -> None:
77 self.line_number += 1
78 line = line.strip()
79
80 def pop_stack() -> TokenAndCondition:
81 if not self.stack:
82 self.fail("#" + token + " without matching #if / #ifdef / #ifndef!")
83 return self.stack.pop()
84
85 if self.continuation:
86 line = self.continuation + line
87 self.continuation = None
88
89 if not line:
90 return
91
92 if line.endswith('\\'):
93 self.continuation = line[:-1].rstrip() + " "
94 return
95
96 # we have to ignore preprocessor commands inside comments
97 #
98 # we also have to handle this:
99 # /* start
100 # ...
101 # */ /* <-- tricky!
102 # ...
103 # */
104 # and this:
105 # /* start
106 # ...
107 # */ /* also tricky! */
108 if self.in_comment:
109 if '*/' in line:
110 # snip out the comment and continue
111 #
112 # GCC allows
113 # /* comment
114 # */ #include <stdio.h>
115 # maybe other compilers too?
116 _, _, line = line.partition('*/')
117 self.in_comment = False
118
119 while True:
120 if '/*' in line:
121 if self.in_comment:
122 self.fail("Nested block comment!")
123
124 before, _, remainder = line.partition('/*')
125 comment, comment_ends, after = remainder.partition('*/')
126 if comment_ends:
127 # snip out the comment
128 line = before.rstrip() + ' ' + after.lstrip()
129 continue
130 # comment continues to eol
131 self.in_comment = True
132 line = before.rstrip()
133 break
134
135 # we actually have some // comments
136 # (but block comments take precedence)
137 before, line_comment, comment = line.partition('//')
138 if line_comment:
139 line = before.rstrip()
140
141 if not line.startswith('#'):
142 return
143
144 line = line[1:].lstrip()
145 assert line
146
147 fields = line.split()
148 token = fields[0].lower()
149 condition = ' '.join(fields[1:]).strip()
150
151 if token in {'if', 'ifdef', 'ifndef', 'elif'}:
152 if not condition:
153 self.fail("Invalid format for #" + token + " line: no argument!")
154 if token in {'if', 'elif'}:
155 if not self.is_a_simple_defined(condition):
156 condition = "(" + condition + ")"
157 if token == 'elif':
158 previous_token, previous_condition = pop_stack()
159 self.stack.append((previous_token, negate(previous_condition)))
160 else:
161 fields = condition.split()
162 if len(fields) != 1:
163 self.fail("Invalid format for #" + token + " line: should be exactly one argument!")
164 symbol = fields[0]
165 condition = 'defined(' + symbol + ')'
166 if token == 'ifndef':
167 condition = '!' + condition
168 token = 'if'
169
170 self.stack.append((token, condition))
171
172 elif token == 'else':
173 previous_token, previous_condition = pop_stack()
174 self.stack.append((previous_token, negate(previous_condition)))
175
176 elif token == 'endif':
177 while pop_stack()[0] != 'if':
178 pass
179
180 else:
181 return
182
183 if self.verbose:
184 print(self.status())
185
186 if __name__ == '__main__':
187 for filename in sys.argv[1:]:
188 with open(filename) as f:
189 cpp = Monitor(filename, verbose=True)
190 print()
191 print(filename)
192 for line_number, line in enumerate(f.read().split('\n'), 1):
193 cpp.writeline(line)