1 """Tokenization help for Python programs.
2
3 tokenize(readline) is a generator that breaks a stream of bytes into
4 Python tokens. It decodes the bytes according to PEP-0263 for
5 determining source file encoding.
6
7 It accepts a readline-like method which is called repeatedly to get the
8 next line of input (or b"" for EOF). It generates 5-tuples with these
9 members:
10
11 the token type (see token.py)
12 the token (a string)
13 the starting (row, column) indices of the token (a 2-tuple of ints)
14 the ending (row, column) indices of the token (a 2-tuple of ints)
15 the original line (string)
16
17 It is designed to match the working of the Python tokenizer exactly, except
18 that it produces COMMENT tokens for comments and gives type OP for all
19 operators. Additionally, all token lists start with an ENCODING token
20 which tells you which encoding was used to decode the bytes stream.
21 """
22
23 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
24 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
25 'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
26 'Michael Foord')
27 from builtins import open as _builtin_open
28 from codecs import lookup, BOM_UTF8
29 import collections
30 import functools
31 from io import TextIOWrapper
32 import itertools as _itertools
33 import re
34 import sys
35 from token import *
36 from token import EXACT_TOKEN_TYPES
37
38 cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
39 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
40
41 import token
42 __all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
43 "untokenize", "TokenInfo"]
44 del token
45
46 class ESC[4;38;5;81mTokenInfo(ESC[4;38;5;149mcollectionsESC[4;38;5;149m.ESC[4;38;5;149mnamedtuple('TokenInfo', 'type string start end line')):
47 def __repr__(self):
48 annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
49 return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
50 self._replace(type=annotated_type))
51
52 @property
53 def exact_type(self):
54 if self.type == OP and self.string in EXACT_TOKEN_TYPES:
55 return EXACT_TOKEN_TYPES[self.string]
56 else:
57 return self.type
58
59 def group(*choices): return '(' + '|'.join(choices) + ')'
60 def any(*choices): return group(*choices) + '*'
61 def maybe(*choices): return group(*choices) + '?'
62
63 # Note: we use unicode matching for names ("\w") but ascii matching for
64 # number literals.
65 Whitespace = r'[ \f\t]*'
66 Comment = r'#[^\r\n]*'
67 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
68 Name = r'\w+'
69
70 Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
71 Binnumber = r'0[bB](?:_?[01])+'
72 Octnumber = r'0[oO](?:_?[0-7])+'
73 Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
74 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
75 Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
76 Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
77 r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
78 Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
79 Floatnumber = group(Pointfloat, Expfloat)
80 Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
81 Number = group(Imagnumber, Floatnumber, Intnumber)
82
83 # Return the empty string, plus all of the valid string prefixes.
84 def _all_string_prefixes():
85 # The valid string prefixes. Only contain the lower case versions,
86 # and don't contain any permutations (include 'fr', but not
87 # 'rf'). The various permutations will be generated.
88 _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
89 # if we add binary f-strings, add: ['fb', 'fbr']
90 result = {''}
91 for prefix in _valid_string_prefixes:
92 for t in _itertools.permutations(prefix):
93 # create a list with upper and lower versions of each
94 # character
95 for u in _itertools.product(*[(c, c.upper()) for c in t]):
96 result.add(''.join(u))
97 return result
98
99 @functools.lru_cache
100 def _compile(expr):
101 return re.compile(expr, re.UNICODE)
102
103 # Note that since _all_string_prefixes includes the empty string,
104 # StringPrefix can be the empty string (making it optional).
105 StringPrefix = group(*_all_string_prefixes())
106
107 # Tail end of ' string.
108 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
109 # Tail end of " string.
110 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
111 # Tail end of ''' string.
112 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
113 # Tail end of """ string.
114 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
115 Triple = group(StringPrefix + "'''", StringPrefix + '"""')
116 # Single-line ' or " string.
117 String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
118 StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
119
120 # Sorting in reverse order puts the long operators before their prefixes.
121 # Otherwise if = came before ==, == would get recognized as two instances
122 # of =.
123 Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True)))
124 Funny = group(r'\r?\n', Special)
125
126 PlainToken = group(Number, Funny, String, Name)
127 Token = Ignore + PlainToken
128
129 # First (or only) line of ' or " string.
130 ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
131 group("'", r'\\\r?\n'),
132 StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
133 group('"', r'\\\r?\n'))
134 PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
135 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
136
137 # For a given string prefix plus quotes, endpats maps it to a regex
138 # to match the remainder of that string. _prefix can be empty, for
139 # a normal single or triple quoted string (with no prefix).
140 endpats = {}
141 for _prefix in _all_string_prefixes():
142 endpats[_prefix + "'"] = Single
143 endpats[_prefix + '"'] = Double
144 endpats[_prefix + "'''"] = Single3
145 endpats[_prefix + '"""'] = Double3
146 del _prefix
147
148 # A set of all of the single and triple quoted string prefixes,
149 # including the opening quotes.
150 single_quoted = set()
151 triple_quoted = set()
152 for t in _all_string_prefixes():
153 for u in (t + '"', t + "'"):
154 single_quoted.add(u)
155 for u in (t + '"""', t + "'''"):
156 triple_quoted.add(u)
157 del t, u
158
159 tabsize = 8
160
161 class ESC[4;38;5;81mTokenError(ESC[4;38;5;149mException): pass
162
163 class ESC[4;38;5;81mStopTokenizing(ESC[4;38;5;149mException): pass
164
165
166 class ESC[4;38;5;81mUntokenizer:
167
168 def __init__(self):
169 self.tokens = []
170 self.prev_row = 1
171 self.prev_col = 0
172 self.encoding = None
173
174 def add_whitespace(self, start):
175 row, col = start
176 if row < self.prev_row or row == self.prev_row and col < self.prev_col:
177 raise ValueError("start ({},{}) precedes previous end ({},{})"
178 .format(row, col, self.prev_row, self.prev_col))
179 row_offset = row - self.prev_row
180 if row_offset:
181 self.tokens.append("\\\n" * row_offset)
182 self.prev_col = 0
183 col_offset = col - self.prev_col
184 if col_offset:
185 self.tokens.append(" " * col_offset)
186
187 def untokenize(self, iterable):
188 it = iter(iterable)
189 indents = []
190 startline = False
191 for t in it:
192 if len(t) == 2:
193 self.compat(t, it)
194 break
195 tok_type, token, start, end, line = t
196 if tok_type == ENCODING:
197 self.encoding = token
198 continue
199 if tok_type == ENDMARKER:
200 break
201 if tok_type == INDENT:
202 indents.append(token)
203 continue
204 elif tok_type == DEDENT:
205 indents.pop()
206 self.prev_row, self.prev_col = end
207 continue
208 elif tok_type in (NEWLINE, NL):
209 startline = True
210 elif startline and indents:
211 indent = indents[-1]
212 if start[1] >= len(indent):
213 self.tokens.append(indent)
214 self.prev_col = len(indent)
215 startline = False
216 self.add_whitespace(start)
217 self.tokens.append(token)
218 self.prev_row, self.prev_col = end
219 if tok_type in (NEWLINE, NL):
220 self.prev_row += 1
221 self.prev_col = 0
222 return "".join(self.tokens)
223
224 def compat(self, token, iterable):
225 indents = []
226 toks_append = self.tokens.append
227 startline = token[0] in (NEWLINE, NL)
228 prevstring = False
229
230 for tok in _itertools.chain([token], iterable):
231 toknum, tokval = tok[:2]
232 if toknum == ENCODING:
233 self.encoding = tokval
234 continue
235
236 if toknum in (NAME, NUMBER):
237 tokval += ' '
238
239 # Insert a space between two consecutive strings
240 if toknum == STRING:
241 if prevstring:
242 tokval = ' ' + tokval
243 prevstring = True
244 else:
245 prevstring = False
246
247 if toknum == INDENT:
248 indents.append(tokval)
249 continue
250 elif toknum == DEDENT:
251 indents.pop()
252 continue
253 elif toknum in (NEWLINE, NL):
254 startline = True
255 elif startline and indents:
256 toks_append(indents[-1])
257 startline = False
258 toks_append(tokval)
259
260
261 def untokenize(iterable):
262 """Transform tokens back into Python source code.
263 It returns a bytes object, encoded using the ENCODING
264 token, which is the first token sequence output by tokenize.
265
266 Each element returned by the iterable must be a token sequence
267 with at least two elements, a token number and token value. If
268 only two tokens are passed, the resulting output is poor.
269
270 Round-trip invariant for full input:
271 Untokenized source will match input source exactly
272
273 Round-trip invariant for limited input:
274 # Output bytes will tokenize back to the input
275 t1 = [tok[:2] for tok in tokenize(f.readline)]
276 newcode = untokenize(t1)
277 readline = BytesIO(newcode).readline
278 t2 = [tok[:2] for tok in tokenize(readline)]
279 assert t1 == t2
280 """
281 ut = Untokenizer()
282 out = ut.untokenize(iterable)
283 if ut.encoding is not None:
284 out = out.encode(ut.encoding)
285 return out
286
287
288 def _get_normal_name(orig_enc):
289 """Imitates get_normal_name in tokenizer.c."""
290 # Only care about the first 12 characters.
291 enc = orig_enc[:12].lower().replace("_", "-")
292 if enc == "utf-8" or enc.startswith("utf-8-"):
293 return "utf-8"
294 if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
295 enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
296 return "iso-8859-1"
297 return orig_enc
298
299 def detect_encoding(readline):
300 """
301 The detect_encoding() function is used to detect the encoding that should
302 be used to decode a Python source file. It requires one argument, readline,
303 in the same way as the tokenize() generator.
304
305 It will call readline a maximum of twice, and return the encoding used
306 (as a string) and a list of any lines (left as bytes) it has read in.
307
308 It detects the encoding from the presence of a utf-8 bom or an encoding
309 cookie as specified in pep-0263. If both a bom and a cookie are present,
310 but disagree, a SyntaxError will be raised. If the encoding cookie is an
311 invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
312 'utf-8-sig' is returned.
313
314 If no encoding is specified, then the default of 'utf-8' will be returned.
315 """
316 try:
317 filename = readline.__self__.name
318 except AttributeError:
319 filename = None
320 bom_found = False
321 encoding = None
322 default = 'utf-8'
323 def read_or_stop():
324 try:
325 return readline()
326 except StopIteration:
327 return b''
328
329 def find_cookie(line):
330 try:
331 # Decode as UTF-8. Either the line is an encoding declaration,
332 # in which case it should be pure ASCII, or it must be UTF-8
333 # per default encoding.
334 line_string = line.decode('utf-8')
335 except UnicodeDecodeError:
336 msg = "invalid or missing encoding declaration"
337 if filename is not None:
338 msg = '{} for {!r}'.format(msg, filename)
339 raise SyntaxError(msg)
340
341 match = cookie_re.match(line_string)
342 if not match:
343 return None
344 encoding = _get_normal_name(match.group(1))
345 try:
346 codec = lookup(encoding)
347 except LookupError:
348 # This behaviour mimics the Python interpreter
349 if filename is None:
350 msg = "unknown encoding: " + encoding
351 else:
352 msg = "unknown encoding for {!r}: {}".format(filename,
353 encoding)
354 raise SyntaxError(msg)
355
356 if bom_found:
357 if encoding != 'utf-8':
358 # This behaviour mimics the Python interpreter
359 if filename is None:
360 msg = 'encoding problem: utf-8'
361 else:
362 msg = 'encoding problem for {!r}: utf-8'.format(filename)
363 raise SyntaxError(msg)
364 encoding += '-sig'
365 return encoding
366
367 first = read_or_stop()
368 if first.startswith(BOM_UTF8):
369 bom_found = True
370 first = first[3:]
371 default = 'utf-8-sig'
372 if not first:
373 return default, []
374
375 encoding = find_cookie(first)
376 if encoding:
377 return encoding, [first]
378 if not blank_re.match(first):
379 return default, [first]
380
381 second = read_or_stop()
382 if not second:
383 return default, [first]
384
385 encoding = find_cookie(second)
386 if encoding:
387 return encoding, [first, second]
388
389 return default, [first, second]
390
391
392 def open(filename):
393 """Open a file in read only mode using the encoding detected by
394 detect_encoding().
395 """
396 buffer = _builtin_open(filename, 'rb')
397 try:
398 encoding, lines = detect_encoding(buffer.readline)
399 buffer.seek(0)
400 text = TextIOWrapper(buffer, encoding, line_buffering=True)
401 text.mode = 'r'
402 return text
403 except:
404 buffer.close()
405 raise
406
407
408 def tokenize(readline):
409 """
410 The tokenize() generator requires one argument, readline, which
411 must be a callable object which provides the same interface as the
412 readline() method of built-in file objects. Each call to the function
413 should return one line of input as bytes. Alternatively, readline
414 can be a callable function terminating with StopIteration:
415 readline = open(myfile, 'rb').__next__ # Example of alternate readline
416
417 The generator produces 5-tuples with these members: the token type; the
418 token string; a 2-tuple (srow, scol) of ints specifying the row and
419 column where the token begins in the source; a 2-tuple (erow, ecol) of
420 ints specifying the row and column where the token ends in the source;
421 and the line on which the token was found. The line passed is the
422 physical line.
423
424 The first token sequence will always be an ENCODING token
425 which tells you which encoding was used to decode the bytes stream.
426 """
427 encoding, consumed = detect_encoding(readline)
428 empty = _itertools.repeat(b"")
429 rl_gen = _itertools.chain(consumed, iter(readline, b""), empty)
430 return _tokenize(rl_gen.__next__, encoding)
431
432
433 def _tokenize(readline, encoding):
434 lnum = parenlev = continued = 0
435 numchars = '0123456789'
436 contstr, needcont = '', 0
437 contline = None
438 indents = [0]
439
440 if encoding is not None:
441 if encoding == "utf-8-sig":
442 # BOM will already have been stripped.
443 encoding = "utf-8"
444 yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
445 last_line = b''
446 line = b''
447 while True: # loop over lines in stream
448 try:
449 # We capture the value of the line variable here because
450 # readline uses the empty string '' to signal end of input,
451 # hence `line` itself will always be overwritten at the end
452 # of this loop.
453 last_line = line
454 line = readline()
455 except StopIteration:
456 line = b''
457
458 if encoding is not None:
459 line = line.decode(encoding)
460 lnum += 1
461 pos, max = 0, len(line)
462
463 if contstr: # continued string
464 if not line:
465 raise TokenError("EOF in multi-line string", strstart)
466 endmatch = endprog.match(line)
467 if endmatch:
468 pos = end = endmatch.end(0)
469 yield TokenInfo(STRING, contstr + line[:end],
470 strstart, (lnum, end), contline + line)
471 contstr, needcont = '', 0
472 contline = None
473 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
474 yield TokenInfo(ERRORTOKEN, contstr + line,
475 strstart, (lnum, len(line)), contline)
476 contstr = ''
477 contline = None
478 continue
479 else:
480 contstr = contstr + line
481 contline = contline + line
482 continue
483
484 elif parenlev == 0 and not continued: # new statement
485 if not line: break
486 column = 0
487 while pos < max: # measure leading whitespace
488 if line[pos] == ' ':
489 column += 1
490 elif line[pos] == '\t':
491 column = (column//tabsize + 1)*tabsize
492 elif line[pos] == '\f':
493 column = 0
494 else:
495 break
496 pos += 1
497 if pos == max:
498 break
499
500 if line[pos] in '#\r\n': # skip comments or blank lines
501 if line[pos] == '#':
502 comment_token = line[pos:].rstrip('\r\n')
503 yield TokenInfo(COMMENT, comment_token,
504 (lnum, pos), (lnum, pos + len(comment_token)), line)
505 pos += len(comment_token)
506
507 yield TokenInfo(NL, line[pos:],
508 (lnum, pos), (lnum, len(line)), line)
509 continue
510
511 if column > indents[-1]: # count indents or dedents
512 indents.append(column)
513 yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
514 while column < indents[-1]:
515 if column not in indents:
516 raise IndentationError(
517 "unindent does not match any outer indentation level",
518 ("<tokenize>", lnum, pos, line))
519 indents = indents[:-1]
520
521 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
522
523 else: # continued statement
524 if not line:
525 raise TokenError("EOF in multi-line statement", (lnum, 0))
526 continued = 0
527
528 while pos < max:
529 pseudomatch = _compile(PseudoToken).match(line, pos)
530 if pseudomatch: # scan for tokens
531 start, end = pseudomatch.span(1)
532 spos, epos, pos = (lnum, start), (lnum, end), end
533 if start == end:
534 continue
535 token, initial = line[start:end], line[start]
536
537 if (initial in numchars or # ordinary number
538 (initial == '.' and token != '.' and token != '...')):
539 yield TokenInfo(NUMBER, token, spos, epos, line)
540 elif initial in '\r\n':
541 if parenlev > 0:
542 yield TokenInfo(NL, token, spos, epos, line)
543 else:
544 yield TokenInfo(NEWLINE, token, spos, epos, line)
545
546 elif initial == '#':
547 assert not token.endswith("\n")
548 yield TokenInfo(COMMENT, token, spos, epos, line)
549
550 elif token in triple_quoted:
551 endprog = _compile(endpats[token])
552 endmatch = endprog.match(line, pos)
553 if endmatch: # all on one line
554 pos = endmatch.end(0)
555 token = line[start:pos]
556 yield TokenInfo(STRING, token, spos, (lnum, pos), line)
557 else:
558 strstart = (lnum, start) # multiple lines
559 contstr = line[start:]
560 contline = line
561 break
562
563 # Check up to the first 3 chars of the token to see if
564 # they're in the single_quoted set. If so, they start
565 # a string.
566 # We're using the first 3, because we're looking for
567 # "rb'" (for example) at the start of the token. If
568 # we switch to longer prefixes, this needs to be
569 # adjusted.
570 # Note that initial == token[:1].
571 # Also note that single quote checking must come after
572 # triple quote checking (above).
573 elif (initial in single_quoted or
574 token[:2] in single_quoted or
575 token[:3] in single_quoted):
576 if token[-1] == '\n': # continued string
577 strstart = (lnum, start)
578 # Again, using the first 3 chars of the
579 # token. This is looking for the matching end
580 # regex for the correct type of quote
581 # character. So it's really looking for
582 # endpats["'"] or endpats['"'], by trying to
583 # skip string prefix characters, if any.
584 endprog = _compile(endpats.get(initial) or
585 endpats.get(token[1]) or
586 endpats.get(token[2]))
587 contstr, needcont = line[start:], 1
588 contline = line
589 break
590 else: # ordinary string
591 yield TokenInfo(STRING, token, spos, epos, line)
592
593 elif initial.isidentifier(): # ordinary name
594 yield TokenInfo(NAME, token, spos, epos, line)
595 elif initial == '\\': # continued stmt
596 continued = 1
597 else:
598 if initial in '([{':
599 parenlev += 1
600 elif initial in ')]}':
601 parenlev -= 1
602 yield TokenInfo(OP, token, spos, epos, line)
603 else:
604 yield TokenInfo(ERRORTOKEN, line[pos],
605 (lnum, pos), (lnum, pos+1), line)
606 pos += 1
607
608 # Add an implicit NEWLINE if the input doesn't end in one
609 if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
610 yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
611 for indent in indents[1:]: # pop remaining indent levels
612 yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
613 yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
614
615
616 def generate_tokens(readline):
617 """Tokenize a source reading Python code as unicode strings.
618
619 This has the same API as tokenize(), except that it expects the *readline*
620 callable to return str objects instead of bytes.
621 """
622 return _tokenize(readline, None)
623
624 def main():
625 import argparse
626
627 # Helper error handling routines
628 def perror(message):
629 sys.stderr.write(message)
630 sys.stderr.write('\n')
631
632 def error(message, filename=None, location=None):
633 if location:
634 args = (filename,) + location + (message,)
635 perror("%s:%d:%d: error: %s" % args)
636 elif filename:
637 perror("%s: error: %s" % (filename, message))
638 else:
639 perror("error: %s" % message)
640 sys.exit(1)
641
642 # Parse the arguments and options
643 parser = argparse.ArgumentParser(prog='python -m tokenize')
644 parser.add_argument(dest='filename', nargs='?',
645 metavar='filename.py',
646 help='the file to tokenize; defaults to stdin')
647 parser.add_argument('-e', '--exact', dest='exact', action='store_true',
648 help='display token names using the exact type')
649 args = parser.parse_args()
650
651 try:
652 # Tokenize the input
653 if args.filename:
654 filename = args.filename
655 with _builtin_open(filename, 'rb') as f:
656 tokens = list(tokenize(f.readline))
657 else:
658 filename = "<stdin>"
659 tokens = _tokenize(sys.stdin.readline, None)
660
661 # Output the tokenization
662 for token in tokens:
663 token_type = token.type
664 if args.exact:
665 token_type = token.exact_type
666 token_range = "%d,%d-%d,%d:" % (token.start + token.end)
667 print("%-20s%-15s%-15r" %
668 (token_range, tok_name[token_type], token.string))
669 except IndentationError as err:
670 line, column = err.args[1][1:3]
671 error(err.args[0], filename, (line, column))
672 except TokenError as err:
673 line, column = err.args[1]
674 error(err.args[0], filename, (line, column))
675 except SyntaxError as err:
676 error(err, filename)
677 except OSError as err:
678 error(err)
679 except KeyboardInterrupt:
680 print("interrupted\n")
681 except Exception as err:
682 perror("unexpected error: %s" % err)
683 raise
684
685 def _generate_tokens_from_c_tokenizer(source):
686 """Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
687 import _tokenize as c_tokenizer
688 for info in c_tokenizer.TokenizerIter(source):
689 tok, type, lineno, end_lineno, col_off, end_col_off, line = info
690 yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line)
691
692
693 if __name__ == "__main__":
694 main()