(root)/
Python-3.11.7/
Lib/
test/
test_tokenize.py
       1  from test import support
       2  from test.support import os_helper
       3  from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
       4                       STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
       5                       open as tokenize_open, Untokenizer, generate_tokens,
       6                       NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT)
       7  from io import BytesIO, StringIO
       8  import unittest
       9  from textwrap import dedent
      10  from unittest import TestCase, mock
      11  from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
      12                                 INVALID_UNDERSCORE_LITERALS)
      13  from test.support import os_helper
      14  from test.support.script_helper import run_test_script, make_script
      15  import os
      16  import token
      17  
      18  # Converts a source string into a list of textual representation
      19  # of the tokens such as:
      20  # `    NAME       'if'          (1, 0) (1, 2)`
      21  # to make writing tests easier.
      22  def stringify_tokens_from_source(token_generator, source_string):
      23      result = []
      24      num_lines = len(source_string.splitlines())
      25      missing_trailing_nl = source_string[-1] not in '\r\n'
      26  
      27      for type, token, start, end, line in token_generator:
      28          if type == ENDMARKER:
      29              break
      30          # Ignore the new line on the last line if the input lacks one
      31          if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
      32              continue
      33          type = tok_name[type]
      34          result.append(f"    {type:10} {token!r:13} {start} {end}")
      35  
      36      return result
      37  
      38  class ESC[4;38;5;81mTokenizeTest(ESC[4;38;5;149mTestCase):
      39      # Tests for the tokenize module.
      40  
      41      # The tests can be really simple. Given a small fragment of source
      42      # code, print out a table with tokens. The ENDMARKER, ENCODING and
      43      # final NEWLINE are omitted for brevity.
      44  
      45      def check_tokenize(self, s, expected):
      46          # Format the tokens in s in a table format.
      47          # The ENDMARKER and final NEWLINE are omitted.
      48          f = BytesIO(s.encode('utf-8'))
      49          result = stringify_tokens_from_source(tokenize(f.readline), s)
      50          self.assertEqual(result,
      51                           ["    ENCODING   'utf-8'       (0, 0) (0, 0)"] +
      52                           expected.rstrip().splitlines())
      53  
      54      def test_implicit_newline(self):
      55          # Make sure that the tokenizer puts in an implicit NEWLINE
      56          # when the input lacks a trailing new line.
      57          f = BytesIO("x".encode('utf-8'))
      58          tokens = list(tokenize(f.readline))
      59          self.assertEqual(tokens[-2].type, NEWLINE)
      60          self.assertEqual(tokens[-1].type, ENDMARKER)
      61  
      62      def test_basic(self):
      63          self.check_tokenize("1 + 1", """\
      64      NUMBER     '1'           (1, 0) (1, 1)
      65      OP         '+'           (1, 2) (1, 3)
      66      NUMBER     '1'           (1, 4) (1, 5)
      67      """)
      68          self.check_tokenize("if False:\n"
      69                              "    # NL\n"
      70                              "    \n"
      71                              "    True = False # NEWLINE\n", """\
      72      NAME       'if'          (1, 0) (1, 2)
      73      NAME       'False'       (1, 3) (1, 8)
      74      OP         ':'           (1, 8) (1, 9)
      75      NEWLINE    '\\n'          (1, 9) (1, 10)
      76      COMMENT    '# NL'        (2, 4) (2, 8)
      77      NL         '\\n'          (2, 8) (2, 9)
      78      NL         '\\n'          (3, 4) (3, 5)
      79      INDENT     '    '        (4, 0) (4, 4)
      80      NAME       'True'        (4, 4) (4, 8)
      81      OP         '='           (4, 9) (4, 10)
      82      NAME       'False'       (4, 11) (4, 16)
      83      COMMENT    '# NEWLINE'   (4, 17) (4, 26)
      84      NEWLINE    '\\n'          (4, 26) (4, 27)
      85      DEDENT     ''            (5, 0) (5, 0)
      86      """)
      87          indent_error_file = b"""\
      88  def k(x):
      89      x += 2
      90    x += 5
      91  """
      92          readline = BytesIO(indent_error_file).readline
      93          with self.assertRaisesRegex(IndentationError,
      94                                      "unindent does not match any "
      95                                      "outer indentation level"):
      96              for tok in tokenize(readline):
      97                  pass
      98  
      99      def test_int(self):
     100          # Ordinary integers and binary operators
     101          self.check_tokenize("0xff <= 255", """\
     102      NUMBER     '0xff'        (1, 0) (1, 4)
     103      OP         '<='          (1, 5) (1, 7)
     104      NUMBER     '255'         (1, 8) (1, 11)
     105      """)
     106          self.check_tokenize("0b10 <= 255", """\
     107      NUMBER     '0b10'        (1, 0) (1, 4)
     108      OP         '<='          (1, 5) (1, 7)
     109      NUMBER     '255'         (1, 8) (1, 11)
     110      """)
     111          self.check_tokenize("0o123 <= 0O123", """\
     112      NUMBER     '0o123'       (1, 0) (1, 5)
     113      OP         '<='          (1, 6) (1, 8)
     114      NUMBER     '0O123'       (1, 9) (1, 14)
     115      """)
     116          self.check_tokenize("1234567 > ~0x15", """\
     117      NUMBER     '1234567'     (1, 0) (1, 7)
     118      OP         '>'           (1, 8) (1, 9)
     119      OP         '~'           (1, 10) (1, 11)
     120      NUMBER     '0x15'        (1, 11) (1, 15)
     121      """)
     122          self.check_tokenize("2134568 != 1231515", """\
     123      NUMBER     '2134568'     (1, 0) (1, 7)
     124      OP         '!='          (1, 8) (1, 10)
     125      NUMBER     '1231515'     (1, 11) (1, 18)
     126      """)
     127          self.check_tokenize("(-124561-1) & 200000000", """\
     128      OP         '('           (1, 0) (1, 1)
     129      OP         '-'           (1, 1) (1, 2)
     130      NUMBER     '124561'      (1, 2) (1, 8)
     131      OP         '-'           (1, 8) (1, 9)
     132      NUMBER     '1'           (1, 9) (1, 10)
     133      OP         ')'           (1, 10) (1, 11)
     134      OP         '&'           (1, 12) (1, 13)
     135      NUMBER     '200000000'   (1, 14) (1, 23)
     136      """)
     137          self.check_tokenize("0xdeadbeef != -1", """\
     138      NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
     139      OP         '!='          (1, 11) (1, 13)
     140      OP         '-'           (1, 14) (1, 15)
     141      NUMBER     '1'           (1, 15) (1, 16)
     142      """)
     143          self.check_tokenize("0xdeadc0de & 12345", """\
     144      NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
     145      OP         '&'           (1, 11) (1, 12)
     146      NUMBER     '12345'       (1, 13) (1, 18)
     147      """)
     148          self.check_tokenize("0xFF & 0x15 | 1234", """\
     149      NUMBER     '0xFF'        (1, 0) (1, 4)
     150      OP         '&'           (1, 5) (1, 6)
     151      NUMBER     '0x15'        (1, 7) (1, 11)
     152      OP         '|'           (1, 12) (1, 13)
     153      NUMBER     '1234'        (1, 14) (1, 18)
     154      """)
     155  
     156      def test_long(self):
     157          # Long integers
     158          self.check_tokenize("x = 0", """\
     159      NAME       'x'           (1, 0) (1, 1)
     160      OP         '='           (1, 2) (1, 3)
     161      NUMBER     '0'           (1, 4) (1, 5)
     162      """)
     163          self.check_tokenize("x = 0xfffffffffff", """\
     164      NAME       'x'           (1, 0) (1, 1)
     165      OP         '='           (1, 2) (1, 3)
     166      NUMBER     '0xfffffffffff' (1, 4) (1, 17)
     167      """)
     168          self.check_tokenize("x = 123141242151251616110", """\
     169      NAME       'x'           (1, 0) (1, 1)
     170      OP         '='           (1, 2) (1, 3)
     171      NUMBER     '123141242151251616110' (1, 4) (1, 25)
     172      """)
     173          self.check_tokenize("x = -15921590215012591", """\
     174      NAME       'x'           (1, 0) (1, 1)
     175      OP         '='           (1, 2) (1, 3)
     176      OP         '-'           (1, 4) (1, 5)
     177      NUMBER     '15921590215012591' (1, 5) (1, 22)
     178      """)
     179  
     180      def test_float(self):
     181          # Floating point numbers
     182          self.check_tokenize("x = 3.14159", """\
     183      NAME       'x'           (1, 0) (1, 1)
     184      OP         '='           (1, 2) (1, 3)
     185      NUMBER     '3.14159'     (1, 4) (1, 11)
     186      """)
     187          self.check_tokenize("x = 314159.", """\
     188      NAME       'x'           (1, 0) (1, 1)
     189      OP         '='           (1, 2) (1, 3)
     190      NUMBER     '314159.'     (1, 4) (1, 11)
     191      """)
     192          self.check_tokenize("x = .314159", """\
     193      NAME       'x'           (1, 0) (1, 1)
     194      OP         '='           (1, 2) (1, 3)
     195      NUMBER     '.314159'     (1, 4) (1, 11)
     196      """)
     197          self.check_tokenize("x = 3e14159", """\
     198      NAME       'x'           (1, 0) (1, 1)
     199      OP         '='           (1, 2) (1, 3)
     200      NUMBER     '3e14159'     (1, 4) (1, 11)
     201      """)
     202          self.check_tokenize("x = 3E123", """\
     203      NAME       'x'           (1, 0) (1, 1)
     204      OP         '='           (1, 2) (1, 3)
     205      NUMBER     '3E123'       (1, 4) (1, 9)
     206      """)
     207          self.check_tokenize("x+y = 3e-1230", """\
     208      NAME       'x'           (1, 0) (1, 1)
     209      OP         '+'           (1, 1) (1, 2)
     210      NAME       'y'           (1, 2) (1, 3)
     211      OP         '='           (1, 4) (1, 5)
     212      NUMBER     '3e-1230'     (1, 6) (1, 13)
     213      """)
     214          self.check_tokenize("x = 3.14e159", """\
     215      NAME       'x'           (1, 0) (1, 1)
     216      OP         '='           (1, 2) (1, 3)
     217      NUMBER     '3.14e159'    (1, 4) (1, 12)
     218      """)
     219  
     220      def test_underscore_literals(self):
     221          def number_token(s):
     222              f = BytesIO(s.encode('utf-8'))
     223              for toktype, token, start, end, line in tokenize(f.readline):
     224                  if toktype == NUMBER:
     225                      return token
     226              return 'invalid token'
     227          for lit in VALID_UNDERSCORE_LITERALS:
     228              if '(' in lit:
     229                  # this won't work with compound complex inputs
     230                  continue
     231              self.assertEqual(number_token(lit), lit)
     232          for lit in INVALID_UNDERSCORE_LITERALS:
     233              self.assertNotEqual(number_token(lit), lit)
     234  
     235      def test_string(self):
     236          # String literals
     237          self.check_tokenize("x = ''; y = \"\"", """\
     238      NAME       'x'           (1, 0) (1, 1)
     239      OP         '='           (1, 2) (1, 3)
     240      STRING     "''"          (1, 4) (1, 6)
     241      OP         ';'           (1, 6) (1, 7)
     242      NAME       'y'           (1, 8) (1, 9)
     243      OP         '='           (1, 10) (1, 11)
     244      STRING     '""'          (1, 12) (1, 14)
     245      """)
     246          self.check_tokenize("x = '\"'; y = \"'\"", """\
     247      NAME       'x'           (1, 0) (1, 1)
     248      OP         '='           (1, 2) (1, 3)
     249      STRING     '\\'"\\''       (1, 4) (1, 7)
     250      OP         ';'           (1, 7) (1, 8)
     251      NAME       'y'           (1, 9) (1, 10)
     252      OP         '='           (1, 11) (1, 12)
     253      STRING     '"\\'"'        (1, 13) (1, 16)
     254      """)
     255          self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
     256      NAME       'x'           (1, 0) (1, 1)
     257      OP         '='           (1, 2) (1, 3)
     258      STRING     '"doesn\\'t "' (1, 4) (1, 14)
     259      NAME       'shrink'      (1, 14) (1, 20)
     260      STRING     '", does it"' (1, 20) (1, 31)
     261      """)
     262          self.check_tokenize("x = 'abc' + 'ABC'", """\
     263      NAME       'x'           (1, 0) (1, 1)
     264      OP         '='           (1, 2) (1, 3)
     265      STRING     "'abc'"       (1, 4) (1, 9)
     266      OP         '+'           (1, 10) (1, 11)
     267      STRING     "'ABC'"       (1, 12) (1, 17)
     268      """)
     269          self.check_tokenize('y = "ABC" + "ABC"', """\
     270      NAME       'y'           (1, 0) (1, 1)
     271      OP         '='           (1, 2) (1, 3)
     272      STRING     '"ABC"'       (1, 4) (1, 9)
     273      OP         '+'           (1, 10) (1, 11)
     274      STRING     '"ABC"'       (1, 12) (1, 17)
     275      """)
     276          self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
     277      NAME       'x'           (1, 0) (1, 1)
     278      OP         '='           (1, 2) (1, 3)
     279      STRING     "r'abc'"      (1, 4) (1, 10)
     280      OP         '+'           (1, 11) (1, 12)
     281      STRING     "r'ABC'"      (1, 13) (1, 19)
     282      OP         '+'           (1, 20) (1, 21)
     283      STRING     "R'ABC'"      (1, 22) (1, 28)
     284      OP         '+'           (1, 29) (1, 30)
     285      STRING     "R'ABC'"      (1, 31) (1, 37)
     286      """)
     287          self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
     288      NAME       'y'           (1, 0) (1, 1)
     289      OP         '='           (1, 2) (1, 3)
     290      STRING     'r"abc"'      (1, 4) (1, 10)
     291      OP         '+'           (1, 11) (1, 12)
     292      STRING     'r"ABC"'      (1, 13) (1, 19)
     293      OP         '+'           (1, 20) (1, 21)
     294      STRING     'R"ABC"'      (1, 22) (1, 28)
     295      OP         '+'           (1, 29) (1, 30)
     296      STRING     'R"ABC"'      (1, 31) (1, 37)
     297      """)
     298  
     299          self.check_tokenize("u'abc' + U'abc'", """\
     300      STRING     "u'abc'"      (1, 0) (1, 6)
     301      OP         '+'           (1, 7) (1, 8)
     302      STRING     "U'abc'"      (1, 9) (1, 15)
     303      """)
     304          self.check_tokenize('u"abc" + U"abc"', """\
     305      STRING     'u"abc"'      (1, 0) (1, 6)
     306      OP         '+'           (1, 7) (1, 8)
     307      STRING     'U"abc"'      (1, 9) (1, 15)
     308      """)
     309  
     310          self.check_tokenize("b'abc' + B'abc'", """\
     311      STRING     "b'abc'"      (1, 0) (1, 6)
     312      OP         '+'           (1, 7) (1, 8)
     313      STRING     "B'abc'"      (1, 9) (1, 15)
     314      """)
     315          self.check_tokenize('b"abc" + B"abc"', """\
     316      STRING     'b"abc"'      (1, 0) (1, 6)
     317      OP         '+'           (1, 7) (1, 8)
     318      STRING     'B"abc"'      (1, 9) (1, 15)
     319      """)
     320          self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
     321      STRING     "br'abc'"     (1, 0) (1, 7)
     322      OP         '+'           (1, 8) (1, 9)
     323      STRING     "bR'abc'"     (1, 10) (1, 17)
     324      OP         '+'           (1, 18) (1, 19)
     325      STRING     "Br'abc'"     (1, 20) (1, 27)
     326      OP         '+'           (1, 28) (1, 29)
     327      STRING     "BR'abc'"     (1, 30) (1, 37)
     328      """)
     329          self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
     330      STRING     'br"abc"'     (1, 0) (1, 7)
     331      OP         '+'           (1, 8) (1, 9)
     332      STRING     'bR"abc"'     (1, 10) (1, 17)
     333      OP         '+'           (1, 18) (1, 19)
     334      STRING     'Br"abc"'     (1, 20) (1, 27)
     335      OP         '+'           (1, 28) (1, 29)
     336      STRING     'BR"abc"'     (1, 30) (1, 37)
     337      """)
     338          self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
     339      STRING     "rb'abc'"     (1, 0) (1, 7)
     340      OP         '+'           (1, 8) (1, 9)
     341      STRING     "rB'abc'"     (1, 10) (1, 17)
     342      OP         '+'           (1, 18) (1, 19)
     343      STRING     "Rb'abc'"     (1, 20) (1, 27)
     344      OP         '+'           (1, 28) (1, 29)
     345      STRING     "RB'abc'"     (1, 30) (1, 37)
     346      """)
     347          self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
     348      STRING     'rb"abc"'     (1, 0) (1, 7)
     349      OP         '+'           (1, 8) (1, 9)
     350      STRING     'rB"abc"'     (1, 10) (1, 17)
     351      OP         '+'           (1, 18) (1, 19)
     352      STRING     'Rb"abc"'     (1, 20) (1, 27)
     353      OP         '+'           (1, 28) (1, 29)
     354      STRING     'RB"abc"'     (1, 30) (1, 37)
     355      """)
     356          # Check 0, 1, and 2 character string prefixes.
     357          self.check_tokenize(r'"a\
     358  de\
     359  fg"', """\
     360      STRING     '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
     361      """)
     362          self.check_tokenize(r'u"a\
     363  de"', """\
     364      STRING     'u"a\\\\\\nde"\'  (1, 0) (2, 3)
     365      """)
     366          self.check_tokenize(r'rb"a\
     367  d"', """\
     368      STRING     'rb"a\\\\\\nd"\'  (1, 0) (2, 2)
     369      """)
     370          self.check_tokenize(r'"""a\
     371  b"""', """\
     372      STRING     '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
     373      """)
     374          self.check_tokenize(r'u"""a\
     375  b"""', """\
     376      STRING     'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
     377      """)
     378          self.check_tokenize(r'rb"""a\
     379  b\
     380  c"""', """\
     381      STRING     'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
     382      """)
     383          self.check_tokenize('f"abc"', """\
     384      STRING     'f"abc"'      (1, 0) (1, 6)
     385      """)
     386          self.check_tokenize('fR"a{b}c"', """\
     387      STRING     'fR"a{b}c"'   (1, 0) (1, 9)
     388      """)
     389          self.check_tokenize('f"""abc"""', """\
     390      STRING     'f\"\"\"abc\"\"\"'  (1, 0) (1, 10)
     391      """)
     392          self.check_tokenize(r'f"abc\
     393  def"', """\
     394      STRING     'f"abc\\\\\\ndef"' (1, 0) (2, 4)
     395      """)
     396          self.check_tokenize(r'Rf"abc\
     397  def"', """\
     398      STRING     'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
     399      """)
     400  
     401      def test_function(self):
     402          self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
     403      NAME       'def'         (1, 0) (1, 3)
     404      NAME       'd22'         (1, 4) (1, 7)
     405      OP         '('           (1, 7) (1, 8)
     406      NAME       'a'           (1, 8) (1, 9)
     407      OP         ','           (1, 9) (1, 10)
     408      NAME       'b'           (1, 11) (1, 12)
     409      OP         ','           (1, 12) (1, 13)
     410      NAME       'c'           (1, 14) (1, 15)
     411      OP         '='           (1, 15) (1, 16)
     412      NUMBER     '2'           (1, 16) (1, 17)
     413      OP         ','           (1, 17) (1, 18)
     414      NAME       'd'           (1, 19) (1, 20)
     415      OP         '='           (1, 20) (1, 21)
     416      NUMBER     '2'           (1, 21) (1, 22)
     417      OP         ','           (1, 22) (1, 23)
     418      OP         '*'           (1, 24) (1, 25)
     419      NAME       'k'           (1, 25) (1, 26)
     420      OP         ')'           (1, 26) (1, 27)
     421      OP         ':'           (1, 27) (1, 28)
     422      NAME       'pass'        (1, 29) (1, 33)
     423      """)
     424          self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
     425      NAME       'def'         (1, 0) (1, 3)
     426      NAME       'd01v_'       (1, 4) (1, 9)
     427      OP         '('           (1, 9) (1, 10)
     428      NAME       'a'           (1, 10) (1, 11)
     429      OP         '='           (1, 11) (1, 12)
     430      NUMBER     '1'           (1, 12) (1, 13)
     431      OP         ','           (1, 13) (1, 14)
     432      OP         '*'           (1, 15) (1, 16)
     433      NAME       'k'           (1, 16) (1, 17)
     434      OP         ','           (1, 17) (1, 18)
     435      OP         '**'          (1, 19) (1, 21)
     436      NAME       'w'           (1, 21) (1, 22)
     437      OP         ')'           (1, 22) (1, 23)
     438      OP         ':'           (1, 23) (1, 24)
     439      NAME       'pass'        (1, 25) (1, 29)
     440      """)
     441          self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\
     442      NAME       'def'         (1, 0) (1, 3)
     443      NAME       'd23'         (1, 4) (1, 7)
     444      OP         '('           (1, 7) (1, 8)
     445      NAME       'a'           (1, 8) (1, 9)
     446      OP         ':'           (1, 9) (1, 10)
     447      NAME       'str'         (1, 11) (1, 14)
     448      OP         ','           (1, 14) (1, 15)
     449      NAME       'b'           (1, 16) (1, 17)
     450      OP         ':'           (1, 17) (1, 18)
     451      NAME       'int'         (1, 19) (1, 22)
     452      OP         '='           (1, 22) (1, 23)
     453      NUMBER     '3'           (1, 23) (1, 24)
     454      OP         ')'           (1, 24) (1, 25)
     455      OP         '->'          (1, 26) (1, 28)
     456      NAME       'int'         (1, 29) (1, 32)
     457      OP         ':'           (1, 32) (1, 33)
     458      NAME       'pass'        (1, 34) (1, 38)
     459      """)
     460  
     461      def test_comparison(self):
     462          # Comparison
     463          self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
     464                              "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
     465      NAME       'if'          (1, 0) (1, 2)
     466      NUMBER     '1'           (1, 3) (1, 4)
     467      OP         '<'           (1, 5) (1, 6)
     468      NUMBER     '1'           (1, 7) (1, 8)
     469      OP         '>'           (1, 9) (1, 10)
     470      NUMBER     '1'           (1, 11) (1, 12)
     471      OP         '=='          (1, 13) (1, 15)
     472      NUMBER     '1'           (1, 16) (1, 17)
     473      OP         '>='          (1, 18) (1, 20)
     474      NUMBER     '5'           (1, 21) (1, 22)
     475      OP         '<='          (1, 23) (1, 25)
     476      NUMBER     '0x15'        (1, 26) (1, 30)
     477      OP         '<='          (1, 31) (1, 33)
     478      NUMBER     '0x12'        (1, 34) (1, 38)
     479      OP         '!='          (1, 39) (1, 41)
     480      NUMBER     '1'           (1, 42) (1, 43)
     481      NAME       'and'         (1, 44) (1, 47)
     482      NUMBER     '5'           (1, 48) (1, 49)
     483      NAME       'in'          (1, 50) (1, 52)
     484      NUMBER     '1'           (1, 53) (1, 54)
     485      NAME       'not'         (1, 55) (1, 58)
     486      NAME       'in'          (1, 59) (1, 61)
     487      NUMBER     '1'           (1, 62) (1, 63)
     488      NAME       'is'          (1, 64) (1, 66)
     489      NUMBER     '1'           (1, 67) (1, 68)
     490      NAME       'or'          (1, 69) (1, 71)
     491      NUMBER     '5'           (1, 72) (1, 73)
     492      NAME       'is'          (1, 74) (1, 76)
     493      NAME       'not'         (1, 77) (1, 80)
     494      NUMBER     '1'           (1, 81) (1, 82)
     495      OP         ':'           (1, 82) (1, 83)
     496      NAME       'pass'        (1, 84) (1, 88)
     497      """)
     498  
     499      def test_shift(self):
     500          # Shift
     501          self.check_tokenize("x = 1 << 1 >> 5", """\
     502      NAME       'x'           (1, 0) (1, 1)
     503      OP         '='           (1, 2) (1, 3)
     504      NUMBER     '1'           (1, 4) (1, 5)
     505      OP         '<<'          (1, 6) (1, 8)
     506      NUMBER     '1'           (1, 9) (1, 10)
     507      OP         '>>'          (1, 11) (1, 13)
     508      NUMBER     '5'           (1, 14) (1, 15)
     509      """)
     510  
     511      def test_additive(self):
     512          # Additive
     513          self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
     514      NAME       'x'           (1, 0) (1, 1)
     515      OP         '='           (1, 2) (1, 3)
     516      NUMBER     '1'           (1, 4) (1, 5)
     517      OP         '-'           (1, 6) (1, 7)
     518      NAME       'y'           (1, 8) (1, 9)
     519      OP         '+'           (1, 10) (1, 11)
     520      NUMBER     '15'          (1, 12) (1, 14)
     521      OP         '-'           (1, 15) (1, 16)
     522      NUMBER     '1'           (1, 17) (1, 18)
     523      OP         '+'           (1, 19) (1, 20)
     524      NUMBER     '0x124'       (1, 21) (1, 26)
     525      OP         '+'           (1, 27) (1, 28)
     526      NAME       'z'           (1, 29) (1, 30)
     527      OP         '+'           (1, 31) (1, 32)
     528      NAME       'a'           (1, 33) (1, 34)
     529      OP         '['           (1, 34) (1, 35)
     530      NUMBER     '5'           (1, 35) (1, 36)
     531      OP         ']'           (1, 36) (1, 37)
     532      """)
     533  
     534      def test_multiplicative(self):
     535          # Multiplicative
     536          self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
     537      NAME       'x'           (1, 0) (1, 1)
     538      OP         '='           (1, 2) (1, 3)
     539      NUMBER     '1'           (1, 4) (1, 5)
     540      OP         '//'          (1, 5) (1, 7)
     541      NUMBER     '1'           (1, 7) (1, 8)
     542      OP         '*'           (1, 8) (1, 9)
     543      NUMBER     '1'           (1, 9) (1, 10)
     544      OP         '/'           (1, 10) (1, 11)
     545      NUMBER     '5'           (1, 11) (1, 12)
     546      OP         '*'           (1, 12) (1, 13)
     547      NUMBER     '12'          (1, 13) (1, 15)
     548      OP         '%'           (1, 15) (1, 16)
     549      NUMBER     '0x12'        (1, 16) (1, 20)
     550      OP         '@'           (1, 20) (1, 21)
     551      NUMBER     '42'          (1, 21) (1, 23)
     552      """)
     553  
     554      def test_unary(self):
     555          # Unary
     556          self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
     557      OP         '~'           (1, 0) (1, 1)
     558      NUMBER     '1'           (1, 1) (1, 2)
     559      OP         '^'           (1, 3) (1, 4)
     560      NUMBER     '1'           (1, 5) (1, 6)
     561      OP         '&'           (1, 7) (1, 8)
     562      NUMBER     '1'           (1, 9) (1, 10)
     563      OP         '|'           (1, 11) (1, 12)
     564      NUMBER     '1'           (1, 12) (1, 13)
     565      OP         '^'           (1, 14) (1, 15)
     566      OP         '-'           (1, 16) (1, 17)
     567      NUMBER     '1'           (1, 17) (1, 18)
     568      """)
     569          self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
     570      OP         '-'           (1, 0) (1, 1)
     571      NUMBER     '1'           (1, 1) (1, 2)
     572      OP         '*'           (1, 2) (1, 3)
     573      NUMBER     '1'           (1, 3) (1, 4)
     574      OP         '/'           (1, 4) (1, 5)
     575      NUMBER     '1'           (1, 5) (1, 6)
     576      OP         '+'           (1, 6) (1, 7)
     577      NUMBER     '1'           (1, 7) (1, 8)
     578      OP         '*'           (1, 8) (1, 9)
     579      NUMBER     '1'           (1, 9) (1, 10)
     580      OP         '//'          (1, 10) (1, 12)
     581      NUMBER     '1'           (1, 12) (1, 13)
     582      OP         '-'           (1, 14) (1, 15)
     583      OP         '-'           (1, 16) (1, 17)
     584      OP         '-'           (1, 17) (1, 18)
     585      OP         '-'           (1, 18) (1, 19)
     586      NUMBER     '1'           (1, 19) (1, 20)
     587      OP         '**'          (1, 20) (1, 22)
     588      NUMBER     '1'           (1, 22) (1, 23)
     589      """)
     590  
     591      def test_selector(self):
     592          # Selector
     593          self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
     594      NAME       'import'      (1, 0) (1, 6)
     595      NAME       'sys'         (1, 7) (1, 10)
     596      OP         ','           (1, 10) (1, 11)
     597      NAME       'time'        (1, 12) (1, 16)
     598      NEWLINE    '\\n'          (1, 16) (1, 17)
     599      NAME       'x'           (2, 0) (2, 1)
     600      OP         '='           (2, 2) (2, 3)
     601      NAME       'sys'         (2, 4) (2, 7)
     602      OP         '.'           (2, 7) (2, 8)
     603      NAME       'modules'     (2, 8) (2, 15)
     604      OP         '['           (2, 15) (2, 16)
     605      STRING     "'time'"      (2, 16) (2, 22)
     606      OP         ']'           (2, 22) (2, 23)
     607      OP         '.'           (2, 23) (2, 24)
     608      NAME       'time'        (2, 24) (2, 28)
     609      OP         '('           (2, 28) (2, 29)
     610      OP         ')'           (2, 29) (2, 30)
     611      """)
     612  
     613      def test_method(self):
     614          # Methods
     615          self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
     616      OP         '@'           (1, 0) (1, 1)
     617      NAME       'staticmethod' (1, 1) (1, 13)
     618      NEWLINE    '\\n'          (1, 13) (1, 14)
     619      NAME       'def'         (2, 0) (2, 3)
     620      NAME       'foo'         (2, 4) (2, 7)
     621      OP         '('           (2, 7) (2, 8)
     622      NAME       'x'           (2, 8) (2, 9)
     623      OP         ','           (2, 9) (2, 10)
     624      NAME       'y'           (2, 10) (2, 11)
     625      OP         ')'           (2, 11) (2, 12)
     626      OP         ':'           (2, 12) (2, 13)
     627      NAME       'pass'        (2, 14) (2, 18)
     628      """)
     629  
     630      def test_tabs(self):
     631          # Evil tabs
     632          self.check_tokenize("def f():\n"
     633                              "\tif x\n"
     634                              "        \tpass", """\
     635      NAME       'def'         (1, 0) (1, 3)
     636      NAME       'f'           (1, 4) (1, 5)
     637      OP         '('           (1, 5) (1, 6)
     638      OP         ')'           (1, 6) (1, 7)
     639      OP         ':'           (1, 7) (1, 8)
     640      NEWLINE    '\\n'          (1, 8) (1, 9)
     641      INDENT     '\\t'          (2, 0) (2, 1)
     642      NAME       'if'          (2, 1) (2, 3)
     643      NAME       'x'           (2, 4) (2, 5)
     644      NEWLINE    '\\n'          (2, 5) (2, 6)
     645      INDENT     '        \\t'  (3, 0) (3, 9)
     646      NAME       'pass'        (3, 9) (3, 13)
     647      DEDENT     ''            (4, 0) (4, 0)
     648      DEDENT     ''            (4, 0) (4, 0)
     649      """)
     650  
     651      def test_non_ascii_identifiers(self):
     652          # Non-ascii identifiers
     653          self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
     654      NAME       'Örter'       (1, 0) (1, 5)
     655      OP         '='           (1, 6) (1, 7)
     656      STRING     "'places'"    (1, 8) (1, 16)
     657      NEWLINE    '\\n'          (1, 16) (1, 17)
     658      NAME       'grün'        (2, 0) (2, 4)
     659      OP         '='           (2, 5) (2, 6)
     660      STRING     "'green'"     (2, 7) (2, 14)
     661      """)
     662  
     663      def test_unicode(self):
     664          # Legacy unicode literals:
     665          self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
     666      NAME       'Örter'       (1, 0) (1, 5)
     667      OP         '='           (1, 6) (1, 7)
     668      STRING     "u'places'"   (1, 8) (1, 17)
     669      NEWLINE    '\\n'          (1, 17) (1, 18)
     670      NAME       'grün'        (2, 0) (2, 4)
     671      OP         '='           (2, 5) (2, 6)
     672      STRING     "U'green'"    (2, 7) (2, 15)
     673      """)
     674  
     675      def test_async(self):
     676          # Async/await extension:
     677          self.check_tokenize("async = 1", """\
     678      NAME       'async'       (1, 0) (1, 5)
     679      OP         '='           (1, 6) (1, 7)
     680      NUMBER     '1'           (1, 8) (1, 9)
     681      """)
     682  
     683          self.check_tokenize("a = (async = 1)", """\
     684      NAME       'a'           (1, 0) (1, 1)
     685      OP         '='           (1, 2) (1, 3)
     686      OP         '('           (1, 4) (1, 5)
     687      NAME       'async'       (1, 5) (1, 10)
     688      OP         '='           (1, 11) (1, 12)
     689      NUMBER     '1'           (1, 13) (1, 14)
     690      OP         ')'           (1, 14) (1, 15)
     691      """)
     692  
     693          self.check_tokenize("async()", """\
     694      NAME       'async'       (1, 0) (1, 5)
     695      OP         '('           (1, 5) (1, 6)
     696      OP         ')'           (1, 6) (1, 7)
     697      """)
     698  
     699          self.check_tokenize("class async(Bar):pass", """\
     700      NAME       'class'       (1, 0) (1, 5)
     701      NAME       'async'       (1, 6) (1, 11)
     702      OP         '('           (1, 11) (1, 12)
     703      NAME       'Bar'         (1, 12) (1, 15)
     704      OP         ')'           (1, 15) (1, 16)
     705      OP         ':'           (1, 16) (1, 17)
     706      NAME       'pass'        (1, 17) (1, 21)
     707      """)
     708  
     709          self.check_tokenize("class async:pass", """\
     710      NAME       'class'       (1, 0) (1, 5)
     711      NAME       'async'       (1, 6) (1, 11)
     712      OP         ':'           (1, 11) (1, 12)
     713      NAME       'pass'        (1, 12) (1, 16)
     714      """)
     715  
     716          self.check_tokenize("await = 1", """\
     717      NAME       'await'       (1, 0) (1, 5)
     718      OP         '='           (1, 6) (1, 7)
     719      NUMBER     '1'           (1, 8) (1, 9)
     720      """)
     721  
     722          self.check_tokenize("foo.async", """\
     723      NAME       'foo'         (1, 0) (1, 3)
     724      OP         '.'           (1, 3) (1, 4)
     725      NAME       'async'       (1, 4) (1, 9)
     726      """)
     727  
     728          self.check_tokenize("async for a in b: pass", """\
     729      NAME       'async'       (1, 0) (1, 5)
     730      NAME       'for'         (1, 6) (1, 9)
     731      NAME       'a'           (1, 10) (1, 11)
     732      NAME       'in'          (1, 12) (1, 14)
     733      NAME       'b'           (1, 15) (1, 16)
     734      OP         ':'           (1, 16) (1, 17)
     735      NAME       'pass'        (1, 18) (1, 22)
     736      """)
     737  
     738          self.check_tokenize("async with a as b: pass", """\
     739      NAME       'async'       (1, 0) (1, 5)
     740      NAME       'with'        (1, 6) (1, 10)
     741      NAME       'a'           (1, 11) (1, 12)
     742      NAME       'as'          (1, 13) (1, 15)
     743      NAME       'b'           (1, 16) (1, 17)
     744      OP         ':'           (1, 17) (1, 18)
     745      NAME       'pass'        (1, 19) (1, 23)
     746      """)
     747  
     748          self.check_tokenize("async.foo", """\
     749      NAME       'async'       (1, 0) (1, 5)
     750      OP         '.'           (1, 5) (1, 6)
     751      NAME       'foo'         (1, 6) (1, 9)
     752      """)
     753  
     754          self.check_tokenize("async", """\
     755      NAME       'async'       (1, 0) (1, 5)
     756      """)
     757  
     758          self.check_tokenize("async\n#comment\nawait", """\
     759      NAME       'async'       (1, 0) (1, 5)
     760      NEWLINE    '\\n'          (1, 5) (1, 6)
     761      COMMENT    '#comment'    (2, 0) (2, 8)
     762      NL         '\\n'          (2, 8) (2, 9)
     763      NAME       'await'       (3, 0) (3, 5)
     764      """)
     765  
     766          self.check_tokenize("async\n...\nawait", """\
     767      NAME       'async'       (1, 0) (1, 5)
     768      NEWLINE    '\\n'          (1, 5) (1, 6)
     769      OP         '...'         (2, 0) (2, 3)
     770      NEWLINE    '\\n'          (2, 3) (2, 4)
     771      NAME       'await'       (3, 0) (3, 5)
     772      """)
     773  
     774          self.check_tokenize("async\nawait", """\
     775      NAME       'async'       (1, 0) (1, 5)
     776      NEWLINE    '\\n'          (1, 5) (1, 6)
     777      NAME       'await'       (2, 0) (2, 5)
     778      """)
     779  
     780          self.check_tokenize("foo.async + 1", """\
     781      NAME       'foo'         (1, 0) (1, 3)
     782      OP         '.'           (1, 3) (1, 4)
     783      NAME       'async'       (1, 4) (1, 9)
     784      OP         '+'           (1, 10) (1, 11)
     785      NUMBER     '1'           (1, 12) (1, 13)
     786      """)
     787  
     788          self.check_tokenize("async def foo(): pass", """\
     789      NAME       'async'       (1, 0) (1, 5)
     790      NAME       'def'         (1, 6) (1, 9)
     791      NAME       'foo'         (1, 10) (1, 13)
     792      OP         '('           (1, 13) (1, 14)
     793      OP         ')'           (1, 14) (1, 15)
     794      OP         ':'           (1, 15) (1, 16)
     795      NAME       'pass'        (1, 17) (1, 21)
     796      """)
     797  
     798          self.check_tokenize('''\
     799  async def foo():
     800    def foo(await):
     801      await = 1
     802    if 1:
     803      await
     804  async += 1
     805  ''', """\
     806      NAME       'async'       (1, 0) (1, 5)
     807      NAME       'def'         (1, 6) (1, 9)
     808      NAME       'foo'         (1, 10) (1, 13)
     809      OP         '('           (1, 13) (1, 14)
     810      OP         ')'           (1, 14) (1, 15)
     811      OP         ':'           (1, 15) (1, 16)
     812      NEWLINE    '\\n'          (1, 16) (1, 17)
     813      INDENT     '  '          (2, 0) (2, 2)
     814      NAME       'def'         (2, 2) (2, 5)
     815      NAME       'foo'         (2, 6) (2, 9)
     816      OP         '('           (2, 9) (2, 10)
     817      NAME       'await'       (2, 10) (2, 15)
     818      OP         ')'           (2, 15) (2, 16)
     819      OP         ':'           (2, 16) (2, 17)
     820      NEWLINE    '\\n'          (2, 17) (2, 18)
     821      INDENT     '    '        (3, 0) (3, 4)
     822      NAME       'await'       (3, 4) (3, 9)
     823      OP         '='           (3, 10) (3, 11)
     824      NUMBER     '1'           (3, 12) (3, 13)
     825      NEWLINE    '\\n'          (3, 13) (3, 14)
     826      DEDENT     ''            (4, 2) (4, 2)
     827      NAME       'if'          (4, 2) (4, 4)
     828      NUMBER     '1'           (4, 5) (4, 6)
     829      OP         ':'           (4, 6) (4, 7)
     830      NEWLINE    '\\n'          (4, 7) (4, 8)
     831      INDENT     '    '        (5, 0) (5, 4)
     832      NAME       'await'       (5, 4) (5, 9)
     833      NEWLINE    '\\n'          (5, 9) (5, 10)
     834      DEDENT     ''            (6, 0) (6, 0)
     835      DEDENT     ''            (6, 0) (6, 0)
     836      NAME       'async'       (6, 0) (6, 5)
     837      OP         '+='          (6, 6) (6, 8)
     838      NUMBER     '1'           (6, 9) (6, 10)
     839      NEWLINE    '\\n'          (6, 10) (6, 11)
     840      """)
     841  
     842          self.check_tokenize('''\
     843  async def foo():
     844    async for i in 1: pass''', """\
     845      NAME       'async'       (1, 0) (1, 5)
     846      NAME       'def'         (1, 6) (1, 9)
     847      NAME       'foo'         (1, 10) (1, 13)
     848      OP         '('           (1, 13) (1, 14)
     849      OP         ')'           (1, 14) (1, 15)
     850      OP         ':'           (1, 15) (1, 16)
     851      NEWLINE    '\\n'          (1, 16) (1, 17)
     852      INDENT     '  '          (2, 0) (2, 2)
     853      NAME       'async'       (2, 2) (2, 7)
     854      NAME       'for'         (2, 8) (2, 11)
     855      NAME       'i'           (2, 12) (2, 13)
     856      NAME       'in'          (2, 14) (2, 16)
     857      NUMBER     '1'           (2, 17) (2, 18)
     858      OP         ':'           (2, 18) (2, 19)
     859      NAME       'pass'        (2, 20) (2, 24)
     860      DEDENT     ''            (3, 0) (3, 0)
     861      """)
     862  
     863          self.check_tokenize('''async def foo(async): await''', """\
     864      NAME       'async'       (1, 0) (1, 5)
     865      NAME       'def'         (1, 6) (1, 9)
     866      NAME       'foo'         (1, 10) (1, 13)
     867      OP         '('           (1, 13) (1, 14)
     868      NAME       'async'       (1, 14) (1, 19)
     869      OP         ')'           (1, 19) (1, 20)
     870      OP         ':'           (1, 20) (1, 21)
     871      NAME       'await'       (1, 22) (1, 27)
     872      """)
     873  
     874          self.check_tokenize('''\
     875  def f():
     876  
     877    def baz(): pass
     878    async def bar(): pass
     879  
     880    await = 2''', """\
     881      NAME       'def'         (1, 0) (1, 3)
     882      NAME       'f'           (1, 4) (1, 5)
     883      OP         '('           (1, 5) (1, 6)
     884      OP         ')'           (1, 6) (1, 7)
     885      OP         ':'           (1, 7) (1, 8)
     886      NEWLINE    '\\n'          (1, 8) (1, 9)
     887      NL         '\\n'          (2, 0) (2, 1)
     888      INDENT     '  '          (3, 0) (3, 2)
     889      NAME       'def'         (3, 2) (3, 5)
     890      NAME       'baz'         (3, 6) (3, 9)
     891      OP         '('           (3, 9) (3, 10)
     892      OP         ')'           (3, 10) (3, 11)
     893      OP         ':'           (3, 11) (3, 12)
     894      NAME       'pass'        (3, 13) (3, 17)
     895      NEWLINE    '\\n'          (3, 17) (3, 18)
     896      NAME       'async'       (4, 2) (4, 7)
     897      NAME       'def'         (4, 8) (4, 11)
     898      NAME       'bar'         (4, 12) (4, 15)
     899      OP         '('           (4, 15) (4, 16)
     900      OP         ')'           (4, 16) (4, 17)
     901      OP         ':'           (4, 17) (4, 18)
     902      NAME       'pass'        (4, 19) (4, 23)
     903      NEWLINE    '\\n'          (4, 23) (4, 24)
     904      NL         '\\n'          (5, 0) (5, 1)
     905      NAME       'await'       (6, 2) (6, 7)
     906      OP         '='           (6, 8) (6, 9)
     907      NUMBER     '2'           (6, 10) (6, 11)
     908      DEDENT     ''            (7, 0) (7, 0)
     909      """)
     910  
     911          self.check_tokenize('''\
     912  async def f():
     913  
     914    def baz(): pass
     915    async def bar(): pass
     916  
     917    await = 2''', """\
     918      NAME       'async'       (1, 0) (1, 5)
     919      NAME       'def'         (1, 6) (1, 9)
     920      NAME       'f'           (1, 10) (1, 11)
     921      OP         '('           (1, 11) (1, 12)
     922      OP         ')'           (1, 12) (1, 13)
     923      OP         ':'           (1, 13) (1, 14)
     924      NEWLINE    '\\n'          (1, 14) (1, 15)
     925      NL         '\\n'          (2, 0) (2, 1)
     926      INDENT     '  '          (3, 0) (3, 2)
     927      NAME       'def'         (3, 2) (3, 5)
     928      NAME       'baz'         (3, 6) (3, 9)
     929      OP         '('           (3, 9) (3, 10)
     930      OP         ')'           (3, 10) (3, 11)
     931      OP         ':'           (3, 11) (3, 12)
     932      NAME       'pass'        (3, 13) (3, 17)
     933      NEWLINE    '\\n'          (3, 17) (3, 18)
     934      NAME       'async'       (4, 2) (4, 7)
     935      NAME       'def'         (4, 8) (4, 11)
     936      NAME       'bar'         (4, 12) (4, 15)
     937      OP         '('           (4, 15) (4, 16)
     938      OP         ')'           (4, 16) (4, 17)
     939      OP         ':'           (4, 17) (4, 18)
     940      NAME       'pass'        (4, 19) (4, 23)
     941      NEWLINE    '\\n'          (4, 23) (4, 24)
     942      NL         '\\n'          (5, 0) (5, 1)
     943      NAME       'await'       (6, 2) (6, 7)
     944      OP         '='           (6, 8) (6, 9)
     945      NUMBER     '2'           (6, 10) (6, 11)
     946      DEDENT     ''            (7, 0) (7, 0)
     947      """)
     948  
     949  class ESC[4;38;5;81mGenerateTokensTest(ESC[4;38;5;149mTokenizeTest):
     950      def check_tokenize(self, s, expected):
     951          # Format the tokens in s in a table format.
     952          # The ENDMARKER and final NEWLINE are omitted.
     953          f = StringIO(s)
     954          result = stringify_tokens_from_source(generate_tokens(f.readline), s)
     955          self.assertEqual(result, expected.rstrip().splitlines())
     956  
     957  
     958  def decistmt(s):
     959      result = []
     960      g = tokenize(BytesIO(s.encode('utf-8')).readline)   # tokenize the string
     961      for toknum, tokval, _, _, _  in g:
     962          if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
     963              result.extend([
     964                  (NAME, 'Decimal'),
     965                  (OP, '('),
     966                  (STRING, repr(tokval)),
     967                  (OP, ')')
     968              ])
     969          else:
     970              result.append((toknum, tokval))
     971      return untokenize(result).decode('utf-8')
     972  
     973  class ESC[4;38;5;81mTestMisc(ESC[4;38;5;149mTestCase):
     974  
     975      def test_decistmt(self):
     976          # Substitute Decimals for floats in a string of statements.
     977          # This is an example from the docs.
     978  
     979          from decimal import Decimal
     980          s = '+21.3e-5*-.1234/81.7'
     981          self.assertEqual(decistmt(s),
     982                           "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
     983  
     984          # The format of the exponent is inherited from the platform C library.
     985          # Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
     986          # we're only showing 11 digits, and the 12th isn't close to 5, the
     987          # rest of the output should be platform-independent.
     988          self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
     989  
     990          # Output from calculations with Decimal should be identical across all
     991          # platforms.
     992          self.assertEqual(eval(decistmt(s)),
     993                           Decimal('-3.217160342717258261933904529E-7'))
     994  
     995  
     996  class ESC[4;38;5;81mTestTokenizerAdheresToPep0263(ESC[4;38;5;149mTestCase):
     997      """
     998      Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
     999      """
    1000  
    1001      def _testFile(self, filename):
    1002          path = os.path.join(os.path.dirname(__file__), 'tokenizedata', filename)
    1003          TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
    1004  
    1005      def test_utf8_coding_cookie_and_no_utf8_bom(self):
    1006          f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
    1007          self._testFile(f)
    1008  
    1009      def test_latin1_coding_cookie_and_utf8_bom(self):
    1010          """
    1011          As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
    1012          allowed encoding for the comment is 'utf-8'.  The text file used in
    1013          this test starts with a BOM signature, but specifies latin1 as the
    1014          coding, so verify that a SyntaxError is raised, which matches the
    1015          behaviour of the interpreter when it encounters a similar condition.
    1016          """
    1017          f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
    1018          self.assertRaises(SyntaxError, self._testFile, f)
    1019  
    1020      def test_no_coding_cookie_and_utf8_bom(self):
    1021          f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
    1022          self._testFile(f)
    1023  
    1024      def test_utf8_coding_cookie_and_utf8_bom(self):
    1025          f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
    1026          self._testFile(f)
    1027  
    1028      def test_bad_coding_cookie(self):
    1029          self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
    1030          self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
    1031  
    1032  
    1033  class ESC[4;38;5;81mTest_Tokenize(ESC[4;38;5;149mTestCase):
    1034  
    1035      def test__tokenize_decodes_with_specified_encoding(self):
    1036          literal = '"ЉЊЈЁЂ"'
    1037          line = literal.encode('utf-8')
    1038          first = False
    1039          def readline():
    1040              nonlocal first
    1041              if not first:
    1042                  first = True
    1043                  return line
    1044              else:
    1045                  return b''
    1046  
    1047          # skip the initial encoding token and the end tokens
    1048          tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]
    1049          expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
    1050          self.assertEqual(tokens, expected_tokens,
    1051                           "bytes not decoded with encoding")
    1052  
    1053      def test__tokenize_does_not_decode_with_encoding_none(self):
    1054          literal = '"ЉЊЈЁЂ"'
    1055          first = False
    1056          def readline():
    1057              nonlocal first
    1058              if not first:
    1059                  first = True
    1060                  return literal
    1061              else:
    1062                  return b''
    1063  
    1064          # skip the end tokens
    1065          tokens = list(_tokenize(readline, encoding=None))[:-2]
    1066          expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
    1067          self.assertEqual(tokens, expected_tokens,
    1068                           "string not tokenized when encoding is None")
    1069  
    1070  
    1071  class ESC[4;38;5;81mTestDetectEncoding(ESC[4;38;5;149mTestCase):
    1072  
    1073      def get_readline(self, lines):
    1074          index = 0
    1075          def readline():
    1076              nonlocal index
    1077              if index == len(lines):
    1078                  raise StopIteration
    1079              line = lines[index]
    1080              index += 1
    1081              return line
    1082          return readline
    1083  
    1084      def test_no_bom_no_encoding_cookie(self):
    1085          lines = (
    1086              b'# something\n',
    1087              b'print(something)\n',
    1088              b'do_something(else)\n'
    1089          )
    1090          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1091          self.assertEqual(encoding, 'utf-8')
    1092          self.assertEqual(consumed_lines, list(lines[:2]))
    1093  
    1094      def test_bom_no_cookie(self):
    1095          lines = (
    1096              b'\xef\xbb\xbf# something\n',
    1097              b'print(something)\n',
    1098              b'do_something(else)\n'
    1099          )
    1100          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1101          self.assertEqual(encoding, 'utf-8-sig')
    1102          self.assertEqual(consumed_lines,
    1103                           [b'# something\n', b'print(something)\n'])
    1104  
    1105      def test_cookie_first_line_no_bom(self):
    1106          lines = (
    1107              b'# -*- coding: latin-1 -*-\n',
    1108              b'print(something)\n',
    1109              b'do_something(else)\n'
    1110          )
    1111          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1112          self.assertEqual(encoding, 'iso-8859-1')
    1113          self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
    1114  
    1115      def test_matched_bom_and_cookie_first_line(self):
    1116          lines = (
    1117              b'\xef\xbb\xbf# coding=utf-8\n',
    1118              b'print(something)\n',
    1119              b'do_something(else)\n'
    1120          )
    1121          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1122          self.assertEqual(encoding, 'utf-8-sig')
    1123          self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
    1124  
    1125      def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
    1126          lines = (
    1127              b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
    1128              b'print(something)\n',
    1129              b'do_something(else)\n'
    1130          )
    1131          readline = self.get_readline(lines)
    1132          self.assertRaises(SyntaxError, detect_encoding, readline)
    1133  
    1134      def test_cookie_second_line_no_bom(self):
    1135          lines = (
    1136              b'#! something\n',
    1137              b'# vim: set fileencoding=ascii :\n',
    1138              b'print(something)\n',
    1139              b'do_something(else)\n'
    1140          )
    1141          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1142          self.assertEqual(encoding, 'ascii')
    1143          expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
    1144          self.assertEqual(consumed_lines, expected)
    1145  
    1146      def test_matched_bom_and_cookie_second_line(self):
    1147          lines = (
    1148              b'\xef\xbb\xbf#! something\n',
    1149              b'f# coding=utf-8\n',
    1150              b'print(something)\n',
    1151              b'do_something(else)\n'
    1152          )
    1153          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1154          self.assertEqual(encoding, 'utf-8-sig')
    1155          self.assertEqual(consumed_lines,
    1156                           [b'#! something\n', b'f# coding=utf-8\n'])
    1157  
    1158      def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
    1159          lines = (
    1160              b'\xef\xbb\xbf#! something\n',
    1161              b'# vim: set fileencoding=ascii :\n',
    1162              b'print(something)\n',
    1163              b'do_something(else)\n'
    1164          )
    1165          readline = self.get_readline(lines)
    1166          self.assertRaises(SyntaxError, detect_encoding, readline)
    1167  
    1168      def test_cookie_second_line_noncommented_first_line(self):
    1169          lines = (
    1170              b"print('\xc2\xa3')\n",
    1171              b'# vim: set fileencoding=iso8859-15 :\n',
    1172              b"print('\xe2\x82\xac')\n"
    1173          )
    1174          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1175          self.assertEqual(encoding, 'utf-8')
    1176          expected = [b"print('\xc2\xa3')\n"]
    1177          self.assertEqual(consumed_lines, expected)
    1178  
    1179      def test_cookie_second_line_commented_first_line(self):
    1180          lines = (
    1181              b"#print('\xc2\xa3')\n",
    1182              b'# vim: set fileencoding=iso8859-15 :\n',
    1183              b"print('\xe2\x82\xac')\n"
    1184          )
    1185          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1186          self.assertEqual(encoding, 'iso8859-15')
    1187          expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
    1188          self.assertEqual(consumed_lines, expected)
    1189  
    1190      def test_cookie_second_line_empty_first_line(self):
    1191          lines = (
    1192              b'\n',
    1193              b'# vim: set fileencoding=iso8859-15 :\n',
    1194              b"print('\xe2\x82\xac')\n"
    1195          )
    1196          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1197          self.assertEqual(encoding, 'iso8859-15')
    1198          expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
    1199          self.assertEqual(consumed_lines, expected)
    1200  
    1201      def test_latin1_normalization(self):
    1202          # See get_normal_name() in tokenizer.c.
    1203          encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
    1204                       "iso-8859-1-unix", "iso-latin-1-mac")
    1205          for encoding in encodings:
    1206              for rep in ("-", "_"):
    1207                  enc = encoding.replace("-", rep)
    1208                  lines = (b"#!/usr/bin/python\n",
    1209                           b"# coding: " + enc.encode("ascii") + b"\n",
    1210                           b"print(things)\n",
    1211                           b"do_something += 4\n")
    1212                  rl = self.get_readline(lines)
    1213                  found, consumed_lines = detect_encoding(rl)
    1214                  self.assertEqual(found, "iso-8859-1")
    1215  
    1216      def test_syntaxerror_latin1(self):
    1217          # Issue 14629: need to raise SyntaxError if the first
    1218          # line(s) have non-UTF-8 characters
    1219          lines = (
    1220              b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
    1221              )
    1222          readline = self.get_readline(lines)
    1223          self.assertRaises(SyntaxError, detect_encoding, readline)
    1224  
    1225  
    1226      def test_utf8_normalization(self):
    1227          # See get_normal_name() in tokenizer.c.
    1228          encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
    1229          for encoding in encodings:
    1230              for rep in ("-", "_"):
    1231                  enc = encoding.replace("-", rep)
    1232                  lines = (b"#!/usr/bin/python\n",
    1233                           b"# coding: " + enc.encode("ascii") + b"\n",
    1234                           b"1 + 3\n")
    1235                  rl = self.get_readline(lines)
    1236                  found, consumed_lines = detect_encoding(rl)
    1237                  self.assertEqual(found, "utf-8")
    1238  
    1239      def test_short_files(self):
    1240          readline = self.get_readline((b'print(something)\n',))
    1241          encoding, consumed_lines = detect_encoding(readline)
    1242          self.assertEqual(encoding, 'utf-8')
    1243          self.assertEqual(consumed_lines, [b'print(something)\n'])
    1244  
    1245          encoding, consumed_lines = detect_encoding(self.get_readline(()))
    1246          self.assertEqual(encoding, 'utf-8')
    1247          self.assertEqual(consumed_lines, [])
    1248  
    1249          readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
    1250          encoding, consumed_lines = detect_encoding(readline)
    1251          self.assertEqual(encoding, 'utf-8-sig')
    1252          self.assertEqual(consumed_lines, [b'print(something)\n'])
    1253  
    1254          readline = self.get_readline((b'\xef\xbb\xbf',))
    1255          encoding, consumed_lines = detect_encoding(readline)
    1256          self.assertEqual(encoding, 'utf-8-sig')
    1257          self.assertEqual(consumed_lines, [])
    1258  
    1259          readline = self.get_readline((b'# coding: bad\n',))
    1260          self.assertRaises(SyntaxError, detect_encoding, readline)
    1261  
    1262      def test_false_encoding(self):
    1263          # Issue 18873: "Encoding" detected in non-comment lines
    1264          readline = self.get_readline((b'print("#coding=fake")',))
    1265          encoding, consumed_lines = detect_encoding(readline)
    1266          self.assertEqual(encoding, 'utf-8')
    1267          self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
    1268  
    1269      def test_open(self):
    1270          filename = os_helper.TESTFN + '.py'
    1271          self.addCleanup(os_helper.unlink, filename)
    1272  
    1273          # test coding cookie
    1274          for encoding in ('iso-8859-15', 'utf-8'):
    1275              with open(filename, 'w', encoding=encoding) as fp:
    1276                  print("# coding: %s" % encoding, file=fp)
    1277                  print("print('euro:\u20ac')", file=fp)
    1278              with tokenize_open(filename) as fp:
    1279                  self.assertEqual(fp.encoding, encoding)
    1280                  self.assertEqual(fp.mode, 'r')
    1281  
    1282          # test BOM (no coding cookie)
    1283          with open(filename, 'w', encoding='utf-8-sig') as fp:
    1284              print("print('euro:\u20ac')", file=fp)
    1285          with tokenize_open(filename) as fp:
    1286              self.assertEqual(fp.encoding, 'utf-8-sig')
    1287              self.assertEqual(fp.mode, 'r')
    1288  
    1289      def test_filename_in_exception(self):
    1290          # When possible, include the file name in the exception.
    1291          path = 'some_file_path'
    1292          lines = (
    1293              b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
    1294              )
    1295          class ESC[4;38;5;81mBunk:
    1296              def __init__(self, lines, path):
    1297                  self.name = path
    1298                  self._lines = lines
    1299                  self._index = 0
    1300  
    1301              def readline(self):
    1302                  if self._index == len(lines):
    1303                      raise StopIteration
    1304                  line = lines[self._index]
    1305                  self._index += 1
    1306                  return line
    1307  
    1308          with self.assertRaises(SyntaxError):
    1309              ins = Bunk(lines, path)
    1310              # Make sure lacking a name isn't an issue.
    1311              del ins.name
    1312              detect_encoding(ins.readline)
    1313          with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
    1314              ins = Bunk(lines, path)
    1315              detect_encoding(ins.readline)
    1316  
    1317      def test_open_error(self):
    1318          # Issue #23840: open() must close the binary file on error
    1319          m = BytesIO(b'#coding:xxx')
    1320          with mock.patch('tokenize._builtin_open', return_value=m):
    1321              self.assertRaises(SyntaxError, tokenize_open, 'foobar')
    1322          self.assertTrue(m.closed)
    1323  
    1324  
    1325  class ESC[4;38;5;81mTestTokenize(ESC[4;38;5;149mTestCase):
    1326  
    1327      def test_tokenize(self):
    1328          import tokenize as tokenize_module
    1329          encoding = object()
    1330          encoding_used = None
    1331          def mock_detect_encoding(readline):
    1332              return encoding, [b'first', b'second']
    1333  
    1334          def mock__tokenize(readline, encoding):
    1335              nonlocal encoding_used
    1336              encoding_used = encoding
    1337              out = []
    1338              while True:
    1339                  next_line = readline()
    1340                  if next_line:
    1341                      out.append(next_line)
    1342                      continue
    1343                  return out
    1344  
    1345          counter = 0
    1346          def mock_readline():
    1347              nonlocal counter
    1348              counter += 1
    1349              if counter == 5:
    1350                  return b''
    1351              return str(counter).encode()
    1352  
    1353          orig_detect_encoding = tokenize_module.detect_encoding
    1354          orig__tokenize = tokenize_module._tokenize
    1355          tokenize_module.detect_encoding = mock_detect_encoding
    1356          tokenize_module._tokenize = mock__tokenize
    1357          try:
    1358              results = tokenize(mock_readline)
    1359              self.assertEqual(list(results),
    1360                               [b'first', b'second', b'1', b'2', b'3', b'4'])
    1361          finally:
    1362              tokenize_module.detect_encoding = orig_detect_encoding
    1363              tokenize_module._tokenize = orig__tokenize
    1364  
    1365          self.assertEqual(encoding_used, encoding)
    1366  
    1367      def test_oneline_defs(self):
    1368          buf = []
    1369          for i in range(500):
    1370              buf.append('def i{i}(): return {i}'.format(i=i))
    1371          buf.append('OK')
    1372          buf = '\n'.join(buf)
    1373  
    1374          # Test that 500 consequent, one-line defs is OK
    1375          toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
    1376          self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
    1377                                                  # [-2] is always NEWLINE
    1378  
    1379      def assertExactTypeEqual(self, opstr, *optypes):
    1380          tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
    1381          num_optypes = len(optypes)
    1382          self.assertEqual(len(tokens), 3 + num_optypes)
    1383          self.assertEqual(tok_name[tokens[0].exact_type],
    1384                           tok_name[ENCODING])
    1385          for i in range(num_optypes):
    1386              self.assertEqual(tok_name[tokens[i + 1].exact_type],
    1387                               tok_name[optypes[i]])
    1388          self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
    1389                           tok_name[token.NEWLINE])
    1390          self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
    1391                           tok_name[token.ENDMARKER])
    1392  
    1393      def test_exact_type(self):
    1394          self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
    1395          self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
    1396          self.assertExactTypeEqual(':', token.COLON)
    1397          self.assertExactTypeEqual(',', token.COMMA)
    1398          self.assertExactTypeEqual(';', token.SEMI)
    1399          self.assertExactTypeEqual('+', token.PLUS)
    1400          self.assertExactTypeEqual('-', token.MINUS)
    1401          self.assertExactTypeEqual('*', token.STAR)
    1402          self.assertExactTypeEqual('/', token.SLASH)
    1403          self.assertExactTypeEqual('|', token.VBAR)
    1404          self.assertExactTypeEqual('&', token.AMPER)
    1405          self.assertExactTypeEqual('<', token.LESS)
    1406          self.assertExactTypeEqual('>', token.GREATER)
    1407          self.assertExactTypeEqual('=', token.EQUAL)
    1408          self.assertExactTypeEqual('.', token.DOT)
    1409          self.assertExactTypeEqual('%', token.PERCENT)
    1410          self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
    1411          self.assertExactTypeEqual('==', token.EQEQUAL)
    1412          self.assertExactTypeEqual('!=', token.NOTEQUAL)
    1413          self.assertExactTypeEqual('<=', token.LESSEQUAL)
    1414          self.assertExactTypeEqual('>=', token.GREATEREQUAL)
    1415          self.assertExactTypeEqual('~', token.TILDE)
    1416          self.assertExactTypeEqual('^', token.CIRCUMFLEX)
    1417          self.assertExactTypeEqual('<<', token.LEFTSHIFT)
    1418          self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
    1419          self.assertExactTypeEqual('**', token.DOUBLESTAR)
    1420          self.assertExactTypeEqual('+=', token.PLUSEQUAL)
    1421          self.assertExactTypeEqual('-=', token.MINEQUAL)
    1422          self.assertExactTypeEqual('*=', token.STAREQUAL)
    1423          self.assertExactTypeEqual('/=', token.SLASHEQUAL)
    1424          self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
    1425          self.assertExactTypeEqual('&=', token.AMPEREQUAL)
    1426          self.assertExactTypeEqual('|=', token.VBAREQUAL)
    1427          self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
    1428          self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
    1429          self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
    1430          self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
    1431          self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
    1432          self.assertExactTypeEqual('//', token.DOUBLESLASH)
    1433          self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
    1434          self.assertExactTypeEqual(':=', token.COLONEQUAL)
    1435          self.assertExactTypeEqual('...', token.ELLIPSIS)
    1436          self.assertExactTypeEqual('->', token.RARROW)
    1437          self.assertExactTypeEqual('@', token.AT)
    1438          self.assertExactTypeEqual('@=', token.ATEQUAL)
    1439  
    1440          self.assertExactTypeEqual('a**2+b**2==c**2',
    1441                                    NAME, token.DOUBLESTAR, NUMBER,
    1442                                    token.PLUS,
    1443                                    NAME, token.DOUBLESTAR, NUMBER,
    1444                                    token.EQEQUAL,
    1445                                    NAME, token.DOUBLESTAR, NUMBER)
    1446          self.assertExactTypeEqual('{1, 2, 3}',
    1447                                    token.LBRACE,
    1448                                    token.NUMBER, token.COMMA,
    1449                                    token.NUMBER, token.COMMA,
    1450                                    token.NUMBER,
    1451                                    token.RBRACE)
    1452          self.assertExactTypeEqual('^(x & 0x1)',
    1453                                    token.CIRCUMFLEX,
    1454                                    token.LPAR,
    1455                                    token.NAME, token.AMPER, token.NUMBER,
    1456                                    token.RPAR)
    1457  
    1458      def test_pathological_trailing_whitespace(self):
    1459          # See http://bugs.python.org/issue16152
    1460          self.assertExactTypeEqual('@          ', token.AT)
    1461  
    1462      def test_comment_at_the_end_of_the_source_without_newline(self):
    1463          # See http://bugs.python.org/issue44667
    1464          source = 'b = 1\n\n#test'
    1465          expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
    1466  
    1467          tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
    1468          self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
    1469          for i in range(6):
    1470              self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
    1471          self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
    1472  
    1473  class ESC[4;38;5;81mUntokenizeTest(ESC[4;38;5;149mTestCase):
    1474  
    1475      def test_bad_input_order(self):
    1476          # raise if previous row
    1477          u = Untokenizer()
    1478          u.prev_row = 2
    1479          u.prev_col = 2
    1480          with self.assertRaises(ValueError) as cm:
    1481              u.add_whitespace((1,3))
    1482          self.assertEqual(cm.exception.args[0],
    1483                  'start (1,3) precedes previous end (2,2)')
    1484          # raise if previous column in row
    1485          self.assertRaises(ValueError, u.add_whitespace, (2,1))
    1486  
    1487      def test_backslash_continuation(self):
    1488          # The problem is that <whitespace>\<newline> leaves no token
    1489          u = Untokenizer()
    1490          u.prev_row = 1
    1491          u.prev_col =  1
    1492          u.tokens = []
    1493          u.add_whitespace((2, 0))
    1494          self.assertEqual(u.tokens, ['\\\n'])
    1495          u.prev_row = 2
    1496          u.add_whitespace((4, 4))
    1497          self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', '    '])
    1498          TestRoundtrip.check_roundtrip(self, 'a\n  b\n    c\n  \\\n  c\n')
    1499  
    1500      def test_iter_compat(self):
    1501          u = Untokenizer()
    1502          token = (NAME, 'Hello')
    1503          tokens = [(ENCODING, 'utf-8'), token]
    1504          u.compat(token, iter([]))
    1505          self.assertEqual(u.tokens, ["Hello "])
    1506          u = Untokenizer()
    1507          self.assertEqual(u.untokenize(iter([token])), 'Hello ')
    1508          u = Untokenizer()
    1509          self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
    1510          self.assertEqual(u.encoding, 'utf-8')
    1511          self.assertEqual(untokenize(iter(tokens)), b'Hello ')
    1512  
    1513  
    1514  class ESC[4;38;5;81mTestRoundtrip(ESC[4;38;5;149mTestCase):
    1515  
    1516      def check_roundtrip(self, f):
    1517          """
    1518          Test roundtrip for `untokenize`. `f` is an open file or a string.
    1519          The source code in f is tokenized to both 5- and 2-tuples.
    1520          Both sequences are converted back to source code via
    1521          tokenize.untokenize(), and the latter tokenized again to 2-tuples.
    1522          The test fails if the 3 pair tokenizations do not match.
    1523  
    1524          When untokenize bugs are fixed, untokenize with 5-tuples should
    1525          reproduce code that does not contain a backslash continuation
    1526          following spaces.  A proper test should test this.
    1527          """
    1528          # Get source code and original tokenizations
    1529          if isinstance(f, str):
    1530              code = f.encode('utf-8')
    1531          else:
    1532              code = f.read()
    1533              f.close()
    1534          readline = iter(code.splitlines(keepends=True)).__next__
    1535          tokens5 = list(tokenize(readline))
    1536          tokens2 = [tok[:2] for tok in tokens5]
    1537          # Reproduce tokens2 from pairs
    1538          bytes_from2 = untokenize(tokens2)
    1539          readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
    1540          tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
    1541          self.assertEqual(tokens2_from2, tokens2)
    1542          # Reproduce tokens2 from 5-tuples
    1543          bytes_from5 = untokenize(tokens5)
    1544          readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
    1545          tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
    1546          self.assertEqual(tokens2_from5, tokens2)
    1547  
    1548      def test_roundtrip(self):
    1549          # There are some standard formatting practices that are easy to get right.
    1550  
    1551          self.check_roundtrip("if x == 1:\n"
    1552                               "    print(x)\n")
    1553          self.check_roundtrip("# This is a comment\n"
    1554                               "# This also\n")
    1555  
    1556          # Some people use different formatting conventions, which makes
    1557          # untokenize a little trickier. Note that this test involves trailing
    1558          # whitespace after the colon. Note that we use hex escapes to make the
    1559          # two trailing blanks apparent in the expected output.
    1560  
    1561          self.check_roundtrip("if x == 1 : \n"
    1562                               "  print(x)\n")
    1563          fn = support.findfile("tokenize_tests.txt", subdir="tokenizedata")
    1564          with open(fn, 'rb') as f:
    1565              self.check_roundtrip(f)
    1566          self.check_roundtrip("if x == 1:\n"
    1567                               "    # A comment by itself.\n"
    1568                               "    print(x) # Comment here, too.\n"
    1569                               "    # Another comment.\n"
    1570                               "after_if = True\n")
    1571          self.check_roundtrip("if (x # The comments need to go in the right place\n"
    1572                               "    == 1):\n"
    1573                               "    print('x==1')\n")
    1574          self.check_roundtrip("class Test: # A comment here\n"
    1575                               "  # A comment with weird indent\n"
    1576                               "  after_com = 5\n"
    1577                               "  def x(m): return m*5 # a one liner\n"
    1578                               "  def y(m): # A whitespace after the colon\n"
    1579                               "     return y*4 # 3-space indent\n")
    1580  
    1581          # Some error-handling code
    1582          self.check_roundtrip("try: import somemodule\n"
    1583                               "except ImportError: # comment\n"
    1584                               "    print('Can not import' # comment2\n)"
    1585                               "else:   print('Loaded')\n")
    1586  
    1587      def test_continuation(self):
    1588          # Balancing continuation
    1589          self.check_roundtrip("a = (3,4, \n"
    1590                               "5,6)\n"
    1591                               "y = [3, 4,\n"
    1592                               "5]\n"
    1593                               "z = {'a': 5,\n"
    1594                               "'b':15, 'c':True}\n"
    1595                               "x = len(y) + 5 - a[\n"
    1596                               "3] - a[2]\n"
    1597                               "+ len(z) - z[\n"
    1598                               "'b']\n")
    1599  
    1600      def test_backslash_continuation(self):
    1601          # Backslash means line continuation, except for comments
    1602          self.check_roundtrip("x=1+\\\n"
    1603                               "1\n"
    1604                               "# This is a comment\\\n"
    1605                               "# This also\n")
    1606          self.check_roundtrip("# Comment \\\n"
    1607                               "x = 0")
    1608  
    1609      def test_string_concatenation(self):
    1610          # Two string literals on the same line
    1611          self.check_roundtrip("'' ''")
    1612  
    1613      def test_random_files(self):
    1614          # Test roundtrip on random python modules.
    1615          # pass the '-ucpu' option to process the full directory.
    1616  
    1617          import glob, random
    1618          tempdir = os.path.dirname(__file__) or os.curdir
    1619          testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
    1620  
    1621          # Tokenize is broken on test_pep3131.py because regular expressions are
    1622          # broken on the obscure unicode identifiers in it. *sigh*
    1623          # With roundtrip extended to test the 5-tuple mode of untokenize,
    1624          # 7 more testfiles fail.  Remove them also until the failure is diagnosed.
    1625  
    1626          testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
    1627          for f in ('buffer', 'builtin', 'fileio', 'os', 'platform', 'sys'):
    1628              testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
    1629  
    1630          if not support.is_resource_enabled("cpu"):
    1631              testfiles = random.sample(testfiles, 10)
    1632  
    1633          for testfile in testfiles:
    1634              if support.verbose >= 2:
    1635                  print('tokenize', testfile)
    1636              with open(testfile, 'rb') as f:
    1637                  with self.subTest(file=testfile):
    1638                      self.check_roundtrip(f)
    1639  
    1640  
    1641      def roundtrip(self, code):
    1642          if isinstance(code, str):
    1643              code = code.encode('utf-8')
    1644          return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
    1645  
    1646      def test_indentation_semantics_retained(self):
    1647          """
    1648          Ensure that although whitespace might be mutated in a roundtrip,
    1649          the semantic meaning of the indentation remains consistent.
    1650          """
    1651          code = "if False:\n\tx=3\n\tx=3\n"
    1652          codelines = self.roundtrip(code).split('\n')
    1653          self.assertEqual(codelines[1], codelines[2])
    1654          self.check_roundtrip(code)
    1655  
    1656  
    1657  class ESC[4;38;5;81mCTokenizeTest(ESC[4;38;5;149mTestCase):
    1658      def check_tokenize(self, s, expected):
    1659          # Format the tokens in s in a table format.
    1660          # The ENDMARKER and final NEWLINE are omitted.
    1661          with self.subTest(source=s):
    1662              result = stringify_tokens_from_source(
    1663                  _generate_tokens_from_c_tokenizer(s), s
    1664              )
    1665              self.assertEqual(result, expected.rstrip().splitlines())
    1666  
    1667      def test_int(self):
    1668  
    1669          self.check_tokenize('0xff <= 255', """\
    1670      NUMBER     '0xff'        (1, 0) (1, 4)
    1671      LESSEQUAL  '<='          (1, 5) (1, 7)
    1672      NUMBER     '255'         (1, 8) (1, 11)
    1673      """)
    1674  
    1675          self.check_tokenize('0b10 <= 255', """\
    1676      NUMBER     '0b10'        (1, 0) (1, 4)
    1677      LESSEQUAL  '<='          (1, 5) (1, 7)
    1678      NUMBER     '255'         (1, 8) (1, 11)
    1679      """)
    1680  
    1681          self.check_tokenize('0o123 <= 0O123', """\
    1682      NUMBER     '0o123'       (1, 0) (1, 5)
    1683      LESSEQUAL  '<='          (1, 6) (1, 8)
    1684      NUMBER     '0O123'       (1, 9) (1, 14)
    1685      """)
    1686  
    1687          self.check_tokenize('1234567 > ~0x15', """\
    1688      NUMBER     '1234567'     (1, 0) (1, 7)
    1689      GREATER    '>'           (1, 8) (1, 9)
    1690      TILDE      '~'           (1, 10) (1, 11)
    1691      NUMBER     '0x15'        (1, 11) (1, 15)
    1692      """)
    1693  
    1694          self.check_tokenize('2134568 != 1231515', """\
    1695      NUMBER     '2134568'     (1, 0) (1, 7)
    1696      NOTEQUAL   '!='          (1, 8) (1, 10)
    1697      NUMBER     '1231515'     (1, 11) (1, 18)
    1698      """)
    1699  
    1700          self.check_tokenize('(-124561-1) & 200000000', """\
    1701      LPAR       '('           (1, 0) (1, 1)
    1702      MINUS      '-'           (1, 1) (1, 2)
    1703      NUMBER     '124561'      (1, 2) (1, 8)
    1704      MINUS      '-'           (1, 8) (1, 9)
    1705      NUMBER     '1'           (1, 9) (1, 10)
    1706      RPAR       ')'           (1, 10) (1, 11)
    1707      AMPER      '&'           (1, 12) (1, 13)
    1708      NUMBER     '200000000'   (1, 14) (1, 23)
    1709      """)
    1710  
    1711          self.check_tokenize('0xdeadbeef != -1', """\
    1712      NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
    1713      NOTEQUAL   '!='          (1, 11) (1, 13)
    1714      MINUS      '-'           (1, 14) (1, 15)
    1715      NUMBER     '1'           (1, 15) (1, 16)
    1716      """)
    1717  
    1718          self.check_tokenize('0xdeadc0de & 12345', """\
    1719      NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
    1720      AMPER      '&'           (1, 11) (1, 12)
    1721      NUMBER     '12345'       (1, 13) (1, 18)
    1722      """)
    1723  
    1724          self.check_tokenize('0xFF & 0x15 | 1234', """\
    1725      NUMBER     '0xFF'        (1, 0) (1, 4)
    1726      AMPER      '&'           (1, 5) (1, 6)
    1727      NUMBER     '0x15'        (1, 7) (1, 11)
    1728      VBAR       '|'           (1, 12) (1, 13)
    1729      NUMBER     '1234'        (1, 14) (1, 18)
    1730      """)
    1731  
    1732      def test_float(self):
    1733  
    1734          self.check_tokenize('x = 3.14159', """\
    1735      NAME       'x'           (1, 0) (1, 1)
    1736      EQUAL      '='           (1, 2) (1, 3)
    1737      NUMBER     '3.14159'     (1, 4) (1, 11)
    1738      """)
    1739  
    1740          self.check_tokenize('x = 314159.', """\
    1741      NAME       'x'           (1, 0) (1, 1)
    1742      EQUAL      '='           (1, 2) (1, 3)
    1743      NUMBER     '314159.'     (1, 4) (1, 11)
    1744      """)
    1745  
    1746          self.check_tokenize('x = .314159', """\
    1747      NAME       'x'           (1, 0) (1, 1)
    1748      EQUAL      '='           (1, 2) (1, 3)
    1749      NUMBER     '.314159'     (1, 4) (1, 11)
    1750      """)
    1751  
    1752          self.check_tokenize('x = 3e14159', """\
    1753      NAME       'x'           (1, 0) (1, 1)
    1754      EQUAL      '='           (1, 2) (1, 3)
    1755      NUMBER     '3e14159'     (1, 4) (1, 11)
    1756      """)
    1757  
    1758          self.check_tokenize('x = 3E123', """\
    1759      NAME       'x'           (1, 0) (1, 1)
    1760      EQUAL      '='           (1, 2) (1, 3)
    1761      NUMBER     '3E123'       (1, 4) (1, 9)
    1762      """)
    1763  
    1764          self.check_tokenize('x+y = 3e-1230', """\
    1765      NAME       'x'           (1, 0) (1, 1)
    1766      PLUS       '+'           (1, 1) (1, 2)
    1767      NAME       'y'           (1, 2) (1, 3)
    1768      EQUAL      '='           (1, 4) (1, 5)
    1769      NUMBER     '3e-1230'     (1, 6) (1, 13)
    1770      """)
    1771  
    1772          self.check_tokenize('x = 3.14e159', """\
    1773      NAME       'x'           (1, 0) (1, 1)
    1774      EQUAL      '='           (1, 2) (1, 3)
    1775      NUMBER     '3.14e159'    (1, 4) (1, 12)
    1776      """)
    1777  
    1778      def test_string(self):
    1779  
    1780          self.check_tokenize('x = \'\'; y = ""', """\
    1781      NAME       'x'           (1, 0) (1, 1)
    1782      EQUAL      '='           (1, 2) (1, 3)
    1783      STRING     "''"          (1, 4) (1, 6)
    1784      SEMI       ';'           (1, 6) (1, 7)
    1785      NAME       'y'           (1, 8) (1, 9)
    1786      EQUAL      '='           (1, 10) (1, 11)
    1787      STRING     '""'          (1, 12) (1, 14)
    1788      """)
    1789  
    1790          self.check_tokenize('x = \'"\'; y = "\'"', """\
    1791      NAME       'x'           (1, 0) (1, 1)
    1792      EQUAL      '='           (1, 2) (1, 3)
    1793      STRING     '\\'"\\''       (1, 4) (1, 7)
    1794      SEMI       ';'           (1, 7) (1, 8)
    1795      NAME       'y'           (1, 9) (1, 10)
    1796      EQUAL      '='           (1, 11) (1, 12)
    1797      STRING     '"\\'"'        (1, 13) (1, 16)
    1798      """)
    1799  
    1800          self.check_tokenize('x = "doesn\'t "shrink", does it"', """\
    1801      NAME       'x'           (1, 0) (1, 1)
    1802      EQUAL      '='           (1, 2) (1, 3)
    1803      STRING     '"doesn\\'t "' (1, 4) (1, 14)
    1804      NAME       'shrink'      (1, 14) (1, 20)
    1805      STRING     '", does it"' (1, 20) (1, 31)
    1806      """)
    1807  
    1808          self.check_tokenize("x = 'abc' + 'ABC'", """\
    1809      NAME       'x'           (1, 0) (1, 1)
    1810      EQUAL      '='           (1, 2) (1, 3)
    1811      STRING     "'abc'"       (1, 4) (1, 9)
    1812      PLUS       '+'           (1, 10) (1, 11)
    1813      STRING     "'ABC'"       (1, 12) (1, 17)
    1814      """)
    1815  
    1816          self.check_tokenize('y = "ABC" + "ABC"', """\
    1817      NAME       'y'           (1, 0) (1, 1)
    1818      EQUAL      '='           (1, 2) (1, 3)
    1819      STRING     '"ABC"'       (1, 4) (1, 9)
    1820      PLUS       '+'           (1, 10) (1, 11)
    1821      STRING     '"ABC"'       (1, 12) (1, 17)
    1822      """)
    1823  
    1824          self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
    1825      NAME       'x'           (1, 0) (1, 1)
    1826      EQUAL      '='           (1, 2) (1, 3)
    1827      STRING     "r'abc'"      (1, 4) (1, 10)
    1828      PLUS       '+'           (1, 11) (1, 12)
    1829      STRING     "r'ABC'"      (1, 13) (1, 19)
    1830      PLUS       '+'           (1, 20) (1, 21)
    1831      STRING     "R'ABC'"      (1, 22) (1, 28)
    1832      PLUS       '+'           (1, 29) (1, 30)
    1833      STRING     "R'ABC'"      (1, 31) (1, 37)
    1834      """)
    1835  
    1836          self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
    1837      NAME       'y'           (1, 0) (1, 1)
    1838      EQUAL      '='           (1, 2) (1, 3)
    1839      STRING     'r"abc"'      (1, 4) (1, 10)
    1840      PLUS       '+'           (1, 11) (1, 12)
    1841      STRING     'r"ABC"'      (1, 13) (1, 19)
    1842      PLUS       '+'           (1, 20) (1, 21)
    1843      STRING     'R"ABC"'      (1, 22) (1, 28)
    1844      PLUS       '+'           (1, 29) (1, 30)
    1845      STRING     'R"ABC"'      (1, 31) (1, 37)
    1846      """)
    1847  
    1848          self.check_tokenize("u'abc' + U'abc'", """\
    1849      STRING     "u'abc'"      (1, 0) (1, 6)
    1850      PLUS       '+'           (1, 7) (1, 8)
    1851      STRING     "U'abc'"      (1, 9) (1, 15)
    1852      """)
    1853  
    1854          self.check_tokenize('u"abc" + U"abc"', """\
    1855      STRING     'u"abc"'      (1, 0) (1, 6)
    1856      PLUS       '+'           (1, 7) (1, 8)
    1857      STRING     'U"abc"'      (1, 9) (1, 15)
    1858      """)
    1859  
    1860          self.check_tokenize("b'abc' + B'abc'", """\
    1861      STRING     "b'abc'"      (1, 0) (1, 6)
    1862      PLUS       '+'           (1, 7) (1, 8)
    1863      STRING     "B'abc'"      (1, 9) (1, 15)
    1864      """)
    1865  
    1866          self.check_tokenize('b"abc" + B"abc"', """\
    1867      STRING     'b"abc"'      (1, 0) (1, 6)
    1868      PLUS       '+'           (1, 7) (1, 8)
    1869      STRING     'B"abc"'      (1, 9) (1, 15)
    1870      """)
    1871  
    1872          self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
    1873      STRING     "br'abc'"     (1, 0) (1, 7)
    1874      PLUS       '+'           (1, 8) (1, 9)
    1875      STRING     "bR'abc'"     (1, 10) (1, 17)
    1876      PLUS       '+'           (1, 18) (1, 19)
    1877      STRING     "Br'abc'"     (1, 20) (1, 27)
    1878      PLUS       '+'           (1, 28) (1, 29)
    1879      STRING     "BR'abc'"     (1, 30) (1, 37)
    1880      """)
    1881  
    1882          self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
    1883      STRING     'br"abc"'     (1, 0) (1, 7)
    1884      PLUS       '+'           (1, 8) (1, 9)
    1885      STRING     'bR"abc"'     (1, 10) (1, 17)
    1886      PLUS       '+'           (1, 18) (1, 19)
    1887      STRING     'Br"abc"'     (1, 20) (1, 27)
    1888      PLUS       '+'           (1, 28) (1, 29)
    1889      STRING     'BR"abc"'     (1, 30) (1, 37)
    1890      """)
    1891  
    1892          self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
    1893      STRING     "rb'abc'"     (1, 0) (1, 7)
    1894      PLUS       '+'           (1, 8) (1, 9)
    1895      STRING     "rB'abc'"     (1, 10) (1, 17)
    1896      PLUS       '+'           (1, 18) (1, 19)
    1897      STRING     "Rb'abc'"     (1, 20) (1, 27)
    1898      PLUS       '+'           (1, 28) (1, 29)
    1899      STRING     "RB'abc'"     (1, 30) (1, 37)
    1900      """)
    1901  
    1902          self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
    1903      STRING     'rb"abc"'     (1, 0) (1, 7)
    1904      PLUS       '+'           (1, 8) (1, 9)
    1905      STRING     'rB"abc"'     (1, 10) (1, 17)
    1906      PLUS       '+'           (1, 18) (1, 19)
    1907      STRING     'Rb"abc"'     (1, 20) (1, 27)
    1908      PLUS       '+'           (1, 28) (1, 29)
    1909      STRING     'RB"abc"'     (1, 30) (1, 37)
    1910      """)
    1911  
    1912          self.check_tokenize('"a\\\nde\\\nfg"', """\
    1913      STRING     '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
    1914      """)
    1915  
    1916          self.check_tokenize('u"a\\\nde"', """\
    1917      STRING     'u"a\\\\\\nde"\'  (1, 0) (2, 3)
    1918      """)
    1919  
    1920          self.check_tokenize('rb"a\\\nd"', """\
    1921      STRING     'rb"a\\\\\\nd"\'  (1, 0) (2, 2)
    1922      """)
    1923  
    1924          self.check_tokenize(r'"""a\
    1925  b"""', """\
    1926      STRING     '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
    1927      """)
    1928          self.check_tokenize(r'u"""a\
    1929  b"""', """\
    1930      STRING     'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
    1931      """)
    1932          self.check_tokenize(r'rb"""a\
    1933  b\
    1934  c"""', """\
    1935      STRING     'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
    1936      """)
    1937  
    1938          self.check_tokenize('f"abc"', """\
    1939      STRING     'f"abc"'      (1, 0) (1, 6)
    1940      """)
    1941  
    1942          self.check_tokenize('fR"a{b}c"', """\
    1943      STRING     'fR"a{b}c"'   (1, 0) (1, 9)
    1944      """)
    1945  
    1946          self.check_tokenize('f"""abc"""', """\
    1947      STRING     'f\"\"\"abc\"\"\"'  (1, 0) (1, 10)
    1948      """)
    1949  
    1950          self.check_tokenize(r'f"abc\
    1951  def"', """\
    1952      STRING     'f"abc\\\\\\ndef"' (1, 0) (2, 4)
    1953      """)
    1954  
    1955          self.check_tokenize(r'Rf"abc\
    1956  def"', """\
    1957      STRING     'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
    1958      """)
    1959  
    1960      def test_function(self):
    1961  
    1962          self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\
    1963      NAME       'def'         (1, 0) (1, 3)
    1964      NAME       'd22'         (1, 4) (1, 7)
    1965      LPAR       '('           (1, 7) (1, 8)
    1966      NAME       'a'           (1, 8) (1, 9)
    1967      COMMA      ','           (1, 9) (1, 10)
    1968      NAME       'b'           (1, 11) (1, 12)
    1969      COMMA      ','           (1, 12) (1, 13)
    1970      NAME       'c'           (1, 14) (1, 15)
    1971      EQUAL      '='           (1, 15) (1, 16)
    1972      NUMBER     '2'           (1, 16) (1, 17)
    1973      COMMA      ','           (1, 17) (1, 18)
    1974      NAME       'd'           (1, 19) (1, 20)
    1975      EQUAL      '='           (1, 20) (1, 21)
    1976      NUMBER     '2'           (1, 21) (1, 22)
    1977      COMMA      ','           (1, 22) (1, 23)
    1978      STAR       '*'           (1, 24) (1, 25)
    1979      NAME       'k'           (1, 25) (1, 26)
    1980      RPAR       ')'           (1, 26) (1, 27)
    1981      COLON      ':'           (1, 27) (1, 28)
    1982      NAME       'pass'        (1, 29) (1, 33)
    1983      """)
    1984  
    1985          self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\
    1986      NAME       'def'         (1, 0) (1, 3)
    1987      NAME       'd01v_'       (1, 4) (1, 9)
    1988      LPAR       '('           (1, 9) (1, 10)
    1989      NAME       'a'           (1, 10) (1, 11)
    1990      EQUAL      '='           (1, 11) (1, 12)
    1991      NUMBER     '1'           (1, 12) (1, 13)
    1992      COMMA      ','           (1, 13) (1, 14)
    1993      STAR       '*'           (1, 15) (1, 16)
    1994      NAME       'k'           (1, 16) (1, 17)
    1995      COMMA      ','           (1, 17) (1, 18)
    1996      DOUBLESTAR '**'          (1, 19) (1, 21)
    1997      NAME       'w'           (1, 21) (1, 22)
    1998      RPAR       ')'           (1, 22) (1, 23)
    1999      COLON      ':'           (1, 23) (1, 24)
    2000      NAME       'pass'        (1, 25) (1, 29)
    2001      """)
    2002  
    2003          self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\
    2004      NAME       'def'         (1, 0) (1, 3)
    2005      NAME       'd23'         (1, 4) (1, 7)
    2006      LPAR       '('           (1, 7) (1, 8)
    2007      NAME       'a'           (1, 8) (1, 9)
    2008      COLON      ':'           (1, 9) (1, 10)
    2009      NAME       'str'         (1, 11) (1, 14)
    2010      COMMA      ','           (1, 14) (1, 15)
    2011      NAME       'b'           (1, 16) (1, 17)
    2012      COLON      ':'           (1, 17) (1, 18)
    2013      NAME       'int'         (1, 19) (1, 22)
    2014      EQUAL      '='           (1, 22) (1, 23)
    2015      NUMBER     '3'           (1, 23) (1, 24)
    2016      RPAR       ')'           (1, 24) (1, 25)
    2017      RARROW     '->'          (1, 26) (1, 28)
    2018      NAME       'int'         (1, 29) (1, 32)
    2019      COLON      ':'           (1, 32) (1, 33)
    2020      NAME       'pass'        (1, 34) (1, 38)
    2021      """)
    2022  
    2023      def test_comparison(self):
    2024  
    2025          self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
    2026                              "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
    2027      NAME       'if'          (1, 0) (1, 2)
    2028      NUMBER     '1'           (1, 3) (1, 4)
    2029      LESS       '<'           (1, 5) (1, 6)
    2030      NUMBER     '1'           (1, 7) (1, 8)
    2031      GREATER    '>'           (1, 9) (1, 10)
    2032      NUMBER     '1'           (1, 11) (1, 12)
    2033      EQEQUAL    '=='          (1, 13) (1, 15)
    2034      NUMBER     '1'           (1, 16) (1, 17)
    2035      GREATEREQUAL '>='          (1, 18) (1, 20)
    2036      NUMBER     '5'           (1, 21) (1, 22)
    2037      LESSEQUAL  '<='          (1, 23) (1, 25)
    2038      NUMBER     '0x15'        (1, 26) (1, 30)
    2039      LESSEQUAL  '<='          (1, 31) (1, 33)
    2040      NUMBER     '0x12'        (1, 34) (1, 38)
    2041      NOTEQUAL   '!='          (1, 39) (1, 41)
    2042      NUMBER     '1'           (1, 42) (1, 43)
    2043      NAME       'and'         (1, 44) (1, 47)
    2044      NUMBER     '5'           (1, 48) (1, 49)
    2045      NAME       'in'          (1, 50) (1, 52)
    2046      NUMBER     '1'           (1, 53) (1, 54)
    2047      NAME       'not'         (1, 55) (1, 58)
    2048      NAME       'in'          (1, 59) (1, 61)
    2049      NUMBER     '1'           (1, 62) (1, 63)
    2050      NAME       'is'          (1, 64) (1, 66)
    2051      NUMBER     '1'           (1, 67) (1, 68)
    2052      NAME       'or'          (1, 69) (1, 71)
    2053      NUMBER     '5'           (1, 72) (1, 73)
    2054      NAME       'is'          (1, 74) (1, 76)
    2055      NAME       'not'         (1, 77) (1, 80)
    2056      NUMBER     '1'           (1, 81) (1, 82)
    2057      COLON      ':'           (1, 82) (1, 83)
    2058      NAME       'pass'        (1, 84) (1, 88)
    2059      """)
    2060  
    2061      def test_additive(self):
    2062  
    2063          self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\
    2064      NAME       'x'           (1, 0) (1, 1)
    2065      EQUAL      '='           (1, 2) (1, 3)
    2066      NUMBER     '1'           (1, 4) (1, 5)
    2067      MINUS      '-'           (1, 6) (1, 7)
    2068      NAME       'y'           (1, 8) (1, 9)
    2069      PLUS       '+'           (1, 10) (1, 11)
    2070      NUMBER     '15'          (1, 12) (1, 14)
    2071      MINUS      '-'           (1, 15) (1, 16)
    2072      NUMBER     '1'           (1, 17) (1, 18)
    2073      PLUS       '+'           (1, 19) (1, 20)
    2074      NUMBER     '0x124'       (1, 21) (1, 26)
    2075      PLUS       '+'           (1, 27) (1, 28)
    2076      NAME       'z'           (1, 29) (1, 30)
    2077      PLUS       '+'           (1, 31) (1, 32)
    2078      NAME       'a'           (1, 33) (1, 34)
    2079      LSQB       '['           (1, 34) (1, 35)
    2080      NUMBER     '5'           (1, 35) (1, 36)
    2081      RSQB       ']'           (1, 36) (1, 37)
    2082      """)
    2083  
    2084      def test_multiplicative(self):
    2085  
    2086          self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\
    2087      NAME       'x'           (1, 0) (1, 1)
    2088      EQUAL      '='           (1, 2) (1, 3)
    2089      NUMBER     '1'           (1, 4) (1, 5)
    2090      DOUBLESLASH '//'          (1, 5) (1, 7)
    2091      NUMBER     '1'           (1, 7) (1, 8)
    2092      STAR       '*'           (1, 8) (1, 9)
    2093      NUMBER     '1'           (1, 9) (1, 10)
    2094      SLASH      '/'           (1, 10) (1, 11)
    2095      NUMBER     '5'           (1, 11) (1, 12)
    2096      STAR       '*'           (1, 12) (1, 13)
    2097      NUMBER     '12'          (1, 13) (1, 15)
    2098      PERCENT    '%'           (1, 15) (1, 16)
    2099      NUMBER     '0x12'        (1, 16) (1, 20)
    2100      AT         '@'           (1, 20) (1, 21)
    2101      NUMBER     '42'          (1, 21) (1, 23)
    2102      """)
    2103  
    2104      def test_unary(self):
    2105  
    2106          self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\
    2107      TILDE      '~'           (1, 0) (1, 1)
    2108      NUMBER     '1'           (1, 1) (1, 2)
    2109      CIRCUMFLEX '^'           (1, 3) (1, 4)
    2110      NUMBER     '1'           (1, 5) (1, 6)
    2111      AMPER      '&'           (1, 7) (1, 8)
    2112      NUMBER     '1'           (1, 9) (1, 10)
    2113      VBAR       '|'           (1, 11) (1, 12)
    2114      NUMBER     '1'           (1, 12) (1, 13)
    2115      CIRCUMFLEX '^'           (1, 14) (1, 15)
    2116      MINUS      '-'           (1, 16) (1, 17)
    2117      NUMBER     '1'           (1, 17) (1, 18)
    2118      """)
    2119  
    2120          self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\
    2121      MINUS      '-'           (1, 0) (1, 1)
    2122      NUMBER     '1'           (1, 1) (1, 2)
    2123      STAR       '*'           (1, 2) (1, 3)
    2124      NUMBER     '1'           (1, 3) (1, 4)
    2125      SLASH      '/'           (1, 4) (1, 5)
    2126      NUMBER     '1'           (1, 5) (1, 6)
    2127      PLUS       '+'           (1, 6) (1, 7)
    2128      NUMBER     '1'           (1, 7) (1, 8)
    2129      STAR       '*'           (1, 8) (1, 9)
    2130      NUMBER     '1'           (1, 9) (1, 10)
    2131      DOUBLESLASH '//'          (1, 10) (1, 12)
    2132      NUMBER     '1'           (1, 12) (1, 13)
    2133      MINUS      '-'           (1, 14) (1, 15)
    2134      MINUS      '-'           (1, 16) (1, 17)
    2135      MINUS      '-'           (1, 17) (1, 18)
    2136      MINUS      '-'           (1, 18) (1, 19)
    2137      NUMBER     '1'           (1, 19) (1, 20)
    2138      DOUBLESTAR '**'          (1, 20) (1, 22)
    2139      NUMBER     '1'           (1, 22) (1, 23)
    2140      """)
    2141  
    2142      def test_selector(self):
    2143  
    2144          self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
    2145      NAME       'import'      (1, 0) (1, 6)
    2146      NAME       'sys'         (1, 7) (1, 10)
    2147      COMMA      ','           (1, 10) (1, 11)
    2148      NAME       'time'        (1, 12) (1, 16)
    2149      NEWLINE    ''            (1, 16) (1, 16)
    2150      NAME       'x'           (2, 0) (2, 1)
    2151      EQUAL      '='           (2, 2) (2, 3)
    2152      NAME       'sys'         (2, 4) (2, 7)
    2153      DOT        '.'           (2, 7) (2, 8)
    2154      NAME       'modules'     (2, 8) (2, 15)
    2155      LSQB       '['           (2, 15) (2, 16)
    2156      STRING     "'time'"      (2, 16) (2, 22)
    2157      RSQB       ']'           (2, 22) (2, 23)
    2158      DOT        '.'           (2, 23) (2, 24)
    2159      NAME       'time'        (2, 24) (2, 28)
    2160      LPAR       '('           (2, 28) (2, 29)
    2161      RPAR       ')'           (2, 29) (2, 30)
    2162      """)
    2163  
    2164      def test_method(self):
    2165  
    2166          self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
    2167      AT         '@'           (1, 0) (1, 1)
    2168      NAME       'staticmethod' (1, 1) (1, 13)
    2169      NEWLINE    ''            (1, 13) (1, 13)
    2170      NAME       'def'         (2, 0) (2, 3)
    2171      NAME       'foo'         (2, 4) (2, 7)
    2172      LPAR       '('           (2, 7) (2, 8)
    2173      NAME       'x'           (2, 8) (2, 9)
    2174      COMMA      ','           (2, 9) (2, 10)
    2175      NAME       'y'           (2, 10) (2, 11)
    2176      RPAR       ')'           (2, 11) (2, 12)
    2177      COLON      ':'           (2, 12) (2, 13)
    2178      NAME       'pass'        (2, 14) (2, 18)
    2179      """)
    2180  
    2181      def test_tabs(self):
    2182  
    2183          self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
    2184      AT         '@'           (1, 0) (1, 1)
    2185      NAME       'staticmethod' (1, 1) (1, 13)
    2186      NEWLINE    ''            (1, 13) (1, 13)
    2187      NAME       'def'         (2, 0) (2, 3)
    2188      NAME       'foo'         (2, 4) (2, 7)
    2189      LPAR       '('           (2, 7) (2, 8)
    2190      NAME       'x'           (2, 8) (2, 9)
    2191      COMMA      ','           (2, 9) (2, 10)
    2192      NAME       'y'           (2, 10) (2, 11)
    2193      RPAR       ')'           (2, 11) (2, 12)
    2194      COLON      ':'           (2, 12) (2, 13)
    2195      NAME       'pass'        (2, 14) (2, 18)
    2196      """)
    2197  
    2198      def test_async(self):
    2199  
    2200          self.check_tokenize('async = 1', """\
    2201      ASYNC      'async'       (1, 0) (1, 5)
    2202      EQUAL      '='           (1, 6) (1, 7)
    2203      NUMBER     '1'           (1, 8) (1, 9)
    2204      """)
    2205  
    2206          self.check_tokenize('a = (async = 1)', """\
    2207      NAME       'a'           (1, 0) (1, 1)
    2208      EQUAL      '='           (1, 2) (1, 3)
    2209      LPAR       '('           (1, 4) (1, 5)
    2210      ASYNC      'async'       (1, 5) (1, 10)
    2211      EQUAL      '='           (1, 11) (1, 12)
    2212      NUMBER     '1'           (1, 13) (1, 14)
    2213      RPAR       ')'           (1, 14) (1, 15)
    2214      """)
    2215  
    2216          self.check_tokenize('async()', """\
    2217      ASYNC      'async'       (1, 0) (1, 5)
    2218      LPAR       '('           (1, 5) (1, 6)
    2219      RPAR       ')'           (1, 6) (1, 7)
    2220      """)
    2221  
    2222          self.check_tokenize('class async(Bar):pass', """\
    2223      NAME       'class'       (1, 0) (1, 5)
    2224      ASYNC      'async'       (1, 6) (1, 11)
    2225      LPAR       '('           (1, 11) (1, 12)
    2226      NAME       'Bar'         (1, 12) (1, 15)
    2227      RPAR       ')'           (1, 15) (1, 16)
    2228      COLON      ':'           (1, 16) (1, 17)
    2229      NAME       'pass'        (1, 17) (1, 21)
    2230      """)
    2231  
    2232          self.check_tokenize('class async:pass', """\
    2233      NAME       'class'       (1, 0) (1, 5)
    2234      ASYNC      'async'       (1, 6) (1, 11)
    2235      COLON      ':'           (1, 11) (1, 12)
    2236      NAME       'pass'        (1, 12) (1, 16)
    2237      """)
    2238  
    2239          self.check_tokenize('await = 1', """\
    2240      AWAIT      'await'       (1, 0) (1, 5)
    2241      EQUAL      '='           (1, 6) (1, 7)
    2242      NUMBER     '1'           (1, 8) (1, 9)
    2243      """)
    2244  
    2245          self.check_tokenize('foo.async', """\
    2246      NAME       'foo'         (1, 0) (1, 3)
    2247      DOT        '.'           (1, 3) (1, 4)
    2248      ASYNC      'async'       (1, 4) (1, 9)
    2249      """)
    2250  
    2251          self.check_tokenize('async for a in b: pass', """\
    2252      ASYNC      'async'       (1, 0) (1, 5)
    2253      NAME       'for'         (1, 6) (1, 9)
    2254      NAME       'a'           (1, 10) (1, 11)
    2255      NAME       'in'          (1, 12) (1, 14)
    2256      NAME       'b'           (1, 15) (1, 16)
    2257      COLON      ':'           (1, 16) (1, 17)
    2258      NAME       'pass'        (1, 18) (1, 22)
    2259      """)
    2260  
    2261          self.check_tokenize('async with a as b: pass', """\
    2262      ASYNC      'async'       (1, 0) (1, 5)
    2263      NAME       'with'        (1, 6) (1, 10)
    2264      NAME       'a'           (1, 11) (1, 12)
    2265      NAME       'as'          (1, 13) (1, 15)
    2266      NAME       'b'           (1, 16) (1, 17)
    2267      COLON      ':'           (1, 17) (1, 18)
    2268      NAME       'pass'        (1, 19) (1, 23)
    2269      """)
    2270  
    2271          self.check_tokenize('async.foo', """\
    2272      ASYNC      'async'       (1, 0) (1, 5)
    2273      DOT        '.'           (1, 5) (1, 6)
    2274      NAME       'foo'         (1, 6) (1, 9)
    2275      """)
    2276  
    2277          self.check_tokenize('async', """\
    2278      ASYNC      'async'       (1, 0) (1, 5)
    2279      """)
    2280  
    2281          self.check_tokenize('async\n#comment\nawait', """\
    2282      ASYNC      'async'       (1, 0) (1, 5)
    2283      NEWLINE    ''            (1, 5) (1, 5)
    2284      AWAIT      'await'       (3, 0) (3, 5)
    2285      """)
    2286  
    2287          self.check_tokenize('async\n...\nawait', """\
    2288      ASYNC      'async'       (1, 0) (1, 5)
    2289      NEWLINE    ''            (1, 5) (1, 5)
    2290      ELLIPSIS   '...'         (2, 0) (2, 3)
    2291      NEWLINE    ''            (2, 3) (2, 3)
    2292      AWAIT      'await'       (3, 0) (3, 5)
    2293      """)
    2294  
    2295          self.check_tokenize('async\nawait', """\
    2296      ASYNC      'async'       (1, 0) (1, 5)
    2297      NEWLINE    ''            (1, 5) (1, 5)
    2298      AWAIT      'await'       (2, 0) (2, 5)
    2299      """)
    2300  
    2301          self.check_tokenize('foo.async + 1', """\
    2302      NAME       'foo'         (1, 0) (1, 3)
    2303      DOT        '.'           (1, 3) (1, 4)
    2304      ASYNC      'async'       (1, 4) (1, 9)
    2305      PLUS       '+'           (1, 10) (1, 11)
    2306      NUMBER     '1'           (1, 12) (1, 13)
    2307      """)
    2308  
    2309          self.check_tokenize('async def foo(): pass', """\
    2310      ASYNC      'async'       (1, 0) (1, 5)
    2311      NAME       'def'         (1, 6) (1, 9)
    2312      NAME       'foo'         (1, 10) (1, 13)
    2313      LPAR       '('           (1, 13) (1, 14)
    2314      RPAR       ')'           (1, 14) (1, 15)
    2315      COLON      ':'           (1, 15) (1, 16)
    2316      NAME       'pass'        (1, 17) (1, 21)
    2317      """)
    2318  
    2319          self.check_tokenize('''\
    2320  async def foo():
    2321    def foo(await):
    2322      await = 1
    2323    if 1:
    2324      await
    2325  async += 1
    2326  ''', """\
    2327      ASYNC      'async'       (1, 0) (1, 5)
    2328      NAME       'def'         (1, 6) (1, 9)
    2329      NAME       'foo'         (1, 10) (1, 13)
    2330      LPAR       '('           (1, 13) (1, 14)
    2331      RPAR       ')'           (1, 14) (1, 15)
    2332      COLON      ':'           (1, 15) (1, 16)
    2333      NEWLINE    ''            (1, 16) (1, 16)
    2334      INDENT     ''            (2, -1) (2, -1)
    2335      NAME       'def'         (2, 2) (2, 5)
    2336      NAME       'foo'         (2, 6) (2, 9)
    2337      LPAR       '('           (2, 9) (2, 10)
    2338      AWAIT      'await'       (2, 10) (2, 15)
    2339      RPAR       ')'           (2, 15) (2, 16)
    2340      COLON      ':'           (2, 16) (2, 17)
    2341      NEWLINE    ''            (2, 17) (2, 17)
    2342      INDENT     ''            (3, -1) (3, -1)
    2343      AWAIT      'await'       (3, 4) (3, 9)
    2344      EQUAL      '='           (3, 10) (3, 11)
    2345      NUMBER     '1'           (3, 12) (3, 13)
    2346      NEWLINE    ''            (3, 13) (3, 13)
    2347      DEDENT     ''            (4, -1) (4, -1)
    2348      NAME       'if'          (4, 2) (4, 4)
    2349      NUMBER     '1'           (4, 5) (4, 6)
    2350      COLON      ':'           (4, 6) (4, 7)
    2351      NEWLINE    ''            (4, 7) (4, 7)
    2352      INDENT     ''            (5, -1) (5, -1)
    2353      AWAIT      'await'       (5, 4) (5, 9)
    2354      NEWLINE    ''            (5, 9) (5, 9)
    2355      DEDENT     ''            (6, -1) (6, -1)
    2356      DEDENT     ''            (6, -1) (6, -1)
    2357      ASYNC      'async'       (6, 0) (6, 5)
    2358      PLUSEQUAL  '+='          (6, 6) (6, 8)
    2359      NUMBER     '1'           (6, 9) (6, 10)
    2360      NEWLINE    ''            (6, 10) (6, 10)
    2361      """)
    2362  
    2363          self.check_tokenize('async def foo():\n  async for i in 1: pass', """\
    2364      ASYNC      'async'       (1, 0) (1, 5)
    2365      NAME       'def'         (1, 6) (1, 9)
    2366      NAME       'foo'         (1, 10) (1, 13)
    2367      LPAR       '('           (1, 13) (1, 14)
    2368      RPAR       ')'           (1, 14) (1, 15)
    2369      COLON      ':'           (1, 15) (1, 16)
    2370      NEWLINE    ''            (1, 16) (1, 16)
    2371      INDENT     ''            (2, -1) (2, -1)
    2372      ASYNC      'async'       (2, 2) (2, 7)
    2373      NAME       'for'         (2, 8) (2, 11)
    2374      NAME       'i'           (2, 12) (2, 13)
    2375      NAME       'in'          (2, 14) (2, 16)
    2376      NUMBER     '1'           (2, 17) (2, 18)
    2377      COLON      ':'           (2, 18) (2, 19)
    2378      NAME       'pass'        (2, 20) (2, 24)
    2379      DEDENT     ''            (2, -1) (2, -1)
    2380      """)
    2381  
    2382          self.check_tokenize('async def foo(async): await', """\
    2383      ASYNC      'async'       (1, 0) (1, 5)
    2384      NAME       'def'         (1, 6) (1, 9)
    2385      NAME       'foo'         (1, 10) (1, 13)
    2386      LPAR       '('           (1, 13) (1, 14)
    2387      ASYNC      'async'       (1, 14) (1, 19)
    2388      RPAR       ')'           (1, 19) (1, 20)
    2389      COLON      ':'           (1, 20) (1, 21)
    2390      AWAIT      'await'       (1, 22) (1, 27)
    2391      """)
    2392  
    2393          self.check_tokenize('''\
    2394  def f():
    2395  
    2396    def baz(): pass
    2397    async def bar(): pass
    2398  
    2399    await = 2''', """\
    2400      NAME       'def'         (1, 0) (1, 3)
    2401      NAME       'f'           (1, 4) (1, 5)
    2402      LPAR       '('           (1, 5) (1, 6)
    2403      RPAR       ')'           (1, 6) (1, 7)
    2404      COLON      ':'           (1, 7) (1, 8)
    2405      NEWLINE    ''            (1, 8) (1, 8)
    2406      INDENT     ''            (3, -1) (3, -1)
    2407      NAME       'def'         (3, 2) (3, 5)
    2408      NAME       'baz'         (3, 6) (3, 9)
    2409      LPAR       '('           (3, 9) (3, 10)
    2410      RPAR       ')'           (3, 10) (3, 11)
    2411      COLON      ':'           (3, 11) (3, 12)
    2412      NAME       'pass'        (3, 13) (3, 17)
    2413      NEWLINE    ''            (3, 17) (3, 17)
    2414      ASYNC      'async'       (4, 2) (4, 7)
    2415      NAME       'def'         (4, 8) (4, 11)
    2416      NAME       'bar'         (4, 12) (4, 15)
    2417      LPAR       '('           (4, 15) (4, 16)
    2418      RPAR       ')'           (4, 16) (4, 17)
    2419      COLON      ':'           (4, 17) (4, 18)
    2420      NAME       'pass'        (4, 19) (4, 23)
    2421      NEWLINE    ''            (4, 23) (4, 23)
    2422      AWAIT      'await'       (6, 2) (6, 7)
    2423      EQUAL      '='           (6, 8) (6, 9)
    2424      NUMBER     '2'           (6, 10) (6, 11)
    2425      DEDENT     ''            (6, -1) (6, -1)
    2426      """)
    2427  
    2428          self.check_tokenize('''\
    2429  async def f():
    2430  
    2431    def baz(): pass
    2432    async def bar(): pass
    2433  
    2434    await = 2''', """\
    2435      ASYNC      'async'       (1, 0) (1, 5)
    2436      NAME       'def'         (1, 6) (1, 9)
    2437      NAME       'f'           (1, 10) (1, 11)
    2438      LPAR       '('           (1, 11) (1, 12)
    2439      RPAR       ')'           (1, 12) (1, 13)
    2440      COLON      ':'           (1, 13) (1, 14)
    2441      NEWLINE    ''            (1, 14) (1, 14)
    2442      INDENT     ''            (3, -1) (3, -1)
    2443      NAME       'def'         (3, 2) (3, 5)
    2444      NAME       'baz'         (3, 6) (3, 9)
    2445      LPAR       '('           (3, 9) (3, 10)
    2446      RPAR       ')'           (3, 10) (3, 11)
    2447      COLON      ':'           (3, 11) (3, 12)
    2448      NAME       'pass'        (3, 13) (3, 17)
    2449      NEWLINE    ''            (3, 17) (3, 17)
    2450      ASYNC      'async'       (4, 2) (4, 7)
    2451      NAME       'def'         (4, 8) (4, 11)
    2452      NAME       'bar'         (4, 12) (4, 15)
    2453      LPAR       '('           (4, 15) (4, 16)
    2454      RPAR       ')'           (4, 16) (4, 17)
    2455      COLON      ':'           (4, 17) (4, 18)
    2456      NAME       'pass'        (4, 19) (4, 23)
    2457      NEWLINE    ''            (4, 23) (4, 23)
    2458      AWAIT      'await'       (6, 2) (6, 7)
    2459      EQUAL      '='           (6, 8) (6, 9)
    2460      NUMBER     '2'           (6, 10) (6, 11)
    2461      DEDENT     ''            (6, -1) (6, -1)
    2462      """)
    2463  
    2464      def test_unicode(self):
    2465  
    2466          self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
    2467      NAME       'Örter'       (1, 0) (1, 6)
    2468      EQUAL      '='           (1, 7) (1, 8)
    2469      STRING     "u'places'"   (1, 9) (1, 18)
    2470      NEWLINE    ''            (1, 18) (1, 18)
    2471      NAME       'grün'        (2, 0) (2, 5)
    2472      EQUAL      '='           (2, 6) (2, 7)
    2473      STRING     "U'green'"    (2, 8) (2, 16)
    2474      """)
    2475  
    2476      def test_invalid_syntax(self):
    2477          def get_tokens(string):
    2478              return list(_generate_tokens_from_c_tokenizer(string))
    2479  
    2480          self.assertRaises(SyntaxError, get_tokens, "(1+2]")
    2481          self.assertRaises(SyntaxError, get_tokens, "(1+2}")
    2482          self.assertRaises(SyntaxError, get_tokens, "{1+2]")
    2483  
    2484          self.assertRaises(SyntaxError, get_tokens, "1_")
    2485          self.assertRaises(SyntaxError, get_tokens, "1.2_")
    2486          self.assertRaises(SyntaxError, get_tokens, "1e2_")
    2487          self.assertRaises(SyntaxError, get_tokens, "1e+")
    2488  
    2489          self.assertRaises(SyntaxError, get_tokens, "\xa0")
    2490          self.assertRaises(SyntaxError, get_tokens, "")
    2491  
    2492          self.assertRaises(SyntaxError, get_tokens, "0b12")
    2493          self.assertRaises(SyntaxError, get_tokens, "0b1_2")
    2494          self.assertRaises(SyntaxError, get_tokens, "0b2")
    2495          self.assertRaises(SyntaxError, get_tokens, "0b1_")
    2496          self.assertRaises(SyntaxError, get_tokens, "0b")
    2497          self.assertRaises(SyntaxError, get_tokens, "0o18")
    2498          self.assertRaises(SyntaxError, get_tokens, "0o1_8")
    2499          self.assertRaises(SyntaxError, get_tokens, "0o8")
    2500          self.assertRaises(SyntaxError, get_tokens, "0o1_")
    2501          self.assertRaises(SyntaxError, get_tokens, "0o")
    2502          self.assertRaises(SyntaxError, get_tokens, "0x1_")
    2503          self.assertRaises(SyntaxError, get_tokens, "0x")
    2504          self.assertRaises(SyntaxError, get_tokens, "1_")
    2505          self.assertRaises(SyntaxError, get_tokens, "012")
    2506          self.assertRaises(SyntaxError, get_tokens, "1.2_")
    2507          self.assertRaises(SyntaxError, get_tokens, "1e2_")
    2508          self.assertRaises(SyntaxError, get_tokens, "1e+")
    2509  
    2510          self.assertRaises(SyntaxError, get_tokens, "'sdfsdf")
    2511          self.assertRaises(SyntaxError, get_tokens, "'''sdfsdf''")
    2512  
    2513          self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000)
    2514          self.assertRaises(SyntaxError, get_tokens, "]")
    2515  
    2516      def test_max_indent(self):
    2517          MAXINDENT = 100
    2518  
    2519          def generate_source(indents):
    2520              source = ''.join(('  ' * x) + 'if True:\n' for x in range(indents))
    2521              source += '  ' * indents + 'pass\n'
    2522              return source
    2523  
    2524          valid = generate_source(MAXINDENT - 1)
    2525          tokens = list(_generate_tokens_from_c_tokenizer(valid))
    2526          self.assertEqual(tokens[-1].type, DEDENT)
    2527          compile(valid, "<string>", "exec")
    2528  
    2529          invalid = generate_source(MAXINDENT)
    2530          tokens = list(_generate_tokens_from_c_tokenizer(invalid))
    2531          self.assertEqual(tokens[-1].type, NEWLINE)
    2532          self.assertRaises(
    2533              IndentationError, compile, invalid, "<string>", "exec"
    2534          )
    2535  
    2536      def test_continuation_lines_indentation(self):
    2537          def get_tokens(string):
    2538              return [(kind, string) for (kind, string, *_) in _generate_tokens_from_c_tokenizer(string)]
    2539  
    2540          code = dedent("""
    2541              def fib(n):
    2542                  \\
    2543              '''Print a Fibonacci series up to n.'''
    2544                  \\
    2545              a, b = 0, 1
    2546          """)
    2547  
    2548          self.check_tokenize(code, """\
    2549      NAME       'def'         (2, 0) (2, 3)
    2550      NAME       'fib'         (2, 4) (2, 7)
    2551      LPAR       '('           (2, 7) (2, 8)
    2552      NAME       'n'           (2, 8) (2, 9)
    2553      RPAR       ')'           (2, 9) (2, 10)
    2554      COLON      ':'           (2, 10) (2, 11)
    2555      NEWLINE    ''            (2, 11) (2, 11)
    2556      INDENT     ''            (4, -1) (4, -1)
    2557      STRING     "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39)
    2558      NEWLINE    ''            (4, 39) (4, 39)
    2559      NAME       'a'           (6, 0) (6, 1)
    2560      COMMA      ','           (6, 1) (6, 2)
    2561      NAME       'b'           (6, 3) (6, 4)
    2562      EQUAL      '='           (6, 5) (6, 6)
    2563      NUMBER     '0'           (6, 7) (6, 8)
    2564      COMMA      ','           (6, 8) (6, 9)
    2565      NUMBER     '1'           (6, 10) (6, 11)
    2566      NEWLINE    ''            (6, 11) (6, 11)
    2567      DEDENT     ''            (6, -1) (6, -1)
    2568          """)
    2569  
    2570          code_no_cont = dedent("""
    2571              def fib(n):
    2572                  '''Print a Fibonacci series up to n.'''
    2573                  a, b = 0, 1
    2574          """)
    2575  
    2576          self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
    2577  
    2578          code = dedent("""
    2579              pass
    2580                  \\
    2581  
    2582              pass
    2583          """)
    2584  
    2585          self.check_tokenize(code, """\
    2586      NAME       'pass'        (2, 0) (2, 4)
    2587      NEWLINE    ''            (2, 4) (2, 4)
    2588      NAME       'pass'        (5, 0) (5, 4)
    2589      NEWLINE    ''            (5, 4) (5, 4)
    2590          """)
    2591  
    2592          code_no_cont = dedent("""
    2593              pass
    2594              pass
    2595          """)
    2596  
    2597          self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
    2598  
    2599          code = dedent("""
    2600              if x:
    2601                  y = 1
    2602                  \\
    2603                          \\
    2604                      \\
    2605                  \\
    2606                  foo = 1
    2607          """)
    2608  
    2609          self.check_tokenize(code, """\
    2610      NAME       'if'          (2, 0) (2, 2)
    2611      NAME       'x'           (2, 3) (2, 4)
    2612      COLON      ':'           (2, 4) (2, 5)
    2613      NEWLINE    ''            (2, 5) (2, 5)
    2614      INDENT     ''            (3, -1) (3, -1)
    2615      NAME       'y'           (3, 4) (3, 5)
    2616      EQUAL      '='           (3, 6) (3, 7)
    2617      NUMBER     '1'           (3, 8) (3, 9)
    2618      NEWLINE    ''            (3, 9) (3, 9)
    2619      NAME       'foo'         (8, 4) (8, 7)
    2620      EQUAL      '='           (8, 8) (8, 9)
    2621      NUMBER     '1'           (8, 10) (8, 11)
    2622      NEWLINE    ''            (8, 11) (8, 11)
    2623      DEDENT     ''            (8, -1) (8, -1)
    2624          """)
    2625  
    2626          code_no_cont = dedent("""
    2627              if x:
    2628                  y = 1
    2629                  foo = 1
    2630          """)
    2631  
    2632          self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
    2633  
    2634  
    2635  class ESC[4;38;5;81mCTokenizerBufferTests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
    2636      def test_newline_at_the_end_of_buffer(self):
    2637          # See issue 99581: Make sure that if we need to add a new line at the
    2638          # end of the buffer, we have enough space in the buffer, specially when
    2639          # the current line is as long as the buffer space available.
    2640          test_script = f"""\
    2641          #coding: latin-1
    2642          #{"a"*10000}
    2643          #{"a"*10002}"""
    2644          with os_helper.temp_dir() as temp_dir:
    2645              file_name = make_script(temp_dir, 'foo', test_script)
    2646              run_test_script(file_name)
    2647  
    2648  
    2649  if __name__ == "__main__":
    2650      unittest.main()