1  from test import support
       2  from test.support import os_helper
       3  from tokenize import (tokenize, untokenize, NUMBER, NAME, OP,
       4                       STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
       5                       open as tokenize_open, Untokenizer, generate_tokens,
       6                       NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo,
       7                       TokenError)
       8  from io import BytesIO, StringIO
       9  import unittest
      10  from textwrap import dedent
      11  from unittest import TestCase, mock
      12  from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
      13                                 INVALID_UNDERSCORE_LITERALS)
      14  from test.support import os_helper
      15  from test.support.script_helper import run_test_script, make_script, run_python_until_end
      16  import os
      17  import token
      18  
      19  # Converts a source string into a list of textual representation
      20  # of the tokens such as:
      21  # `    NAME       'if'          (1, 0) (1, 2)`
      22  # to make writing tests easier.
      23  def stringify_tokens_from_source(token_generator, source_string):
      24      result = []
      25      num_lines = len(source_string.splitlines())
      26      missing_trailing_nl = source_string[-1] not in '\r\n'
      27  
      28      for type, token, start, end, line in token_generator:
      29          if type == ENDMARKER:
      30              break
      31          # Ignore the new line on the last line if the input lacks one
      32          if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
      33              continue
      34          type = tok_name[type]
      35          result.append(f"    {type:10} {token!r:13} {start} {end}")
      36  
      37      return result
      38  
      39  class ESC[4;38;5;81mTokenizeTest(ESC[4;38;5;149mTestCase):
      40      # Tests for the tokenize module.
      41  
      42      # The tests can be really simple. Given a small fragment of source
      43      # code, print out a table with tokens. The ENDMARKER, ENCODING and
      44      # final NEWLINE are omitted for brevity.
      45  
      46      def check_tokenize(self, s, expected):
      47          # Format the tokens in s in a table format.
      48          # The ENDMARKER and final NEWLINE are omitted.
      49          f = BytesIO(s.encode('utf-8'))
      50          result = stringify_tokens_from_source(tokenize(f.readline), s)
      51          self.assertEqual(result,
      52                           ["    ENCODING   'utf-8'       (0, 0) (0, 0)"] +
      53                           expected.rstrip().splitlines())
      54  
      55      def test_invalid_readline(self):
      56          def gen():
      57              yield "sdfosdg"
      58              yield "sdfosdg"
      59          with self.assertRaises(TypeError):
      60              list(tokenize(gen().__next__))
      61  
      62          def gen():
      63              yield b"sdfosdg"
      64              yield b"sdfosdg"
      65          with self.assertRaises(TypeError):
      66              list(generate_tokens(gen().__next__))
      67  
      68          def gen():
      69              yield "sdfosdg"
      70              1/0
      71          with self.assertRaises(ZeroDivisionError):
      72              list(generate_tokens(gen().__next__))
      73  
      74      def test_implicit_newline(self):
      75          # Make sure that the tokenizer puts in an implicit NEWLINE
      76          # when the input lacks a trailing new line.
      77          f = BytesIO("x".encode('utf-8'))
      78          tokens = list(tokenize(f.readline))
      79          self.assertEqual(tokens[-2].type, NEWLINE)
      80          self.assertEqual(tokens[-1].type, ENDMARKER)
      81  
      82      def test_basic(self):
      83          self.check_tokenize("1 + 1", """\
      84      NUMBER     '1'           (1, 0) (1, 1)
      85      OP         '+'           (1, 2) (1, 3)
      86      NUMBER     '1'           (1, 4) (1, 5)
      87      """)
      88          self.check_tokenize("if False:\n"
      89                              "    # NL\n"
      90                              "    \n"
      91                              "    True = False # NEWLINE\n", """\
      92      NAME       'if'          (1, 0) (1, 2)
      93      NAME       'False'       (1, 3) (1, 8)
      94      OP         ':'           (1, 8) (1, 9)
      95      NEWLINE    '\\n'          (1, 9) (1, 10)
      96      COMMENT    '# NL'        (2, 4) (2, 8)
      97      NL         '\\n'          (2, 8) (2, 9)
      98      NL         '\\n'          (3, 4) (3, 5)
      99      INDENT     '    '        (4, 0) (4, 4)
     100      NAME       'True'        (4, 4) (4, 8)
     101      OP         '='           (4, 9) (4, 10)
     102      NAME       'False'       (4, 11) (4, 16)
     103      COMMENT    '# NEWLINE'   (4, 17) (4, 26)
     104      NEWLINE    '\\n'          (4, 26) (4, 27)
     105      DEDENT     ''            (5, 0) (5, 0)
     106      """)
     107  
     108          self.check_tokenize("if True:\r\n    # NL\r\n    foo='bar'\r\n\r\n", """\
     109      NAME       'if'          (1, 0) (1, 2)
     110      NAME       'True'        (1, 3) (1, 7)
     111      OP         ':'           (1, 7) (1, 8)
     112      NEWLINE    '\\r\\n'        (1, 8) (1, 10)
     113      COMMENT    '# NL'        (2, 4) (2, 8)
     114      NL         '\\r\\n'        (2, 8) (2, 10)
     115      INDENT     '    '        (3, 0) (3, 4)
     116      NAME       'foo'         (3, 4) (3, 7)
     117      OP         '='           (3, 7) (3, 8)
     118      STRING     "\'bar\'"       (3, 8) (3, 13)
     119      NEWLINE    '\\r\\n'        (3, 13) (3, 15)
     120      NL         '\\r\\n'        (4, 0) (4, 2)
     121      DEDENT     ''            (5, 0) (5, 0)
     122              """)
     123  
     124          self.check_tokenize("x = 1 + \\\r\n1\r\n", """\
     125      NAME       'x'           (1, 0) (1, 1)
     126      OP         '='           (1, 2) (1, 3)
     127      NUMBER     '1'           (1, 4) (1, 5)
     128      OP         '+'           (1, 6) (1, 7)
     129      NUMBER     '1'           (2, 0) (2, 1)
     130      NEWLINE    '\\r\\n'        (2, 1) (2, 3)
     131              """)
     132  
     133          indent_error_file = b"""\
     134  def k(x):
     135      x += 2
     136    x += 5
     137  """
     138          readline = BytesIO(indent_error_file).readline
     139          with self.assertRaisesRegex(IndentationError,
     140                                      "unindent does not match any "
     141                                      "outer indentation level") as e:
     142              for tok in tokenize(readline):
     143                  pass
     144          self.assertEqual(e.exception.lineno, 3)
     145          self.assertEqual(e.exception.filename, '<string>')
     146          self.assertEqual(e.exception.end_lineno, None)
     147          self.assertEqual(e.exception.end_offset, None)
     148          self.assertEqual(
     149              e.exception.msg,
     150              'unindent does not match any outer indentation level')
     151          self.assertEqual(e.exception.offset, 9)
     152          self.assertEqual(e.exception.text, '  x += 5')
     153  
     154      def test_int(self):
     155          # Ordinary integers and binary operators
     156          self.check_tokenize("0xff <= 255", """\
     157      NUMBER     '0xff'        (1, 0) (1, 4)
     158      OP         '<='          (1, 5) (1, 7)
     159      NUMBER     '255'         (1, 8) (1, 11)
     160      """)
     161          self.check_tokenize("0b10 <= 255", """\
     162      NUMBER     '0b10'        (1, 0) (1, 4)
     163      OP         '<='          (1, 5) (1, 7)
     164      NUMBER     '255'         (1, 8) (1, 11)
     165      """)
     166          self.check_tokenize("0o123 <= 0O123", """\
     167      NUMBER     '0o123'       (1, 0) (1, 5)
     168      OP         '<='          (1, 6) (1, 8)
     169      NUMBER     '0O123'       (1, 9) (1, 14)
     170      """)
     171          self.check_tokenize("1234567 > ~0x15", """\
     172      NUMBER     '1234567'     (1, 0) (1, 7)
     173      OP         '>'           (1, 8) (1, 9)
     174      OP         '~'           (1, 10) (1, 11)
     175      NUMBER     '0x15'        (1, 11) (1, 15)
     176      """)
     177          self.check_tokenize("2134568 != 1231515", """\
     178      NUMBER     '2134568'     (1, 0) (1, 7)
     179      OP         '!='          (1, 8) (1, 10)
     180      NUMBER     '1231515'     (1, 11) (1, 18)
     181      """)
     182          self.check_tokenize("(-124561-1) & 200000000", """\
     183      OP         '('           (1, 0) (1, 1)
     184      OP         '-'           (1, 1) (1, 2)
     185      NUMBER     '124561'      (1, 2) (1, 8)
     186      OP         '-'           (1, 8) (1, 9)
     187      NUMBER     '1'           (1, 9) (1, 10)
     188      OP         ')'           (1, 10) (1, 11)
     189      OP         '&'           (1, 12) (1, 13)
     190      NUMBER     '200000000'   (1, 14) (1, 23)
     191      """)
     192          self.check_tokenize("0xdeadbeef != -1", """\
     193      NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
     194      OP         '!='          (1, 11) (1, 13)
     195      OP         '-'           (1, 14) (1, 15)
     196      NUMBER     '1'           (1, 15) (1, 16)
     197      """)
     198          self.check_tokenize("0xdeadc0de & 12345", """\
     199      NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
     200      OP         '&'           (1, 11) (1, 12)
     201      NUMBER     '12345'       (1, 13) (1, 18)
     202      """)
     203          self.check_tokenize("0xFF & 0x15 | 1234", """\
     204      NUMBER     '0xFF'        (1, 0) (1, 4)
     205      OP         '&'           (1, 5) (1, 6)
     206      NUMBER     '0x15'        (1, 7) (1, 11)
     207      OP         '|'           (1, 12) (1, 13)
     208      NUMBER     '1234'        (1, 14) (1, 18)
     209      """)
     210  
     211      def test_long(self):
     212          # Long integers
     213          self.check_tokenize("x = 0", """\
     214      NAME       'x'           (1, 0) (1, 1)
     215      OP         '='           (1, 2) (1, 3)
     216      NUMBER     '0'           (1, 4) (1, 5)
     217      """)
     218          self.check_tokenize("x = 0xfffffffffff", """\
     219      NAME       'x'           (1, 0) (1, 1)
     220      OP         '='           (1, 2) (1, 3)
     221      NUMBER     '0xfffffffffff' (1, 4) (1, 17)
     222      """)
     223          self.check_tokenize("x = 123141242151251616110", """\
     224      NAME       'x'           (1, 0) (1, 1)
     225      OP         '='           (1, 2) (1, 3)
     226      NUMBER     '123141242151251616110' (1, 4) (1, 25)
     227      """)
     228          self.check_tokenize("x = -15921590215012591", """\
     229      NAME       'x'           (1, 0) (1, 1)
     230      OP         '='           (1, 2) (1, 3)
     231      OP         '-'           (1, 4) (1, 5)
     232      NUMBER     '15921590215012591' (1, 5) (1, 22)
     233      """)
     234  
     235      def test_float(self):
     236          # Floating point numbers
     237          self.check_tokenize("x = 3.14159", """\
     238      NAME       'x'           (1, 0) (1, 1)
     239      OP         '='           (1, 2) (1, 3)
     240      NUMBER     '3.14159'     (1, 4) (1, 11)
     241      """)
     242          self.check_tokenize("x = 314159.", """\
     243      NAME       'x'           (1, 0) (1, 1)
     244      OP         '='           (1, 2) (1, 3)
     245      NUMBER     '314159.'     (1, 4) (1, 11)
     246      """)
     247          self.check_tokenize("x = .314159", """\
     248      NAME       'x'           (1, 0) (1, 1)
     249      OP         '='           (1, 2) (1, 3)
     250      NUMBER     '.314159'     (1, 4) (1, 11)
     251      """)
     252          self.check_tokenize("x = 3e14159", """\
     253      NAME       'x'           (1, 0) (1, 1)
     254      OP         '='           (1, 2) (1, 3)
     255      NUMBER     '3e14159'     (1, 4) (1, 11)
     256      """)
     257          self.check_tokenize("x = 3E123", """\
     258      NAME       'x'           (1, 0) (1, 1)
     259      OP         '='           (1, 2) (1, 3)
     260      NUMBER     '3E123'       (1, 4) (1, 9)
     261      """)
     262          self.check_tokenize("x+y = 3e-1230", """\
     263      NAME       'x'           (1, 0) (1, 1)
     264      OP         '+'           (1, 1) (1, 2)
     265      NAME       'y'           (1, 2) (1, 3)
     266      OP         '='           (1, 4) (1, 5)
     267      NUMBER     '3e-1230'     (1, 6) (1, 13)
     268      """)
     269          self.check_tokenize("x = 3.14e159", """\
     270      NAME       'x'           (1, 0) (1, 1)
     271      OP         '='           (1, 2) (1, 3)
     272      NUMBER     '3.14e159'    (1, 4) (1, 12)
     273      """)
     274  
     275      def test_underscore_literals(self):
     276          def number_token(s):
     277              f = BytesIO(s.encode('utf-8'))
     278              for toktype, token, start, end, line in tokenize(f.readline):
     279                  if toktype == NUMBER:
     280                      return token
     281              return 'invalid token'
     282          for lit in VALID_UNDERSCORE_LITERALS:
     283              if '(' in lit:
     284                  # this won't work with compound complex inputs
     285                  continue
     286              self.assertEqual(number_token(lit), lit)
     287          # Valid cases with extra underscores in the tokenize module
     288          # See gh-105549 for context
     289          extra_valid_cases = {"0_7", "09_99"}
     290          for lit in INVALID_UNDERSCORE_LITERALS:
     291              if lit in extra_valid_cases:
     292                  continue
     293              try:
     294                  number_token(lit)
     295              except TokenError:
     296                  continue
     297              self.assertNotEqual(number_token(lit), lit)
     298  
     299      def test_string(self):
     300          # String literals
     301          self.check_tokenize("x = ''; y = \"\"", """\
     302      NAME       'x'           (1, 0) (1, 1)
     303      OP         '='           (1, 2) (1, 3)
     304      STRING     "''"          (1, 4) (1, 6)
     305      OP         ';'           (1, 6) (1, 7)
     306      NAME       'y'           (1, 8) (1, 9)
     307      OP         '='           (1, 10) (1, 11)
     308      STRING     '""'          (1, 12) (1, 14)
     309      """)
     310          self.check_tokenize("x = '\"'; y = \"'\"", """\
     311      NAME       'x'           (1, 0) (1, 1)
     312      OP         '='           (1, 2) (1, 3)
     313      STRING     '\\'"\\''       (1, 4) (1, 7)
     314      OP         ';'           (1, 7) (1, 8)
     315      NAME       'y'           (1, 9) (1, 10)
     316      OP         '='           (1, 11) (1, 12)
     317      STRING     '"\\'"'        (1, 13) (1, 16)
     318      """)
     319          self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
     320      NAME       'x'           (1, 0) (1, 1)
     321      OP         '='           (1, 2) (1, 3)
     322      STRING     '"doesn\\'t "' (1, 4) (1, 14)
     323      NAME       'shrink'      (1, 14) (1, 20)
     324      STRING     '", does it"' (1, 20) (1, 31)
     325      """)
     326          self.check_tokenize("x = 'abc' + 'ABC'", """\
     327      NAME       'x'           (1, 0) (1, 1)
     328      OP         '='           (1, 2) (1, 3)
     329      STRING     "'abc'"       (1, 4) (1, 9)
     330      OP         '+'           (1, 10) (1, 11)
     331      STRING     "'ABC'"       (1, 12) (1, 17)
     332      """)
     333          self.check_tokenize('y = "ABC" + "ABC"', """\
     334      NAME       'y'           (1, 0) (1, 1)
     335      OP         '='           (1, 2) (1, 3)
     336      STRING     '"ABC"'       (1, 4) (1, 9)
     337      OP         '+'           (1, 10) (1, 11)
     338      STRING     '"ABC"'       (1, 12) (1, 17)
     339      """)
     340          self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
     341      NAME       'x'           (1, 0) (1, 1)
     342      OP         '='           (1, 2) (1, 3)
     343      STRING     "r'abc'"      (1, 4) (1, 10)
     344      OP         '+'           (1, 11) (1, 12)
     345      STRING     "r'ABC'"      (1, 13) (1, 19)
     346      OP         '+'           (1, 20) (1, 21)
     347      STRING     "R'ABC'"      (1, 22) (1, 28)
     348      OP         '+'           (1, 29) (1, 30)
     349      STRING     "R'ABC'"      (1, 31) (1, 37)
     350      """)
     351          self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
     352      NAME       'y'           (1, 0) (1, 1)
     353      OP         '='           (1, 2) (1, 3)
     354      STRING     'r"abc"'      (1, 4) (1, 10)
     355      OP         '+'           (1, 11) (1, 12)
     356      STRING     'r"ABC"'      (1, 13) (1, 19)
     357      OP         '+'           (1, 20) (1, 21)
     358      STRING     'R"ABC"'      (1, 22) (1, 28)
     359      OP         '+'           (1, 29) (1, 30)
     360      STRING     'R"ABC"'      (1, 31) (1, 37)
     361      """)
     362  
     363          self.check_tokenize("u'abc' + U'abc'", """\
     364      STRING     "u'abc'"      (1, 0) (1, 6)
     365      OP         '+'           (1, 7) (1, 8)
     366      STRING     "U'abc'"      (1, 9) (1, 15)
     367      """)
     368          self.check_tokenize('u"abc" + U"abc"', """\
     369      STRING     'u"abc"'      (1, 0) (1, 6)
     370      OP         '+'           (1, 7) (1, 8)
     371      STRING     'U"abc"'      (1, 9) (1, 15)
     372      """)
     373  
     374          self.check_tokenize("b'abc' + B'abc'", """\
     375      STRING     "b'abc'"      (1, 0) (1, 6)
     376      OP         '+'           (1, 7) (1, 8)
     377      STRING     "B'abc'"      (1, 9) (1, 15)
     378      """)
     379          self.check_tokenize('b"abc" + B"abc"', """\
     380      STRING     'b"abc"'      (1, 0) (1, 6)
     381      OP         '+'           (1, 7) (1, 8)
     382      STRING     'B"abc"'      (1, 9) (1, 15)
     383      """)
     384          self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
     385      STRING     "br'abc'"     (1, 0) (1, 7)
     386      OP         '+'           (1, 8) (1, 9)
     387      STRING     "bR'abc'"     (1, 10) (1, 17)
     388      OP         '+'           (1, 18) (1, 19)
     389      STRING     "Br'abc'"     (1, 20) (1, 27)
     390      OP         '+'           (1, 28) (1, 29)
     391      STRING     "BR'abc'"     (1, 30) (1, 37)
     392      """)
     393          self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
     394      STRING     'br"abc"'     (1, 0) (1, 7)
     395      OP         '+'           (1, 8) (1, 9)
     396      STRING     'bR"abc"'     (1, 10) (1, 17)
     397      OP         '+'           (1, 18) (1, 19)
     398      STRING     'Br"abc"'     (1, 20) (1, 27)
     399      OP         '+'           (1, 28) (1, 29)
     400      STRING     'BR"abc"'     (1, 30) (1, 37)
     401      """)
     402          self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
     403      STRING     "rb'abc'"     (1, 0) (1, 7)
     404      OP         '+'           (1, 8) (1, 9)
     405      STRING     "rB'abc'"     (1, 10) (1, 17)
     406      OP         '+'           (1, 18) (1, 19)
     407      STRING     "Rb'abc'"     (1, 20) (1, 27)
     408      OP         '+'           (1, 28) (1, 29)
     409      STRING     "RB'abc'"     (1, 30) (1, 37)
     410      """)
     411          self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
     412      STRING     'rb"abc"'     (1, 0) (1, 7)
     413      OP         '+'           (1, 8) (1, 9)
     414      STRING     'rB"abc"'     (1, 10) (1, 17)
     415      OP         '+'           (1, 18) (1, 19)
     416      STRING     'Rb"abc"'     (1, 20) (1, 27)
     417      OP         '+'           (1, 28) (1, 29)
     418      STRING     'RB"abc"'     (1, 30) (1, 37)
     419      """)
     420          # Check 0, 1, and 2 character string prefixes.
     421          self.check_tokenize(r'"a\
     422  de\
     423  fg"', """\
     424      STRING     '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
     425      """)
     426          self.check_tokenize(r'u"a\
     427  de"', """\
     428      STRING     'u"a\\\\\\nde"\'  (1, 0) (2, 3)
     429      """)
     430          self.check_tokenize(r'rb"a\
     431  d"', """\
     432      STRING     'rb"a\\\\\\nd"\'  (1, 0) (2, 2)
     433      """)
     434          self.check_tokenize(r'"""a\
     435  b"""', """\
     436      STRING     '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
     437      """)
     438          self.check_tokenize(r'u"""a\
     439  b"""', """\
     440      STRING     'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
     441      """)
     442          self.check_tokenize(r'rb"""a\
     443  b\
     444  c"""', """\
     445      STRING     'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
     446      """)
     447          self.check_tokenize('f"abc"', """\
     448      FSTRING_START 'f"'          (1, 0) (1, 2)
     449      FSTRING_MIDDLE 'abc'         (1, 2) (1, 5)
     450      FSTRING_END '"'           (1, 5) (1, 6)
     451      """)
     452          self.check_tokenize('fR"a{b}c"', """\
     453      FSTRING_START 'fR"'         (1, 0) (1, 3)
     454      FSTRING_MIDDLE 'a'           (1, 3) (1, 4)
     455      OP         '{'           (1, 4) (1, 5)
     456      NAME       'b'           (1, 5) (1, 6)
     457      OP         '}'           (1, 6) (1, 7)
     458      FSTRING_MIDDLE 'c'           (1, 7) (1, 8)
     459      FSTRING_END '"'           (1, 8) (1, 9)
     460      """)
     461          self.check_tokenize('fR"a{{{b!r}}}c"', """\
     462      FSTRING_START 'fR"'         (1, 0) (1, 3)
     463      FSTRING_MIDDLE 'a{'          (1, 3) (1, 5)
     464      OP         '{'           (1, 6) (1, 7)
     465      NAME       'b'           (1, 7) (1, 8)
     466      OP         '!'           (1, 8) (1, 9)
     467      NAME       'r'           (1, 9) (1, 10)
     468      OP         '}'           (1, 10) (1, 11)
     469      FSTRING_MIDDLE '}'           (1, 11) (1, 12)
     470      FSTRING_MIDDLE 'c'           (1, 13) (1, 14)
     471      FSTRING_END '"'           (1, 14) (1, 15)
     472      """)
     473          self.check_tokenize('f"{{{1+1}}}"', """\
     474      FSTRING_START 'f"'          (1, 0) (1, 2)
     475      FSTRING_MIDDLE '{'           (1, 2) (1, 3)
     476      OP         '{'           (1, 4) (1, 5)
     477      NUMBER     '1'           (1, 5) (1, 6)
     478      OP         '+'           (1, 6) (1, 7)
     479      NUMBER     '1'           (1, 7) (1, 8)
     480      OP         '}'           (1, 8) (1, 9)
     481      FSTRING_MIDDLE '}'           (1, 9) (1, 10)
     482      FSTRING_END '"'           (1, 11) (1, 12)
     483      """)
     484          self.check_tokenize('f"""{f\'\'\'{f\'{f"{1+1}"}\'}\'\'\'}"""', """\
     485      FSTRING_START 'f\"""'        (1, 0) (1, 4)
     486      OP         '{'           (1, 4) (1, 5)
     487      FSTRING_START "f'''"        (1, 5) (1, 9)
     488      OP         '{'           (1, 9) (1, 10)
     489      FSTRING_START "f'"          (1, 10) (1, 12)
     490      OP         '{'           (1, 12) (1, 13)
     491      FSTRING_START 'f"'          (1, 13) (1, 15)
     492      OP         '{'           (1, 15) (1, 16)
     493      NUMBER     '1'           (1, 16) (1, 17)
     494      OP         '+'           (1, 17) (1, 18)
     495      NUMBER     '1'           (1, 18) (1, 19)
     496      OP         '}'           (1, 19) (1, 20)
     497      FSTRING_END '"'           (1, 20) (1, 21)
     498      OP         '}'           (1, 21) (1, 22)
     499      FSTRING_END "'"           (1, 22) (1, 23)
     500      OP         '}'           (1, 23) (1, 24)
     501      FSTRING_END "'''"         (1, 24) (1, 27)
     502      OP         '}'           (1, 27) (1, 28)
     503      FSTRING_END '\"""'         (1, 28) (1, 31)
     504      """)
     505          self.check_tokenize('f"""     x\nstr(data, encoding={invalid!r})\n"""', """\
     506      FSTRING_START 'f\"""'        (1, 0) (1, 4)
     507      FSTRING_MIDDLE '     x\\nstr(data, encoding=' (1, 4) (2, 19)
     508      OP         '{'           (2, 19) (2, 20)
     509      NAME       'invalid'     (2, 20) (2, 27)
     510      OP         '!'           (2, 27) (2, 28)
     511      NAME       'r'           (2, 28) (2, 29)
     512      OP         '}'           (2, 29) (2, 30)
     513      FSTRING_MIDDLE ')\\n'         (2, 30) (3, 0)
     514      FSTRING_END '\"""'         (3, 0) (3, 3)
     515      """)
     516          self.check_tokenize('f"""123456789\nsomething{None}bad"""', """\
     517      FSTRING_START 'f\"""'        (1, 0) (1, 4)
     518      FSTRING_MIDDLE '123456789\\nsomething' (1, 4) (2, 9)
     519      OP         '{'           (2, 9) (2, 10)
     520      NAME       'None'        (2, 10) (2, 14)
     521      OP         '}'           (2, 14) (2, 15)
     522      FSTRING_MIDDLE 'bad'         (2, 15) (2, 18)
     523      FSTRING_END '\"""'         (2, 18) (2, 21)
     524      """)
     525          self.check_tokenize('f"""abc"""', """\
     526      FSTRING_START 'f\"""'        (1, 0) (1, 4)
     527      FSTRING_MIDDLE 'abc'         (1, 4) (1, 7)
     528      FSTRING_END '\"""'         (1, 7) (1, 10)
     529      """)
     530          self.check_tokenize(r'f"abc\
     531  def"', """\
     532      FSTRING_START 'f"'          (1, 0) (1, 2)
     533      FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 2) (2, 3)
     534      FSTRING_END '"'           (2, 3) (2, 4)
     535      """)
     536          self.check_tokenize(r'Rf"abc\
     537  def"', """\
     538      FSTRING_START 'Rf"'         (1, 0) (1, 3)
     539      FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 3) (2, 3)
     540      FSTRING_END '"'           (2, 3) (2, 4)
     541      """)
     542          self.check_tokenize("f'some words {a+b:.3f} more words {c+d=} final words'", """\
     543      FSTRING_START "f'"          (1, 0) (1, 2)
     544      FSTRING_MIDDLE 'some words ' (1, 2) (1, 13)
     545      OP         '{'           (1, 13) (1, 14)
     546      NAME       'a'           (1, 14) (1, 15)
     547      OP         '+'           (1, 15) (1, 16)
     548      NAME       'b'           (1, 16) (1, 17)
     549      OP         ':'           (1, 17) (1, 18)
     550      FSTRING_MIDDLE '.3f'         (1, 18) (1, 21)
     551      OP         '}'           (1, 21) (1, 22)
     552      FSTRING_MIDDLE ' more words ' (1, 22) (1, 34)
     553      OP         '{'           (1, 34) (1, 35)
     554      NAME       'c'           (1, 35) (1, 36)
     555      OP         '+'           (1, 36) (1, 37)
     556      NAME       'd'           (1, 37) (1, 38)
     557      OP         '='           (1, 38) (1, 39)
     558      OP         '}'           (1, 39) (1, 40)
     559      FSTRING_MIDDLE ' final words' (1, 40) (1, 52)
     560      FSTRING_END "'"           (1, 52) (1, 53)
     561      """)
     562          self.check_tokenize("""\
     563  f'''{
     564  3
     565  =}'''""", """\
     566      FSTRING_START "f'''"        (1, 0) (1, 4)
     567      OP         '{'           (1, 4) (1, 5)
     568      NL         '\\n'          (1, 5) (1, 6)
     569      NUMBER     '3'           (2, 0) (2, 1)
     570      NL         '\\n'          (2, 1) (2, 2)
     571      OP         '='           (3, 0) (3, 1)
     572      OP         '}'           (3, 1) (3, 2)
     573      FSTRING_END "'''"         (3, 2) (3, 5)
     574      """)
     575  
     576      def test_function(self):
     577          self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
     578      NAME       'def'         (1, 0) (1, 3)
     579      NAME       'd22'         (1, 4) (1, 7)
     580      OP         '('           (1, 7) (1, 8)
     581      NAME       'a'           (1, 8) (1, 9)
     582      OP         ','           (1, 9) (1, 10)
     583      NAME       'b'           (1, 11) (1, 12)
     584      OP         ','           (1, 12) (1, 13)
     585      NAME       'c'           (1, 14) (1, 15)
     586      OP         '='           (1, 15) (1, 16)
     587      NUMBER     '2'           (1, 16) (1, 17)
     588      OP         ','           (1, 17) (1, 18)
     589      NAME       'd'           (1, 19) (1, 20)
     590      OP         '='           (1, 20) (1, 21)
     591      NUMBER     '2'           (1, 21) (1, 22)
     592      OP         ','           (1, 22) (1, 23)
     593      OP         '*'           (1, 24) (1, 25)
     594      NAME       'k'           (1, 25) (1, 26)
     595      OP         ')'           (1, 26) (1, 27)
     596      OP         ':'           (1, 27) (1, 28)
     597      NAME       'pass'        (1, 29) (1, 33)
     598      """)
     599          self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
     600      NAME       'def'         (1, 0) (1, 3)
     601      NAME       'd01v_'       (1, 4) (1, 9)
     602      OP         '('           (1, 9) (1, 10)
     603      NAME       'a'           (1, 10) (1, 11)
     604      OP         '='           (1, 11) (1, 12)
     605      NUMBER     '1'           (1, 12) (1, 13)
     606      OP         ','           (1, 13) (1, 14)
     607      OP         '*'           (1, 15) (1, 16)
     608      NAME       'k'           (1, 16) (1, 17)
     609      OP         ','           (1, 17) (1, 18)
     610      OP         '**'          (1, 19) (1, 21)
     611      NAME       'w'           (1, 21) (1, 22)
     612      OP         ')'           (1, 22) (1, 23)
     613      OP         ':'           (1, 23) (1, 24)
     614      NAME       'pass'        (1, 25) (1, 29)
     615      """)
     616          self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\
     617      NAME       'def'         (1, 0) (1, 3)
     618      NAME       'd23'         (1, 4) (1, 7)
     619      OP         '('           (1, 7) (1, 8)
     620      NAME       'a'           (1, 8) (1, 9)
     621      OP         ':'           (1, 9) (1, 10)
     622      NAME       'str'         (1, 11) (1, 14)
     623      OP         ','           (1, 14) (1, 15)
     624      NAME       'b'           (1, 16) (1, 17)
     625      OP         ':'           (1, 17) (1, 18)
     626      NAME       'int'         (1, 19) (1, 22)
     627      OP         '='           (1, 22) (1, 23)
     628      NUMBER     '3'           (1, 23) (1, 24)
     629      OP         ')'           (1, 24) (1, 25)
     630      OP         '->'          (1, 26) (1, 28)
     631      NAME       'int'         (1, 29) (1, 32)
     632      OP         ':'           (1, 32) (1, 33)
     633      NAME       'pass'        (1, 34) (1, 38)
     634      """)
     635  
     636      def test_comparison(self):
     637          # Comparison
     638          self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
     639                              "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
     640      NAME       'if'          (1, 0) (1, 2)
     641      NUMBER     '1'           (1, 3) (1, 4)
     642      OP         '<'           (1, 5) (1, 6)
     643      NUMBER     '1'           (1, 7) (1, 8)
     644      OP         '>'           (1, 9) (1, 10)
     645      NUMBER     '1'           (1, 11) (1, 12)
     646      OP         '=='          (1, 13) (1, 15)
     647      NUMBER     '1'           (1, 16) (1, 17)
     648      OP         '>='          (1, 18) (1, 20)
     649      NUMBER     '5'           (1, 21) (1, 22)
     650      OP         '<='          (1, 23) (1, 25)
     651      NUMBER     '0x15'        (1, 26) (1, 30)
     652      OP         '<='          (1, 31) (1, 33)
     653      NUMBER     '0x12'        (1, 34) (1, 38)
     654      OP         '!='          (1, 39) (1, 41)
     655      NUMBER     '1'           (1, 42) (1, 43)
     656      NAME       'and'         (1, 44) (1, 47)
     657      NUMBER     '5'           (1, 48) (1, 49)
     658      NAME       'in'          (1, 50) (1, 52)
     659      NUMBER     '1'           (1, 53) (1, 54)
     660      NAME       'not'         (1, 55) (1, 58)
     661      NAME       'in'          (1, 59) (1, 61)
     662      NUMBER     '1'           (1, 62) (1, 63)
     663      NAME       'is'          (1, 64) (1, 66)
     664      NUMBER     '1'           (1, 67) (1, 68)
     665      NAME       'or'          (1, 69) (1, 71)
     666      NUMBER     '5'           (1, 72) (1, 73)
     667      NAME       'is'          (1, 74) (1, 76)
     668      NAME       'not'         (1, 77) (1, 80)
     669      NUMBER     '1'           (1, 81) (1, 82)
     670      OP         ':'           (1, 82) (1, 83)
     671      NAME       'pass'        (1, 84) (1, 88)
     672      """)
     673  
     674      def test_shift(self):
     675          # Shift
     676          self.check_tokenize("x = 1 << 1 >> 5", """\
     677      NAME       'x'           (1, 0) (1, 1)
     678      OP         '='           (1, 2) (1, 3)
     679      NUMBER     '1'           (1, 4) (1, 5)
     680      OP         '<<'          (1, 6) (1, 8)
     681      NUMBER     '1'           (1, 9) (1, 10)
     682      OP         '>>'          (1, 11) (1, 13)
     683      NUMBER     '5'           (1, 14) (1, 15)
     684      """)
     685  
     686      def test_additive(self):
     687          # Additive
     688          self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
     689      NAME       'x'           (1, 0) (1, 1)
     690      OP         '='           (1, 2) (1, 3)
     691      NUMBER     '1'           (1, 4) (1, 5)
     692      OP         '-'           (1, 6) (1, 7)
     693      NAME       'y'           (1, 8) (1, 9)
     694      OP         '+'           (1, 10) (1, 11)
     695      NUMBER     '15'          (1, 12) (1, 14)
     696      OP         '-'           (1, 15) (1, 16)
     697      NUMBER     '1'           (1, 17) (1, 18)
     698      OP         '+'           (1, 19) (1, 20)
     699      NUMBER     '0x124'       (1, 21) (1, 26)
     700      OP         '+'           (1, 27) (1, 28)
     701      NAME       'z'           (1, 29) (1, 30)
     702      OP         '+'           (1, 31) (1, 32)
     703      NAME       'a'           (1, 33) (1, 34)
     704      OP         '['           (1, 34) (1, 35)
     705      NUMBER     '5'           (1, 35) (1, 36)
     706      OP         ']'           (1, 36) (1, 37)
     707      """)
     708  
     709      def test_multiplicative(self):
     710          # Multiplicative
     711          self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
     712      NAME       'x'           (1, 0) (1, 1)
     713      OP         '='           (1, 2) (1, 3)
     714      NUMBER     '1'           (1, 4) (1, 5)
     715      OP         '//'          (1, 5) (1, 7)
     716      NUMBER     '1'           (1, 7) (1, 8)
     717      OP         '*'           (1, 8) (1, 9)
     718      NUMBER     '1'           (1, 9) (1, 10)
     719      OP         '/'           (1, 10) (1, 11)
     720      NUMBER     '5'           (1, 11) (1, 12)
     721      OP         '*'           (1, 12) (1, 13)
     722      NUMBER     '12'          (1, 13) (1, 15)
     723      OP         '%'           (1, 15) (1, 16)
     724      NUMBER     '0x12'        (1, 16) (1, 20)
     725      OP         '@'           (1, 20) (1, 21)
     726      NUMBER     '42'          (1, 21) (1, 23)
     727      """)
     728  
     729      def test_unary(self):
     730          # Unary
     731          self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
     732      OP         '~'           (1, 0) (1, 1)
     733      NUMBER     '1'           (1, 1) (1, 2)
     734      OP         '^'           (1, 3) (1, 4)
     735      NUMBER     '1'           (1, 5) (1, 6)
     736      OP         '&'           (1, 7) (1, 8)
     737      NUMBER     '1'           (1, 9) (1, 10)
     738      OP         '|'           (1, 11) (1, 12)
     739      NUMBER     '1'           (1, 12) (1, 13)
     740      OP         '^'           (1, 14) (1, 15)
     741      OP         '-'           (1, 16) (1, 17)
     742      NUMBER     '1'           (1, 17) (1, 18)
     743      """)
     744          self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
     745      OP         '-'           (1, 0) (1, 1)
     746      NUMBER     '1'           (1, 1) (1, 2)
     747      OP         '*'           (1, 2) (1, 3)
     748      NUMBER     '1'           (1, 3) (1, 4)
     749      OP         '/'           (1, 4) (1, 5)
     750      NUMBER     '1'           (1, 5) (1, 6)
     751      OP         '+'           (1, 6) (1, 7)
     752      NUMBER     '1'           (1, 7) (1, 8)
     753      OP         '*'           (1, 8) (1, 9)
     754      NUMBER     '1'           (1, 9) (1, 10)
     755      OP         '//'          (1, 10) (1, 12)
     756      NUMBER     '1'           (1, 12) (1, 13)
     757      OP         '-'           (1, 14) (1, 15)
     758      OP         '-'           (1, 16) (1, 17)
     759      OP         '-'           (1, 17) (1, 18)
     760      OP         '-'           (1, 18) (1, 19)
     761      NUMBER     '1'           (1, 19) (1, 20)
     762      OP         '**'          (1, 20) (1, 22)
     763      NUMBER     '1'           (1, 22) (1, 23)
     764      """)
     765  
     766      def test_selector(self):
     767          # Selector
     768          self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
     769      NAME       'import'      (1, 0) (1, 6)
     770      NAME       'sys'         (1, 7) (1, 10)
     771      OP         ','           (1, 10) (1, 11)
     772      NAME       'time'        (1, 12) (1, 16)
     773      NEWLINE    '\\n'          (1, 16) (1, 17)
     774      NAME       'x'           (2, 0) (2, 1)
     775      OP         '='           (2, 2) (2, 3)
     776      NAME       'sys'         (2, 4) (2, 7)
     777      OP         '.'           (2, 7) (2, 8)
     778      NAME       'modules'     (2, 8) (2, 15)
     779      OP         '['           (2, 15) (2, 16)
     780      STRING     "'time'"      (2, 16) (2, 22)
     781      OP         ']'           (2, 22) (2, 23)
     782      OP         '.'           (2, 23) (2, 24)
     783      NAME       'time'        (2, 24) (2, 28)
     784      OP         '('           (2, 28) (2, 29)
     785      OP         ')'           (2, 29) (2, 30)
     786      """)
     787  
     788      def test_method(self):
     789          # Methods
     790          self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
     791      OP         '@'           (1, 0) (1, 1)
     792      NAME       'staticmethod' (1, 1) (1, 13)
     793      NEWLINE    '\\n'          (1, 13) (1, 14)
     794      NAME       'def'         (2, 0) (2, 3)
     795      NAME       'foo'         (2, 4) (2, 7)
     796      OP         '('           (2, 7) (2, 8)
     797      NAME       'x'           (2, 8) (2, 9)
     798      OP         ','           (2, 9) (2, 10)
     799      NAME       'y'           (2, 10) (2, 11)
     800      OP         ')'           (2, 11) (2, 12)
     801      OP         ':'           (2, 12) (2, 13)
     802      NAME       'pass'        (2, 14) (2, 18)
     803      """)
     804  
     805      def test_tabs(self):
     806          # Evil tabs
     807          self.check_tokenize("def f():\n"
     808                              "\tif x\n"
     809                              "        \tpass", """\
     810      NAME       'def'         (1, 0) (1, 3)
     811      NAME       'f'           (1, 4) (1, 5)
     812      OP         '('           (1, 5) (1, 6)
     813      OP         ')'           (1, 6) (1, 7)
     814      OP         ':'           (1, 7) (1, 8)
     815      NEWLINE    '\\n'          (1, 8) (1, 9)
     816      INDENT     '\\t'          (2, 0) (2, 1)
     817      NAME       'if'          (2, 1) (2, 3)
     818      NAME       'x'           (2, 4) (2, 5)
     819      NEWLINE    '\\n'          (2, 5) (2, 6)
     820      INDENT     '        \\t'  (3, 0) (3, 9)
     821      NAME       'pass'        (3, 9) (3, 13)
     822      DEDENT     ''            (4, 0) (4, 0)
     823      DEDENT     ''            (4, 0) (4, 0)
     824      """)
     825  
     826      def test_non_ascii_identifiers(self):
     827          # Non-ascii identifiers
     828          self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
     829      NAME       'Örter'       (1, 0) (1, 5)
     830      OP         '='           (1, 6) (1, 7)
     831      STRING     "'places'"    (1, 8) (1, 16)
     832      NEWLINE    '\\n'          (1, 16) (1, 17)
     833      NAME       'grün'        (2, 0) (2, 4)
     834      OP         '='           (2, 5) (2, 6)
     835      STRING     "'green'"     (2, 7) (2, 14)
     836      """)
     837  
     838      def test_unicode(self):
     839          # Legacy unicode literals:
     840          self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
     841      NAME       'Örter'       (1, 0) (1, 5)
     842      OP         '='           (1, 6) (1, 7)
     843      STRING     "u'places'"   (1, 8) (1, 17)
     844      NEWLINE    '\\n'          (1, 17) (1, 18)
     845      NAME       'grün'        (2, 0) (2, 4)
     846      OP         '='           (2, 5) (2, 6)
     847      STRING     "U'green'"    (2, 7) (2, 15)
     848      """)
     849  
     850      def test_async(self):
     851          # Async/await extension:
     852          self.check_tokenize("async = 1", """\
     853      NAME       'async'       (1, 0) (1, 5)
     854      OP         '='           (1, 6) (1, 7)
     855      NUMBER     '1'           (1, 8) (1, 9)
     856      """)
     857  
     858          self.check_tokenize("a = (async = 1)", """\
     859      NAME       'a'           (1, 0) (1, 1)
     860      OP         '='           (1, 2) (1, 3)
     861      OP         '('           (1, 4) (1, 5)
     862      NAME       'async'       (1, 5) (1, 10)
     863      OP         '='           (1, 11) (1, 12)
     864      NUMBER     '1'           (1, 13) (1, 14)
     865      OP         ')'           (1, 14) (1, 15)
     866      """)
     867  
     868          self.check_tokenize("async()", """\
     869      NAME       'async'       (1, 0) (1, 5)
     870      OP         '('           (1, 5) (1, 6)
     871      OP         ')'           (1, 6) (1, 7)
     872      """)
     873  
     874          self.check_tokenize("class async(Bar):pass", """\
     875      NAME       'class'       (1, 0) (1, 5)
     876      NAME       'async'       (1, 6) (1, 11)
     877      OP         '('           (1, 11) (1, 12)
     878      NAME       'Bar'         (1, 12) (1, 15)
     879      OP         ')'           (1, 15) (1, 16)
     880      OP         ':'           (1, 16) (1, 17)
     881      NAME       'pass'        (1, 17) (1, 21)
     882      """)
     883  
     884          self.check_tokenize("class async:pass", """\
     885      NAME       'class'       (1, 0) (1, 5)
     886      NAME       'async'       (1, 6) (1, 11)
     887      OP         ':'           (1, 11) (1, 12)
     888      NAME       'pass'        (1, 12) (1, 16)
     889      """)
     890  
     891          self.check_tokenize("await = 1", """\
     892      NAME       'await'       (1, 0) (1, 5)
     893      OP         '='           (1, 6) (1, 7)
     894      NUMBER     '1'           (1, 8) (1, 9)
     895      """)
     896  
     897          self.check_tokenize("foo.async", """\
     898      NAME       'foo'         (1, 0) (1, 3)
     899      OP         '.'           (1, 3) (1, 4)
     900      NAME       'async'       (1, 4) (1, 9)
     901      """)
     902  
     903          self.check_tokenize("async for a in b: pass", """\
     904      NAME       'async'       (1, 0) (1, 5)
     905      NAME       'for'         (1, 6) (1, 9)
     906      NAME       'a'           (1, 10) (1, 11)
     907      NAME       'in'          (1, 12) (1, 14)
     908      NAME       'b'           (1, 15) (1, 16)
     909      OP         ':'           (1, 16) (1, 17)
     910      NAME       'pass'        (1, 18) (1, 22)
     911      """)
     912  
     913          self.check_tokenize("async with a as b: pass", """\
     914      NAME       'async'       (1, 0) (1, 5)
     915      NAME       'with'        (1, 6) (1, 10)
     916      NAME       'a'           (1, 11) (1, 12)
     917      NAME       'as'          (1, 13) (1, 15)
     918      NAME       'b'           (1, 16) (1, 17)
     919      OP         ':'           (1, 17) (1, 18)
     920      NAME       'pass'        (1, 19) (1, 23)
     921      """)
     922  
     923          self.check_tokenize("async.foo", """\
     924      NAME       'async'       (1, 0) (1, 5)
     925      OP         '.'           (1, 5) (1, 6)
     926      NAME       'foo'         (1, 6) (1, 9)
     927      """)
     928  
     929          self.check_tokenize("async", """\
     930      NAME       'async'       (1, 0) (1, 5)
     931      """)
     932  
     933          self.check_tokenize("async\n#comment\nawait", """\
     934      NAME       'async'       (1, 0) (1, 5)
     935      NEWLINE    '\\n'          (1, 5) (1, 6)
     936      COMMENT    '#comment'    (2, 0) (2, 8)
     937      NL         '\\n'          (2, 8) (2, 9)
     938      NAME       'await'       (3, 0) (3, 5)
     939      """)
     940  
     941          self.check_tokenize("async\n...\nawait", """\
     942      NAME       'async'       (1, 0) (1, 5)
     943      NEWLINE    '\\n'          (1, 5) (1, 6)
     944      OP         '...'         (2, 0) (2, 3)
     945      NEWLINE    '\\n'          (2, 3) (2, 4)
     946      NAME       'await'       (3, 0) (3, 5)
     947      """)
     948  
     949          self.check_tokenize("async\nawait", """\
     950      NAME       'async'       (1, 0) (1, 5)
     951      NEWLINE    '\\n'          (1, 5) (1, 6)
     952      NAME       'await'       (2, 0) (2, 5)
     953      """)
     954  
     955          self.check_tokenize("foo.async + 1", """\
     956      NAME       'foo'         (1, 0) (1, 3)
     957      OP         '.'           (1, 3) (1, 4)
     958      NAME       'async'       (1, 4) (1, 9)
     959      OP         '+'           (1, 10) (1, 11)
     960      NUMBER     '1'           (1, 12) (1, 13)
     961      """)
     962  
     963          self.check_tokenize("async def foo(): pass", """\
     964      NAME       'async'       (1, 0) (1, 5)
     965      NAME       'def'         (1, 6) (1, 9)
     966      NAME       'foo'         (1, 10) (1, 13)
     967      OP         '('           (1, 13) (1, 14)
     968      OP         ')'           (1, 14) (1, 15)
     969      OP         ':'           (1, 15) (1, 16)
     970      NAME       'pass'        (1, 17) (1, 21)
     971      """)
     972  
     973          self.check_tokenize('''\
     974  async def foo():
     975    def foo(await):
     976      await = 1
     977    if 1:
     978      await
     979  async += 1
     980  ''', """\
     981      NAME       'async'       (1, 0) (1, 5)
     982      NAME       'def'         (1, 6) (1, 9)
     983      NAME       'foo'         (1, 10) (1, 13)
     984      OP         '('           (1, 13) (1, 14)
     985      OP         ')'           (1, 14) (1, 15)
     986      OP         ':'           (1, 15) (1, 16)
     987      NEWLINE    '\\n'          (1, 16) (1, 17)
     988      INDENT     '  '          (2, 0) (2, 2)
     989      NAME       'def'         (2, 2) (2, 5)
     990      NAME       'foo'         (2, 6) (2, 9)
     991      OP         '('           (2, 9) (2, 10)
     992      NAME       'await'       (2, 10) (2, 15)
     993      OP         ')'           (2, 15) (2, 16)
     994      OP         ':'           (2, 16) (2, 17)
     995      NEWLINE    '\\n'          (2, 17) (2, 18)
     996      INDENT     '    '        (3, 0) (3, 4)
     997      NAME       'await'       (3, 4) (3, 9)
     998      OP         '='           (3, 10) (3, 11)
     999      NUMBER     '1'           (3, 12) (3, 13)
    1000      NEWLINE    '\\n'          (3, 13) (3, 14)
    1001      DEDENT     ''            (4, 2) (4, 2)
    1002      NAME       'if'          (4, 2) (4, 4)
    1003      NUMBER     '1'           (4, 5) (4, 6)
    1004      OP         ':'           (4, 6) (4, 7)
    1005      NEWLINE    '\\n'          (4, 7) (4, 8)
    1006      INDENT     '    '        (5, 0) (5, 4)
    1007      NAME       'await'       (5, 4) (5, 9)
    1008      NEWLINE    '\\n'          (5, 9) (5, 10)
    1009      DEDENT     ''            (6, 0) (6, 0)
    1010      DEDENT     ''            (6, 0) (6, 0)
    1011      NAME       'async'       (6, 0) (6, 5)
    1012      OP         '+='          (6, 6) (6, 8)
    1013      NUMBER     '1'           (6, 9) (6, 10)
    1014      NEWLINE    '\\n'          (6, 10) (6, 11)
    1015      """)
    1016  
    1017          self.check_tokenize('''\
    1018  async def foo():
    1019    async for i in 1: pass''', """\
    1020      NAME       'async'       (1, 0) (1, 5)
    1021      NAME       'def'         (1, 6) (1, 9)
    1022      NAME       'foo'         (1, 10) (1, 13)
    1023      OP         '('           (1, 13) (1, 14)
    1024      OP         ')'           (1, 14) (1, 15)
    1025      OP         ':'           (1, 15) (1, 16)
    1026      NEWLINE    '\\n'          (1, 16) (1, 17)
    1027      INDENT     '  '          (2, 0) (2, 2)
    1028      NAME       'async'       (2, 2) (2, 7)
    1029      NAME       'for'         (2, 8) (2, 11)
    1030      NAME       'i'           (2, 12) (2, 13)
    1031      NAME       'in'          (2, 14) (2, 16)
    1032      NUMBER     '1'           (2, 17) (2, 18)
    1033      OP         ':'           (2, 18) (2, 19)
    1034      NAME       'pass'        (2, 20) (2, 24)
    1035      DEDENT     ''            (3, 0) (3, 0)
    1036      """)
    1037  
    1038          self.check_tokenize('''async def foo(async): await''', """\
    1039      NAME       'async'       (1, 0) (1, 5)
    1040      NAME       'def'         (1, 6) (1, 9)
    1041      NAME       'foo'         (1, 10) (1, 13)
    1042      OP         '('           (1, 13) (1, 14)
    1043      NAME       'async'       (1, 14) (1, 19)
    1044      OP         ')'           (1, 19) (1, 20)
    1045      OP         ':'           (1, 20) (1, 21)
    1046      NAME       'await'       (1, 22) (1, 27)
    1047      """)
    1048  
    1049          self.check_tokenize('''\
    1050  def f():
    1051  
    1052    def baz(): pass
    1053    async def bar(): pass
    1054  
    1055    await = 2''', """\
    1056      NAME       'def'         (1, 0) (1, 3)
    1057      NAME       'f'           (1, 4) (1, 5)
    1058      OP         '('           (1, 5) (1, 6)
    1059      OP         ')'           (1, 6) (1, 7)
    1060      OP         ':'           (1, 7) (1, 8)
    1061      NEWLINE    '\\n'          (1, 8) (1, 9)
    1062      NL         '\\n'          (2, 0) (2, 1)
    1063      INDENT     '  '          (3, 0) (3, 2)
    1064      NAME       'def'         (3, 2) (3, 5)
    1065      NAME       'baz'         (3, 6) (3, 9)
    1066      OP         '('           (3, 9) (3, 10)
    1067      OP         ')'           (3, 10) (3, 11)
    1068      OP         ':'           (3, 11) (3, 12)
    1069      NAME       'pass'        (3, 13) (3, 17)
    1070      NEWLINE    '\\n'          (3, 17) (3, 18)
    1071      NAME       'async'       (4, 2) (4, 7)
    1072      NAME       'def'         (4, 8) (4, 11)
    1073      NAME       'bar'         (4, 12) (4, 15)
    1074      OP         '('           (4, 15) (4, 16)
    1075      OP         ')'           (4, 16) (4, 17)
    1076      OP         ':'           (4, 17) (4, 18)
    1077      NAME       'pass'        (4, 19) (4, 23)
    1078      NEWLINE    '\\n'          (4, 23) (4, 24)
    1079      NL         '\\n'          (5, 0) (5, 1)
    1080      NAME       'await'       (6, 2) (6, 7)
    1081      OP         '='           (6, 8) (6, 9)
    1082      NUMBER     '2'           (6, 10) (6, 11)
    1083      DEDENT     ''            (7, 0) (7, 0)
    1084      """)
    1085  
    1086          self.check_tokenize('''\
    1087  async def f():
    1088  
    1089    def baz(): pass
    1090    async def bar(): pass
    1091  
    1092    await = 2''', """\
    1093      NAME       'async'       (1, 0) (1, 5)
    1094      NAME       'def'         (1, 6) (1, 9)
    1095      NAME       'f'           (1, 10) (1, 11)
    1096      OP         '('           (1, 11) (1, 12)
    1097      OP         ')'           (1, 12) (1, 13)
    1098      OP         ':'           (1, 13) (1, 14)
    1099      NEWLINE    '\\n'          (1, 14) (1, 15)
    1100      NL         '\\n'          (2, 0) (2, 1)
    1101      INDENT     '  '          (3, 0) (3, 2)
    1102      NAME       'def'         (3, 2) (3, 5)
    1103      NAME       'baz'         (3, 6) (3, 9)
    1104      OP         '('           (3, 9) (3, 10)
    1105      OP         ')'           (3, 10) (3, 11)
    1106      OP         ':'           (3, 11) (3, 12)
    1107      NAME       'pass'        (3, 13) (3, 17)
    1108      NEWLINE    '\\n'          (3, 17) (3, 18)
    1109      NAME       'async'       (4, 2) (4, 7)
    1110      NAME       'def'         (4, 8) (4, 11)
    1111      NAME       'bar'         (4, 12) (4, 15)
    1112      OP         '('           (4, 15) (4, 16)
    1113      OP         ')'           (4, 16) (4, 17)
    1114      OP         ':'           (4, 17) (4, 18)
    1115      NAME       'pass'        (4, 19) (4, 23)
    1116      NEWLINE    '\\n'          (4, 23) (4, 24)
    1117      NL         '\\n'          (5, 0) (5, 1)
    1118      NAME       'await'       (6, 2) (6, 7)
    1119      OP         '='           (6, 8) (6, 9)
    1120      NUMBER     '2'           (6, 10) (6, 11)
    1121      DEDENT     ''            (7, 0) (7, 0)
    1122      """)
    1123  
    1124      def test_newline_after_parenthesized_block_with_comment(self):
    1125          self.check_tokenize('''\
    1126  [
    1127      # A comment here
    1128      1
    1129  ]
    1130  ''', """\
    1131      OP         '['           (1, 0) (1, 1)
    1132      NL         '\\n'          (1, 1) (1, 2)
    1133      COMMENT    '# A comment here' (2, 4) (2, 20)
    1134      NL         '\\n'          (2, 20) (2, 21)
    1135      NUMBER     '1'           (3, 4) (3, 5)
    1136      NL         '\\n'          (3, 5) (3, 6)
    1137      OP         ']'           (4, 0) (4, 1)
    1138      NEWLINE    '\\n'          (4, 1) (4, 2)
    1139      """)
    1140  
    1141      def test_closing_parenthesis_from_different_line(self):
    1142          self.check_tokenize("); x", """\
    1143      OP         ')'           (1, 0) (1, 1)
    1144      OP         ';'           (1, 1) (1, 2)
    1145      NAME       'x'           (1, 3) (1, 4)
    1146      """)
    1147  
    1148  class ESC[4;38;5;81mGenerateTokensTest(ESC[4;38;5;149mTokenizeTest):
    1149      def check_tokenize(self, s, expected):
    1150          # Format the tokens in s in a table format.
    1151          # The ENDMARKER and final NEWLINE are omitted.
    1152          f = StringIO(s)
    1153          result = stringify_tokens_from_source(generate_tokens(f.readline), s)
    1154          self.assertEqual(result, expected.rstrip().splitlines())
    1155  
    1156  
    1157  def decistmt(s):
    1158      result = []
    1159      g = tokenize(BytesIO(s.encode('utf-8')).readline)   # tokenize the string
    1160      for toknum, tokval, _, _, _  in g:
    1161          if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
    1162              result.extend([
    1163                  (NAME, 'Decimal'),
    1164                  (OP, '('),
    1165                  (STRING, repr(tokval)),
    1166                  (OP, ')')
    1167              ])
    1168          else:
    1169              result.append((toknum, tokval))
    1170      return untokenize(result).decode('utf-8').strip()
    1171  
    1172  class ESC[4;38;5;81mTestMisc(ESC[4;38;5;149mTestCase):
    1173  
    1174      def test_decistmt(self):
    1175          # Substitute Decimals for floats in a string of statements.
    1176          # This is an example from the docs.
    1177  
    1178          from decimal import Decimal
    1179          s = '+21.3e-5*-.1234/81.7'
    1180          self.assertEqual(decistmt(s),
    1181                           "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
    1182  
    1183          # The format of the exponent is inherited from the platform C library.
    1184          # Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
    1185          # we're only showing 11 digits, and the 12th isn't close to 5, the
    1186          # rest of the output should be platform-independent.
    1187          self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
    1188  
    1189          # Output from calculations with Decimal should be identical across all
    1190          # platforms.
    1191          self.assertEqual(eval(decistmt(s)),
    1192                           Decimal('-3.217160342717258261933904529E-7'))
    1193  
    1194  
    1195  class ESC[4;38;5;81mTestTokenizerAdheresToPep0263(ESC[4;38;5;149mTestCase):
    1196      """
    1197      Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
    1198      """
    1199  
    1200      def _testFile(self, filename):
    1201          path = os.path.join(os.path.dirname(__file__), filename)
    1202          with open(path, 'rb') as f:
    1203              TestRoundtrip.check_roundtrip(self, f)
    1204  
    1205      def test_utf8_coding_cookie_and_no_utf8_bom(self):
    1206          f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
    1207          self._testFile(f)
    1208  
    1209      def test_latin1_coding_cookie_and_utf8_bom(self):
    1210          """
    1211          As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
    1212          allowed encoding for the comment is 'utf-8'.  The text file used in
    1213          this test starts with a BOM signature, but specifies latin1 as the
    1214          coding, so verify that a SyntaxError is raised, which matches the
    1215          behaviour of the interpreter when it encounters a similar condition.
    1216          """
    1217          f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
    1218          self.assertRaises(SyntaxError, self._testFile, f)
    1219  
    1220      def test_no_coding_cookie_and_utf8_bom(self):
    1221          f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
    1222          self._testFile(f)
    1223  
    1224      def test_utf8_coding_cookie_and_utf8_bom(self):
    1225          f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
    1226          self._testFile(f)
    1227  
    1228      def test_bad_coding_cookie(self):
    1229          self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
    1230          self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
    1231  
    1232  
    1233  class ESC[4;38;5;81mTest_Tokenize(ESC[4;38;5;149mTestCase):
    1234  
    1235      def test__tokenize_decodes_with_specified_encoding(self):
    1236          literal = '"ЉЊЈЁЂ"'
    1237          line = literal.encode('utf-8')
    1238          first = False
    1239          def readline():
    1240              nonlocal first
    1241              if not first:
    1242                  first = True
    1243                  yield line
    1244              else:
    1245                  yield b''
    1246  
    1247          # skip the initial encoding token and the end tokens
    1248          tokens = list(_generate_tokens_from_c_tokenizer(readline().__next__, encoding='utf-8',
    1249                        extra_tokens=True))[:-2]
    1250          expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
    1251          self.assertEqual(tokens, expected_tokens,
    1252                           "bytes not decoded with encoding")
    1253  
    1254  
    1255  class ESC[4;38;5;81mTestDetectEncoding(ESC[4;38;5;149mTestCase):
    1256  
    1257      def get_readline(self, lines):
    1258          index = 0
    1259          def readline():
    1260              nonlocal index
    1261              if index == len(lines):
    1262                  raise StopIteration
    1263              line = lines[index]
    1264              index += 1
    1265              return line
    1266          return readline
    1267  
    1268      def test_no_bom_no_encoding_cookie(self):
    1269          lines = (
    1270              b'# something\n',
    1271              b'print(something)\n',
    1272              b'do_something(else)\n'
    1273          )
    1274          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1275          self.assertEqual(encoding, 'utf-8')
    1276          self.assertEqual(consumed_lines, list(lines[:2]))
    1277  
    1278      def test_bom_no_cookie(self):
    1279          lines = (
    1280              b'\xef\xbb\xbf# something\n',
    1281              b'print(something)\n',
    1282              b'do_something(else)\n'
    1283          )
    1284          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1285          self.assertEqual(encoding, 'utf-8-sig')
    1286          self.assertEqual(consumed_lines,
    1287                           [b'# something\n', b'print(something)\n'])
    1288  
    1289      def test_cookie_first_line_no_bom(self):
    1290          lines = (
    1291              b'# -*- coding: latin-1 -*-\n',
    1292              b'print(something)\n',
    1293              b'do_something(else)\n'
    1294          )
    1295          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1296          self.assertEqual(encoding, 'iso-8859-1')
    1297          self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
    1298  
    1299      def test_matched_bom_and_cookie_first_line(self):
    1300          lines = (
    1301              b'\xef\xbb\xbf# coding=utf-8\n',
    1302              b'print(something)\n',
    1303              b'do_something(else)\n'
    1304          )
    1305          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1306          self.assertEqual(encoding, 'utf-8-sig')
    1307          self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
    1308  
    1309      def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
    1310          lines = (
    1311              b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
    1312              b'print(something)\n',
    1313              b'do_something(else)\n'
    1314          )
    1315          readline = self.get_readline(lines)
    1316          self.assertRaises(SyntaxError, detect_encoding, readline)
    1317  
    1318      def test_cookie_second_line_no_bom(self):
    1319          lines = (
    1320              b'#! something\n',
    1321              b'# vim: set fileencoding=ascii :\n',
    1322              b'print(something)\n',
    1323              b'do_something(else)\n'
    1324          )
    1325          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1326          self.assertEqual(encoding, 'ascii')
    1327          expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
    1328          self.assertEqual(consumed_lines, expected)
    1329  
    1330      def test_matched_bom_and_cookie_second_line(self):
    1331          lines = (
    1332              b'\xef\xbb\xbf#! something\n',
    1333              b'f# coding=utf-8\n',
    1334              b'print(something)\n',
    1335              b'do_something(else)\n'
    1336          )
    1337          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1338          self.assertEqual(encoding, 'utf-8-sig')
    1339          self.assertEqual(consumed_lines,
    1340                           [b'#! something\n', b'f# coding=utf-8\n'])
    1341  
    1342      def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
    1343          lines = (
    1344              b'\xef\xbb\xbf#! something\n',
    1345              b'# vim: set fileencoding=ascii :\n',
    1346              b'print(something)\n',
    1347              b'do_something(else)\n'
    1348          )
    1349          readline = self.get_readline(lines)
    1350          self.assertRaises(SyntaxError, detect_encoding, readline)
    1351  
    1352      def test_cookie_second_line_noncommented_first_line(self):
    1353          lines = (
    1354              b"print('\xc2\xa3')\n",
    1355              b'# vim: set fileencoding=iso8859-15 :\n',
    1356              b"print('\xe2\x82\xac')\n"
    1357          )
    1358          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1359          self.assertEqual(encoding, 'utf-8')
    1360          expected = [b"print('\xc2\xa3')\n"]
    1361          self.assertEqual(consumed_lines, expected)
    1362  
    1363      def test_cookie_second_line_commented_first_line(self):
    1364          lines = (
    1365              b"#print('\xc2\xa3')\n",
    1366              b'# vim: set fileencoding=iso8859-15 :\n',
    1367              b"print('\xe2\x82\xac')\n"
    1368          )
    1369          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1370          self.assertEqual(encoding, 'iso8859-15')
    1371          expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
    1372          self.assertEqual(consumed_lines, expected)
    1373  
    1374      def test_cookie_second_line_empty_first_line(self):
    1375          lines = (
    1376              b'\n',
    1377              b'# vim: set fileencoding=iso8859-15 :\n',
    1378              b"print('\xe2\x82\xac')\n"
    1379          )
    1380          encoding, consumed_lines = detect_encoding(self.get_readline(lines))
    1381          self.assertEqual(encoding, 'iso8859-15')
    1382          expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
    1383          self.assertEqual(consumed_lines, expected)
    1384  
    1385      def test_latin1_normalization(self):
    1386          # See get_normal_name() in tokenizer.c.
    1387          encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
    1388                       "iso-8859-1-unix", "iso-latin-1-mac")
    1389          for encoding in encodings:
    1390              for rep in ("-", "_"):
    1391                  enc = encoding.replace("-", rep)
    1392                  lines = (b"#!/usr/bin/python\n",
    1393                           b"# coding: " + enc.encode("ascii") + b"\n",
    1394                           b"print(things)\n",
    1395                           b"do_something += 4\n")
    1396                  rl = self.get_readline(lines)
    1397                  found, consumed_lines = detect_encoding(rl)
    1398                  self.assertEqual(found, "iso-8859-1")
    1399  
    1400      def test_syntaxerror_latin1(self):
    1401          # Issue 14629: need to raise TokenError if the first
    1402          # line(s) have non-UTF-8 characters
    1403          lines = (
    1404              b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
    1405              )
    1406          readline = self.get_readline(lines)
    1407          self.assertRaises(SyntaxError, detect_encoding, readline)
    1408  
    1409  
    1410      def test_utf8_normalization(self):
    1411          # See get_normal_name() in tokenizer.c.
    1412          encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
    1413          for encoding in encodings:
    1414              for rep in ("-", "_"):
    1415                  enc = encoding.replace("-", rep)
    1416                  lines = (b"#!/usr/bin/python\n",
    1417                           b"# coding: " + enc.encode("ascii") + b"\n",
    1418                           b"1 + 3\n")
    1419                  rl = self.get_readline(lines)
    1420                  found, consumed_lines = detect_encoding(rl)
    1421                  self.assertEqual(found, "utf-8")
    1422  
    1423      def test_short_files(self):
    1424          readline = self.get_readline((b'print(something)\n',))
    1425          encoding, consumed_lines = detect_encoding(readline)
    1426          self.assertEqual(encoding, 'utf-8')
    1427          self.assertEqual(consumed_lines, [b'print(something)\n'])
    1428  
    1429          encoding, consumed_lines = detect_encoding(self.get_readline(()))
    1430          self.assertEqual(encoding, 'utf-8')
    1431          self.assertEqual(consumed_lines, [])
    1432  
    1433          readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
    1434          encoding, consumed_lines = detect_encoding(readline)
    1435          self.assertEqual(encoding, 'utf-8-sig')
    1436          self.assertEqual(consumed_lines, [b'print(something)\n'])
    1437  
    1438          readline = self.get_readline((b'\xef\xbb\xbf',))
    1439          encoding, consumed_lines = detect_encoding(readline)
    1440          self.assertEqual(encoding, 'utf-8-sig')
    1441          self.assertEqual(consumed_lines, [])
    1442  
    1443          readline = self.get_readline((b'# coding: bad\n',))
    1444          self.assertRaises(SyntaxError, detect_encoding, readline)
    1445  
    1446      def test_false_encoding(self):
    1447          # Issue 18873: "Encoding" detected in non-comment lines
    1448          readline = self.get_readline((b'print("#coding=fake")',))
    1449          encoding, consumed_lines = detect_encoding(readline)
    1450          self.assertEqual(encoding, 'utf-8')
    1451          self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
    1452  
    1453      def test_open(self):
    1454          filename = os_helper.TESTFN + '.py'
    1455          self.addCleanup(os_helper.unlink, filename)
    1456  
    1457          # test coding cookie
    1458          for encoding in ('iso-8859-15', 'utf-8'):
    1459              with open(filename, 'w', encoding=encoding) as fp:
    1460                  print("# coding: %s" % encoding, file=fp)
    1461                  print("print('euro:\u20ac')", file=fp)
    1462              with tokenize_open(filename) as fp:
    1463                  self.assertEqual(fp.encoding, encoding)
    1464                  self.assertEqual(fp.mode, 'r')
    1465  
    1466          # test BOM (no coding cookie)
    1467          with open(filename, 'w', encoding='utf-8-sig') as fp:
    1468              print("print('euro:\u20ac')", file=fp)
    1469          with tokenize_open(filename) as fp:
    1470              self.assertEqual(fp.encoding, 'utf-8-sig')
    1471              self.assertEqual(fp.mode, 'r')
    1472  
    1473      def test_filename_in_exception(self):
    1474          # When possible, include the file name in the exception.
    1475          path = 'some_file_path'
    1476          lines = (
    1477              b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
    1478              )
    1479          class ESC[4;38;5;81mBunk:
    1480              def __init__(self, lines, path):
    1481                  self.name = path
    1482                  self._lines = lines
    1483                  self._index = 0
    1484  
    1485              def readline(self):
    1486                  if self._index == len(lines):
    1487                      raise StopIteration
    1488                  line = lines[self._index]
    1489                  self._index += 1
    1490                  return line
    1491  
    1492          with self.assertRaises(SyntaxError):
    1493              ins = Bunk(lines, path)
    1494              # Make sure lacking a name isn't an issue.
    1495              del ins.name
    1496              detect_encoding(ins.readline)
    1497          with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
    1498              ins = Bunk(lines, path)
    1499              detect_encoding(ins.readline)
    1500  
    1501      def test_open_error(self):
    1502          # Issue #23840: open() must close the binary file on error
    1503          m = BytesIO(b'#coding:xxx')
    1504          with mock.patch('tokenize._builtin_open', return_value=m):
    1505              self.assertRaises(SyntaxError, tokenize_open, 'foobar')
    1506          self.assertTrue(m.closed)
    1507  
    1508  
    1509  class ESC[4;38;5;81mTestTokenize(ESC[4;38;5;149mTestCase):
    1510  
    1511      def test_tokenize(self):
    1512          import tokenize as tokenize_module
    1513          encoding = "utf-8"
    1514          encoding_used = None
    1515          def mock_detect_encoding(readline):
    1516              return encoding, [b'first', b'second']
    1517  
    1518          def mock__tokenize(readline, encoding, **kwargs):
    1519              nonlocal encoding_used
    1520              encoding_used = encoding
    1521              out = []
    1522              while True:
    1523                  try:
    1524                      next_line = readline()
    1525                  except StopIteration:
    1526                      return out
    1527                  if next_line:
    1528                      out.append(next_line)
    1529                      continue
    1530                  return out
    1531  
    1532          counter = 0
    1533          def mock_readline():
    1534              nonlocal counter
    1535              counter += 1
    1536              if counter == 5:
    1537                  return b''
    1538              return str(counter).encode()
    1539  
    1540          orig_detect_encoding = tokenize_module.detect_encoding
    1541          orig_c_token = tokenize_module._generate_tokens_from_c_tokenizer
    1542          tokenize_module.detect_encoding = mock_detect_encoding
    1543          tokenize_module._generate_tokens_from_c_tokenizer = mock__tokenize
    1544          try:
    1545              results = tokenize(mock_readline)
    1546              self.assertEqual(list(results)[1:],
    1547                               [b'first', b'second', b'1', b'2', b'3', b'4'])
    1548          finally:
    1549              tokenize_module.detect_encoding = orig_detect_encoding
    1550              tokenize_module._generate_tokens_from_c_tokenizer = orig_c_token
    1551  
    1552          self.assertEqual(encoding_used, encoding)
    1553  
    1554      def test_oneline_defs(self):
    1555          buf = []
    1556          for i in range(500):
    1557              buf.append('def i{i}(): return {i}'.format(i=i))
    1558          buf.append('OK')
    1559          buf = '\n'.join(buf)
    1560  
    1561          # Test that 500 consequent, one-line defs is OK
    1562          toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
    1563          self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
    1564                                                  # [-2] is always NEWLINE
    1565  
    1566      def assertExactTypeEqual(self, opstr, *optypes):
    1567          tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
    1568          num_optypes = len(optypes)
    1569          self.assertEqual(len(tokens), 3 + num_optypes)
    1570          self.assertEqual(tok_name[tokens[0].exact_type],
    1571                           tok_name[ENCODING])
    1572          for i in range(num_optypes):
    1573              self.assertEqual(tok_name[tokens[i + 1].exact_type],
    1574                               tok_name[optypes[i]])
    1575          self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
    1576                           tok_name[token.NEWLINE])
    1577          self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
    1578                           tok_name[token.ENDMARKER])
    1579  
    1580      def test_exact_type(self):
    1581          self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
    1582          self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
    1583          self.assertExactTypeEqual(':', token.COLON)
    1584          self.assertExactTypeEqual(',', token.COMMA)
    1585          self.assertExactTypeEqual(';', token.SEMI)
    1586          self.assertExactTypeEqual('+', token.PLUS)
    1587          self.assertExactTypeEqual('-', token.MINUS)
    1588          self.assertExactTypeEqual('*', token.STAR)
    1589          self.assertExactTypeEqual('/', token.SLASH)
    1590          self.assertExactTypeEqual('|', token.VBAR)
    1591          self.assertExactTypeEqual('&', token.AMPER)
    1592          self.assertExactTypeEqual('<', token.LESS)
    1593          self.assertExactTypeEqual('>', token.GREATER)
    1594          self.assertExactTypeEqual('=', token.EQUAL)
    1595          self.assertExactTypeEqual('.', token.DOT)
    1596          self.assertExactTypeEqual('%', token.PERCENT)
    1597          self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
    1598          self.assertExactTypeEqual('==', token.EQEQUAL)
    1599          self.assertExactTypeEqual('!=', token.NOTEQUAL)
    1600          self.assertExactTypeEqual('<=', token.LESSEQUAL)
    1601          self.assertExactTypeEqual('>=', token.GREATEREQUAL)
    1602          self.assertExactTypeEqual('~', token.TILDE)
    1603          self.assertExactTypeEqual('^', token.CIRCUMFLEX)
    1604          self.assertExactTypeEqual('<<', token.LEFTSHIFT)
    1605          self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
    1606          self.assertExactTypeEqual('**', token.DOUBLESTAR)
    1607          self.assertExactTypeEqual('+=', token.PLUSEQUAL)
    1608          self.assertExactTypeEqual('-=', token.MINEQUAL)
    1609          self.assertExactTypeEqual('*=', token.STAREQUAL)
    1610          self.assertExactTypeEqual('/=', token.SLASHEQUAL)
    1611          self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
    1612          self.assertExactTypeEqual('&=', token.AMPEREQUAL)
    1613          self.assertExactTypeEqual('|=', token.VBAREQUAL)
    1614          self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
    1615          self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
    1616          self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
    1617          self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
    1618          self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
    1619          self.assertExactTypeEqual('//', token.DOUBLESLASH)
    1620          self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
    1621          self.assertExactTypeEqual(':=', token.COLONEQUAL)
    1622          self.assertExactTypeEqual('...', token.ELLIPSIS)
    1623          self.assertExactTypeEqual('->', token.RARROW)
    1624          self.assertExactTypeEqual('@', token.AT)
    1625          self.assertExactTypeEqual('@=', token.ATEQUAL)
    1626  
    1627          self.assertExactTypeEqual('a**2+b**2==c**2',
    1628                                    NAME, token.DOUBLESTAR, NUMBER,
    1629                                    token.PLUS,
    1630                                    NAME, token.DOUBLESTAR, NUMBER,
    1631                                    token.EQEQUAL,
    1632                                    NAME, token.DOUBLESTAR, NUMBER)
    1633          self.assertExactTypeEqual('{1, 2, 3}',
    1634                                    token.LBRACE,
    1635                                    token.NUMBER, token.COMMA,
    1636                                    token.NUMBER, token.COMMA,
    1637                                    token.NUMBER,
    1638                                    token.RBRACE)
    1639          self.assertExactTypeEqual('^(x & 0x1)',
    1640                                    token.CIRCUMFLEX,
    1641                                    token.LPAR,
    1642                                    token.NAME, token.AMPER, token.NUMBER,
    1643                                    token.RPAR)
    1644  
    1645      def test_pathological_trailing_whitespace(self):
    1646          # See http://bugs.python.org/issue16152
    1647          self.assertExactTypeEqual('@          ', token.AT)
    1648  
    1649      def test_comment_at_the_end_of_the_source_without_newline(self):
    1650          # See http://bugs.python.org/issue44667
    1651          source = 'b = 1\n\n#test'
    1652          expected_tokens = [
    1653              TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
    1654              TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
    1655              TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
    1656              TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
    1657              TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
    1658              TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
    1659              TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
    1660              TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
    1661              TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
    1662          ]
    1663  
    1664          tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
    1665          self.assertEqual(tokens, expected_tokens)
    1666  
    1667      def test_newline_and_space_at_the_end_of_the_source_without_newline(self):
    1668          # See https://github.com/python/cpython/issues/105435
    1669          source = 'a\n '
    1670          expected_tokens = [
    1671              TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
    1672              TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
    1673              TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
    1674              TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
    1675              TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
    1676          ]
    1677  
    1678          tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
    1679          self.assertEqual(tokens, expected_tokens)
    1680  
    1681      def test_invalid_character_in_fstring_middle(self):
    1682          # See gh-103824
    1683          script = b'''F"""
    1684          \xe5"""'''
    1685  
    1686          with os_helper.temp_dir() as temp_dir:
    1687              filename = os.path.join(temp_dir, "script.py")
    1688              with open(filename, 'wb') as file:
    1689                  file.write(script)
    1690              rs, _ = run_python_until_end(filename)
    1691              self.assertIn(b"SyntaxError", rs.err)
    1692  
    1693  
    1694  class ESC[4;38;5;81mUntokenizeTest(ESC[4;38;5;149mTestCase):
    1695  
    1696      def test_bad_input_order(self):
    1697          # raise if previous row
    1698          u = Untokenizer()
    1699          u.prev_row = 2
    1700          u.prev_col = 2
    1701          with self.assertRaises(ValueError) as cm:
    1702              u.add_whitespace((1,3))
    1703          self.assertEqual(cm.exception.args[0],
    1704                  'start (1,3) precedes previous end (2,2)')
    1705          # raise if previous column in row
    1706          self.assertRaises(ValueError, u.add_whitespace, (2,1))
    1707  
    1708      def test_backslash_continuation(self):
    1709          # The problem is that <whitespace>\<newline> leaves no token
    1710          u = Untokenizer()
    1711          u.prev_row = 1
    1712          u.prev_col =  1
    1713          u.tokens = []
    1714          u.add_whitespace((2, 0))
    1715          self.assertEqual(u.tokens, ['\\\n'])
    1716          u.prev_row = 2
    1717          u.add_whitespace((4, 4))
    1718          self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', '    '])
    1719          TestRoundtrip.check_roundtrip(self, 'a\n  b\n    c\n  \\\n  c\n')
    1720  
    1721      def test_iter_compat(self):
    1722          u = Untokenizer()
    1723          token = (NAME, 'Hello')
    1724          tokens = [(ENCODING, 'utf-8'), token]
    1725          u.compat(token, iter([]))
    1726          self.assertEqual(u.tokens, ["Hello "])
    1727          u = Untokenizer()
    1728          self.assertEqual(u.untokenize(iter([token])), 'Hello ')
    1729          u = Untokenizer()
    1730          self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
    1731          self.assertEqual(u.encoding, 'utf-8')
    1732          self.assertEqual(untokenize(iter(tokens)), b'Hello ')
    1733  
    1734  
    1735  class ESC[4;38;5;81mTestRoundtrip(ESC[4;38;5;149mTestCase):
    1736  
    1737      def check_roundtrip(self, f):
    1738          """
    1739          Test roundtrip for `untokenize`. `f` is an open file or a string.
    1740          The source code in f is tokenized to both 5- and 2-tuples.
    1741          Both sequences are converted back to source code via
    1742          tokenize.untokenize(), and the latter tokenized again to 2-tuples.
    1743          The test fails if the 3 pair tokenizations do not match.
    1744  
    1745          When untokenize bugs are fixed, untokenize with 5-tuples should
    1746          reproduce code that does not contain a backslash continuation
    1747          following spaces.  A proper test should test this.
    1748          """
    1749          # Get source code and original tokenizations
    1750          if isinstance(f, str):
    1751              code = f.encode('utf-8')
    1752          else:
    1753              code = f.read()
    1754          readline = iter(code.splitlines(keepends=True)).__next__
    1755          tokens5 = list(tokenize(readline))
    1756          tokens2 = [tok[:2] for tok in tokens5]
    1757          # Reproduce tokens2 from pairs
    1758          bytes_from2 = untokenize(tokens2)
    1759          readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
    1760          tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
    1761          self.assertEqual(tokens2_from2, tokens2)
    1762          # Reproduce tokens2 from 5-tuples
    1763          bytes_from5 = untokenize(tokens5)
    1764          readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
    1765          tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
    1766          self.assertEqual(tokens2_from5, tokens2)
    1767  
    1768      def check_line_extraction(self, f):
    1769          if isinstance(f, str):
    1770              code = f.encode('utf-8')
    1771          else:
    1772              code = f.read()
    1773          readline = iter(code.splitlines(keepends=True)).__next__
    1774          for tok in tokenize(readline):
    1775              if tok.type in  {ENCODING, ENDMARKER}:
    1776                  continue
    1777              self.assertEqual(tok.string, tok.line[tok.start[1]: tok.end[1]])
    1778  
    1779      def test_roundtrip(self):
    1780          # There are some standard formatting practices that are easy to get right.
    1781  
    1782          self.check_roundtrip("if x == 1:\n"
    1783                               "    print(x)\n")
    1784          self.check_roundtrip("# This is a comment\n"
    1785                               "# This also\n")
    1786  
    1787          # Some people use different formatting conventions, which makes
    1788          # untokenize a little trickier. Note that this test involves trailing
    1789          # whitespace after the colon. Note that we use hex escapes to make the
    1790          # two trailing blanks apparent in the expected output.
    1791  
    1792          self.check_roundtrip("if x == 1 : \n"
    1793                               "  print(x)\n")
    1794          fn = support.findfile("tokenize_tests.txt")
    1795          with open(fn, 'rb') as f:
    1796              self.check_roundtrip(f)
    1797          self.check_roundtrip("if x == 1:\n"
    1798                               "    # A comment by itself.\n"
    1799                               "    print(x) # Comment here, too.\n"
    1800                               "    # Another comment.\n"
    1801                               "after_if = True\n")
    1802          self.check_roundtrip("if (x # The comments need to go in the right place\n"
    1803                               "    == 1):\n"
    1804                               "    print('x==1')\n")
    1805          self.check_roundtrip("class Test: # A comment here\n"
    1806                               "  # A comment with weird indent\n"
    1807                               "  after_com = 5\n"
    1808                               "  def x(m): return m*5 # a one liner\n"
    1809                               "  def y(m): # A whitespace after the colon\n"
    1810                               "     return y*4 # 3-space indent\n")
    1811  
    1812          # Some error-handling code
    1813          self.check_roundtrip("try: import somemodule\n"
    1814                               "except ImportError: # comment\n"
    1815                               "    print('Can not import' # comment2\n)"
    1816                               "else:   print('Loaded')\n")
    1817  
    1818      def test_continuation(self):
    1819          # Balancing continuation
    1820          self.check_roundtrip("a = (3,4, \n"
    1821                               "5,6)\n"
    1822                               "y = [3, 4,\n"
    1823                               "5]\n"
    1824                               "z = {'a': 5,\n"
    1825                               "'b':15, 'c':True}\n"
    1826                               "x = len(y) + 5 - a[\n"
    1827                               "3] - a[2]\n"
    1828                               "+ len(z) - z[\n"
    1829                               "'b']\n")
    1830  
    1831      def test_backslash_continuation(self):
    1832          # Backslash means line continuation, except for comments
    1833          self.check_roundtrip("x=1+\\\n"
    1834                               "1\n"
    1835                               "# This is a comment\\\n"
    1836                               "# This also\n")
    1837          self.check_roundtrip("# Comment \\\n"
    1838                               "x = 0")
    1839  
    1840      def test_string_concatenation(self):
    1841          # Two string literals on the same line
    1842          self.check_roundtrip("'' ''")
    1843  
    1844      def test_random_files(self):
    1845          # Test roundtrip on random python modules.
    1846          # pass the '-ucpu' option to process the full directory.
    1847  
    1848          import glob, random
    1849          fn = support.findfile("tokenize_tests.txt")
    1850          tempdir = os.path.dirname(fn) or os.curdir
    1851          testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
    1852  
    1853          # Tokenize is broken on test_pep3131.py because regular expressions are
    1854          # broken on the obscure unicode identifiers in it. *sigh*
    1855          # With roundtrip extended to test the 5-tuple mode of untokenize,
    1856          # 7 more testfiles fail.  Remove them also until the failure is diagnosed.
    1857  
    1858          testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
    1859  
    1860          # TODO: Remove this once we can untokenize PEP 701 syntax
    1861          testfiles.remove(os.path.join(tempdir, "test_fstring.py"))
    1862  
    1863          for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
    1864              testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
    1865  
    1866          if not support.is_resource_enabled("cpu"):
    1867              testfiles = random.sample(testfiles, 10)
    1868  
    1869          for testfile in testfiles:
    1870              if support.verbose >= 2:
    1871                  print('tokenize', testfile)
    1872              with open(testfile, 'rb') as f:
    1873                  with self.subTest(file=testfile):
    1874                      self.check_roundtrip(f)
    1875                      self.check_line_extraction(f)
    1876  
    1877  
    1878      def roundtrip(self, code):
    1879          if isinstance(code, str):
    1880              code = code.encode('utf-8')
    1881          return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
    1882  
    1883      def test_indentation_semantics_retained(self):
    1884          """
    1885          Ensure that although whitespace might be mutated in a roundtrip,
    1886          the semantic meaning of the indentation remains consistent.
    1887          """
    1888          code = "if False:\n\tx=3\n\tx=3\n"
    1889          codelines = self.roundtrip(code).split('\n')
    1890          self.assertEqual(codelines[1], codelines[2])
    1891          self.check_roundtrip(code)
    1892  
    1893  
    1894  class ESC[4;38;5;81mInvalidPythonTests(ESC[4;38;5;149mTestCase):
    1895      def test_number_followed_by_name(self):
    1896          # See issue #gh-105549
    1897          source = "2sin(x)"
    1898          expected_tokens = [
    1899              TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
    1900              TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
    1901              TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
    1902              TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
    1903              TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
    1904              TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
    1905              TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
    1906          ]
    1907  
    1908          tokens = list(generate_tokens(StringIO(source).readline))
    1909          self.assertEqual(tokens, expected_tokens)
    1910  
    1911      def test_number_starting_with_zero(self):
    1912          source = "01234"
    1913          expected_tokens = [
    1914              TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
    1915              TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
    1916              TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
    1917          ]
    1918  
    1919          tokens = list(generate_tokens(StringIO(source).readline))
    1920          self.assertEqual(tokens, expected_tokens)
    1921  
    1922  class ESC[4;38;5;81mCTokenizeTest(ESC[4;38;5;149mTestCase):
    1923      def check_tokenize(self, s, expected):
    1924          # Format the tokens in s in a table format.
    1925          # The ENDMARKER and final NEWLINE are omitted.
    1926          f = StringIO(s)
    1927          with self.subTest(source=s):
    1928              result = stringify_tokens_from_source(
    1929                  _generate_tokens_from_c_tokenizer(f.readline), s
    1930              )
    1931              self.assertEqual(result, expected.rstrip().splitlines())
    1932  
    1933      def test_encoding(self):
    1934          def readline(encoding):
    1935              yield "1+1".encode(encoding)
    1936  
    1937          expected = [
    1938              TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
    1939              TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
    1940              TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
    1941              TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
    1942              TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
    1943          ]
    1944          for encoding in ["utf-8", "latin-1", "utf-16"]:
    1945              with self.subTest(encoding=encoding):
    1946                  tokens = list(_generate_tokens_from_c_tokenizer(
    1947                      readline(encoding).__next__,
    1948                      extra_tokens=True,
    1949                      encoding=encoding,
    1950                  ))
    1951                  self.assertEqual(tokens, expected)
    1952  
    1953      def test_int(self):
    1954  
    1955          self.check_tokenize('0xff <= 255', """\
    1956      NUMBER     '0xff'        (1, 0) (1, 4)
    1957      LESSEQUAL  '<='          (1, 5) (1, 7)
    1958      NUMBER     '255'         (1, 8) (1, 11)
    1959      """)
    1960  
    1961          self.check_tokenize('0b10 <= 255', """\
    1962      NUMBER     '0b10'        (1, 0) (1, 4)
    1963      LESSEQUAL  '<='          (1, 5) (1, 7)
    1964      NUMBER     '255'         (1, 8) (1, 11)
    1965      """)
    1966  
    1967          self.check_tokenize('0o123 <= 0O123', """\
    1968      NUMBER     '0o123'       (1, 0) (1, 5)
    1969      LESSEQUAL  '<='          (1, 6) (1, 8)
    1970      NUMBER     '0O123'       (1, 9) (1, 14)
    1971      """)
    1972  
    1973          self.check_tokenize('1234567 > ~0x15', """\
    1974      NUMBER     '1234567'     (1, 0) (1, 7)
    1975      GREATER    '>'           (1, 8) (1, 9)
    1976      TILDE      '~'           (1, 10) (1, 11)
    1977      NUMBER     '0x15'        (1, 11) (1, 15)
    1978      """)
    1979  
    1980          self.check_tokenize('2134568 != 1231515', """\
    1981      NUMBER     '2134568'     (1, 0) (1, 7)
    1982      NOTEQUAL   '!='          (1, 8) (1, 10)
    1983      NUMBER     '1231515'     (1, 11) (1, 18)
    1984      """)
    1985  
    1986          self.check_tokenize('(-124561-1) & 200000000', """\
    1987      LPAR       '('           (1, 0) (1, 1)
    1988      MINUS      '-'           (1, 1) (1, 2)
    1989      NUMBER     '124561'      (1, 2) (1, 8)
    1990      MINUS      '-'           (1, 8) (1, 9)
    1991      NUMBER     '1'           (1, 9) (1, 10)
    1992      RPAR       ')'           (1, 10) (1, 11)
    1993      AMPER      '&'           (1, 12) (1, 13)
    1994      NUMBER     '200000000'   (1, 14) (1, 23)
    1995      """)
    1996  
    1997          self.check_tokenize('0xdeadbeef != -1', """\
    1998      NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
    1999      NOTEQUAL   '!='          (1, 11) (1, 13)
    2000      MINUS      '-'           (1, 14) (1, 15)
    2001      NUMBER     '1'           (1, 15) (1, 16)
    2002      """)
    2003  
    2004          self.check_tokenize('0xdeadc0de & 12345', """\
    2005      NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
    2006      AMPER      '&'           (1, 11) (1, 12)
    2007      NUMBER     '12345'       (1, 13) (1, 18)
    2008      """)
    2009  
    2010          self.check_tokenize('0xFF & 0x15 | 1234', """\
    2011      NUMBER     '0xFF'        (1, 0) (1, 4)
    2012      AMPER      '&'           (1, 5) (1, 6)
    2013      NUMBER     '0x15'        (1, 7) (1, 11)
    2014      VBAR       '|'           (1, 12) (1, 13)
    2015      NUMBER     '1234'        (1, 14) (1, 18)
    2016      """)
    2017  
    2018      def test_float(self):
    2019  
    2020          self.check_tokenize('x = 3.14159', """\
    2021      NAME       'x'           (1, 0) (1, 1)
    2022      EQUAL      '='           (1, 2) (1, 3)
    2023      NUMBER     '3.14159'     (1, 4) (1, 11)
    2024      """)
    2025  
    2026          self.check_tokenize('x = 314159.', """\
    2027      NAME       'x'           (1, 0) (1, 1)
    2028      EQUAL      '='           (1, 2) (1, 3)
    2029      NUMBER     '314159.'     (1, 4) (1, 11)
    2030      """)
    2031  
    2032          self.check_tokenize('x = .314159', """\
    2033      NAME       'x'           (1, 0) (1, 1)
    2034      EQUAL      '='           (1, 2) (1, 3)
    2035      NUMBER     '.314159'     (1, 4) (1, 11)
    2036      """)
    2037  
    2038          self.check_tokenize('x = 3e14159', """\
    2039      NAME       'x'           (1, 0) (1, 1)
    2040      EQUAL      '='           (1, 2) (1, 3)
    2041      NUMBER     '3e14159'     (1, 4) (1, 11)
    2042      """)
    2043  
    2044          self.check_tokenize('x = 3E123', """\
    2045      NAME       'x'           (1, 0) (1, 1)
    2046      EQUAL      '='           (1, 2) (1, 3)
    2047      NUMBER     '3E123'       (1, 4) (1, 9)
    2048      """)
    2049  
    2050          self.check_tokenize('x+y = 3e-1230', """\
    2051      NAME       'x'           (1, 0) (1, 1)
    2052      PLUS       '+'           (1, 1) (1, 2)
    2053      NAME       'y'           (1, 2) (1, 3)
    2054      EQUAL      '='           (1, 4) (1, 5)
    2055      NUMBER     '3e-1230'     (1, 6) (1, 13)
    2056      """)
    2057  
    2058          self.check_tokenize('x = 3.14e159', """\
    2059      NAME       'x'           (1, 0) (1, 1)
    2060      EQUAL      '='           (1, 2) (1, 3)
    2061      NUMBER     '3.14e159'    (1, 4) (1, 12)
    2062      """)
    2063  
    2064      def test_string(self):
    2065  
    2066          self.check_tokenize('x = \'\'; y = ""', """\
    2067      NAME       'x'           (1, 0) (1, 1)
    2068      EQUAL      '='           (1, 2) (1, 3)
    2069      STRING     "''"          (1, 4) (1, 6)
    2070      SEMI       ';'           (1, 6) (1, 7)
    2071      NAME       'y'           (1, 8) (1, 9)
    2072      EQUAL      '='           (1, 10) (1, 11)
    2073      STRING     '""'          (1, 12) (1, 14)
    2074      """)
    2075  
    2076          self.check_tokenize('x = \'"\'; y = "\'"', """\
    2077      NAME       'x'           (1, 0) (1, 1)
    2078      EQUAL      '='           (1, 2) (1, 3)
    2079      STRING     '\\'"\\''       (1, 4) (1, 7)
    2080      SEMI       ';'           (1, 7) (1, 8)
    2081      NAME       'y'           (1, 9) (1, 10)
    2082      EQUAL      '='           (1, 11) (1, 12)
    2083      STRING     '"\\'"'        (1, 13) (1, 16)
    2084      """)
    2085  
    2086          self.check_tokenize('x = "doesn\'t "shrink", does it"', """\
    2087      NAME       'x'           (1, 0) (1, 1)
    2088      EQUAL      '='           (1, 2) (1, 3)
    2089      STRING     '"doesn\\'t "' (1, 4) (1, 14)
    2090      NAME       'shrink'      (1, 14) (1, 20)
    2091      STRING     '", does it"' (1, 20) (1, 31)
    2092      """)
    2093  
    2094          self.check_tokenize("x = 'abc' + 'ABC'", """\
    2095      NAME       'x'           (1, 0) (1, 1)
    2096      EQUAL      '='           (1, 2) (1, 3)
    2097      STRING     "'abc'"       (1, 4) (1, 9)
    2098      PLUS       '+'           (1, 10) (1, 11)
    2099      STRING     "'ABC'"       (1, 12) (1, 17)
    2100      """)
    2101  
    2102          self.check_tokenize('y = "ABC" + "ABC"', """\
    2103      NAME       'y'           (1, 0) (1, 1)
    2104      EQUAL      '='           (1, 2) (1, 3)
    2105      STRING     '"ABC"'       (1, 4) (1, 9)
    2106      PLUS       '+'           (1, 10) (1, 11)
    2107      STRING     '"ABC"'       (1, 12) (1, 17)
    2108      """)
    2109  
    2110          self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
    2111      NAME       'x'           (1, 0) (1, 1)
    2112      EQUAL      '='           (1, 2) (1, 3)
    2113      STRING     "r'abc'"      (1, 4) (1, 10)
    2114      PLUS       '+'           (1, 11) (1, 12)
    2115      STRING     "r'ABC'"      (1, 13) (1, 19)
    2116      PLUS       '+'           (1, 20) (1, 21)
    2117      STRING     "R'ABC'"      (1, 22) (1, 28)
    2118      PLUS       '+'           (1, 29) (1, 30)
    2119      STRING     "R'ABC'"      (1, 31) (1, 37)
    2120      """)
    2121  
    2122          self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
    2123      NAME       'y'           (1, 0) (1, 1)
    2124      EQUAL      '='           (1, 2) (1, 3)
    2125      STRING     'r"abc"'      (1, 4) (1, 10)
    2126      PLUS       '+'           (1, 11) (1, 12)
    2127      STRING     'r"ABC"'      (1, 13) (1, 19)
    2128      PLUS       '+'           (1, 20) (1, 21)
    2129      STRING     'R"ABC"'      (1, 22) (1, 28)
    2130      PLUS       '+'           (1, 29) (1, 30)
    2131      STRING     'R"ABC"'      (1, 31) (1, 37)
    2132      """)
    2133  
    2134          self.check_tokenize("u'abc' + U'abc'", """\
    2135      STRING     "u'abc'"      (1, 0) (1, 6)
    2136      PLUS       '+'           (1, 7) (1, 8)
    2137      STRING     "U'abc'"      (1, 9) (1, 15)
    2138      """)
    2139  
    2140          self.check_tokenize('u"abc" + U"abc"', """\
    2141      STRING     'u"abc"'      (1, 0) (1, 6)
    2142      PLUS       '+'           (1, 7) (1, 8)
    2143      STRING     'U"abc"'      (1, 9) (1, 15)
    2144      """)
    2145  
    2146          self.check_tokenize("b'abc' + B'abc'", """\
    2147      STRING     "b'abc'"      (1, 0) (1, 6)
    2148      PLUS       '+'           (1, 7) (1, 8)
    2149      STRING     "B'abc'"      (1, 9) (1, 15)
    2150      """)
    2151  
    2152          self.check_tokenize('b"abc" + B"abc"', """\
    2153      STRING     'b"abc"'      (1, 0) (1, 6)
    2154      PLUS       '+'           (1, 7) (1, 8)
    2155      STRING     'B"abc"'      (1, 9) (1, 15)
    2156      """)
    2157  
    2158          self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
    2159      STRING     "br'abc'"     (1, 0) (1, 7)
    2160      PLUS       '+'           (1, 8) (1, 9)
    2161      STRING     "bR'abc'"     (1, 10) (1, 17)
    2162      PLUS       '+'           (1, 18) (1, 19)
    2163      STRING     "Br'abc'"     (1, 20) (1, 27)
    2164      PLUS       '+'           (1, 28) (1, 29)
    2165      STRING     "BR'abc'"     (1, 30) (1, 37)
    2166      """)
    2167  
    2168          self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
    2169      STRING     'br"abc"'     (1, 0) (1, 7)
    2170      PLUS       '+'           (1, 8) (1, 9)
    2171      STRING     'bR"abc"'     (1, 10) (1, 17)
    2172      PLUS       '+'           (1, 18) (1, 19)
    2173      STRING     'Br"abc"'     (1, 20) (1, 27)
    2174      PLUS       '+'           (1, 28) (1, 29)
    2175      STRING     'BR"abc"'     (1, 30) (1, 37)
    2176      """)
    2177  
    2178          self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
    2179      STRING     "rb'abc'"     (1, 0) (1, 7)
    2180      PLUS       '+'           (1, 8) (1, 9)
    2181      STRING     "rB'abc'"     (1, 10) (1, 17)
    2182      PLUS       '+'           (1, 18) (1, 19)
    2183      STRING     "Rb'abc'"     (1, 20) (1, 27)
    2184      PLUS       '+'           (1, 28) (1, 29)
    2185      STRING     "RB'abc'"     (1, 30) (1, 37)
    2186      """)
    2187  
    2188          self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
    2189      STRING     'rb"abc"'     (1, 0) (1, 7)
    2190      PLUS       '+'           (1, 8) (1, 9)
    2191      STRING     'rB"abc"'     (1, 10) (1, 17)
    2192      PLUS       '+'           (1, 18) (1, 19)
    2193      STRING     'Rb"abc"'     (1, 20) (1, 27)
    2194      PLUS       '+'           (1, 28) (1, 29)
    2195      STRING     'RB"abc"'     (1, 30) (1, 37)
    2196      """)
    2197  
    2198          self.check_tokenize('"a\\\nde\\\nfg"', """\
    2199      STRING     '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
    2200      """)
    2201  
    2202          self.check_tokenize('u"a\\\nde"', """\
    2203      STRING     'u"a\\\\\\nde"\'  (1, 0) (2, 3)
    2204      """)
    2205  
    2206          self.check_tokenize('rb"a\\\nd"', """\
    2207      STRING     'rb"a\\\\\\nd"\'  (1, 0) (2, 2)
    2208      """)
    2209  
    2210          self.check_tokenize(r'"""a\
    2211  b"""', """\
    2212      STRING     '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
    2213      """)
    2214          self.check_tokenize(r'u"""a\
    2215  b"""', """\
    2216      STRING     'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
    2217      """)
    2218          self.check_tokenize(r'rb"""a\
    2219  b\
    2220  c"""', """\
    2221      STRING     'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
    2222      """)
    2223  
    2224          self.check_tokenize(r'"hola\\\r\ndfgf"', """\
    2225      STRING     \'"hola\\\\\\\\\\\\r\\\\ndfgf"\' (1, 0) (1, 16)
    2226      """)
    2227  
    2228          self.check_tokenize('f"abc"', """\
    2229      FSTRING_START 'f"'          (1, 0) (1, 2)
    2230      FSTRING_MIDDLE 'abc'         (1, 2) (1, 5)
    2231      FSTRING_END '"'           (1, 5) (1, 6)
    2232      """)
    2233  
    2234          self.check_tokenize('fR"a{b}c"', """\
    2235      FSTRING_START 'fR"'         (1, 0) (1, 3)
    2236      FSTRING_MIDDLE 'a'           (1, 3) (1, 4)
    2237      LBRACE     '{'           (1, 4) (1, 5)
    2238      NAME       'b'           (1, 5) (1, 6)
    2239      RBRACE     '}'           (1, 6) (1, 7)
    2240      FSTRING_MIDDLE 'c'           (1, 7) (1, 8)
    2241      FSTRING_END '"'           (1, 8) (1, 9)
    2242      """)
    2243  
    2244          self.check_tokenize('f"""abc"""', """\
    2245      FSTRING_START 'f\"""'        (1, 0) (1, 4)
    2246      FSTRING_MIDDLE 'abc'         (1, 4) (1, 7)
    2247      FSTRING_END '\"""'         (1, 7) (1, 10)
    2248      """)
    2249  
    2250          self.check_tokenize(r'f"abc\
    2251  def"', """\
    2252      FSTRING_START \'f"\'          (1, 0) (1, 2)
    2253      FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 2) (2, 3)
    2254      FSTRING_END '"'           (2, 3) (2, 4)
    2255      """)
    2256  
    2257          self.check_tokenize('''\
    2258  f"{
    2259  a}"''', """\
    2260      FSTRING_START 'f"'          (1, 0) (1, 2)
    2261      LBRACE     '{'           (1, 2) (1, 3)
    2262      NAME       'a'           (2, 0) (2, 1)
    2263      RBRACE     '}'           (2, 1) (2, 2)
    2264      FSTRING_END '"'           (2, 2) (2, 3)
    2265      """)
    2266  
    2267          self.check_tokenize(r'Rf"abc\
    2268  def"', """\
    2269      FSTRING_START 'Rf"'         (1, 0) (1, 3)
    2270      FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 3) (2, 3)
    2271      FSTRING_END '"'           (2, 3) (2, 4)
    2272      """)
    2273  
    2274          self.check_tokenize(r'f"hola\\\r\ndfgf"', """\
    2275      FSTRING_START \'f"\'          (1, 0) (1, 2)
    2276      FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16)
    2277      FSTRING_END \'"\'           (1, 16) (1, 17)
    2278      """)
    2279  
    2280      def test_function(self):
    2281  
    2282          self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\
    2283      NAME       'def'         (1, 0) (1, 3)
    2284      NAME       'd22'         (1, 4) (1, 7)
    2285      LPAR       '('           (1, 7) (1, 8)
    2286      NAME       'a'           (1, 8) (1, 9)
    2287      COMMA      ','           (1, 9) (1, 10)
    2288      NAME       'b'           (1, 11) (1, 12)
    2289      COMMA      ','           (1, 12) (1, 13)
    2290      NAME       'c'           (1, 14) (1, 15)
    2291      EQUAL      '='           (1, 15) (1, 16)
    2292      NUMBER     '2'           (1, 16) (1, 17)
    2293      COMMA      ','           (1, 17) (1, 18)
    2294      NAME       'd'           (1, 19) (1, 20)
    2295      EQUAL      '='           (1, 20) (1, 21)
    2296      NUMBER     '2'           (1, 21) (1, 22)
    2297      COMMA      ','           (1, 22) (1, 23)
    2298      STAR       '*'           (1, 24) (1, 25)
    2299      NAME       'k'           (1, 25) (1, 26)
    2300      RPAR       ')'           (1, 26) (1, 27)
    2301      COLON      ':'           (1, 27) (1, 28)
    2302      NAME       'pass'        (1, 29) (1, 33)
    2303      """)
    2304  
    2305          self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\
    2306      NAME       'def'         (1, 0) (1, 3)
    2307      NAME       'd01v_'       (1, 4) (1, 9)
    2308      LPAR       '('           (1, 9) (1, 10)
    2309      NAME       'a'           (1, 10) (1, 11)
    2310      EQUAL      '='           (1, 11) (1, 12)
    2311      NUMBER     '1'           (1, 12) (1, 13)
    2312      COMMA      ','           (1, 13) (1, 14)
    2313      STAR       '*'           (1, 15) (1, 16)
    2314      NAME       'k'           (1, 16) (1, 17)
    2315      COMMA      ','           (1, 17) (1, 18)
    2316      DOUBLESTAR '**'          (1, 19) (1, 21)
    2317      NAME       'w'           (1, 21) (1, 22)
    2318      RPAR       ')'           (1, 22) (1, 23)
    2319      COLON      ':'           (1, 23) (1, 24)
    2320      NAME       'pass'        (1, 25) (1, 29)
    2321      """)
    2322  
    2323          self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\
    2324      NAME       'def'         (1, 0) (1, 3)
    2325      NAME       'd23'         (1, 4) (1, 7)
    2326      LPAR       '('           (1, 7) (1, 8)
    2327      NAME       'a'           (1, 8) (1, 9)
    2328      COLON      ':'           (1, 9) (1, 10)
    2329      NAME       'str'         (1, 11) (1, 14)
    2330      COMMA      ','           (1, 14) (1, 15)
    2331      NAME       'b'           (1, 16) (1, 17)
    2332      COLON      ':'           (1, 17) (1, 18)
    2333      NAME       'int'         (1, 19) (1, 22)
    2334      EQUAL      '='           (1, 22) (1, 23)
    2335      NUMBER     '3'           (1, 23) (1, 24)
    2336      RPAR       ')'           (1, 24) (1, 25)
    2337      RARROW     '->'          (1, 26) (1, 28)
    2338      NAME       'int'         (1, 29) (1, 32)
    2339      COLON      ':'           (1, 32) (1, 33)
    2340      NAME       'pass'        (1, 34) (1, 38)
    2341      """)
    2342  
    2343      def test_comparison(self):
    2344  
    2345          self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
    2346                              "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
    2347      NAME       'if'          (1, 0) (1, 2)
    2348      NUMBER     '1'           (1, 3) (1, 4)
    2349      LESS       '<'           (1, 5) (1, 6)
    2350      NUMBER     '1'           (1, 7) (1, 8)
    2351      GREATER    '>'           (1, 9) (1, 10)
    2352      NUMBER     '1'           (1, 11) (1, 12)
    2353      EQEQUAL    '=='          (1, 13) (1, 15)
    2354      NUMBER     '1'           (1, 16) (1, 17)
    2355      GREATEREQUAL '>='          (1, 18) (1, 20)
    2356      NUMBER     '5'           (1, 21) (1, 22)
    2357      LESSEQUAL  '<='          (1, 23) (1, 25)
    2358      NUMBER     '0x15'        (1, 26) (1, 30)
    2359      LESSEQUAL  '<='          (1, 31) (1, 33)
    2360      NUMBER     '0x12'        (1, 34) (1, 38)
    2361      NOTEQUAL   '!='          (1, 39) (1, 41)
    2362      NUMBER     '1'           (1, 42) (1, 43)
    2363      NAME       'and'         (1, 44) (1, 47)
    2364      NUMBER     '5'           (1, 48) (1, 49)
    2365      NAME       'in'          (1, 50) (1, 52)
    2366      NUMBER     '1'           (1, 53) (1, 54)
    2367      NAME       'not'         (1, 55) (1, 58)
    2368      NAME       'in'          (1, 59) (1, 61)
    2369      NUMBER     '1'           (1, 62) (1, 63)
    2370      NAME       'is'          (1, 64) (1, 66)
    2371      NUMBER     '1'           (1, 67) (1, 68)
    2372      NAME       'or'          (1, 69) (1, 71)
    2373      NUMBER     '5'           (1, 72) (1, 73)
    2374      NAME       'is'          (1, 74) (1, 76)
    2375      NAME       'not'         (1, 77) (1, 80)
    2376      NUMBER     '1'           (1, 81) (1, 82)
    2377      COLON      ':'           (1, 82) (1, 83)
    2378      NAME       'pass'        (1, 84) (1, 88)
    2379      """)
    2380  
    2381      def test_additive(self):
    2382  
    2383          self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\
    2384      NAME       'x'           (1, 0) (1, 1)
    2385      EQUAL      '='           (1, 2) (1, 3)
    2386      NUMBER     '1'           (1, 4) (1, 5)
    2387      MINUS      '-'           (1, 6) (1, 7)
    2388      NAME       'y'           (1, 8) (1, 9)
    2389      PLUS       '+'           (1, 10) (1, 11)
    2390      NUMBER     '15'          (1, 12) (1, 14)
    2391      MINUS      '-'           (1, 15) (1, 16)
    2392      NUMBER     '1'           (1, 17) (1, 18)
    2393      PLUS       '+'           (1, 19) (1, 20)
    2394      NUMBER     '0x124'       (1, 21) (1, 26)
    2395      PLUS       '+'           (1, 27) (1, 28)
    2396      NAME       'z'           (1, 29) (1, 30)
    2397      PLUS       '+'           (1, 31) (1, 32)
    2398      NAME       'a'           (1, 33) (1, 34)
    2399      LSQB       '['           (1, 34) (1, 35)
    2400      NUMBER     '5'           (1, 35) (1, 36)
    2401      RSQB       ']'           (1, 36) (1, 37)
    2402      """)
    2403  
    2404      def test_multiplicative(self):
    2405  
    2406          self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\
    2407      NAME       'x'           (1, 0) (1, 1)
    2408      EQUAL      '='           (1, 2) (1, 3)
    2409      NUMBER     '1'           (1, 4) (1, 5)
    2410      DOUBLESLASH '//'          (1, 5) (1, 7)
    2411      NUMBER     '1'           (1, 7) (1, 8)
    2412      STAR       '*'           (1, 8) (1, 9)
    2413      NUMBER     '1'           (1, 9) (1, 10)
    2414      SLASH      '/'           (1, 10) (1, 11)
    2415      NUMBER     '5'           (1, 11) (1, 12)
    2416      STAR       '*'           (1, 12) (1, 13)
    2417      NUMBER     '12'          (1, 13) (1, 15)
    2418      PERCENT    '%'           (1, 15) (1, 16)
    2419      NUMBER     '0x12'        (1, 16) (1, 20)
    2420      AT         '@'           (1, 20) (1, 21)
    2421      NUMBER     '42'          (1, 21) (1, 23)
    2422      """)
    2423  
    2424      def test_unary(self):
    2425  
    2426          self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\
    2427      TILDE      '~'           (1, 0) (1, 1)
    2428      NUMBER     '1'           (1, 1) (1, 2)
    2429      CIRCUMFLEX '^'           (1, 3) (1, 4)
    2430      NUMBER     '1'           (1, 5) (1, 6)
    2431      AMPER      '&'           (1, 7) (1, 8)
    2432      NUMBER     '1'           (1, 9) (1, 10)
    2433      VBAR       '|'           (1, 11) (1, 12)
    2434      NUMBER     '1'           (1, 12) (1, 13)
    2435      CIRCUMFLEX '^'           (1, 14) (1, 15)
    2436      MINUS      '-'           (1, 16) (1, 17)
    2437      NUMBER     '1'           (1, 17) (1, 18)
    2438      """)
    2439  
    2440          self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\
    2441      MINUS      '-'           (1, 0) (1, 1)
    2442      NUMBER     '1'           (1, 1) (1, 2)
    2443      STAR       '*'           (1, 2) (1, 3)
    2444      NUMBER     '1'           (1, 3) (1, 4)
    2445      SLASH      '/'           (1, 4) (1, 5)
    2446      NUMBER     '1'           (1, 5) (1, 6)
    2447      PLUS       '+'           (1, 6) (1, 7)
    2448      NUMBER     '1'           (1, 7) (1, 8)
    2449      STAR       '*'           (1, 8) (1, 9)
    2450      NUMBER     '1'           (1, 9) (1, 10)
    2451      DOUBLESLASH '//'          (1, 10) (1, 12)
    2452      NUMBER     '1'           (1, 12) (1, 13)
    2453      MINUS      '-'           (1, 14) (1, 15)
    2454      MINUS      '-'           (1, 16) (1, 17)
    2455      MINUS      '-'           (1, 17) (1, 18)
    2456      MINUS      '-'           (1, 18) (1, 19)
    2457      NUMBER     '1'           (1, 19) (1, 20)
    2458      DOUBLESTAR '**'          (1, 20) (1, 22)
    2459      NUMBER     '1'           (1, 22) (1, 23)
    2460      """)
    2461  
    2462      def test_selector(self):
    2463  
    2464          self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
    2465      NAME       'import'      (1, 0) (1, 6)
    2466      NAME       'sys'         (1, 7) (1, 10)
    2467      COMMA      ','           (1, 10) (1, 11)
    2468      NAME       'time'        (1, 12) (1, 16)
    2469      NEWLINE    ''            (1, 16) (1, 16)
    2470      NAME       'x'           (2, 0) (2, 1)
    2471      EQUAL      '='           (2, 2) (2, 3)
    2472      NAME       'sys'         (2, 4) (2, 7)
    2473      DOT        '.'           (2, 7) (2, 8)
    2474      NAME       'modules'     (2, 8) (2, 15)
    2475      LSQB       '['           (2, 15) (2, 16)
    2476      STRING     "'time'"      (2, 16) (2, 22)
    2477      RSQB       ']'           (2, 22) (2, 23)
    2478      DOT        '.'           (2, 23) (2, 24)
    2479      NAME       'time'        (2, 24) (2, 28)
    2480      LPAR       '('           (2, 28) (2, 29)
    2481      RPAR       ')'           (2, 29) (2, 30)
    2482      """)
    2483  
    2484      def test_method(self):
    2485  
    2486          self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
    2487      AT         '@'           (1, 0) (1, 1)
    2488      NAME       'staticmethod' (1, 1) (1, 13)
    2489      NEWLINE    ''            (1, 13) (1, 13)
    2490      NAME       'def'         (2, 0) (2, 3)
    2491      NAME       'foo'         (2, 4) (2, 7)
    2492      LPAR       '('           (2, 7) (2, 8)
    2493      NAME       'x'           (2, 8) (2, 9)
    2494      COMMA      ','           (2, 9) (2, 10)
    2495      NAME       'y'           (2, 10) (2, 11)
    2496      RPAR       ')'           (2, 11) (2, 12)
    2497      COLON      ':'           (2, 12) (2, 13)
    2498      NAME       'pass'        (2, 14) (2, 18)
    2499      """)
    2500  
    2501      def test_tabs(self):
    2502  
    2503          self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
    2504      AT         '@'           (1, 0) (1, 1)
    2505      NAME       'staticmethod' (1, 1) (1, 13)
    2506      NEWLINE    ''            (1, 13) (1, 13)
    2507      NAME       'def'         (2, 0) (2, 3)
    2508      NAME       'foo'         (2, 4) (2, 7)
    2509      LPAR       '('           (2, 7) (2, 8)
    2510      NAME       'x'           (2, 8) (2, 9)
    2511      COMMA      ','           (2, 9) (2, 10)
    2512      NAME       'y'           (2, 10) (2, 11)
    2513      RPAR       ')'           (2, 11) (2, 12)
    2514      COLON      ':'           (2, 12) (2, 13)
    2515      NAME       'pass'        (2, 14) (2, 18)
    2516      """)
    2517  
    2518      def test_async(self):
    2519  
    2520          self.check_tokenize('async = 1', """\
    2521      ASYNC      'async'       (1, 0) (1, 5)
    2522      EQUAL      '='           (1, 6) (1, 7)
    2523      NUMBER     '1'           (1, 8) (1, 9)
    2524      """)
    2525  
    2526          self.check_tokenize('a = (async = 1)', """\
    2527      NAME       'a'           (1, 0) (1, 1)
    2528      EQUAL      '='           (1, 2) (1, 3)
    2529      LPAR       '('           (1, 4) (1, 5)
    2530      ASYNC      'async'       (1, 5) (1, 10)
    2531      EQUAL      '='           (1, 11) (1, 12)
    2532      NUMBER     '1'           (1, 13) (1, 14)
    2533      RPAR       ')'           (1, 14) (1, 15)
    2534      """)
    2535  
    2536          self.check_tokenize('async()', """\
    2537      ASYNC      'async'       (1, 0) (1, 5)
    2538      LPAR       '('           (1, 5) (1, 6)
    2539      RPAR       ')'           (1, 6) (1, 7)
    2540      """)
    2541  
    2542          self.check_tokenize('class async(Bar):pass', """\
    2543      NAME       'class'       (1, 0) (1, 5)
    2544      ASYNC      'async'       (1, 6) (1, 11)
    2545      LPAR       '('           (1, 11) (1, 12)
    2546      NAME       'Bar'         (1, 12) (1, 15)
    2547      RPAR       ')'           (1, 15) (1, 16)
    2548      COLON      ':'           (1, 16) (1, 17)
    2549      NAME       'pass'        (1, 17) (1, 21)
    2550      """)
    2551  
    2552          self.check_tokenize('class async:pass', """\
    2553      NAME       'class'       (1, 0) (1, 5)
    2554      ASYNC      'async'       (1, 6) (1, 11)
    2555      COLON      ':'           (1, 11) (1, 12)
    2556      NAME       'pass'        (1, 12) (1, 16)
    2557      """)
    2558  
    2559          self.check_tokenize('await = 1', """\
    2560      AWAIT      'await'       (1, 0) (1, 5)
    2561      EQUAL      '='           (1, 6) (1, 7)
    2562      NUMBER     '1'           (1, 8) (1, 9)
    2563      """)
    2564  
    2565          self.check_tokenize('foo.async', """\
    2566      NAME       'foo'         (1, 0) (1, 3)
    2567      DOT        '.'           (1, 3) (1, 4)
    2568      ASYNC      'async'       (1, 4) (1, 9)
    2569      """)
    2570  
    2571          self.check_tokenize('async for a in b: pass', """\
    2572      ASYNC      'async'       (1, 0) (1, 5)
    2573      NAME       'for'         (1, 6) (1, 9)
    2574      NAME       'a'           (1, 10) (1, 11)
    2575      NAME       'in'          (1, 12) (1, 14)
    2576      NAME       'b'           (1, 15) (1, 16)
    2577      COLON      ':'           (1, 16) (1, 17)
    2578      NAME       'pass'        (1, 18) (1, 22)
    2579      """)
    2580  
    2581          self.check_tokenize('async with a as b: pass', """\
    2582      ASYNC      'async'       (1, 0) (1, 5)
    2583      NAME       'with'        (1, 6) (1, 10)
    2584      NAME       'a'           (1, 11) (1, 12)
    2585      NAME       'as'          (1, 13) (1, 15)
    2586      NAME       'b'           (1, 16) (1, 17)
    2587      COLON      ':'           (1, 17) (1, 18)
    2588      NAME       'pass'        (1, 19) (1, 23)
    2589      """)
    2590  
    2591          self.check_tokenize('async.foo', """\
    2592      ASYNC      'async'       (1, 0) (1, 5)
    2593      DOT        '.'           (1, 5) (1, 6)
    2594      NAME       'foo'         (1, 6) (1, 9)
    2595      """)
    2596  
    2597          self.check_tokenize('async', """\
    2598      ASYNC      'async'       (1, 0) (1, 5)
    2599      """)
    2600  
    2601          self.check_tokenize('async\n#comment\nawait', """\
    2602      ASYNC      'async'       (1, 0) (1, 5)
    2603      NEWLINE    ''            (1, 5) (1, 5)
    2604      AWAIT      'await'       (3, 0) (3, 5)
    2605      """)
    2606  
    2607          self.check_tokenize('async\n...\nawait', """\
    2608      ASYNC      'async'       (1, 0) (1, 5)
    2609      NEWLINE    ''            (1, 5) (1, 5)
    2610      ELLIPSIS   '...'         (2, 0) (2, 3)
    2611      NEWLINE    ''            (2, 3) (2, 3)
    2612      AWAIT      'await'       (3, 0) (3, 5)
    2613      """)
    2614  
    2615          self.check_tokenize('async\nawait', """\
    2616      ASYNC      'async'       (1, 0) (1, 5)
    2617      NEWLINE    ''            (1, 5) (1, 5)
    2618      AWAIT      'await'       (2, 0) (2, 5)
    2619      """)
    2620  
    2621          self.check_tokenize('foo.async + 1', """\
    2622      NAME       'foo'         (1, 0) (1, 3)
    2623      DOT        '.'           (1, 3) (1, 4)
    2624      ASYNC      'async'       (1, 4) (1, 9)
    2625      PLUS       '+'           (1, 10) (1, 11)
    2626      NUMBER     '1'           (1, 12) (1, 13)
    2627      """)
    2628  
    2629          self.check_tokenize('async def foo(): pass', """\
    2630      ASYNC      'async'       (1, 0) (1, 5)
    2631      NAME       'def'         (1, 6) (1, 9)
    2632      NAME       'foo'         (1, 10) (1, 13)
    2633      LPAR       '('           (1, 13) (1, 14)
    2634      RPAR       ')'           (1, 14) (1, 15)
    2635      COLON      ':'           (1, 15) (1, 16)
    2636      NAME       'pass'        (1, 17) (1, 21)
    2637      """)
    2638  
    2639          self.check_tokenize('''\
    2640  async def foo():
    2641    def foo(await):
    2642      await = 1
    2643    if 1:
    2644      await
    2645  async += 1
    2646  ''', """\
    2647      ASYNC      'async'       (1, 0) (1, 5)
    2648      NAME       'def'         (1, 6) (1, 9)
    2649      NAME       'foo'         (1, 10) (1, 13)
    2650      LPAR       '('           (1, 13) (1, 14)
    2651      RPAR       ')'           (1, 14) (1, 15)
    2652      COLON      ':'           (1, 15) (1, 16)
    2653      NEWLINE    ''            (1, 16) (1, 16)
    2654      INDENT     ''            (2, -1) (2, -1)
    2655      NAME       'def'         (2, 2) (2, 5)
    2656      NAME       'foo'         (2, 6) (2, 9)
    2657      LPAR       '('           (2, 9) (2, 10)
    2658      AWAIT      'await'       (2, 10) (2, 15)
    2659      RPAR       ')'           (2, 15) (2, 16)
    2660      COLON      ':'           (2, 16) (2, 17)
    2661      NEWLINE    ''            (2, 17) (2, 17)
    2662      INDENT     ''            (3, -1) (3, -1)
    2663      AWAIT      'await'       (3, 4) (3, 9)
    2664      EQUAL      '='           (3, 10) (3, 11)
    2665      NUMBER     '1'           (3, 12) (3, 13)
    2666      NEWLINE    ''            (3, 13) (3, 13)
    2667      DEDENT     ''            (4, -1) (4, -1)
    2668      NAME       'if'          (4, 2) (4, 4)
    2669      NUMBER     '1'           (4, 5) (4, 6)
    2670      COLON      ':'           (4, 6) (4, 7)
    2671      NEWLINE    ''            (4, 7) (4, 7)
    2672      INDENT     ''            (5, -1) (5, -1)
    2673      AWAIT      'await'       (5, 4) (5, 9)
    2674      NEWLINE    ''            (5, 9) (5, 9)
    2675      DEDENT     ''            (6, -1) (6, -1)
    2676      DEDENT     ''            (6, -1) (6, -1)
    2677      ASYNC      'async'       (6, 0) (6, 5)
    2678      PLUSEQUAL  '+='          (6, 6) (6, 8)
    2679      NUMBER     '1'           (6, 9) (6, 10)
    2680      NEWLINE    ''            (6, 10) (6, 10)
    2681      """)
    2682  
    2683          self.check_tokenize('async def foo():\n  async for i in 1: pass', """\
    2684      ASYNC      'async'       (1, 0) (1, 5)
    2685      NAME       'def'         (1, 6) (1, 9)
    2686      NAME       'foo'         (1, 10) (1, 13)
    2687      LPAR       '('           (1, 13) (1, 14)
    2688      RPAR       ')'           (1, 14) (1, 15)
    2689      COLON      ':'           (1, 15) (1, 16)
    2690      NEWLINE    ''            (1, 16) (1, 16)
    2691      INDENT     ''            (2, -1) (2, -1)
    2692      ASYNC      'async'       (2, 2) (2, 7)
    2693      NAME       'for'         (2, 8) (2, 11)
    2694      NAME       'i'           (2, 12) (2, 13)
    2695      NAME       'in'          (2, 14) (2, 16)
    2696      NUMBER     '1'           (2, 17) (2, 18)
    2697      COLON      ':'           (2, 18) (2, 19)
    2698      NAME       'pass'        (2, 20) (2, 24)
    2699      DEDENT     ''            (2, -1) (2, -1)
    2700      """)
    2701  
    2702          self.check_tokenize('async def foo(async): await', """\
    2703      ASYNC      'async'       (1, 0) (1, 5)
    2704      NAME       'def'         (1, 6) (1, 9)
    2705      NAME       'foo'         (1, 10) (1, 13)
    2706      LPAR       '('           (1, 13) (1, 14)
    2707      ASYNC      'async'       (1, 14) (1, 19)
    2708      RPAR       ')'           (1, 19) (1, 20)
    2709      COLON      ':'           (1, 20) (1, 21)
    2710      AWAIT      'await'       (1, 22) (1, 27)
    2711      """)
    2712  
    2713          self.check_tokenize('''\
    2714  def f():
    2715  
    2716    def baz(): pass
    2717    async def bar(): pass
    2718  
    2719    await = 2''', """\
    2720      NAME       'def'         (1, 0) (1, 3)
    2721      NAME       'f'           (1, 4) (1, 5)
    2722      LPAR       '('           (1, 5) (1, 6)
    2723      RPAR       ')'           (1, 6) (1, 7)
    2724      COLON      ':'           (1, 7) (1, 8)
    2725      NEWLINE    ''            (1, 8) (1, 8)
    2726      INDENT     ''            (3, -1) (3, -1)
    2727      NAME       'def'         (3, 2) (3, 5)
    2728      NAME       'baz'         (3, 6) (3, 9)
    2729      LPAR       '('           (3, 9) (3, 10)
    2730      RPAR       ')'           (3, 10) (3, 11)
    2731      COLON      ':'           (3, 11) (3, 12)
    2732      NAME       'pass'        (3, 13) (3, 17)
    2733      NEWLINE    ''            (3, 17) (3, 17)
    2734      ASYNC      'async'       (4, 2) (4, 7)
    2735      NAME       'def'         (4, 8) (4, 11)
    2736      NAME       'bar'         (4, 12) (4, 15)
    2737      LPAR       '('           (4, 15) (4, 16)
    2738      RPAR       ')'           (4, 16) (4, 17)
    2739      COLON      ':'           (4, 17) (4, 18)
    2740      NAME       'pass'        (4, 19) (4, 23)
    2741      NEWLINE    ''            (4, 23) (4, 23)
    2742      AWAIT      'await'       (6, 2) (6, 7)
    2743      EQUAL      '='           (6, 8) (6, 9)
    2744      NUMBER     '2'           (6, 10) (6, 11)
    2745      DEDENT     ''            (6, -1) (6, -1)
    2746      """)
    2747  
    2748          self.check_tokenize('''\
    2749  async def f():
    2750  
    2751    def baz(): pass
    2752    async def bar(): pass
    2753  
    2754    await = 2''', """\
    2755      ASYNC      'async'       (1, 0) (1, 5)
    2756      NAME       'def'         (1, 6) (1, 9)
    2757      NAME       'f'           (1, 10) (1, 11)
    2758      LPAR       '('           (1, 11) (1, 12)
    2759      RPAR       ')'           (1, 12) (1, 13)
    2760      COLON      ':'           (1, 13) (1, 14)
    2761      NEWLINE    ''            (1, 14) (1, 14)
    2762      INDENT     ''            (3, -1) (3, -1)
    2763      NAME       'def'         (3, 2) (3, 5)
    2764      NAME       'baz'         (3, 6) (3, 9)
    2765      LPAR       '('           (3, 9) (3, 10)
    2766      RPAR       ')'           (3, 10) (3, 11)
    2767      COLON      ':'           (3, 11) (3, 12)
    2768      NAME       'pass'        (3, 13) (3, 17)
    2769      NEWLINE    ''            (3, 17) (3, 17)
    2770      ASYNC      'async'       (4, 2) (4, 7)
    2771      NAME       'def'         (4, 8) (4, 11)
    2772      NAME       'bar'         (4, 12) (4, 15)
    2773      LPAR       '('           (4, 15) (4, 16)
    2774      RPAR       ')'           (4, 16) (4, 17)
    2775      COLON      ':'           (4, 17) (4, 18)
    2776      NAME       'pass'        (4, 19) (4, 23)
    2777      NEWLINE    ''            (4, 23) (4, 23)
    2778      AWAIT      'await'       (6, 2) (6, 7)
    2779      EQUAL      '='           (6, 8) (6, 9)
    2780      NUMBER     '2'           (6, 10) (6, 11)
    2781      DEDENT     ''            (6, -1) (6, -1)
    2782      """)
    2783  
    2784      def test_unicode(self):
    2785  
    2786          self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
    2787      NAME       'Örter'       (1, 0) (1, 5)
    2788      EQUAL      '='           (1, 6) (1, 7)
    2789      STRING     "u'places'"   (1, 8) (1, 17)
    2790      NEWLINE    ''            (1, 17) (1, 17)
    2791      NAME       'grün'        (2, 0) (2, 4)
    2792      EQUAL      '='           (2, 5) (2, 6)
    2793      STRING     "U'green'"    (2, 7) (2, 15)
    2794      """)
    2795  
    2796      def test_invalid_syntax(self):
    2797          def get_tokens(string):
    2798              the_string = StringIO(string)
    2799              return list(_generate_tokens_from_c_tokenizer(the_string.readline))
    2800  
    2801          for case in [
    2802              "(1+2]",
    2803              "(1+2}",
    2804              "{1+2]",
    2805              "1_",
    2806              "1.2_",
    2807              "1e2_",
    2808              "1e+",
    2809  
    2810              "\xa0",
    2811              "€",
    2812              "0b12",
    2813              "0b1_2",
    2814              "0b2",
    2815              "0b1_",
    2816              "0b",
    2817              "0o18",
    2818              "0o1_8",
    2819              "0o8",
    2820              "0o1_",
    2821              "0o",
    2822              "0x1_",
    2823              "0x",
    2824              "1_",
    2825              "012",
    2826              "1.2_",
    2827              "1e2_",
    2828              "1e+",
    2829              "'sdfsdf",
    2830              "'''sdfsdf''",
    2831              "("*1000+"a"+")"*1000,
    2832              "]",
    2833          ]:
    2834              with self.subTest(case=case):
    2835                  self.assertRaises(TokenError, get_tokens, case)
    2836  
    2837      def test_max_indent(self):
    2838          MAXINDENT = 100
    2839  
    2840          def generate_source(indents):
    2841              source = ''.join(('  ' * x) + 'if True:\n' for x in range(indents))
    2842              source += '  ' * indents + 'pass\n'
    2843              return source
    2844  
    2845          valid = generate_source(MAXINDENT - 1)
    2846          the_input = StringIO(valid)
    2847          tokens = list(_generate_tokens_from_c_tokenizer(the_input.readline))
    2848          self.assertEqual(tokens[-2].type, DEDENT)
    2849          self.assertEqual(tokens[-1].type, ENDMARKER)
    2850          compile(valid, "<string>", "exec")
    2851  
    2852          invalid = generate_source(MAXINDENT)
    2853          the_input = StringIO(invalid)
    2854          self.assertRaises(IndentationError, lambda: list(_generate_tokens_from_c_tokenizer(the_input.readline)))
    2855          self.assertRaises(
    2856              IndentationError, compile, invalid, "<string>", "exec"
    2857          )
    2858  
    2859      def test_continuation_lines_indentation(self):
    2860          def get_tokens(string):
    2861              the_string = StringIO(string)
    2862              return [(kind, string) for (kind, string, *_)
    2863                      in _generate_tokens_from_c_tokenizer(the_string.readline)]
    2864  
    2865          code = dedent("""
    2866              def fib(n):
    2867                  \\
    2868              '''Print a Fibonacci series up to n.'''
    2869                  \\
    2870              a, b = 0, 1
    2871          """)
    2872  
    2873          self.check_tokenize(code, """\
    2874      NAME       'def'         (2, 0) (2, 3)
    2875      NAME       'fib'         (2, 4) (2, 7)
    2876      LPAR       '('           (2, 7) (2, 8)
    2877      NAME       'n'           (2, 8) (2, 9)
    2878      RPAR       ')'           (2, 9) (2, 10)
    2879      COLON      ':'           (2, 10) (2, 11)
    2880      NEWLINE    ''            (2, 11) (2, 11)
    2881      INDENT     ''            (4, -1) (4, -1)
    2882      STRING     "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39)
    2883      NEWLINE    ''            (4, 39) (4, 39)
    2884      NAME       'a'           (6, 0) (6, 1)
    2885      COMMA      ','           (6, 1) (6, 2)
    2886      NAME       'b'           (6, 3) (6, 4)
    2887      EQUAL      '='           (6, 5) (6, 6)
    2888      NUMBER     '0'           (6, 7) (6, 8)
    2889      COMMA      ','           (6, 8) (6, 9)
    2890      NUMBER     '1'           (6, 10) (6, 11)
    2891      NEWLINE    ''            (6, 11) (6, 11)
    2892      DEDENT     ''            (6, -1) (6, -1)
    2893          """)
    2894  
    2895          code_no_cont = dedent("""
    2896              def fib(n):
    2897                  '''Print a Fibonacci series up to n.'''
    2898                  a, b = 0, 1
    2899          """)
    2900  
    2901          self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
    2902  
    2903          code = dedent("""
    2904              pass
    2905                  \\
    2906  
    2907              pass
    2908          """)
    2909  
    2910          self.check_tokenize(code, """\
    2911      NAME       'pass'        (2, 0) (2, 4)
    2912      NEWLINE    ''            (2, 4) (2, 4)
    2913      NAME       'pass'        (5, 0) (5, 4)
    2914      NEWLINE    ''            (5, 4) (5, 4)
    2915          """)
    2916  
    2917          code_no_cont = dedent("""
    2918              pass
    2919              pass
    2920          """)
    2921  
    2922          self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
    2923  
    2924          code = dedent("""
    2925              if x:
    2926                  y = 1
    2927                  \\
    2928                          \\
    2929                      \\
    2930                  \\
    2931                  foo = 1
    2932          """)
    2933  
    2934          self.check_tokenize(code, """\
    2935      NAME       'if'          (2, 0) (2, 2)
    2936      NAME       'x'           (2, 3) (2, 4)
    2937      COLON      ':'           (2, 4) (2, 5)
    2938      NEWLINE    ''            (2, 5) (2, 5)
    2939      INDENT     ''            (3, -1) (3, -1)
    2940      NAME       'y'           (3, 4) (3, 5)
    2941      EQUAL      '='           (3, 6) (3, 7)
    2942      NUMBER     '1'           (3, 8) (3, 9)
    2943      NEWLINE    ''            (3, 9) (3, 9)
    2944      NAME       'foo'         (8, 4) (8, 7)
    2945      EQUAL      '='           (8, 8) (8, 9)
    2946      NUMBER     '1'           (8, 10) (8, 11)
    2947      NEWLINE    ''            (8, 11) (8, 11)
    2948      DEDENT     ''            (8, -1) (8, -1)
    2949          """)
    2950  
    2951          code_no_cont = dedent("""
    2952              if x:
    2953                  y = 1
    2954                  foo = 1
    2955          """)
    2956  
    2957          self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
    2958  
    2959  
    2960  class ESC[4;38;5;81mCTokenizerBufferTests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
    2961      def test_newline_at_the_end_of_buffer(self):
    2962          # See issue 99581: Make sure that if we need to add a new line at the
    2963          # end of the buffer, we have enough space in the buffer, specially when
    2964          # the current line is as long as the buffer space available.
    2965          test_script = f"""\
    2966          #coding: latin-1
    2967          #{"a"*10000}
    2968          #{"a"*10002}"""
    2969          with os_helper.temp_dir() as temp_dir:
    2970              file_name = make_script(temp_dir, 'foo', test_script)
    2971              run_test_script(file_name)
    2972  
    2973  
    2974  if __name__ == "__main__":
    2975      unittest.main()