(root)/
Python-3.11.7/
Lib/
test/
re_tests.py
       1  #!/usr/bin/env python3
       2  # -*- mode: python -*-
       3  
       4  # Re test suite and benchmark suite v1.5
       5  
       6  # The 3 possible outcomes for each pattern
       7  [SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
       8  
       9  # Benchmark suite (needs expansion)
      10  #
      11  # The benchmark suite does not test correctness, just speed.  The
      12  # first element of each tuple is the regex pattern; the second is a
      13  # string to match it against.  The benchmarking code will embed the
      14  # second string inside several sizes of padding, to test how regex
      15  # matching performs on large strings.
      16  
      17  benchmarks = [
      18  
      19      # test common prefix
      20      ('Python|Perl', 'Perl'),    # Alternation
      21      ('(Python|Perl)', 'Perl'),  # Grouped alternation
      22  
      23      ('Python|Perl|Tcl', 'Perl'),        # Alternation
      24      ('(Python|Perl|Tcl)', 'Perl'),      # Grouped alternation
      25  
      26      ('(Python)\\1', 'PythonPython'),    # Backreference
      27      ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
      28      ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
      29  
      30      ('Python', 'Python'),               # Simple text literal
      31      ('.*Python', 'Python'),             # Bad text literal
      32      ('.*Python.*', 'Python'),           # Worse text literal
      33      ('.*(Python)', 'Python'),           # Bad text literal with grouping
      34  
      35  ]
      36  
      37  # Test suite (for verifying correctness)
      38  #
      39  # The test suite is a list of 5- or 3-tuples.  The 5 parts of a
      40  # complete tuple are:
      41  # element 0: a string containing the pattern
      42  #         1: the string to match against the pattern
      43  #         2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
      44  #         3: a string that will be eval()'ed to produce a test string.
      45  #            This is an arbitrary Python expression; the available
      46  #            variables are "found" (the whole match), and "g1", "g2", ...
      47  #            up to "g99" contain the contents of each group, or the
      48  #            string 'None' if the group wasn't given a value, or the
      49  #            string 'Error' if the group index was out of range;
      50  #            also "groups", the return value of m.group() (a tuple).
      51  #         4: The expected result of evaluating the expression.
      52  #            If the two don't match, an error is reported.
      53  #
      54  # If the regex isn't expected to work, the latter two elements can be omitted.
      55  
      56  tests = [
      57      # Test ?P< and ?P= extensions
      58      ('(?P<foo_123', '', SYNTAX_ERROR),      # Unterminated group identifier
      59      ('(?P<1>a)', '', SYNTAX_ERROR),         # Begins with a digit
      60      ('(?P<!>a)', '', SYNTAX_ERROR),         # Begins with an illegal char
      61      ('(?P<foo!>a)', '', SYNTAX_ERROR),      # Begins with an illegal char
      62  
      63      # Same tests, for the ?P= form
      64      ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
      65      ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
      66      ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
      67      ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR),  # Backref to undefined group
      68  
      69      ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
      70      ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
      71  
      72      # Test octal escapes
      73      ('\\1', 'a', SYNTAX_ERROR),    # Backreference
      74      ('[\\1]', '\1', SUCCEED, 'found', '\1'),  # Character
      75      ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
      76      ('\\141', 'a', SUCCEED, 'found', 'a'),
      77      ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
      78  
      79      # Test \0 is handled everywhere
      80      (r'\0', '\0', SUCCEED, 'found', '\0'),
      81      (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
      82      (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
      83      (r'[^a\0]', '\0', FAIL),
      84  
      85      # Test various letter escapes
      86      (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
      87      (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
      88      # NOTE: not an error under PCRE/PRE:
      89      (r'\u', '', SYNTAX_ERROR),    # A Perl escape
      90      # (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
      91      # new \x semantics
      92      (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
      93      (r'\x00f', '\017', FAIL, 'found', chr(15)),
      94      (r'\x00fe', '\376', FAIL, 'found', chr(254)),
      95      # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
      96      # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
      97      # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
      98  
      99      (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
     100       SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
     101  
     102      # Test that . only matches \n in DOTALL mode
     103      ('a.b', 'acb', SUCCEED, 'found', 'acb'),
     104      ('a.b', 'a\nb', FAIL),
     105      ('a.*b', 'acc\nccb', FAIL),
     106      ('a.{4,5}b', 'acc\nccb', FAIL),
     107      ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
     108      ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
     109      ('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
     110      ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
     111      ('(?s)a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
     112  
     113      (')', '', SYNTAX_ERROR),           # Unmatched right bracket
     114      ('', '', SUCCEED, 'found', ''),    # Empty pattern
     115      ('abc', 'abc', SUCCEED, 'found', 'abc'),
     116      ('abc', 'xbc', FAIL),
     117      ('abc', 'axc', FAIL),
     118      ('abc', 'abx', FAIL),
     119      ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
     120      ('abc', 'ababc', SUCCEED, 'found', 'abc'),
     121      ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
     122      ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
     123      ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
     124      ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
     125      ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
     126      ('ab+bc', 'abc', FAIL),
     127      ('ab+bc', 'abq', FAIL),
     128      ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
     129      ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
     130      ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
     131      ('ab?bc', 'abbbbc', FAIL),
     132      ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
     133      ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
     134      ('^abc$', 'abcc', FAIL),
     135      ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
     136      ('^abc$', 'aabc', FAIL),
     137      ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
     138      ('^', 'abc', SUCCEED, 'found+"-"', '-'),
     139      ('$', 'abc', SUCCEED, 'found+"-"', '-'),
     140      ('a.c', 'abc', SUCCEED, 'found', 'abc'),
     141      ('a.c', 'axc', SUCCEED, 'found', 'axc'),
     142      ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
     143      ('a.*c', 'axyzd', FAIL),
     144      ('a[bc]d', 'abc', FAIL),
     145      ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
     146      ('a[b-d]e', 'abd', FAIL),
     147      ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
     148      ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
     149      ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
     150      ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
     151      # NOTE: not an error under PCRE/PRE:
     152      # ('a[b-]', 'a-', SYNTAX_ERROR),
     153      ('a[]b', '-', SYNTAX_ERROR),
     154      ('a[', '-', SYNTAX_ERROR),
     155      ('a\\', '-', SYNTAX_ERROR),
     156      ('abc)', '-', SYNTAX_ERROR),
     157      ('(abc', '-', SYNTAX_ERROR),
     158      ('a]', 'a]', SUCCEED, 'found', 'a]'),
     159      ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
     160      ('a[\\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
     161      ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
     162      ('a[^bc]d', 'abd', FAIL),
     163      ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
     164      ('a[^-b]c', 'a-c', FAIL),
     165      ('a[^]b]c', 'a]c', FAIL),
     166      ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
     167      ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
     168      ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
     169      ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
     170      ('\\by\\b', 'xy', FAIL),
     171      ('\\by\\b', 'yz', FAIL),
     172      ('\\by\\b', 'xyz', FAIL),
     173      ('x\\b', 'xyz', FAIL),
     174      ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
     175      ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
     176      ('z\\B', 'xyz', FAIL),
     177      ('\\Bx', 'xyz', FAIL),
     178      ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
     179      ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
     180      ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
     181      ('\\By\\B', 'xy', FAIL),
     182      ('\\By\\B', 'yz', FAIL),
     183      ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
     184      ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
     185      ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
     186      ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
     187      ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
     188      ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
     189      ('$b', 'b', FAIL),
     190      ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
     191      ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
     192      ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
     193      ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
     194      ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
     195      ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
     196      ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
     197      ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
     198      ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
     199      ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
     200      (')(', '-', SYNTAX_ERROR),
     201      ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
     202      ('abc', '', FAIL),
     203      ('a*', '', SUCCEED, 'found', ''),
     204      ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
     205      ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
     206      ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
     207      ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
     208      ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
     209      ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
     210      ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
     211      ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
     212      ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
     213      ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
     214      ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
     215      ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
     216      ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
     217      ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
     218      ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
     219      ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
     220      ('a[bcd]+dcdcde', 'adcdcde', FAIL),
     221      ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
     222      ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
     223      ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
     224      ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
     225      ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
     226      ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
     227      ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
     228      ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
     229      ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
     230      ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
     231      ('multiple words of text', 'uh-uh', FAIL),
     232      ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
     233      ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
     234      ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
     235      ('[k]', 'ab', FAIL),
     236      ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
     237      ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
     238      ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
     239      ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
     240      ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
     241      ('^(a+).\\1$', 'aaaa', FAIL),
     242      ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
     243      ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
     244      ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
     245      ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
     246      ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
     247      ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
     248      ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
     249      ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
     250      ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
     251      ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
     252      ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
     253      ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
     254      ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
     255      ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
     256      ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
     257      ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
     258      ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
     259      ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
     260      ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
     261      ('([abc]*)x', 'abc', FAIL),
     262      ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
     263      ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
     264  
     265      # Test symbolic groups
     266  
     267      ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
     268      ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
     269      ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
     270      ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
     271  
     272      # Test octal escapes/memory references
     273  
     274      ('\\1', 'a', SYNTAX_ERROR),
     275  
     276      # All tests from Perl
     277  
     278      ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
     279      ('ab{1,}bc', 'abq', FAIL),
     280      ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
     281      ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
     282      ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
     283      ('ab{4,5}bc', 'abbbbc', FAIL),
     284      ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
     285      ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
     286      ('^', 'abc', SUCCEED, 'found', ''),
     287      ('$', 'abc', SUCCEED, 'found', ''),
     288      ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
     289      ('a[b-a]', '-', SYNTAX_ERROR),
     290      ('*a', '-', SYNTAX_ERROR),
     291      ('(*)b', '-', SYNTAX_ERROR),
     292      ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
     293      ('a**', '-', SYNTAX_ERROR),
     294      ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
     295      ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
     296      ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
     297      ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
     298      ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
     299      ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
     300      ('^(ab|cd)e', 'abcde', FAIL),
     301      ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
     302      ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
     303  # Python does not have the same rules for \\41 so this is a syntax error
     304  #    ('((((((((((a))))))))))\\41', 'aa', FAIL),
     305  #    ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
     306      ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
     307      ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
     308      ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
     309      ('(?i)abc', 'XBC', FAIL),
     310      ('(?i)abc', 'AXC', FAIL),
     311      ('(?i)abc', 'ABX', FAIL),
     312      ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
     313      ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
     314      ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
     315      ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
     316      ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
     317      ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
     318      ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
     319      ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
     320      ('(?i)ab+bc', 'ABC', FAIL),
     321      ('(?i)ab+bc', 'ABQ', FAIL),
     322      ('(?i)ab{1,}bc', 'ABQ', FAIL),
     323      ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
     324      ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
     325      ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
     326      ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
     327      ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
     328      ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
     329      ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
     330      ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
     331      ('(?i)ab??bc', 'ABBBBC', FAIL),
     332      ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
     333      ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
     334      ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
     335      ('(?i)^abc$', 'ABCC', FAIL),
     336      ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
     337      ('(?i)^abc$', 'AABC', FAIL),
     338      ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
     339      ('(?i)^', 'ABC', SUCCEED, 'found', ''),
     340      ('(?i)$', 'ABC', SUCCEED, 'found', ''),
     341      ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
     342      ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
     343      ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
     344      ('(?i)a.*c', 'AXYZD', FAIL),
     345      ('(?i)a[bc]d', 'ABC', FAIL),
     346      ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
     347      ('(?i)a[b-d]e', 'ABD', FAIL),
     348      ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
     349      ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
     350      ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
     351      ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
     352      ('(?i)a[b-a]', '-', SYNTAX_ERROR),
     353      ('(?i)a[]b', '-', SYNTAX_ERROR),
     354      ('(?i)a[', '-', SYNTAX_ERROR),
     355      ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
     356      ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
     357      ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
     358      ('(?i)a[^bc]d', 'ABD', FAIL),
     359      ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
     360      ('(?i)a[^-b]c', 'A-C', FAIL),
     361      ('(?i)a[^]b]c', 'A]C', FAIL),
     362      ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
     363      ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
     364      ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
     365      ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
     366      ('(?i)*a', '-', SYNTAX_ERROR),
     367      ('(?i)(*)b', '-', SYNTAX_ERROR),
     368      ('(?i)$b', 'B', FAIL),
     369      ('(?i)a\\', '-', SYNTAX_ERROR),
     370      ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
     371      ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
     372      ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
     373      ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
     374      ('(?i)abc)', '-', SYNTAX_ERROR),
     375      ('(?i)(abc', '-', SYNTAX_ERROR),
     376      ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
     377      ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
     378      ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
     379      ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
     380      ('(?i)a**', '-', SYNTAX_ERROR),
     381      ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
     382      ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
     383      ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
     384      ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
     385      ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
     386      ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
     387      ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
     388      ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
     389      ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
     390      ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
     391      ('(?i))(', '-', SYNTAX_ERROR),
     392      ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
     393      ('(?i)abc', '', FAIL),
     394      ('(?i)a*', '', SUCCEED, 'found', ''),
     395      ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
     396      ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
     397      ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
     398      ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
     399      ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
     400      ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
     401      ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
     402      ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
     403      ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
     404      ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
     405      ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
     406      ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
     407      ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
     408      ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
     409      ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
     410      ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
     411      ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
     412      ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
     413      ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
     414      ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
     415      ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
     416      ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
     417      ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
     418      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
     419      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
     420      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
     421      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
     422      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
     423      ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
     424      ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
     425      #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
     426      #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
     427      ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
     428      ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
     429      ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
     430      ('(?i)multiple words of text', 'UH-UH', FAIL),
     431      ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
     432      ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
     433      ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
     434      ('(?i)[k]', 'AB', FAIL),
     435  #    ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
     436  #    ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
     437      ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
     438      ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
     439      ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
     440      ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
     441      ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
     442      ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
     443      ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
     444      ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
     445      ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
     446      ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
     447  
     448      # lookbehind: split by : but not if it is escaped by -.
     449      ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
     450      # escaping with \ as we know it
     451      ('(?<!\\\\):(.*?)(?<!\\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
     452      # terminating with ' and escaping with ? as in edifact
     453      ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ),
     454  
     455      # Comments using the (?#...) syntax
     456  
     457      ('w(?# comment', 'w', SYNTAX_ERROR),
     458      ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
     459  
     460      # Check odd placement of embedded pattern modifiers
     461  
     462      # not an error under PCRE/PRE:
     463      ('(?i)w', 'W', SUCCEED, 'found', 'W'),
     464      # ('w(?i)', 'W', SYNTAX_ERROR),
     465  
     466      # Comments using the x embedded pattern modifier
     467  
     468      ("""(?x)w# comment 1
     469          x y
     470          # comment 2
     471          z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
     472  
     473      # using the m embedded pattern modifier
     474  
     475      ('^abc', """jkl
     476  abc
     477  xyz""", FAIL),
     478      ('(?m)^abc', """jkl
     479  abc
     480  xyz""", SUCCEED, 'found', 'abc'),
     481  
     482      ('(?m)abc$', """jkl
     483  xyzabc
     484  123""", SUCCEED, 'found', 'abc'),
     485  
     486  
     487  
     488      # test \w, etc. both inside and outside character classes
     489  
     490      ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
     491      ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
     492      ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
     493      ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
     494      ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
     495      # not an error under PCRE/PRE:
     496      # ('[\\d-x]', '-', SYNTAX_ERROR),
     497      (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
     498      (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
     499  
     500      (r'\xff', '\377', SUCCEED, 'found', chr(255)),
     501      # new \x semantics
     502      (r'\x00ff', '\377', FAIL),
     503      # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
     504      (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
     505      ('\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
     506      (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
     507      (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
     508  
     509      #
     510      # post-1.5.2 additions
     511  
     512      # xmllib problem
     513      (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
     514      # bug 110866: reference to undefined group
     515      (r'((.)\1+)', '', SYNTAX_ERROR),
     516      # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
     517      (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
     518      # bug 112468: various expected syntax errors
     519      (r'(', '', SYNTAX_ERROR),
     520      (r'[\41]', '!', SUCCEED, 'found', '!'),
     521      # bug 114033: nothing to repeat
     522      (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
     523      # bug 115040: rescan if flags are modified inside pattern
     524      (r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'),
     525      # bug 115618: negative lookahead
     526      (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
     527      # bug 116251: character class bug
     528      (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
     529      # bug 123769+127259: non-greedy backtracking bug
     530      (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
     531      (r'a[ ]*?\ (\d+).*', 'a   10', SUCCEED, 'found', 'a   10'),
     532      (r'a[ ]*?\ (\d+).*', 'a    10', SUCCEED, 'found', 'a    10'),
     533      # bug 127259: \Z shouldn't depend on multiline mode
     534      (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
     535      # bug 128899: uppercase literals under the ignorecase flag
     536      (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
     537      (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
     538      (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
     539      (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
     540      # bug 130748: ^* should be an error (nothing to repeat)
     541      (r'^*', '', SYNTAX_ERROR),
     542      # bug 133283: minimizing repeat problem
     543      (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'),
     544      # bug 477728: minimizing repeat problem
     545      (r'^.*?$', 'one\ntwo\nthree\n', FAIL),
     546      # bug 483789: minimizing repeat problem
     547      (r'a[^>]*?b', 'a>b', FAIL),
     548      # bug 490573: minimizing repeat problem
     549      (r'^a*?$', 'foo', FAIL),
     550      # bug 470582: nested groups problem
     551      (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
     552      # another minimizing repeat problem (capturing groups in assertions)
     553      ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
     554      ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
     555      ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
     556  ]
     557  
     558  u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
     559  tests.extend([
     560      # bug 410271: \b broken under locales
     561      (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
     562      (r'(?u)\b.\b', u, SUCCEED, 'found', u),
     563      (r'(?u)\w', u, SUCCEED, 'found', u),
     564  ])