1  import difflib
       2  from test.support import findfile
       3  import unittest
       4  import doctest
       5  import sys
       6  
       7  
       8  class ESC[4;38;5;81mTestWithAscii(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
       9      def test_one_insert(self):
      10          sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
      11          self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
      12          self.assertEqual(list(sm.get_opcodes()),
      13              [   ('insert', 0, 0, 0, 1),
      14                  ('equal', 0, 100, 1, 101)])
      15          self.assertEqual(sm.bpopular, set())
      16          sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
      17          self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
      18          self.assertEqual(list(sm.get_opcodes()),
      19              [   ('equal', 0, 50, 0, 50),
      20                  ('insert', 50, 50, 50, 51),
      21                  ('equal', 50, 100, 51, 101)])
      22          self.assertEqual(sm.bpopular, set())
      23  
      24      def test_one_delete(self):
      25          sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
      26          self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
      27          self.assertEqual(list(sm.get_opcodes()),
      28              [   ('equal', 0, 40, 0, 40),
      29                  ('delete', 40, 41, 40, 40),
      30                  ('equal', 41, 81, 40, 80)])
      31  
      32      def test_bjunk(self):
      33          sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
      34                  a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
      35          self.assertEqual(sm.bjunk, set())
      36  
      37          sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
      38                  a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
      39          self.assertEqual(sm.bjunk, {' '})
      40  
      41          sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
      42                  a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
      43          self.assertEqual(sm.bjunk, {' ', 'b'})
      44  
      45  
      46  class ESC[4;38;5;81mTestAutojunk(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      47      """Tests for the autojunk parameter added in 2.7"""
      48      def test_one_insert_homogenous_sequence(self):
      49          # By default autojunk=True and the heuristic kicks in for a sequence
      50          # of length 200+
      51          seq1 = 'b' * 200
      52          seq2 = 'a' + 'b' * 200
      53  
      54          sm = difflib.SequenceMatcher(None, seq1, seq2)
      55          self.assertAlmostEqual(sm.ratio(), 0, places=3)
      56          self.assertEqual(sm.bpopular, {'b'})
      57  
      58          # Now turn the heuristic off
      59          sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
      60          self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
      61          self.assertEqual(sm.bpopular, set())
      62  
      63  
      64  class ESC[4;38;5;81mTestSFbugs(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      65      def test_ratio_for_null_seqn(self):
      66          # Check clearing of SF bug 763023
      67          s = difflib.SequenceMatcher(None, [], [])
      68          self.assertEqual(s.ratio(), 1)
      69          self.assertEqual(s.quick_ratio(), 1)
      70          self.assertEqual(s.real_quick_ratio(), 1)
      71  
      72      def test_comparing_empty_lists(self):
      73          # Check fix for bug #979794
      74          group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
      75          self.assertRaises(StopIteration, next, group_gen)
      76          diff_gen = difflib.unified_diff([], [])
      77          self.assertRaises(StopIteration, next, diff_gen)
      78  
      79      def test_matching_blocks_cache(self):
      80          # Issue #21635
      81          s = difflib.SequenceMatcher(None, "abxcd", "abcd")
      82          first = s.get_matching_blocks()
      83          second = s.get_matching_blocks()
      84          self.assertEqual(second[0].size, 2)
      85          self.assertEqual(second[1].size, 2)
      86          self.assertEqual(second[2].size, 0)
      87  
      88      def test_added_tab_hint(self):
      89          # Check fix for bug #1488943
      90          diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
      91          self.assertEqual("- \tI am a buggy", diff[0])
      92          self.assertEqual("? \t          --\n", diff[1])
      93          self.assertEqual("+ \t\tI am a bug", diff[2])
      94          self.assertEqual("? +\n", diff[3])
      95  
      96      def test_hint_indented_properly_with_tabs(self):
      97          diff = list(difflib.Differ().compare(["\t \t \t^"], ["\t \t \t^\n"]))
      98          self.assertEqual("- \t \t \t^", diff[0])
      99          self.assertEqual("+ \t \t \t^\n", diff[1])
     100          self.assertEqual("? \t \t \t +\n", diff[2])
     101  
     102      def test_mdiff_catch_stop_iteration(self):
     103          # Issue #33224
     104          self.assertEqual(
     105              list(difflib._mdiff(["2"], ["3"], 1)),
     106              [((1, '\x00-2\x01'), (1, '\x00+3\x01'), True)],
     107          )
     108  
     109  
     110  patch914575_from1 = """
     111     1. Beautiful is beTTer than ugly.
     112     2. Explicit is better than implicit.
     113     3. Simple is better than complex.
     114     4. Complex is better than complicated.
     115  """
     116  
     117  patch914575_to1 = """
     118     1. Beautiful is better than ugly.
     119     3.   Simple is better than complex.
     120     4. Complicated is better than complex.
     121     5. Flat is better than nested.
     122  """
     123  
     124  patch914575_nonascii_from1 = """
     125     1. Beautiful is beTTer than ugly.
     126     2. Explicit is better than ımplıcıt.
     127     3. Simple is better than complex.
     128     4. Complex is better than complicated.
     129  """
     130  
     131  patch914575_nonascii_to1 = """
     132     1. Beautiful is better than ügly.
     133     3.   Sımple is better than complex.
     134     4. Complicated is better than cömplex.
     135     5. Flat is better than nested.
     136  """
     137  
     138  patch914575_from2 = """
     139  \t\tLine 1: preceded by from:[tt] to:[ssss]
     140    \t\tLine 2: preceded by from:[sstt] to:[sssst]
     141    \t \tLine 3: preceded by from:[sstst] to:[ssssss]
     142  Line 4:  \thas from:[sst] to:[sss] after :
     143  Line 5: has from:[t] to:[ss] at end\t
     144  """
     145  
     146  patch914575_to2 = """
     147      Line 1: preceded by from:[tt] to:[ssss]
     148      \tLine 2: preceded by from:[sstt] to:[sssst]
     149        Line 3: preceded by from:[sstst] to:[ssssss]
     150  Line 4:   has from:[sst] to:[sss] after :
     151  Line 5: has from:[t] to:[ss] at end
     152  """
     153  
     154  patch914575_from3 = """line 0
     155  1234567890123456789012345689012345
     156  line 1
     157  line 2
     158  line 3
     159  line 4   changed
     160  line 5   changed
     161  line 6   changed
     162  line 7
     163  line 8  subtracted
     164  line 9
     165  1234567890123456789012345689012345
     166  short line
     167  just fits in!!
     168  just fits in two lines yup!!
     169  the end"""
     170  
     171  patch914575_to3 = """line 0
     172  1234567890123456789012345689012345
     173  line 1
     174  line 2    added
     175  line 3
     176  line 4   chanGEd
     177  line 5a  chanGed
     178  line 6a  changEd
     179  line 7
     180  line 8
     181  line 9
     182  1234567890
     183  another long line that needs to be wrapped
     184  just fitS in!!
     185  just fits in two lineS yup!!
     186  the end"""
     187  
     188  class ESC[4;38;5;81mTestSFpatches(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     189  
     190      def test_html_diff(self):
     191          # Check SF patch 914575 for generating HTML differences
     192          f1a = ((patch914575_from1 + '123\n'*10)*3)
     193          t1a = (patch914575_to1 + '123\n'*10)*3
     194          f1b = '456\n'*10 + f1a
     195          t1b = '456\n'*10 + t1a
     196          f1a = f1a.splitlines()
     197          t1a = t1a.splitlines()
     198          f1b = f1b.splitlines()
     199          t1b = t1b.splitlines()
     200          f2 = patch914575_from2.splitlines()
     201          t2 = patch914575_to2.splitlines()
     202          f3 = patch914575_from3
     203          t3 = patch914575_to3
     204          i = difflib.HtmlDiff()
     205          j = difflib.HtmlDiff(tabsize=2)
     206          k = difflib.HtmlDiff(wrapcolumn=14)
     207  
     208          full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
     209          tables = '\n'.join(
     210              [
     211               '<h2>Context (first diff within numlines=5(default))</h2>',
     212               i.make_table(f1a,t1a,'from','to',context=True),
     213               '<h2>Context (first diff after numlines=5(default))</h2>',
     214               i.make_table(f1b,t1b,'from','to',context=True),
     215               '<h2>Context (numlines=6)</h2>',
     216               i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
     217               '<h2>Context (numlines=0)</h2>',
     218               i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
     219               '<h2>Same Context</h2>',
     220               i.make_table(f1a,f1a,'from','to',context=True),
     221               '<h2>Same Full</h2>',
     222               i.make_table(f1a,f1a,'from','to',context=False),
     223               '<h2>Empty Context</h2>',
     224               i.make_table([],[],'from','to',context=True),
     225               '<h2>Empty Full</h2>',
     226               i.make_table([],[],'from','to',context=False),
     227               '<h2>tabsize=2</h2>',
     228               j.make_table(f2,t2),
     229               '<h2>tabsize=default</h2>',
     230               i.make_table(f2,t2),
     231               '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
     232               k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
     233               '<h2>wrapcolumn=14,splitlines()</h2>',
     234               k.make_table(f3.splitlines(),t3.splitlines()),
     235               '<h2>wrapcolumn=14,splitlines(True)</h2>',
     236               k.make_table(f3.splitlines(True),t3.splitlines(True)),
     237               ])
     238          actual = full.replace('</body>','\n%s\n</body>' % tables)
     239  
     240          # temporarily uncomment next two lines to baseline this test
     241          #with open('test_difflib_expect.html','w') as fp:
     242          #    fp.write(actual)
     243  
     244          with open(findfile('test_difflib_expect.html'), encoding="utf-8") as fp:
     245              self.assertEqual(actual, fp.read())
     246  
     247      def test_recursion_limit(self):
     248          # Check if the problem described in patch #1413711 exists.
     249          limit = sys.getrecursionlimit()
     250          old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
     251          new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
     252          difflib.SequenceMatcher(None, old, new).get_opcodes()
     253  
     254      def test_make_file_default_charset(self):
     255          html_diff = difflib.HtmlDiff()
     256          output = html_diff.make_file(patch914575_from1.splitlines(),
     257                                       patch914575_to1.splitlines())
     258          self.assertIn('content="text/html; charset=utf-8"', output)
     259  
     260      def test_make_file_iso88591_charset(self):
     261          html_diff = difflib.HtmlDiff()
     262          output = html_diff.make_file(patch914575_from1.splitlines(),
     263                                       patch914575_to1.splitlines(),
     264                                       charset='iso-8859-1')
     265          self.assertIn('content="text/html; charset=iso-8859-1"', output)
     266  
     267      def test_make_file_usascii_charset_with_nonascii_input(self):
     268          html_diff = difflib.HtmlDiff()
     269          output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
     270                                       patch914575_nonascii_to1.splitlines(),
     271                                       charset='us-ascii')
     272          self.assertIn('content="text/html; charset=us-ascii"', output)
     273          self.assertIn('&#305;mpl&#305;c&#305;t', output)
     274  
     275  
     276  class ESC[4;38;5;81mTestOutputFormat(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     277      def test_tab_delimiter(self):
     278          args = ['one', 'two', 'Original', 'Current',
     279              '2005-01-26 23:30:50', '2010-04-02 10:20:52']
     280          ud = difflib.unified_diff(*args, lineterm='')
     281          self.assertEqual(list(ud)[0:2], [
     282                             "--- Original\t2005-01-26 23:30:50",
     283                             "+++ Current\t2010-04-02 10:20:52"])
     284          cd = difflib.context_diff(*args, lineterm='')
     285          self.assertEqual(list(cd)[0:2], [
     286                             "*** Original\t2005-01-26 23:30:50",
     287                             "--- Current\t2010-04-02 10:20:52"])
     288  
     289      def test_no_trailing_tab_on_empty_filedate(self):
     290          args = ['one', 'two', 'Original', 'Current']
     291          ud = difflib.unified_diff(*args, lineterm='')
     292          self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
     293  
     294          cd = difflib.context_diff(*args, lineterm='')
     295          self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
     296  
     297      def test_range_format_unified(self):
     298          # Per the diff spec at http://www.unix.org/single_unix_specification/
     299          spec = '''\
     300             Each <range> field shall be of the form:
     301               %1d", <beginning line number>  if the range contains exactly one line,
     302             and:
     303              "%1d,%1d", <beginning line number>, <number of lines> otherwise.
     304             If a range is empty, its beginning line number shall be the number of
     305             the line just before the range, or 0 if the empty range starts the file.
     306          '''
     307          fmt = difflib._format_range_unified
     308          self.assertEqual(fmt(3,3), '3,0')
     309          self.assertEqual(fmt(3,4), '4')
     310          self.assertEqual(fmt(3,5), '4,2')
     311          self.assertEqual(fmt(3,6), '4,3')
     312          self.assertEqual(fmt(0,0), '0,0')
     313  
     314      def test_range_format_context(self):
     315          # Per the diff spec at http://www.unix.org/single_unix_specification/
     316          spec = '''\
     317             The range of lines in file1 shall be written in the following format
     318             if the range contains two or more lines:
     319                 "*** %d,%d ****\n", <beginning line number>, <ending line number>
     320             and the following format otherwise:
     321                 "*** %d ****\n", <ending line number>
     322             The ending line number of an empty range shall be the number of the preceding line,
     323             or 0 if the range is at the start of the file.
     324  
     325             Next, the range of lines in file2 shall be written in the following format
     326             if the range contains two or more lines:
     327                 "--- %d,%d ----\n", <beginning line number>, <ending line number>
     328             and the following format otherwise:
     329                 "--- %d ----\n", <ending line number>
     330          '''
     331          fmt = difflib._format_range_context
     332          self.assertEqual(fmt(3,3), '3')
     333          self.assertEqual(fmt(3,4), '4')
     334          self.assertEqual(fmt(3,5), '4,5')
     335          self.assertEqual(fmt(3,6), '4,6')
     336          self.assertEqual(fmt(0,0), '0')
     337  
     338  
     339  class ESC[4;38;5;81mTestBytes(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     340      # don't really care about the content of the output, just the fact
     341      # that it's bytes and we don't crash
     342      def check(self, diff):
     343          diff = list(diff)   # trigger exceptions first
     344          for line in diff:
     345              self.assertIsInstance(
     346                  line, bytes,
     347                  "all lines of diff should be bytes, but got: %r" % line)
     348  
     349      def test_byte_content(self):
     350          # if we receive byte strings, we return byte strings
     351          a = [b'hello', b'andr\xe9']     # iso-8859-1 bytes
     352          b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes
     353  
     354          unified = difflib.unified_diff
     355          context = difflib.context_diff
     356  
     357          check = self.check
     358          check(difflib.diff_bytes(unified, a, a))
     359          check(difflib.diff_bytes(unified, a, b))
     360  
     361          # now with filenames (content and filenames are all bytes!)
     362          check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
     363          check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
     364  
     365          # and with filenames and dates
     366          check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
     367          check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
     368  
     369          # same all over again, with context diff
     370          check(difflib.diff_bytes(context, a, a))
     371          check(difflib.diff_bytes(context, a, b))
     372          check(difflib.diff_bytes(context, a, a, b'a', b'a'))
     373          check(difflib.diff_bytes(context, a, b, b'a', b'b'))
     374          check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
     375          check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
     376  
     377      def test_byte_filenames(self):
     378          # somebody renamed a file from ISO-8859-2 to UTF-8
     379          fna = b'\xb3odz.txt'    # "łodz.txt"
     380          fnb = b'\xc5\x82odz.txt'
     381  
     382          # they transcoded the content at the same time
     383          a = [b'\xa3odz is a city in Poland.']
     384          b = [b'\xc5\x81odz is a city in Poland.']
     385  
     386          check = self.check
     387          unified = difflib.unified_diff
     388          context = difflib.context_diff
     389          check(difflib.diff_bytes(unified, a, b, fna, fnb))
     390          check(difflib.diff_bytes(context, a, b, fna, fnb))
     391  
     392          def assertDiff(expect, actual):
     393              # do not compare expect and equal as lists, because unittest
     394              # uses difflib to report difference between lists
     395              actual = list(actual)
     396              self.assertEqual(len(expect), len(actual))
     397              for e, a in zip(expect, actual):
     398                  self.assertEqual(e, a)
     399  
     400          expect = [
     401              b'--- \xb3odz.txt',
     402              b'+++ \xc5\x82odz.txt',
     403              b'@@ -1 +1 @@',
     404              b'-\xa3odz is a city in Poland.',
     405              b'+\xc5\x81odz is a city in Poland.',
     406          ]
     407          actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
     408          assertDiff(expect, actual)
     409  
     410          # with dates (plain ASCII)
     411          datea = b'2005-03-18'
     412          dateb = b'2005-03-19'
     413          check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
     414          check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
     415  
     416          expect = [
     417              # note the mixed encodings here: this is deeply wrong by every
     418              # tenet of Unicode, but it doesn't crash, it's parseable by
     419              # patch, and it's how UNIX(tm) diff behaves
     420              b'--- \xb3odz.txt\t2005-03-18',
     421              b'+++ \xc5\x82odz.txt\t2005-03-19',
     422              b'@@ -1 +1 @@',
     423              b'-\xa3odz is a city in Poland.',
     424              b'+\xc5\x81odz is a city in Poland.',
     425          ]
     426          actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
     427                                      lineterm=b'')
     428          assertDiff(expect, actual)
     429  
     430      def test_mixed_types_content(self):
     431          # type of input content must be consistent: all str or all bytes
     432          a = [b'hello']
     433          b = ['hello']
     434  
     435          unified = difflib.unified_diff
     436          context = difflib.context_diff
     437  
     438          expect = "lines to compare must be str, not bytes (b'hello')"
     439          self._assert_type_error(expect, unified, a, b)
     440          self._assert_type_error(expect, unified, b, a)
     441          self._assert_type_error(expect, context, a, b)
     442          self._assert_type_error(expect, context, b, a)
     443  
     444          expect = "all arguments must be bytes, not str ('hello')"
     445          self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)
     446          self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)
     447          self._assert_type_error(expect, difflib.diff_bytes, context, a, b)
     448          self._assert_type_error(expect, difflib.diff_bytes, context, b, a)
     449  
     450      def test_mixed_types_filenames(self):
     451          # cannot pass filenames as bytes if content is str (this may not be
     452          # the right behaviour, but at least the test demonstrates how
     453          # things work)
     454          a = ['hello\n']
     455          b = ['ohell\n']
     456          fna = b'ol\xe9.txt'     # filename transcoded from ISO-8859-1
     457          fnb = b'ol\xc3a9.txt'   # to UTF-8
     458          self._assert_type_error(
     459              "all arguments must be str, not: b'ol\\xe9.txt'",
     460              difflib.unified_diff, a, b, fna, fnb)
     461  
     462      def test_mixed_types_dates(self):
     463          # type of dates must be consistent with type of contents
     464          a = [b'foo\n']
     465          b = [b'bar\n']
     466          datea = '1 fév'
     467          dateb = '3 fév'
     468          self._assert_type_error(
     469              "all arguments must be bytes, not str ('1 fév')",
     470              difflib.diff_bytes, difflib.unified_diff,
     471              a, b, b'a', b'b', datea, dateb)
     472  
     473          # if input is str, non-ASCII dates are fine
     474          a = ['foo\n']
     475          b = ['bar\n']
     476          list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
     477  
     478      def _assert_type_error(self, msg, generator, *args):
     479          with self.assertRaises(TypeError) as ctx:
     480              list(generator(*args))
     481          self.assertEqual(msg, str(ctx.exception))
     482  
     483  class ESC[4;38;5;81mTestJunkAPIs(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     484      def test_is_line_junk_true(self):
     485          for line in ['#', '  ', ' #', '# ', ' # ', '']:
     486              self.assertTrue(difflib.IS_LINE_JUNK(line), repr(line))
     487  
     488      def test_is_line_junk_false(self):
     489          for line in ['##', ' ##', '## ', 'abc ', 'abc #', 'Mr. Moose is up!']:
     490              self.assertFalse(difflib.IS_LINE_JUNK(line), repr(line))
     491  
     492      def test_is_line_junk_REDOS(self):
     493          evil_input = ('\t' * 1000000) + '##'
     494          self.assertFalse(difflib.IS_LINE_JUNK(evil_input))
     495  
     496      def test_is_character_junk_true(self):
     497          for char in [' ', '\t']:
     498              self.assertTrue(difflib.IS_CHARACTER_JUNK(char), repr(char))
     499  
     500      def test_is_character_junk_false(self):
     501          for char in ['a', '#', '\n', '\f', '\r', '\v']:
     502              self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
     503  
     504  class ESC[4;38;5;81mTestFindLongest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     505      def longer_match_exists(self, a, b, n):
     506          return any(b_part in a for b_part in
     507                     [b[i:i + n + 1] for i in range(0, len(b) - n - 1)])
     508  
     509      def test_default_args(self):
     510          a = 'foo bar'
     511          b = 'foo baz bar'
     512          sm = difflib.SequenceMatcher(a=a, b=b)
     513          match = sm.find_longest_match()
     514          self.assertEqual(match.a, 0)
     515          self.assertEqual(match.b, 0)
     516          self.assertEqual(match.size, 6)
     517          self.assertEqual(a[match.a: match.a + match.size],
     518                           b[match.b: match.b + match.size])
     519          self.assertFalse(self.longer_match_exists(a, b, match.size))
     520  
     521          match = sm.find_longest_match(alo=2, blo=4)
     522          self.assertEqual(match.a, 3)
     523          self.assertEqual(match.b, 7)
     524          self.assertEqual(match.size, 4)
     525          self.assertEqual(a[match.a: match.a + match.size],
     526                           b[match.b: match.b + match.size])
     527          self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size))
     528  
     529          match = sm.find_longest_match(bhi=5, blo=1)
     530          self.assertEqual(match.a, 1)
     531          self.assertEqual(match.b, 1)
     532          self.assertEqual(match.size, 4)
     533          self.assertEqual(a[match.a: match.a + match.size],
     534                           b[match.b: match.b + match.size])
     535          self.assertFalse(self.longer_match_exists(a, b[1:5], match.size))
     536  
     537      def test_longest_match_with_popular_chars(self):
     538          a = 'dabcd'
     539          b = 'd'*100 + 'abc' + 'd'*100  # length over 200 so popular used
     540          sm = difflib.SequenceMatcher(a=a, b=b)
     541          match = sm.find_longest_match(0, len(a), 0, len(b))
     542          self.assertEqual(match.a, 0)
     543          self.assertEqual(match.b, 99)
     544          self.assertEqual(match.size, 5)
     545          self.assertEqual(a[match.a: match.a + match.size],
     546                           b[match.b: match.b + match.size])
     547          self.assertFalse(self.longer_match_exists(a, b, match.size))
     548  
     549  
     550  def setUpModule():
     551      difflib.HtmlDiff._default_prefix = 0
     552  
     553  
     554  def load_tests(loader, tests, pattern):
     555      tests.addTest(doctest.DocTestSuite(difflib))
     556      return tests
     557  
     558  
     559  if __name__ == '__main__':
     560      unittest.main()