1  
       2  # Various microbenchmarks comparing unicode and byte string performance
       3  # Please keep this file both 2.x and 3.x compatible!
       4  
       5  import timeit
       6  import itertools
       7  import operator
       8  import re
       9  import sys
      10  import datetime
      11  import optparse
      12  
      13  VERSION = '2.0'
      14  
      15  def p(*args):
      16      sys.stdout.write(' '.join(str(s) for s in args) + '\n')
      17  
      18  if sys.version_info >= (3,):
      19      BYTES = bytes_from_str = lambda x: x.encode('ascii')
      20      UNICODE = unicode_from_str = lambda x: x
      21  else:
      22      BYTES = bytes_from_str = lambda x: x
      23      UNICODE = unicode_from_str = lambda x: x.decode('ascii')
      24  
      25  class ESC[4;38;5;81mUnsupportedType(ESC[4;38;5;149mTypeError):
      26      pass
      27  
      28  
      29  p('stringbench v%s' % VERSION)
      30  p(sys.version)
      31  p(datetime.datetime.now())
      32  
      33  REPEAT = 1
      34  REPEAT = 3
      35  #REPEAT = 7
      36  
      37  if __name__ != "__main__":
      38      raise SystemExit("Must run as main program")
      39  
      40  parser = optparse.OptionParser()
      41  parser.add_option("-R", "--skip-re", dest="skip_re",
      42                    action="store_true",
      43                    help="skip regular expression tests")
      44  parser.add_option("-8", "--8-bit", dest="bytes_only",
      45                    action="store_true",
      46                    help="only do 8-bit string benchmarks")
      47  parser.add_option("-u", "--unicode", dest="unicode_only",
      48                    action="store_true",
      49                    help="only do Unicode string benchmarks")
      50  
      51  
      52  _RANGE_1000 = list(range(1000))
      53  _RANGE_100 = list(range(100))
      54  _RANGE_10 = list(range(10))
      55  
      56  dups = {}
      57  def bench(s, group, repeat_count):
      58      def blah(f):
      59          if f.__name__ in dups:
      60              raise AssertionError("Multiple functions with same name: %r" %
      61                                   (f.__name__,))
      62          dups[f.__name__] = 1
      63          f.comment = s
      64          f.is_bench = True
      65          f.group = group
      66          f.repeat_count = repeat_count
      67          return f
      68      return blah
      69  
      70  def uses_re(f):
      71      f.uses_re = True
      72  
      73  ####### 'in' comparisons
      74  
      75  @bench('"A" in "A"*1000', "early match, single character", 1000)
      76  def in_test_quick_match_single_character(STR):
      77      s1 = STR("A" * 1000)
      78      s2 = STR("A")
      79      for x in _RANGE_1000:
      80          s2 in s1
      81  
      82  @bench('"B" in "A"*1000', "no match, single character", 1000)
      83  def in_test_no_match_single_character(STR):
      84      s1 = STR("A" * 1000)
      85      s2 = STR("B")
      86      for x in _RANGE_1000:
      87          s2 in s1
      88  
      89  
      90  @bench('"AB" in "AB"*1000', "early match, two characters", 1000)
      91  def in_test_quick_match_two_characters(STR):
      92      s1 = STR("AB" * 1000)
      93      s2 = STR("AB")
      94      for x in _RANGE_1000:
      95          s2 in s1
      96  
      97  @bench('"BC" in "AB"*1000', "no match, two characters", 1000)
      98  def in_test_no_match_two_character(STR):
      99      s1 = STR("AB" * 1000)
     100      s2 = STR("BC")
     101      for x in _RANGE_1000:
     102          s2 in s1
     103  
     104  @bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
     105  def in_test_slow_match_two_characters(STR):
     106      s1 = STR("AB" * 300+"C")
     107      s2 = STR("BC")
     108      for x in _RANGE_1000:
     109          s2 in s1
     110  
     111  @bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")',
     112         "late match, 100 characters", 100)
     113  def in_test_slow_match_100_characters(STR):
     114      m = STR("ABC"*33)
     115      d = STR("D")
     116      e = STR("E")
     117      s1 = (m+d)*300 + m+e
     118      s2 = m+e
     119      for x in _RANGE_100:
     120          s2 in s1
     121  
     122  # Try with regex
     123  @uses_re
     124  @bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")',
     125         "late match, 100 characters", 100)
     126  def re_test_slow_match_100_characters(STR):
     127      m = STR("ABC"*33)
     128      d = STR("D")
     129      e = STR("E")
     130      s1 = (m+d)*300 + m+e
     131      s2 = m+e
     132      pat = re.compile(s2)
     133      search = pat.search
     134      for x in _RANGE_100:
     135          search(s1)
     136  
     137  
     138  #### same tests as 'in' but use 'find'
     139  
     140  @bench('("A"*1000).find("A")', "early match, single character", 1000)
     141  def find_test_quick_match_single_character(STR):
     142      s1 = STR("A" * 1000)
     143      s2 = STR("A")
     144      s1_find = s1.find
     145      for x in _RANGE_1000:
     146          s1_find(s2)
     147  
     148  @bench('("A"*1000).find("B")', "no match, single character", 1000)
     149  def find_test_no_match_single_character(STR):
     150      s1 = STR("A" * 1000)
     151      s2 = STR("B")
     152      s1_find = s1.find
     153      for x in _RANGE_1000:
     154          s1_find(s2)
     155  
     156  
     157  @bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
     158  def find_test_quick_match_two_characters(STR):
     159      s1 = STR("AB" * 1000)
     160      s2 = STR("AB")
     161      s1_find = s1.find
     162      for x in _RANGE_1000:
     163          s1_find(s2)
     164  
     165  @bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
     166  def find_test_no_match_two_character(STR):
     167      s1 = STR("AB" * 1000)
     168      s2 = STR("BC")
     169      s1_find = s1.find
     170      for x in _RANGE_1000:
     171          s1_find(s2)
     172  
     173  @bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
     174  def find_test_no_match_two_character_bis(STR):
     175      s1 = STR("AB" * 1000)
     176      s2 = STR("CA")
     177      s1_find = s1.find
     178      for x in _RANGE_1000:
     179          s1_find(s2)
     180  
     181  @bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
     182  def find_test_slow_match_two_characters(STR):
     183      s1 = STR("AB" * 300+"C")
     184      s2 = STR("BC")
     185      s1_find = s1.find
     186      for x in _RANGE_1000:
     187          s1_find(s2)
     188  
     189  @bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
     190  def find_test_slow_match_two_characters_bis(STR):
     191      s1 = STR("AB" * 300+"CA")
     192      s2 = STR("CA")
     193      s1_find = s1.find
     194      for x in _RANGE_1000:
     195          s1_find(s2)
     196  
     197  @bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
     198         "late match, 100 characters", 100)
     199  def find_test_slow_match_100_characters(STR):
     200      m = STR("ABC"*33)
     201      d = STR("D")
     202      e = STR("E")
     203      s1 = (m+d)*500 + m+e
     204      s2 = m+e
     205      s1_find = s1.find
     206      for x in _RANGE_100:
     207          s1_find(s2)
     208  
     209  @bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
     210         "late match, 100 characters", 100)
     211  def find_test_slow_match_100_characters_bis(STR):
     212      m = STR("ABC"*33)
     213      d = STR("D")
     214      e = STR("E")
     215      s1 = (m+d)*500 + e+m
     216      s2 = e+m
     217      s1_find = s1.find
     218      for x in _RANGE_100:
     219          s1_find(s2)
     220  
     221  
     222  #### Same tests for 'rfind'
     223  
     224  @bench('("A"*1000).rfind("A")', "early match, single character", 1000)
     225  def rfind_test_quick_match_single_character(STR):
     226      s1 = STR("A" * 1000)
     227      s2 = STR("A")
     228      s1_rfind = s1.rfind
     229      for x in _RANGE_1000:
     230          s1_rfind(s2)
     231  
     232  @bench('("A"*1000).rfind("B")', "no match, single character", 1000)
     233  def rfind_test_no_match_single_character(STR):
     234      s1 = STR("A" * 1000)
     235      s2 = STR("B")
     236      s1_rfind = s1.rfind
     237      for x in _RANGE_1000:
     238          s1_rfind(s2)
     239  
     240  
     241  @bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
     242  def rfind_test_quick_match_two_characters(STR):
     243      s1 = STR("AB" * 1000)
     244      s2 = STR("AB")
     245      s1_rfind = s1.rfind
     246      for x in _RANGE_1000:
     247          s1_rfind(s2)
     248  
     249  @bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
     250  def rfind_test_no_match_two_character(STR):
     251      s1 = STR("AB" * 1000)
     252      s2 = STR("BC")
     253      s1_rfind = s1.rfind
     254      for x in _RANGE_1000:
     255          s1_rfind(s2)
     256  
     257  @bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
     258  def rfind_test_no_match_two_character_bis(STR):
     259      s1 = STR("AB" * 1000)
     260      s2 = STR("CA")
     261      s1_rfind = s1.rfind
     262      for x in _RANGE_1000:
     263          s1_rfind(s2)
     264  
     265  @bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
     266  def rfind_test_slow_match_two_characters(STR):
     267      s1 = STR("C" + "AB" * 300)
     268      s2 = STR("CA")
     269      s1_rfind = s1.rfind
     270      for x in _RANGE_1000:
     271          s1_rfind(s2)
     272  
     273  @bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
     274  def rfind_test_slow_match_two_characters_bis(STR):
     275      s1 = STR("BC" + "AB" * 300)
     276      s2 = STR("BC")
     277      s1_rfind = s1.rfind
     278      for x in _RANGE_1000:
     279          s1_rfind(s2)
     280  
     281  @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
     282         "late match, 100 characters", 100)
     283  def rfind_test_slow_match_100_characters(STR):
     284      m = STR("ABC"*33)
     285      d = STR("D")
     286      e = STR("E")
     287      s1 = e+m + (d+m)*500
     288      s2 = e+m
     289      s1_rfind = s1.rfind
     290      for x in _RANGE_100:
     291          s1_rfind(s2)
     292  
     293  @bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
     294         "late match, 100 characters", 100)
     295  def rfind_test_slow_match_100_characters_bis(STR):
     296      m = STR("ABC"*33)
     297      d = STR("D")
     298      e = STR("E")
     299      s1 = m+e + (d+m)*500
     300      s2 = m+e
     301      s1_rfind = s1.rfind
     302      for x in _RANGE_100:
     303          s1_rfind(s2)
     304  
     305  
     306  #### Now with index.
     307  # Skip the ones which fail because that would include exception overhead.
     308  
     309  @bench('("A"*1000).index("A")', "early match, single character", 1000)
     310  def index_test_quick_match_single_character(STR):
     311      s1 = STR("A" * 1000)
     312      s2 = STR("A")
     313      s1_index = s1.index
     314      for x in _RANGE_1000:
     315          s1_index(s2)
     316  
     317  @bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
     318  def index_test_quick_match_two_characters(STR):
     319      s1 = STR("AB" * 1000)
     320      s2 = STR("AB")
     321      s1_index = s1.index
     322      for x in _RANGE_1000:
     323          s1_index(s2)
     324  
     325  @bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
     326  def index_test_slow_match_two_characters(STR):
     327      s1 = STR("AB" * 300+"C")
     328      s2 = STR("BC")
     329      s1_index = s1.index
     330      for x in _RANGE_1000:
     331          s1_index(s2)
     332  
     333  @bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
     334         "late match, 100 characters", 100)
     335  def index_test_slow_match_100_characters(STR):
     336      m = STR("ABC"*33)
     337      d = STR("D")
     338      e = STR("E")
     339      s1 = (m+d)*500 + m+e
     340      s2 = m+e
     341      s1_index = s1.index
     342      for x in _RANGE_100:
     343          s1_index(s2)
     344  
     345  
     346  #### Same for rindex
     347  
     348  @bench('("A"*1000).rindex("A")', "early match, single character", 1000)
     349  def rindex_test_quick_match_single_character(STR):
     350      s1 = STR("A" * 1000)
     351      s2 = STR("A")
     352      s1_rindex = s1.rindex
     353      for x in _RANGE_1000:
     354          s1_rindex(s2)
     355  
     356  @bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
     357  def rindex_test_quick_match_two_characters(STR):
     358      s1 = STR("AB" * 1000)
     359      s2 = STR("AB")
     360      s1_rindex = s1.rindex
     361      for x in _RANGE_1000:
     362          s1_rindex(s2)
     363  
     364  @bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
     365  def rindex_test_slow_match_two_characters(STR):
     366      s1 = STR("C" + "AB" * 300)
     367      s2 = STR("CA")
     368      s1_rindex = s1.rindex
     369      for x in _RANGE_1000:
     370          s1_rindex(s2)
     371  
     372  @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)',
     373         "late match, 100 characters", 100)
     374  def rindex_test_slow_match_100_characters(STR):
     375      m = STR("ABC"*33)
     376      d = STR("D")
     377      e = STR("E")
     378      s1 = e + m + (d+m)*500
     379      s2 = e + m
     380      s1_rindex = s1.rindex
     381      for x in _RANGE_100:
     382          s1_rindex(s2)
     383  
     384  
     385  #### Same for partition
     386  
     387  @bench('("A"*1000).partition("A")', "early match, single character", 1000)
     388  def partition_test_quick_match_single_character(STR):
     389      s1 = STR("A" * 1000)
     390      s2 = STR("A")
     391      s1_partition = s1.partition
     392      for x in _RANGE_1000:
     393          s1_partition(s2)
     394  
     395  @bench('("A"*1000).partition("B")', "no match, single character", 1000)
     396  def partition_test_no_match_single_character(STR):
     397      s1 = STR("A" * 1000)
     398      s2 = STR("B")
     399      s1_partition = s1.partition
     400      for x in _RANGE_1000:
     401          s1_partition(s2)
     402  
     403  
     404  @bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
     405  def partition_test_quick_match_two_characters(STR):
     406      s1 = STR("AB" * 1000)
     407      s2 = STR("AB")
     408      s1_partition = s1.partition
     409      for x in _RANGE_1000:
     410          s1_partition(s2)
     411  
     412  @bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
     413  def partition_test_no_match_two_character(STR):
     414      s1 = STR("AB" * 1000)
     415      s2 = STR("BC")
     416      s1_partition = s1.partition
     417      for x in _RANGE_1000:
     418          s1_partition(s2)
     419  
     420  @bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
     421  def partition_test_slow_match_two_characters(STR):
     422      s1 = STR("AB" * 300+"C")
     423      s2 = STR("BC")
     424      s1_partition = s1.partition
     425      for x in _RANGE_1000:
     426          s1_partition(s2)
     427  
     428  @bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")',
     429         "late match, 100 characters", 100)
     430  def partition_test_slow_match_100_characters(STR):
     431      m = STR("ABC"*33)
     432      d = STR("D")
     433      e = STR("E")
     434      s1 = (m+d)*500 + m+e
     435      s2 = m+e
     436      s1_partition = s1.partition
     437      for x in _RANGE_100:
     438          s1_partition(s2)
     439  
     440  
     441  #### Same for rpartition
     442  
     443  @bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
     444  def rpartition_test_quick_match_single_character(STR):
     445      s1 = STR("A" * 1000)
     446      s2 = STR("A")
     447      s1_rpartition = s1.rpartition
     448      for x in _RANGE_1000:
     449          s1_rpartition(s2)
     450  
     451  @bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
     452  def rpartition_test_no_match_single_character(STR):
     453      s1 = STR("A" * 1000)
     454      s2 = STR("B")
     455      s1_rpartition = s1.rpartition
     456      for x in _RANGE_1000:
     457          s1_rpartition(s2)
     458  
     459  
     460  @bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
     461  def rpartition_test_quick_match_two_characters(STR):
     462      s1 = STR("AB" * 1000)
     463      s2 = STR("AB")
     464      s1_rpartition = s1.rpartition
     465      for x in _RANGE_1000:
     466          s1_rpartition(s2)
     467  
     468  @bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
     469  def rpartition_test_no_match_two_character(STR):
     470      s1 = STR("AB" * 1000)
     471      s2 = STR("BC")
     472      s1_rpartition = s1.rpartition
     473      for x in _RANGE_1000:
     474          s1_rpartition(s2)
     475  
     476  @bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
     477  def rpartition_test_slow_match_two_characters(STR):
     478      s1 = STR("C" + "AB" * 300)
     479      s2 = STR("CA")
     480      s1_rpartition = s1.rpartition
     481      for x in _RANGE_1000:
     482          s1_rpartition(s2)
     483  
     484  @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)',
     485         "late match, 100 characters", 100)
     486  def rpartition_test_slow_match_100_characters(STR):
     487      m = STR("ABC"*33)
     488      d = STR("D")
     489      e = STR("E")
     490      s1 = e + m + (d+m)*500
     491      s2 = e + m
     492      s1_rpartition = s1.rpartition
     493      for x in _RANGE_100:
     494          s1_rpartition(s2)
     495  
     496  
     497  #### Same for split(s, 1)
     498  
     499  @bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
     500  def split_test_quick_match_single_character(STR):
     501      s1 = STR("A" * 1000)
     502      s2 = STR("A")
     503      s1_split = s1.split
     504      for x in _RANGE_1000:
     505          s1_split(s2, 1)
     506  
     507  @bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
     508  def split_test_no_match_single_character(STR):
     509      s1 = STR("A" * 1000)
     510      s2 = STR("B")
     511      s1_split = s1.split
     512      for x in _RANGE_1000:
     513          s1_split(s2, 1)
     514  
     515  
     516  @bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
     517  def split_test_quick_match_two_characters(STR):
     518      s1 = STR("AB" * 1000)
     519      s2 = STR("AB")
     520      s1_split = s1.split
     521      for x in _RANGE_1000:
     522          s1_split(s2, 1)
     523  
     524  @bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
     525  def split_test_no_match_two_character(STR):
     526      s1 = STR("AB" * 1000)
     527      s2 = STR("BC")
     528      s1_split = s1.split
     529      for x in _RANGE_1000:
     530          s1_split(s2, 1)
     531  
     532  @bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
     533  def split_test_slow_match_two_characters(STR):
     534      s1 = STR("AB" * 300+"C")
     535      s2 = STR("BC")
     536      s1_split = s1.split
     537      for x in _RANGE_1000:
     538          s1_split(s2, 1)
     539  
     540  @bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)',
     541         "late match, 100 characters", 100)
     542  def split_test_slow_match_100_characters(STR):
     543      m = STR("ABC"*33)
     544      d = STR("D")
     545      e = STR("E")
     546      s1 = (m+d)*500 + m+e
     547      s2 = m+e
     548      s1_split = s1.split
     549      for x in _RANGE_100:
     550          s1_split(s2, 1)
     551  
     552  
     553  #### Same for rsplit(s, 1)
     554  
     555  @bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
     556  def rsplit_test_quick_match_single_character(STR):
     557      s1 = STR("A" * 1000)
     558      s2 = STR("A")
     559      s1_rsplit = s1.rsplit
     560      for x in _RANGE_1000:
     561          s1_rsplit(s2, 1)
     562  
     563  @bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
     564  def rsplit_test_no_match_single_character(STR):
     565      s1 = STR("A" * 1000)
     566      s2 = STR("B")
     567      s1_rsplit = s1.rsplit
     568      for x in _RANGE_1000:
     569          s1_rsplit(s2, 1)
     570  
     571  
     572  @bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
     573  def rsplit_test_quick_match_two_characters(STR):
     574      s1 = STR("AB" * 1000)
     575      s2 = STR("AB")
     576      s1_rsplit = s1.rsplit
     577      for x in _RANGE_1000:
     578          s1_rsplit(s2, 1)
     579  
     580  @bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
     581  def rsplit_test_no_match_two_character(STR):
     582      s1 = STR("AB" * 1000)
     583      s2 = STR("BC")
     584      s1_rsplit = s1.rsplit
     585      for x in _RANGE_1000:
     586          s1_rsplit(s2, 1)
     587  
     588  @bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
     589  def rsplit_test_slow_match_two_characters(STR):
     590      s1 = STR("C" + "AB" * 300)
     591      s2 = STR("CA")
     592      s1_rsplit = s1.rsplit
     593      for x in _RANGE_1000:
     594          s1_rsplit(s2, 1)
     595  
     596  @bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)',
     597         "late match, 100 characters", 100)
     598  def rsplit_test_slow_match_100_characters(STR):
     599      m = STR("ABC"*33)
     600      d = STR("D")
     601      e = STR("E")
     602      s1 = e + m + (d+m)*500
     603      s2 = e + m
     604      s1_rsplit = s1.rsplit
     605      for x in _RANGE_100:
     606          s1_rsplit(s2, 1)
     607  
     608  
     609  #### Benchmark the operator-based methods
     610  
     611  @bench('"A"*10', "repeat 1 character 10 times", 1000)
     612  def repeat_single_10_times(STR):
     613      s = STR("A")
     614      for x in _RANGE_1000:
     615          s * 10
     616  
     617  @bench('"A"*1000', "repeat 1 character 1000 times", 1000)
     618  def repeat_single_1000_times(STR):
     619      s = STR("A")
     620      for x in _RANGE_1000:
     621          s * 1000
     622  
     623  @bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000)
     624  def repeat_5_10_times(STR):
     625      s = STR("ABCDE")
     626      for x in _RANGE_1000:
     627          s * 10
     628  
     629  @bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000)
     630  def repeat_5_1000_times(STR):
     631      s = STR("ABCDE")
     632      for x in _RANGE_1000:
     633          s * 1000
     634  
     635  # + for concat
     636  
     637  @bench('"Andrew"+"Dalke"', "concat two strings", 1000)
     638  def concat_two_strings(STR):
     639      s1 = STR("Andrew")
     640      s2 = STR("Dalke")
     641      for x in _RANGE_1000:
     642          s1+s2
     643  
     644  @bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15",
     645         1000)
     646  def concat_many_strings(STR):
     647      s1=STR('TIXSGYNREDCVBHJ')
     648      s2=STR('PUMTLXBZVDO')
     649      s3=STR('FVZNJ')
     650      s4=STR('OGDXUW')
     651      s5=STR('WEIMRNCOYVGHKB')
     652      s6=STR('FCQTNMXPUZH')
     653      s7=STR('TICZJYRLBNVUEAK')
     654      s8=STR('REYB')
     655      s9=STR('PWUOQ')
     656      s10=STR('EQHCMKBS')
     657      s11=STR('AEVDFOH')
     658      s12=STR('IFHVD')
     659      s13=STR('JGTCNLXWOHQ')
     660      s14=STR('ITSKEPYLROZAWXF')
     661      s15=STR('THEK')
     662      s16=STR('GHPZFBUYCKMNJIT')
     663      s17=STR('JMUZ')
     664      s18=STR('WLZQMTB')
     665      s19=STR('KPADCBW')
     666      s20=STR('TNJHZQAGBU')
     667      for x in _RANGE_1000:
     668          (s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+
     669           s11+s12+s13+s14+s15+s16+s17+s18+s19+s20)
     670  
     671  
     672  #### Benchmark join
     673  
     674  def get_bytes_yielding_seq(STR, arg):
     675      if STR is BYTES and sys.version_info >= (3,):
     676          raise UnsupportedType
     677      return STR(arg)
     678  
     679  @bench('"A".join("")',
     680         "join empty string, with 1 character sep", 100)
     681  def join_empty_single(STR):
     682      sep = STR("A")
     683      s2 = get_bytes_yielding_seq(STR, "")
     684      sep_join = sep.join
     685      for x in _RANGE_100:
     686          sep_join(s2)
     687  
     688  @bench('"ABCDE".join("")',
     689         "join empty string, with 5 character sep", 100)
     690  def join_empty_5(STR):
     691      sep = STR("ABCDE")
     692      s2 = get_bytes_yielding_seq(STR, "")
     693      sep_join = sep.join
     694      for x in _RANGE_100:
     695          sep_join(s2)
     696  
     697  @bench('"A".join("ABC..Z")',
     698         "join string with 26 characters, with 1 character sep", 1000)
     699  def join_alphabet_single(STR):
     700      sep = STR("A")
     701      s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
     702      sep_join = sep.join
     703      for x in _RANGE_1000:
     704          sep_join(s2)
     705  
     706  @bench('"ABCDE".join("ABC..Z")',
     707         "join string with 26 characters, with 5 character sep", 1000)
     708  def join_alphabet_5(STR):
     709      sep = STR("ABCDE")
     710      s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
     711      sep_join = sep.join
     712      for x in _RANGE_1000:
     713          sep_join(s2)
     714  
     715  @bench('"A".join(list("ABC..Z"))',
     716         "join list of 26 characters, with 1 character sep", 1000)
     717  def join_alphabet_list_single(STR):
     718      sep = STR("A")
     719      s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
     720      sep_join = sep.join
     721      for x in _RANGE_1000:
     722          sep_join(s2)
     723  
     724  @bench('"ABCDE".join(list("ABC..Z"))',
     725         "join list of 26 characters, with 5 character sep", 1000)
     726  def join_alphabet_list_five(STR):
     727      sep = STR("ABCDE")
     728      s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
     729      sep_join = sep.join
     730      for x in _RANGE_1000:
     731          sep_join(s2)
     732  
     733  @bench('"A".join(["Bob"]*100)',
     734         "join list of 100 words, with 1 character sep", 1000)
     735  def join_100_words_single(STR):
     736      sep = STR("A")
     737      s2 = [STR("Bob")]*100
     738      sep_join = sep.join
     739      for x in _RANGE_1000:
     740          sep_join(s2)
     741  
     742  @bench('"ABCDE".join(["Bob"]*100))',
     743         "join list of 100 words, with 5 character sep", 1000)
     744  def join_100_words_5(STR):
     745      sep = STR("ABCDE")
     746      s2 = [STR("Bob")]*100
     747      sep_join = sep.join
     748      for x in _RANGE_1000:
     749          sep_join(s2)
     750  
     751  #### split tests
     752  
     753  @bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000)
     754  def whitespace_split(STR):
     755      s = STR("Here are some words. "*2)
     756      s_split = s.split
     757      for x in _RANGE_1000:
     758          s_split()
     759  
     760  @bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000)
     761  def whitespace_rsplit(STR):
     762      s = STR("Here are some words. "*2)
     763      s_rsplit = s.rsplit
     764      for x in _RANGE_1000:
     765          s_rsplit()
     766  
     767  @bench('("Here are some words. "*2).split(None, 1)',
     768         "split 1 whitespace", 1000)
     769  def whitespace_split_1(STR):
     770      s = STR("Here are some words. "*2)
     771      s_split = s.split
     772      N = None
     773      for x in _RANGE_1000:
     774          s_split(N, 1)
     775  
     776  @bench('("Here are some words. "*2).rsplit(None, 1)',
     777         "split 1 whitespace", 1000)
     778  def whitespace_rsplit_1(STR):
     779      s = STR("Here are some words. "*2)
     780      s_rsplit = s.rsplit
     781      N = None
     782      for x in _RANGE_1000:
     783          s_rsplit(N, 1)
     784  
     785  @bench('("Here are some words. "*2).partition(" ")',
     786         "split 1 whitespace", 1000)
     787  def whitespace_partition(STR):
     788      sep = STR(" ")
     789      s = STR("Here are some words. "*2)
     790      s_partition = s.partition
     791      for x in _RANGE_1000:
     792          s_partition(sep)
     793  
     794  @bench('("Here are some words. "*2).rpartition(" ")',
     795         "split 1 whitespace", 1000)
     796  def whitespace_rpartition(STR):
     797      sep = STR(" ")
     798      s = STR("Here are some words. "*2)
     799      s_rpartition = s.rpartition
     800      for x in _RANGE_1000:
     801          s_rpartition(sep)
     802  
     803  human_text = """\
     804  Python is a dynamic object-oriented programming language that can be
     805  used for many kinds of software development. It offers strong support
     806  for integration with other languages and tools, comes with extensive
     807  standard libraries, and can be learned in a few days. Many Python
     808  programmers report substantial productivity gains and feel the language
     809  encourages the development of higher quality, more maintainable code.
     810  
     811  Python runs on Windows, Linux/Unix, Mac OS X, Amiga, Palm
     812  Handhelds, and Nokia mobile phones. Python has also been ported to the
     813  Java and .NET virtual machines.
     814  
     815  Python is distributed under an OSI-approved open source license that
     816  makes it free to use, even for commercial products.
     817  """*25
     818  human_text_bytes = bytes_from_str(human_text)
     819  human_text_unicode = unicode_from_str(human_text)
     820  def _get_human_text(STR):
     821      if STR is UNICODE:
     822          return human_text_unicode
     823      if STR is BYTES:
     824          return human_text_bytes
     825      raise AssertionError
     826  
     827  @bench('human_text.split()', "split whitespace (huge)", 10)
     828  def whitespace_split_huge(STR):
     829      s = _get_human_text(STR)
     830      s_split = s.split
     831      for x in _RANGE_10:
     832          s_split()
     833  
     834  @bench('human_text.rsplit()', "split whitespace (huge)", 10)
     835  def whitespace_rsplit_huge(STR):
     836      s = _get_human_text(STR)
     837      s_rsplit = s.rsplit
     838      for x in _RANGE_10:
     839          s_rsplit()
     840  
     841  
     842  
     843  @bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000)
     844  def newlines_split(STR):
     845      s = STR("this\nis\na\ntest\n")
     846      s_split = s.split
     847      nl = STR("\n")
     848      for x in _RANGE_1000:
     849          s_split(nl)
     850  
     851  
     852  @bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000)
     853  def newlines_rsplit(STR):
     854      s = STR("this\nis\na\ntest\n")
     855      s_rsplit = s.rsplit
     856      nl = STR("\n")
     857      for x in _RANGE_1000:
     858          s_rsplit(nl)
     859  
     860  @bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000)
     861  def newlines_splitlines(STR):
     862      s = STR("this\nis\na\ntest\n")
     863      s_splitlines = s.splitlines
     864      for x in _RANGE_1000:
     865          s_splitlines()
     866  
     867  ## split text with 2000 newlines
     868  
     869  def _make_2000_lines():
     870      import random
     871      r = random.Random(100)
     872      chars = list(map(chr, range(32, 128)))
     873      i = 0
     874      while i < len(chars):
     875          chars[i] = " "
     876          i += r.randrange(9)
     877      s = "".join(chars)
     878      s = s*4
     879      words = []
     880      for i in range(2000):
     881          start = r.randrange(96)
     882          n = r.randint(5, 65)
     883          words.append(s[start:start+n])
     884      return "\n".join(words)+"\n"
     885  
     886  _text_with_2000_lines = _make_2000_lines()
     887  _text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines)
     888  _text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines)
     889  def _get_2000_lines(STR):
     890      if STR is UNICODE:
     891          return _text_with_2000_lines_unicode
     892      if STR is BYTES:
     893          return _text_with_2000_lines_bytes
     894      raise AssertionError
     895  
     896  
     897  @bench('"...text...".split("\\n")', "split 2000 newlines", 10)
     898  def newlines_split_2000(STR):
     899      s = _get_2000_lines(STR)
     900      s_split = s.split
     901      nl = STR("\n")
     902      for x in _RANGE_10:
     903          s_split(nl)
     904  
     905  @bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10)
     906  def newlines_rsplit_2000(STR):
     907      s = _get_2000_lines(STR)
     908      s_rsplit = s.rsplit
     909      nl = STR("\n")
     910      for x in _RANGE_10:
     911          s_rsplit(nl)
     912  
     913  @bench('"...text...".splitlines()', "split 2000 newlines", 10)
     914  def newlines_splitlines_2000(STR):
     915      s = _get_2000_lines(STR)
     916      s_splitlines = s.splitlines
     917      for x in _RANGE_10:
     918          s_splitlines()
     919  
     920  
     921  ## split text on "--" characters
     922  @bench(
     923      '"this--is--a--test--of--the--emergency--broadcast--system".split("--")',
     924      "split on multicharacter separator (small)", 1000)
     925  def split_multichar_sep_small(STR):
     926      s = STR("this--is--a--test--of--the--emergency--broadcast--system")
     927      s_split = s.split
     928      pat = STR("--")
     929      for x in _RANGE_1000:
     930          s_split(pat)
     931  @bench(
     932      '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")',
     933      "split on multicharacter separator (small)", 1000)
     934  def rsplit_multichar_sep_small(STR):
     935      s = STR("this--is--a--test--of--the--emergency--broadcast--system")
     936      s_rsplit = s.rsplit
     937      pat = STR("--")
     938      for x in _RANGE_1000:
     939          s_rsplit(pat)
     940  
     941  ## split dna text on "ACTAT" characters
     942  @bench('dna.split("ACTAT")',
     943         "split on multicharacter separator (dna)", 10)
     944  def split_multichar_sep_dna(STR):
     945      s = _get_dna(STR)
     946      s_split = s.split
     947      pat = STR("ACTAT")
     948      for x in _RANGE_10:
     949          s_split(pat)
     950  
     951  @bench('dna.rsplit("ACTAT")',
     952         "split on multicharacter separator (dna)", 10)
     953  def rsplit_multichar_sep_dna(STR):
     954      s = _get_dna(STR)
     955      s_rsplit = s.rsplit
     956      pat = STR("ACTAT")
     957      for x in _RANGE_10:
     958          s_rsplit(pat)
     959  
     960  
     961  
     962  ## split with limits
     963  
     964  GFF3_example = "\t".join([
     965      "I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".",
     966      "ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"])
     967  
     968  @bench('GFF3_example.split("\\t")', "tab split", 1000)
     969  def tab_split_no_limit(STR):
     970      sep = STR("\t")
     971      s = STR(GFF3_example)
     972      s_split = s.split
     973      for x in _RANGE_1000:
     974          s_split(sep)
     975  
     976  @bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
     977  def tab_split_limit(STR):
     978      sep = STR("\t")
     979      s = STR(GFF3_example)
     980      s_split = s.split
     981      for x in _RANGE_1000:
     982          s_split(sep, 8)
     983  
     984  @bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
     985  def tab_rsplit_no_limit(STR):
     986      sep = STR("\t")
     987      s = STR(GFF3_example)
     988      s_rsplit = s.rsplit
     989      for x in _RANGE_1000:
     990          s_rsplit(sep)
     991  
     992  @bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
     993  def tab_rsplit_limit(STR):
     994      sep = STR("\t")
     995      s = STR(GFF3_example)
     996      s_rsplit = s.rsplit
     997      for x in _RANGE_1000:
     998          s_rsplit(sep, 8)
     999  
    1000  #### Count characters
    1001  
    1002  @bench('...text.with.2000.newlines.count("\\n")',
    1003         "count newlines", 10)
    1004  def count_newlines(STR):
    1005      s = _get_2000_lines(STR)
    1006      s_count = s.count
    1007      nl = STR("\n")
    1008      for x in _RANGE_10:
    1009          s_count(nl)
    1010  
    1011  # Orchid sequences concatenated, from Biopython
    1012  _dna = """
    1013  CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT
    1014  AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG
    1015  AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT
    1016  TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC
    1017  AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG
    1018  TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT
    1019  CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT
    1020  TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT
    1021  GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC
    1022  TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG
    1023  GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA
    1024  ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC
    1025  CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA
    1026  ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA
    1027  ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA
    1028  TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG
    1029  CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG
    1030  GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA
    1031  ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG
    1032  ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC
    1033  ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA
    1034  GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA
    1035  TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG
    1036  TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT
    1037  TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG
    1038  GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG
    1039  GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT
    1040  AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC
    1041  GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG
    1042  TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT
    1043  CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA
    1044  TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC
    1045  TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC
    1046  AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT
    1047  GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT
    1048  GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA
    1049  CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG
    1050  GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA
    1051  TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG
    1052  ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT
    1053  GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA
    1054  AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC
    1055  AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA
    1056  ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC
    1057  GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC
    1058  GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC
    1059  AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA
    1060  GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG
    1061  ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC
    1062  GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC
    1063  """
    1064  _dna = "".join(_dna.splitlines())
    1065  _dna = _dna * 25
    1066  _dna_bytes = bytes_from_str(_dna)
    1067  _dna_unicode = unicode_from_str(_dna)
    1068  
    1069  def _get_dna(STR):
    1070      if STR is UNICODE:
    1071          return _dna_unicode
    1072      if STR is BYTES:
    1073          return _dna_bytes
    1074      raise AssertionError
    1075  
    1076  @bench('dna.count("AACT")', "count AACT substrings in DNA example", 10)
    1077  def count_aact(STR):
    1078      seq = _get_dna(STR)
    1079      seq_count = seq.count
    1080      needle = STR("AACT")
    1081      for x in _RANGE_10:
    1082          seq_count(needle)
    1083  
    1084  ##### startswith and endswith
    1085  
    1086  @bench('"Andrew".startswith("A")', 'startswith single character', 1000)
    1087  def startswith_single(STR):
    1088      s1 = STR("Andrew")
    1089      s2 = STR("A")
    1090      s1_startswith = s1.startswith
    1091      for x in _RANGE_1000:
    1092          s1_startswith(s2)
    1093  
    1094  @bench('"Andrew".startswith("Andrew")', 'startswith multiple characters',
    1095         1000)
    1096  def startswith_multiple(STR):
    1097      s1 = STR("Andrew")
    1098      s2 = STR("Andrew")
    1099      s1_startswith = s1.startswith
    1100      for x in _RANGE_1000:
    1101          s1_startswith(s2)
    1102  
    1103  @bench('"Andrew".startswith("Anders")',
    1104         'startswith multiple characters - not!', 1000)
    1105  def startswith_multiple_not(STR):
    1106      s1 = STR("Andrew")
    1107      s2 = STR("Anders")
    1108      s1_startswith = s1.startswith
    1109      for x in _RANGE_1000:
    1110          s1_startswith(s2)
    1111  
    1112  
    1113  # endswith
    1114  
    1115  @bench('"Andrew".endswith("w")', 'endswith single character', 1000)
    1116  def endswith_single(STR):
    1117      s1 = STR("Andrew")
    1118      s2 = STR("w")
    1119      s1_endswith = s1.endswith
    1120      for x in _RANGE_1000:
    1121          s1_endswith(s2)
    1122  
    1123  @bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000)
    1124  def endswith_multiple(STR):
    1125      s1 = STR("Andrew")
    1126      s2 = STR("Andrew")
    1127      s1_endswith = s1.endswith
    1128      for x in _RANGE_1000:
    1129          s1_endswith(s2)
    1130  
    1131  @bench('"Andrew".endswith("Anders")',
    1132         'endswith multiple characters - not!', 1000)
    1133  def endswith_multiple_not(STR):
    1134      s1 = STR("Andrew")
    1135      s2 = STR("Anders")
    1136      s1_endswith = s1.endswith
    1137      for x in _RANGE_1000:
    1138          s1_endswith(s2)
    1139  
    1140  #### Strip
    1141  
    1142  @bench('"Hello!\\n".strip()', 'strip terminal newline', 1000)
    1143  def terminal_newline_strip_right(STR):
    1144      s = STR("Hello!\n")
    1145      s_strip = s.strip
    1146      for x in _RANGE_1000:
    1147          s_strip()
    1148  
    1149  @bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000)
    1150  def terminal_newline_rstrip(STR):
    1151      s = STR("Hello!\n")
    1152      s_rstrip = s.rstrip
    1153      for x in _RANGE_1000:
    1154          s_rstrip()
    1155  
    1156  @bench('"\\nHello!".strip()', 'strip terminal newline', 1000)
    1157  def terminal_newline_strip_left(STR):
    1158      s = STR("\nHello!")
    1159      s_strip = s.strip
    1160      for x in _RANGE_1000:
    1161          s_strip()
    1162  
    1163  @bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000)
    1164  def terminal_newline_strip_both(STR):
    1165      s = STR("\nHello!\n")
    1166      s_strip = s.strip
    1167      for x in _RANGE_1000:
    1168          s_strip()
    1169  
    1170  @bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000)
    1171  def terminal_newline_lstrip(STR):
    1172      s = STR("\nHello!")
    1173      s_lstrip = s.lstrip
    1174      for x in _RANGE_1000:
    1175          s_lstrip()
    1176  
    1177  @bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s',
    1178         'strip terminal newline', 1000)
    1179  def terminal_newline_if_else(STR):
    1180      s = STR("Hello!\n")
    1181      NL = STR("\n")
    1182      for x in _RANGE_1000:
    1183          s[:-1] if (s[-1] == NL) else s
    1184  
    1185  
    1186  # Strip multiple spaces or tabs
    1187  
    1188  @bench('"Hello\\t   \\t".strip()', 'strip terminal spaces and tabs', 1000)
    1189  def terminal_space_strip(STR):
    1190      s = STR("Hello\t   \t!")
    1191      s_strip = s.strip
    1192      for x in _RANGE_1000:
    1193          s_strip()
    1194  
    1195  @bench('"Hello\\t   \\t".rstrip()', 'strip terminal spaces and tabs', 1000)
    1196  def terminal_space_rstrip(STR):
    1197      s = STR("Hello!\t   \t")
    1198      s_rstrip = s.rstrip
    1199      for x in _RANGE_1000:
    1200          s_rstrip()
    1201  
    1202  @bench('"\\t   \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000)
    1203  def terminal_space_lstrip(STR):
    1204      s = STR("\t   \tHello!")
    1205      s_lstrip = s.lstrip
    1206      for x in _RANGE_1000:
    1207          s_lstrip()
    1208  
    1209  
    1210  #### replace
    1211  @bench('"This is a test".replace(" ", "\\t")', 'replace single character',
    1212         1000)
    1213  def replace_single_character(STR):
    1214      s = STR("This is a test!")
    1215      from_str = STR(" ")
    1216      to_str = STR("\t")
    1217      s_replace = s.replace
    1218      for x in _RANGE_1000:
    1219          s_replace(from_str, to_str)
    1220  
    1221  @uses_re
    1222  @bench('re.sub(" ", "\\t", "This is a test"', 'replace single character',
    1223         1000)
    1224  def replace_single_character_re(STR):
    1225      s = STR("This is a test!")
    1226      pat = re.compile(STR(" "))
    1227      to_str = STR("\t")
    1228      pat_sub = pat.sub
    1229      for x in _RANGE_1000:
    1230          pat_sub(to_str, s)
    1231  
    1232  @bench('"...text.with.2000.lines...replace("\\n", " ")',
    1233         'replace single character, big string', 10)
    1234  def replace_single_character_big(STR):
    1235      s = _get_2000_lines(STR)
    1236      from_str = STR("\n")
    1237      to_str = STR(" ")
    1238      s_replace = s.replace
    1239      for x in _RANGE_10:
    1240          s_replace(from_str, to_str)
    1241  
    1242  @uses_re
    1243  @bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
    1244         'replace single character, big string', 10)
    1245  def replace_single_character_big_re(STR):
    1246      s = _get_2000_lines(STR)
    1247      pat = re.compile(STR("\n"))
    1248      to_str = STR(" ")
    1249      pat_sub = pat.sub
    1250      for x in _RANGE_10:
    1251          pat_sub(to_str, s)
    1252  
    1253  
    1254  @bench('dna.replace("ATC", "ATT")',
    1255         'replace multiple characters, dna', 10)
    1256  def replace_multiple_characters_dna(STR):
    1257      seq = _get_dna(STR)
    1258      from_str = STR("ATC")
    1259      to_str = STR("ATT")
    1260      seq_replace = seq.replace
    1261      for x in _RANGE_10:
    1262          seq_replace(from_str, to_str)
    1263  
    1264  # This increases the character count
    1265  @bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")',
    1266         'replace and expand multiple characters, big string', 10)
    1267  def replace_multiple_character_big(STR):
    1268      s = _get_2000_lines(STR)
    1269      from_str = STR("\n")
    1270      to_str = STR("\r\n")
    1271      s_replace = s.replace
    1272      for x in _RANGE_10:
    1273          s_replace(from_str, to_str)
    1274  
    1275  
    1276  # This decreases the character count
    1277  @bench('"When shall we three meet again?".replace("ee", "")',
    1278         'replace/remove multiple characters', 1000)
    1279  def replace_multiple_character_remove(STR):
    1280      s = STR("When shall we three meet again?")
    1281      from_str = STR("ee")
    1282      to_str = STR("")
    1283      s_replace = s.replace
    1284      for x in _RANGE_1000:
    1285          s_replace(from_str, to_str)
    1286  
    1287  
    1288  big_s = "A" + ("Z"*128*1024)
    1289  big_s_bytes = bytes_from_str(big_s)
    1290  big_s_unicode = unicode_from_str(big_s)
    1291  def _get_big_s(STR):
    1292      if STR is UNICODE: return big_s_unicode
    1293      if STR is BYTES: return big_s_bytes
    1294      raise AssertionError
    1295  
    1296  # The older replace implementation counted all matches in
    1297  # the string even when it only needed to make one replacement.
    1298  @bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)',
    1299         'quick replace single character match', 10)
    1300  def quick_replace_single_match(STR):
    1301      s = _get_big_s(STR)
    1302      from_str = STR("A")
    1303      to_str = STR("BB")
    1304      s_replace = s.replace
    1305      for x in _RANGE_10:
    1306          s_replace(from_str, to_str, 1)
    1307  
    1308  @bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)',
    1309         'quick replace multiple character match', 10)
    1310  def quick_replace_multiple_match(STR):
    1311      s = _get_big_s(STR)
    1312      from_str = STR("AZZ")
    1313      to_str = STR("BBZZ")
    1314      s_replace = s.replace
    1315      for x in _RANGE_10:
    1316          s_replace(from_str, to_str, 1)
    1317  
    1318  
    1319  ####
    1320  
    1321  # CCP does a lot of this, for internationalisation of ingame messages.
    1322  _format = "The %(thing)s is %(place)s the %(location)s."
    1323  _format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", }
    1324  _format_bytes = bytes_from_str(_format)
    1325  _format_unicode = unicode_from_str(_format)
    1326  _format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items())
    1327  _format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items())
    1328  
    1329  def _get_format(STR):
    1330      if STR is UNICODE:
    1331          return _format_unicode
    1332      if STR is BYTES:
    1333          if sys.version_info >= (3,):
    1334              raise UnsupportedType
    1335          return _format_bytes
    1336      raise AssertionError
    1337  
    1338  def _get_format_dict(STR):
    1339      if STR is UNICODE:
    1340          return _format_dict_unicode
    1341      if STR is BYTES:
    1342          if sys.version_info >= (3,):
    1343              raise UnsupportedType
    1344          return _format_dict_bytes
    1345      raise AssertionError
    1346  
    1347  # Formatting.
    1348  @bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}',
    1349         'formatting a string type with a dict', 1000)
    1350  def format_with_dict(STR):
    1351      s = _get_format(STR)
    1352      d = _get_format_dict(STR)
    1353      for x in _RANGE_1000:
    1354          s % d
    1355  
    1356  
    1357  #### Upper- and lower- case conversion
    1358  
    1359  @bench('("Where in the world is Carmen San Deigo?"*10).lower()',
    1360         "case conversion -- rare", 1000)
    1361  def lower_conversion_rare(STR):
    1362      s = STR("Where in the world is Carmen San Deigo?"*10)
    1363      s_lower = s.lower
    1364      for x in _RANGE_1000:
    1365          s_lower()
    1366  
    1367  @bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()',
    1368         "case conversion -- dense", 1000)
    1369  def lower_conversion_dense(STR):
    1370      s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10)
    1371      s_lower = s.lower
    1372      for x in _RANGE_1000:
    1373          s_lower()
    1374  
    1375  
    1376  @bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()',
    1377         "case conversion -- rare", 1000)
    1378  def upper_conversion_rare(STR):
    1379      s = STR("Where in the world is Carmen San Deigo?"*10)
    1380      s_upper = s.upper
    1381      for x in _RANGE_1000:
    1382          s_upper()
    1383  
    1384  @bench('("where in the world is carmen san deigo?"*10).upper()',
    1385         "case conversion -- dense", 1000)
    1386  def upper_conversion_dense(STR):
    1387      s = STR("where in the world is carmen san deigo?"*10)
    1388      s_upper = s.upper
    1389      for x in _RANGE_1000:
    1390          s_upper()
    1391  
    1392  
    1393  # end of benchmarks
    1394  
    1395  #################
    1396  
    1397  class ESC[4;38;5;81mBenchTimer(ESC[4;38;5;149mtimeitESC[4;38;5;149m.ESC[4;38;5;149mTimer):
    1398      def best(self, repeat=1):
    1399          for i in range(1, 10):
    1400              number = 10**i
    1401              x = self.timeit(number)
    1402              if x > 0.02:
    1403                  break
    1404          times = [x]
    1405          for i in range(1, repeat):
    1406              times.append(self.timeit(number))
    1407          return min(times) / number
    1408  
    1409  def main():
    1410      (options, test_names) = parser.parse_args()
    1411      if options.bytes_only and options.unicode_only:
    1412          raise SystemExit("Only one of --8-bit and --unicode are allowed")
    1413  
    1414      bench_functions = []
    1415      for (k,v) in globals().items():
    1416          if hasattr(v, "is_bench"):
    1417              if test_names:
    1418                  for name in test_names:
    1419                      if name in v.group:
    1420                          break
    1421                  else:
    1422                      # Not selected, ignore
    1423                      continue
    1424              if options.skip_re and hasattr(v, "uses_re"):
    1425                  continue
    1426  
    1427              bench_functions.append( (v.group, k, v) )
    1428      bench_functions.sort()
    1429  
    1430      p("bytes\tunicode")
    1431      p("(in ms)\t(in ms)\t%\tcomment")
    1432  
    1433      bytes_total = uni_total = 0.0
    1434  
    1435      for title, group in itertools.groupby(bench_functions,
    1436                                        operator.itemgetter(0)):
    1437          # Flush buffer before each group
    1438          sys.stdout.flush()
    1439          p("="*10, title)
    1440          for (_, k, v) in group:
    1441              if hasattr(v, "is_bench"):
    1442                  bytes_time = 0.0
    1443                  bytes_time_s = " - "
    1444                  if not options.unicode_only:
    1445                      try:
    1446                          bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,),
    1447                                                  "import __main__").best(REPEAT)
    1448                          bytes_time_s = "%.2f" % (1000 * bytes_time)
    1449                          bytes_total += bytes_time
    1450                      except UnsupportedType:
    1451                          bytes_time_s = "N/A"
    1452                  uni_time = 0.0
    1453                  uni_time_s = " - "
    1454                  if not options.bytes_only:
    1455                      try:
    1456                          uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,),
    1457                                                "import __main__").best(REPEAT)
    1458                          uni_time_s = "%.2f" % (1000 * uni_time)
    1459                          uni_total += uni_time
    1460                      except UnsupportedType:
    1461                          uni_time_s = "N/A"
    1462                  try:
    1463                      average = bytes_time/uni_time
    1464                  except (TypeError, ZeroDivisionError):
    1465                      average = 0.0
    1466                  p("%s\t%s\t%.1f\t%s (*%d)" % (
    1467                      bytes_time_s, uni_time_s, 100.*average,
    1468                      v.comment, v.repeat_count))
    1469  
    1470      if bytes_total == uni_total == 0.0:
    1471          p("That was zippy!")
    1472      else:
    1473          try:
    1474              ratio = bytes_total/uni_total
    1475          except ZeroDivisionError:
    1476              ratio = 0.0
    1477          p("%.2f\t%.2f\t%.1f\t%s" % (
    1478              1000*bytes_total, 1000*uni_total, 100.*ratio,
    1479              "TOTAL"))
    1480  
    1481  if __name__ == "__main__":
    1482      main()