1  #
       2  # Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
       3  #
       4  # Redistribution and use in source and binary forms, with or without
       5  # modification, are permitted provided that the following conditions
       6  # are met:
       7  #
       8  # 1. Redistributions of source code must retain the above copyright
       9  #    notice, this list of conditions and the following disclaimer.
      10  #
      11  # 2. Redistributions in binary form must reproduce the above copyright
      12  #    notice, this list of conditions and the following disclaimer in the
      13  #    documentation and/or other materials provided with the distribution.
      14  #
      15  # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
      16  # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      17  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      18  # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
      19  # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      20  # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      21  # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      22  # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      23  # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      24  # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      25  # SUCH DAMAGE.
      26  #
      27  
      28  
      29  # Generate PEP-3101 format strings.
      30  
      31  
      32  import os, sys, locale, random
      33  import platform, subprocess
      34  from test.support.import_helper import import_fresh_module
      35  from shutil import which
      36  
      37  C = import_fresh_module('decimal', fresh=['_decimal'])
      38  P = import_fresh_module('decimal', blocked=['_decimal'])
      39  
      40  
      41  windows_lang_strings = [
      42    "chinese", "chinese-simplified", "chinese-traditional", "czech", "danish",
      43    "dutch", "belgian", "english", "australian", "canadian", "english-nz",
      44    "english-uk", "english-us", "finnish", "french", "french-belgian",
      45    "french-canadian", "french-swiss", "german", "german-austrian",
      46    "german-swiss", "greek", "hungarian", "icelandic", "italian", "italian-swiss",
      47    "japanese", "korean", "norwegian", "norwegian-bokmal", "norwegian-nynorsk",
      48    "polish", "portuguese", "portuguese-brazil", "russian", "slovak", "spanish",
      49    "spanish-mexican", "spanish-modern", "swedish", "turkish",
      50  ]
      51  
      52  preferred_encoding = {
      53    'cs_CZ': 'ISO8859-2',
      54    'cs_CZ.iso88592': 'ISO8859-2',
      55    'czech': 'ISO8859-2',
      56    'eesti': 'ISO8859-1',
      57    'estonian': 'ISO8859-1',
      58    'et_EE': 'ISO8859-15',
      59    'et_EE.ISO-8859-15': 'ISO8859-15',
      60    'et_EE.iso885915': 'ISO8859-15',
      61    'et_EE.iso88591': 'ISO8859-1',
      62    'fi_FI.iso88591': 'ISO8859-1',
      63    'fi_FI': 'ISO8859-15',
      64    'fi_FI@euro': 'ISO8859-15',
      65    'fi_FI.iso885915@euro': 'ISO8859-15',
      66    'finnish': 'ISO8859-1',
      67    'lv_LV': 'ISO8859-13',
      68    'lv_LV.iso885913': 'ISO8859-13',
      69    'nb_NO': 'ISO8859-1',
      70    'nb_NO.iso88591': 'ISO8859-1',
      71    'bokmal': 'ISO8859-1',
      72    'nn_NO': 'ISO8859-1',
      73    'nn_NO.iso88591': 'ISO8859-1',
      74    'no_NO': 'ISO8859-1',
      75    'norwegian': 'ISO8859-1',
      76    'nynorsk': 'ISO8859-1',
      77    'ru_RU': 'ISO8859-5',
      78    'ru_RU.iso88595': 'ISO8859-5',
      79    'russian': 'ISO8859-5',
      80    'ru_RU.KOI8-R': 'KOI8-R',
      81    'ru_RU.koi8r': 'KOI8-R',
      82    'ru_RU.CP1251': 'CP1251',
      83    'ru_RU.cp1251': 'CP1251',
      84    'sk_SK': 'ISO8859-2',
      85    'sk_SK.iso88592': 'ISO8859-2',
      86    'slovak': 'ISO8859-2',
      87    'sv_FI': 'ISO8859-1',
      88    'sv_FI.iso88591': 'ISO8859-1',
      89    'sv_FI@euro': 'ISO8859-15',
      90    'sv_FI.iso885915@euro': 'ISO8859-15',
      91    'uk_UA': 'KOI8-U',
      92    'uk_UA.koi8u': 'KOI8-U'
      93  }
      94  
      95  integers = [
      96    "",
      97    "1",
      98    "12",
      99    "123",
     100    "1234",
     101    "12345",
     102    "123456",
     103    "1234567",
     104    "12345678",
     105    "123456789",
     106    "1234567890",
     107    "12345678901",
     108    "123456789012",
     109    "1234567890123",
     110    "12345678901234",
     111    "123456789012345",
     112    "1234567890123456",
     113    "12345678901234567",
     114    "123456789012345678",
     115    "1234567890123456789",
     116    "12345678901234567890",
     117    "123456789012345678901",
     118    "1234567890123456789012",
     119  ]
     120  
     121  numbers = [
     122    "0", "-0", "+0",
     123    "0.0", "-0.0", "+0.0",
     124    "0e0", "-0e0", "+0e0",
     125    ".0", "-.0",
     126    ".1", "-.1",
     127    "1.1", "-1.1",
     128    "1e1", "-1e1"
     129  ]
     130  
     131  # Get the list of available locales.
     132  if platform.system() == 'Windows':
     133      locale_list = windows_lang_strings
     134  else:
     135      locale_list = ['C']
     136      if os.path.isfile("/var/lib/locales/supported.d/local"):
     137          # On Ubuntu, `locale -a` gives the wrong case for some locales,
     138          # so we get the correct names directly:
     139          with open("/var/lib/locales/supported.d/local") as f:
     140              locale_list = [loc.split()[0] for loc in f.readlines() \
     141                             if not loc.startswith('#')]
     142      elif which('locale'):
     143          locale_list = subprocess.Popen(["locale", "-a"],
     144                            stdout=subprocess.PIPE).communicate()[0]
     145          try:
     146              locale_list = locale_list.decode()
     147          except UnicodeDecodeError:
     148              # Some distributions insist on using latin-1 characters
     149              # in their locale names.
     150              locale_list = locale_list.decode('latin-1')
     151          locale_list = locale_list.split('\n')
     152  try:
     153      locale_list.remove('')
     154  except ValueError:
     155      pass
     156  
     157  # Debian
     158  if os.path.isfile("/etc/locale.alias"):
     159      with open("/etc/locale.alias") as f:
     160          while 1:
     161              try:
     162                  line = f.readline()
     163              except UnicodeDecodeError:
     164                  continue
     165              if line == "":
     166                  break
     167              if line.startswith('#'):
     168                  continue
     169              x = line.split()
     170              if len(x) == 2:
     171                  if x[0] in locale_list:
     172                      locale_list.remove(x[0])
     173  
     174  # FreeBSD
     175  if platform.system() == 'FreeBSD':
     176      # http://www.freebsd.org/cgi/query-pr.cgi?pr=142173
     177      # en_GB.US-ASCII has 163 as the currency symbol.
     178      for loc in ['it_CH.ISO8859-1', 'it_CH.ISO8859-15', 'it_CH.UTF-8',
     179                  'it_IT.ISO8859-1', 'it_IT.ISO8859-15', 'it_IT.UTF-8',
     180                  'sl_SI.ISO8859-2', 'sl_SI.UTF-8',
     181                  'en_GB.US-ASCII']:
     182          try:
     183              locale_list.remove(loc)
     184          except ValueError:
     185              pass
     186  
     187  # Print a testcase in the format of the IBM tests (for runtest.c):
     188  def get_preferred_encoding():
     189      loc = locale.setlocale(locale.LC_CTYPE)
     190      if loc in preferred_encoding:
     191          return preferred_encoding[loc]
     192      else:
     193          return locale.getpreferredencoding()
     194  
     195  def printit(testno, s, fmt, encoding=None):
     196      if not encoding:
     197          encoding = get_preferred_encoding()
     198      try:
     199          result = format(P.Decimal(s), fmt)
     200          fmt = str(fmt.encode(encoding))[2:-1]
     201          result = str(result.encode(encoding))[2:-1]
     202          if "'" in result:
     203              sys.stdout.write("xfmt%d  format  %s  '%s'  ->  \"%s\"\n"
     204                               % (testno, s, fmt, result))
     205          else:
     206              sys.stdout.write("xfmt%d  format  %s  '%s'  ->  '%s'\n"
     207                               % (testno, s, fmt, result))
     208      except Exception as err:
     209          sys.stderr.write("%s  %s  %s\n" % (err, s, fmt))
     210  
     211  
     212  # Check if an integer can be converted to a valid fill character.
     213  def check_fillchar(i):
     214      try:
     215          c = chr(i)
     216          c.encode('utf-8').decode()
     217          format(P.Decimal(0), c + '<19g')
     218          return c
     219      except:
     220          return None
     221  
     222  # Generate all unicode characters that are accepted as
     223  # fill characters by decimal.py.
     224  def all_fillchars():
     225      for i in range(0, 0x110002):
     226          c = check_fillchar(i)
     227          if c: yield c
     228  
     229  # Return random fill character.
     230  def rand_fillchar():
     231      while 1:
     232          i = random.randrange(0, 0x110002)
     233          c = check_fillchar(i)
     234          if c: return c
     235  
     236  # Generate random format strings
     237  # [[fill]align][sign][#][0][width][.precision][type]
     238  def rand_format(fill, typespec='EeGgFfn%'):
     239      active = sorted(random.sample(range(7), random.randrange(8)))
     240      have_align = 0
     241      s = ''
     242      for elem in active:
     243          if elem == 0: # fill+align
     244              s += fill
     245              s += random.choice('<>=^')
     246              have_align = 1
     247          elif elem == 1: # sign
     248              s += random.choice('+- ')
     249          elif elem == 2 and not have_align: # zeropad
     250              s += '0'
     251          elif elem == 3: # width
     252              s += str(random.randrange(1, 100))
     253          elif elem == 4: # thousands separator
     254              s += ','
     255          elif elem == 5: # prec
     256              s += '.'
     257              s += str(random.randrange(100))
     258          elif elem == 6:
     259              if 4 in active: c = typespec.replace('n', '')
     260              else: c = typespec
     261              s += random.choice(c)
     262      return s
     263  
     264  # Partially brute force all possible format strings containing a thousands
     265  # separator. Fall back to random where the runtime would become excessive.
     266  # [[fill]align][sign][#][0][width][,][.precision][type]
     267  def all_format_sep():
     268      for align in ('', '<', '>', '=', '^'):
     269          for fill in ('', 'x'):
     270              if align == '': fill = ''
     271              for sign in ('', '+', '-', ' '):
     272                  for zeropad in ('', '0'):
     273                      if align != '': zeropad = ''
     274                      for width in ['']+[str(y) for y in range(1, 15)]+['101']:
     275                          for prec in ['']+['.'+str(y) for y in range(15)]:
     276                              # for type in ('', 'E', 'e', 'G', 'g', 'F', 'f', '%'):
     277                              type = random.choice(('', 'E', 'e', 'G', 'g', 'F', 'f', '%'))
     278                              yield ''.join((fill, align, sign, zeropad, width, ',', prec, type))
     279  
     280  # Partially brute force all possible format strings with an 'n' specifier.
     281  # [[fill]align][sign][#][0][width][,][.precision][type]
     282  def all_format_loc():
     283      for align in ('', '<', '>', '=', '^'):
     284          for fill in ('', 'x'):
     285              if align == '': fill = ''
     286              for sign in ('', '+', '-', ' '):
     287                  for zeropad in ('', '0'):
     288                      if align != '': zeropad = ''
     289                      for width in ['']+[str(y) for y in range(1, 20)]+['101']:
     290                          for prec in ['']+['.'+str(y) for y in range(1, 20)]:
     291                              yield ''.join((fill, align, sign, zeropad, width, prec, 'n'))
     292  
     293  # Generate random format strings with a unicode fill character
     294  # [[fill]align][sign][#][0][width][,][.precision][type]
     295  def randfill(fill):
     296      active = sorted(random.sample(range(5), random.randrange(6)))
     297      s = ''
     298      s += str(fill)
     299      s += random.choice('<>=^')
     300      for elem in active:
     301          if elem == 0: # sign
     302              s += random.choice('+- ')
     303          elif elem == 1: # width
     304              s += str(random.randrange(1, 100))
     305          elif elem == 2: # thousands separator
     306              s += ','
     307          elif elem == 3: # prec
     308              s += '.'
     309              s += str(random.randrange(100))
     310          elif elem == 4:
     311              if 2 in active: c = 'EeGgFf%'
     312              else: c = 'EeGgFfn%'
     313              s += random.choice(c)
     314      return s
     315  
     316  # Generate random format strings with random locale setting
     317  # [[fill]align][sign][#][0][width][,][.precision][type]
     318  def rand_locale():
     319      try:
     320          loc = random.choice(locale_list)
     321          locale.setlocale(locale.LC_ALL, loc)
     322      except locale.Error as err:
     323          pass
     324      active = sorted(random.sample(range(5), random.randrange(6)))
     325      s = ''
     326      have_align = 0
     327      for elem in active:
     328          if elem == 0: # fill+align
     329              s += chr(random.randrange(32, 128))
     330              s += random.choice('<>=^')
     331              have_align = 1
     332          elif elem == 1: # sign
     333              s += random.choice('+- ')
     334          elif elem == 2 and not have_align: # zeropad
     335              s += '0'
     336          elif elem == 3: # width
     337              s += str(random.randrange(1, 100))
     338          elif elem == 4: # prec
     339              s += '.'
     340              s += str(random.randrange(100))
     341      s += 'n'
     342      return s