1 #
2 # Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7 #
8 # 1. Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #
11 # 2. Redistributions in binary form must reproduce the above copyright
12 # notice, this list of conditions and the following disclaimer in the
13 # documentation and/or other materials provided with the distribution.
14 #
15 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 # SUCH DAMAGE.
26 #
27
28
29 # Generate PEP-3101 format strings.
30
31
32 import os, sys, locale, random
33 import platform, subprocess
34 from test.support.import_helper import import_fresh_module
35 from shutil import which
36
37 C = import_fresh_module('decimal', fresh=['_decimal'])
38 P = import_fresh_module('decimal', blocked=['_decimal'])
39
40
41 windows_lang_strings = [
42 "chinese", "chinese-simplified", "chinese-traditional", "czech", "danish",
43 "dutch", "belgian", "english", "australian", "canadian", "english-nz",
44 "english-uk", "english-us", "finnish", "french", "french-belgian",
45 "french-canadian", "french-swiss", "german", "german-austrian",
46 "german-swiss", "greek", "hungarian", "icelandic", "italian", "italian-swiss",
47 "japanese", "korean", "norwegian", "norwegian-bokmal", "norwegian-nynorsk",
48 "polish", "portuguese", "portuguese-brazil", "russian", "slovak", "spanish",
49 "spanish-mexican", "spanish-modern", "swedish", "turkish",
50 ]
51
52 preferred_encoding = {
53 'cs_CZ': 'ISO8859-2',
54 'cs_CZ.iso88592': 'ISO8859-2',
55 'czech': 'ISO8859-2',
56 'eesti': 'ISO8859-1',
57 'estonian': 'ISO8859-1',
58 'et_EE': 'ISO8859-15',
59 'et_EE.ISO-8859-15': 'ISO8859-15',
60 'et_EE.iso885915': 'ISO8859-15',
61 'et_EE.iso88591': 'ISO8859-1',
62 'fi_FI.iso88591': 'ISO8859-1',
63 'fi_FI': 'ISO8859-15',
64 'fi_FI@euro': 'ISO8859-15',
65 'fi_FI.iso885915@euro': 'ISO8859-15',
66 'finnish': 'ISO8859-1',
67 'lv_LV': 'ISO8859-13',
68 'lv_LV.iso885913': 'ISO8859-13',
69 'nb_NO': 'ISO8859-1',
70 'nb_NO.iso88591': 'ISO8859-1',
71 'bokmal': 'ISO8859-1',
72 'nn_NO': 'ISO8859-1',
73 'nn_NO.iso88591': 'ISO8859-1',
74 'no_NO': 'ISO8859-1',
75 'norwegian': 'ISO8859-1',
76 'nynorsk': 'ISO8859-1',
77 'ru_RU': 'ISO8859-5',
78 'ru_RU.iso88595': 'ISO8859-5',
79 'russian': 'ISO8859-5',
80 'ru_RU.KOI8-R': 'KOI8-R',
81 'ru_RU.koi8r': 'KOI8-R',
82 'ru_RU.CP1251': 'CP1251',
83 'ru_RU.cp1251': 'CP1251',
84 'sk_SK': 'ISO8859-2',
85 'sk_SK.iso88592': 'ISO8859-2',
86 'slovak': 'ISO8859-2',
87 'sv_FI': 'ISO8859-1',
88 'sv_FI.iso88591': 'ISO8859-1',
89 'sv_FI@euro': 'ISO8859-15',
90 'sv_FI.iso885915@euro': 'ISO8859-15',
91 'uk_UA': 'KOI8-U',
92 'uk_UA.koi8u': 'KOI8-U'
93 }
94
95 integers = [
96 "",
97 "1",
98 "12",
99 "123",
100 "1234",
101 "12345",
102 "123456",
103 "1234567",
104 "12345678",
105 "123456789",
106 "1234567890",
107 "12345678901",
108 "123456789012",
109 "1234567890123",
110 "12345678901234",
111 "123456789012345",
112 "1234567890123456",
113 "12345678901234567",
114 "123456789012345678",
115 "1234567890123456789",
116 "12345678901234567890",
117 "123456789012345678901",
118 "1234567890123456789012",
119 ]
120
121 numbers = [
122 "0", "-0", "+0",
123 "0.0", "-0.0", "+0.0",
124 "0e0", "-0e0", "+0e0",
125 ".0", "-.0",
126 ".1", "-.1",
127 "1.1", "-1.1",
128 "1e1", "-1e1"
129 ]
130
131 # Get the list of available locales.
132 if platform.system() == 'Windows':
133 locale_list = windows_lang_strings
134 else:
135 locale_list = ['C']
136 if os.path.isfile("/var/lib/locales/supported.d/local"):
137 # On Ubuntu, `locale -a` gives the wrong case for some locales,
138 # so we get the correct names directly:
139 with open("/var/lib/locales/supported.d/local") as f:
140 locale_list = [loc.split()[0] for loc in f.readlines() \
141 if not loc.startswith('#')]
142 elif which('locale'):
143 locale_list = subprocess.Popen(["locale", "-a"],
144 stdout=subprocess.PIPE).communicate()[0]
145 try:
146 locale_list = locale_list.decode()
147 except UnicodeDecodeError:
148 # Some distributions insist on using latin-1 characters
149 # in their locale names.
150 locale_list = locale_list.decode('latin-1')
151 locale_list = locale_list.split('\n')
152 try:
153 locale_list.remove('')
154 except ValueError:
155 pass
156
157 # Debian
158 if os.path.isfile("/etc/locale.alias"):
159 with open("/etc/locale.alias") as f:
160 while 1:
161 try:
162 line = f.readline()
163 except UnicodeDecodeError:
164 continue
165 if line == "":
166 break
167 if line.startswith('#'):
168 continue
169 x = line.split()
170 if len(x) == 2:
171 if x[0] in locale_list:
172 locale_list.remove(x[0])
173
174 # FreeBSD
175 if platform.system() == 'FreeBSD':
176 # http://www.freebsd.org/cgi/query-pr.cgi?pr=142173
177 # en_GB.US-ASCII has 163 as the currency symbol.
178 for loc in ['it_CH.ISO8859-1', 'it_CH.ISO8859-15', 'it_CH.UTF-8',
179 'it_IT.ISO8859-1', 'it_IT.ISO8859-15', 'it_IT.UTF-8',
180 'sl_SI.ISO8859-2', 'sl_SI.UTF-8',
181 'en_GB.US-ASCII']:
182 try:
183 locale_list.remove(loc)
184 except ValueError:
185 pass
186
187 # Print a testcase in the format of the IBM tests (for runtest.c):
188 def get_preferred_encoding():
189 loc = locale.setlocale(locale.LC_CTYPE)
190 if loc in preferred_encoding:
191 return preferred_encoding[loc]
192 else:
193 return locale.getpreferredencoding()
194
195 def printit(testno, s, fmt, encoding=None):
196 if not encoding:
197 encoding = get_preferred_encoding()
198 try:
199 result = format(P.Decimal(s), fmt)
200 fmt = str(fmt.encode(encoding))[2:-1]
201 result = str(result.encode(encoding))[2:-1]
202 if "'" in result:
203 sys.stdout.write("xfmt%d format %s '%s' -> \"%s\"\n"
204 % (testno, s, fmt, result))
205 else:
206 sys.stdout.write("xfmt%d format %s '%s' -> '%s'\n"
207 % (testno, s, fmt, result))
208 except Exception as err:
209 sys.stderr.write("%s %s %s\n" % (err, s, fmt))
210
211
212 # Check if an integer can be converted to a valid fill character.
213 def check_fillchar(i):
214 try:
215 c = chr(i)
216 c.encode('utf-8').decode()
217 format(P.Decimal(0), c + '<19g')
218 return c
219 except:
220 return None
221
222 # Generate all unicode characters that are accepted as
223 # fill characters by decimal.py.
224 def all_fillchars():
225 for i in range(0, 0x110002):
226 c = check_fillchar(i)
227 if c: yield c
228
229 # Return random fill character.
230 def rand_fillchar():
231 while 1:
232 i = random.randrange(0, 0x110002)
233 c = check_fillchar(i)
234 if c: return c
235
236 # Generate random format strings
237 # [[fill]align][sign][#][0][width][.precision][type]
238 def rand_format(fill, typespec='EeGgFfn%'):
239 active = sorted(random.sample(range(7), random.randrange(8)))
240 have_align = 0
241 s = ''
242 for elem in active:
243 if elem == 0: # fill+align
244 s += fill
245 s += random.choice('<>=^')
246 have_align = 1
247 elif elem == 1: # sign
248 s += random.choice('+- ')
249 elif elem == 2 and not have_align: # zeropad
250 s += '0'
251 elif elem == 3: # width
252 s += str(random.randrange(1, 100))
253 elif elem == 4: # thousands separator
254 s += ','
255 elif elem == 5: # prec
256 s += '.'
257 s += str(random.randrange(100))
258 elif elem == 6:
259 if 4 in active: c = typespec.replace('n', '')
260 else: c = typespec
261 s += random.choice(c)
262 return s
263
264 # Partially brute force all possible format strings containing a thousands
265 # separator. Fall back to random where the runtime would become excessive.
266 # [[fill]align][sign][#][0][width][,][.precision][type]
267 def all_format_sep():
268 for align in ('', '<', '>', '=', '^'):
269 for fill in ('', 'x'):
270 if align == '': fill = ''
271 for sign in ('', '+', '-', ' '):
272 for zeropad in ('', '0'):
273 if align != '': zeropad = ''
274 for width in ['']+[str(y) for y in range(1, 15)]+['101']:
275 for prec in ['']+['.'+str(y) for y in range(15)]:
276 # for type in ('', 'E', 'e', 'G', 'g', 'F', 'f', '%'):
277 type = random.choice(('', 'E', 'e', 'G', 'g', 'F', 'f', '%'))
278 yield ''.join((fill, align, sign, zeropad, width, ',', prec, type))
279
280 # Partially brute force all possible format strings with an 'n' specifier.
281 # [[fill]align][sign][#][0][width][,][.precision][type]
282 def all_format_loc():
283 for align in ('', '<', '>', '=', '^'):
284 for fill in ('', 'x'):
285 if align == '': fill = ''
286 for sign in ('', '+', '-', ' '):
287 for zeropad in ('', '0'):
288 if align != '': zeropad = ''
289 for width in ['']+[str(y) for y in range(1, 20)]+['101']:
290 for prec in ['']+['.'+str(y) for y in range(1, 20)]:
291 yield ''.join((fill, align, sign, zeropad, width, prec, 'n'))
292
293 # Generate random format strings with a unicode fill character
294 # [[fill]align][sign][#][0][width][,][.precision][type]
295 def randfill(fill):
296 active = sorted(random.sample(range(5), random.randrange(6)))
297 s = ''
298 s += str(fill)
299 s += random.choice('<>=^')
300 for elem in active:
301 if elem == 0: # sign
302 s += random.choice('+- ')
303 elif elem == 1: # width
304 s += str(random.randrange(1, 100))
305 elif elem == 2: # thousands separator
306 s += ','
307 elif elem == 3: # prec
308 s += '.'
309 s += str(random.randrange(100))
310 elif elem == 4:
311 if 2 in active: c = 'EeGgFf%'
312 else: c = 'EeGgFfn%'
313 s += random.choice(c)
314 return s
315
316 # Generate random format strings with random locale setting
317 # [[fill]align][sign][#][0][width][,][.precision][type]
318 def rand_locale():
319 try:
320 loc = random.choice(locale_list)
321 locale.setlocale(locale.LC_ALL, loc)
322 except locale.Error as err:
323 pass
324 active = sorted(random.sample(range(5), random.randrange(6)))
325 s = ''
326 have_align = 0
327 for elem in active:
328 if elem == 0: # fill+align
329 s += chr(random.randrange(32, 128))
330 s += random.choice('<>=^')
331 have_align = 1
332 elif elem == 1: # sign
333 s += random.choice('+- ')
334 elif elem == 2 and not have_align: # zeropad
335 s += '0'
336 elif elem == 3: # width
337 s += str(random.randrange(1, 100))
338 elif elem == 4: # prec
339 s += '.'
340 s += str(random.randrange(100))
341 s += 'n'
342 return s