1 """
2 Test the implementation of the PEP 540: the UTF-8 Mode.
3 """
4
5 import locale
6 import subprocess
7 import sys
8 import textwrap
9 import unittest
10 from test import support
11 from test.support.script_helper import assert_python_ok, assert_python_failure
12 from test.support import os_helper, MS_WINDOWS
13
14
15 POSIX_LOCALES = ('C', 'POSIX')
16 VXWORKS = (sys.platform == "vxworks")
17
18 class ESC[4;38;5;81mUTF8ModeTests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
19 DEFAULT_ENV = {
20 'PYTHONUTF8': '',
21 'PYTHONLEGACYWINDOWSFSENCODING': '',
22 'PYTHONCOERCECLOCALE': '0',
23 }
24
25 def posix_locale(self):
26 loc = locale.setlocale(locale.LC_CTYPE, None)
27 return (loc in POSIX_LOCALES)
28
29 def get_output(self, *args, failure=False, **kw):
30 kw = dict(self.DEFAULT_ENV, **kw)
31 if failure:
32 out = assert_python_failure(*args, **kw)
33 out = out[2]
34 else:
35 out = assert_python_ok(*args, **kw)
36 out = out[1]
37 return out.decode().rstrip("\n\r")
38
39 @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
40 def test_posix_locale(self):
41 code = 'import sys; print(sys.flags.utf8_mode)'
42
43 for loc in POSIX_LOCALES:
44 with self.subTest(LC_ALL=loc):
45 out = self.get_output('-c', code, LC_ALL=loc)
46 self.assertEqual(out, '1')
47
48 def test_xoption(self):
49 code = 'import sys; print(sys.flags.utf8_mode)'
50
51 out = self.get_output('-X', 'utf8', '-c', code)
52 self.assertEqual(out, '1')
53
54 # undocumented but accepted syntax: -X utf8=1
55 out = self.get_output('-X', 'utf8=1', '-c', code)
56 self.assertEqual(out, '1')
57
58 out = self.get_output('-X', 'utf8=0', '-c', code)
59 self.assertEqual(out, '0')
60
61 if MS_WINDOWS:
62 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
63 # and has the priority over -X utf8
64 out = self.get_output('-X', 'utf8', '-c', code,
65 PYTHONLEGACYWINDOWSFSENCODING='1')
66 self.assertEqual(out, '0')
67
68 def test_env_var(self):
69 code = 'import sys; print(sys.flags.utf8_mode)'
70
71 out = self.get_output('-c', code, PYTHONUTF8='1')
72 self.assertEqual(out, '1')
73
74 out = self.get_output('-c', code, PYTHONUTF8='0')
75 self.assertEqual(out, '0')
76
77 # -X utf8 has the priority over PYTHONUTF8
78 out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
79 self.assertEqual(out, '0')
80
81 if MS_WINDOWS:
82 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
83 # and has the priority over PYTHONUTF8
84 out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
85 PYTHONLEGACYWINDOWSFSENCODING='1')
86 self.assertEqual(out, '0')
87
88 # Cannot test with the POSIX locale, since the POSIX locale enables
89 # the UTF-8 mode
90 if not self.posix_locale():
91 # PYTHONUTF8 should be ignored if -E is used
92 out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
93 self.assertEqual(out, '0')
94
95 # invalid mode
96 out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
97 self.assertIn('invalid PYTHONUTF8 environment variable value',
98 out.rstrip())
99
100 def test_filesystemencoding(self):
101 code = textwrap.dedent('''
102 import sys
103 print("{}/{}".format(sys.getfilesystemencoding(),
104 sys.getfilesystemencodeerrors()))
105 ''')
106
107 if MS_WINDOWS:
108 expected = 'utf-8/surrogatepass'
109 else:
110 expected = 'utf-8/surrogateescape'
111
112 out = self.get_output('-X', 'utf8', '-c', code)
113 self.assertEqual(out, expected)
114
115 if MS_WINDOWS:
116 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
117 # and has the priority over -X utf8 and PYTHONUTF8
118 out = self.get_output('-X', 'utf8', '-c', code,
119 PYTHONUTF8='strict',
120 PYTHONLEGACYWINDOWSFSENCODING='1')
121 self.assertEqual(out, 'mbcs/replace')
122
123 def test_stdio(self):
124 code = textwrap.dedent('''
125 import sys
126 print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
127 print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
128 print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
129 ''')
130
131 out = self.get_output('-X', 'utf8', '-c', code,
132 PYTHONIOENCODING='')
133 self.assertEqual(out.splitlines(),
134 ['stdin: utf-8/surrogateescape',
135 'stdout: utf-8/surrogateescape',
136 'stderr: utf-8/backslashreplace'])
137
138 # PYTHONIOENCODING has the priority over PYTHONUTF8
139 out = self.get_output('-X', 'utf8', '-c', code,
140 PYTHONIOENCODING="latin1")
141 self.assertEqual(out.splitlines(),
142 ['stdin: iso8859-1/strict',
143 'stdout: iso8859-1/strict',
144 'stderr: iso8859-1/backslashreplace'])
145
146 out = self.get_output('-X', 'utf8', '-c', code,
147 PYTHONIOENCODING=":namereplace")
148 self.assertEqual(out.splitlines(),
149 ['stdin: utf-8/namereplace',
150 'stdout: utf-8/namereplace',
151 'stderr: utf-8/backslashreplace'])
152
153 def test_io(self):
154 code = textwrap.dedent('''
155 import sys
156 filename = sys.argv[1]
157 with open(filename) as fp:
158 print(f"{fp.encoding}/{fp.errors}")
159 ''')
160 filename = __file__
161
162 out = self.get_output('-c', code, filename, PYTHONUTF8='1')
163 self.assertEqual(out.lower(), 'utf-8/strict')
164
165 def _check_io_encoding(self, module, encoding=None, errors=None):
166 filename = __file__
167
168 # Encoding explicitly set
169 args = []
170 if encoding:
171 args.append(f'encoding={encoding!r}')
172 if errors:
173 args.append(f'errors={errors!r}')
174 code = textwrap.dedent('''
175 import sys
176 from %s import open
177 filename = sys.argv[1]
178 with open(filename, %s) as fp:
179 print(f"{fp.encoding}/{fp.errors}")
180 ''') % (module, ', '.join(args))
181 out = self.get_output('-c', code, filename,
182 PYTHONUTF8='1')
183
184 if not encoding:
185 encoding = 'utf-8'
186 if not errors:
187 errors = 'strict'
188 self.assertEqual(out.lower(), f'{encoding}/{errors}')
189
190 def check_io_encoding(self, module):
191 self._check_io_encoding(module, encoding="latin1")
192 self._check_io_encoding(module, errors="namereplace")
193 self._check_io_encoding(module,
194 encoding="latin1", errors="namereplace")
195
196 def test_io_encoding(self):
197 self.check_io_encoding('io')
198
199 def test_pyio_encoding(self):
200 self.check_io_encoding('_pyio')
201
202 def test_locale_getpreferredencoding(self):
203 code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
204 out = self.get_output('-X', 'utf8', '-c', code)
205 self.assertEqual(out, 'utf-8 utf-8')
206
207 for loc in POSIX_LOCALES:
208 with self.subTest(LC_ALL=loc):
209 out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
210 self.assertEqual(out, 'utf-8 utf-8')
211
212 @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
213 def test_cmd_line(self):
214 arg = 'h\xe9\u20ac'.encode('utf-8')
215 arg_utf8 = arg.decode('utf-8')
216 arg_ascii = arg.decode('ascii', 'surrogateescape')
217 code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
218
219 def check(utf8_opt, expected, **kw):
220 out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
221 args = out.partition(':')[2].rstrip()
222 self.assertEqual(args, ascii(expected), out)
223
224 check('utf8', [arg_utf8])
225 for loc in POSIX_LOCALES:
226 with self.subTest(LC_ALL=loc):
227 check('utf8', [arg_utf8], LC_ALL=loc)
228
229 if sys.platform == 'darwin' or support.is_android or VXWORKS:
230 c_arg = arg_utf8
231 elif sys.platform.startswith("aix"):
232 c_arg = arg.decode('iso-8859-1')
233 else:
234 c_arg = arg_ascii
235 for loc in POSIX_LOCALES:
236 with self.subTest(LC_ALL=loc):
237 check('utf8=0', [c_arg], LC_ALL=loc)
238
239 def test_optim_level(self):
240 # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
241 # twice when -X utf8 requires to parse the configuration twice (when
242 # the encoding changes after reading the configuration, the
243 # configuration is read again with the new encoding).
244 code = 'import sys; print(sys.flags.optimize)'
245 out = self.get_output('-X', 'utf8', '-O', '-c', code)
246 self.assertEqual(out, '1')
247 out = self.get_output('-X', 'utf8', '-OO', '-c', code)
248 self.assertEqual(out, '2')
249
250 code = 'import sys; print(sys.flags.ignore_environment)'
251 out = self.get_output('-X', 'utf8', '-E', '-c', code)
252 self.assertEqual(out, '1')
253
254 @unittest.skipIf(MS_WINDOWS,
255 "os.device_encoding() doesn't implement "
256 "the UTF-8 Mode on Windows")
257 @support.requires_subprocess()
258 def test_device_encoding(self):
259 # Use stdout as TTY
260 if not sys.stdout.isatty():
261 self.skipTest("sys.stdout is not a TTY")
262
263 filename = 'out.txt'
264 self.addCleanup(os_helper.unlink, filename)
265
266 code = (f'import os, sys; fd = sys.stdout.fileno(); '
267 f'out = open({filename!r}, "w", encoding="utf-8"); '
268 f'print(os.isatty(fd), os.device_encoding(fd), file=out); '
269 f'out.close()')
270 cmd = [sys.executable, '-X', 'utf8', '-c', code]
271 # The stdout TTY is inherited to the child process
272 proc = subprocess.run(cmd, text=True)
273 self.assertEqual(proc.returncode, 0, proc)
274
275 # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
276 with open(filename, encoding="utf8") as fp:
277 out = fp.read().rstrip()
278 self.assertEqual(out, 'True utf-8')
279
280
281 if __name__ == "__main__":
282 unittest.main()