1 """
2 Test the implementation of the PEP 540: the UTF-8 Mode.
3 """
4
5 import locale
6 import subprocess
7 import sys
8 import textwrap
9 import unittest
10 from test import support
11 from test.support.script_helper import assert_python_ok, assert_python_failure
12 from test.support import os_helper
13
14
15 MS_WINDOWS = (sys.platform == 'win32')
16 POSIX_LOCALES = ('C', 'POSIX')
17 VXWORKS = (sys.platform == "vxworks")
18
19 class ESC[4;38;5;81mUTF8ModeTests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
20 DEFAULT_ENV = {
21 'PYTHONUTF8': '',
22 'PYTHONLEGACYWINDOWSFSENCODING': '',
23 'PYTHONCOERCECLOCALE': '0',
24 }
25
26 def posix_locale(self):
27 loc = locale.setlocale(locale.LC_CTYPE, None)
28 return (loc in POSIX_LOCALES)
29
30 def get_output(self, *args, failure=False, **kw):
31 kw = dict(self.DEFAULT_ENV, **kw)
32 if failure:
33 out = assert_python_failure(*args, **kw)
34 out = out[2]
35 else:
36 out = assert_python_ok(*args, **kw)
37 out = out[1]
38 return out.decode().rstrip("\n\r")
39
40 @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
41 def test_posix_locale(self):
42 code = 'import sys; print(sys.flags.utf8_mode)'
43
44 for loc in POSIX_LOCALES:
45 with self.subTest(LC_ALL=loc):
46 out = self.get_output('-c', code, LC_ALL=loc)
47 self.assertEqual(out, '1')
48
49 def test_xoption(self):
50 code = 'import sys; print(sys.flags.utf8_mode)'
51
52 out = self.get_output('-X', 'utf8', '-c', code)
53 self.assertEqual(out, '1')
54
55 # undocumented but accepted syntax: -X utf8=1
56 out = self.get_output('-X', 'utf8=1', '-c', code)
57 self.assertEqual(out, '1')
58
59 out = self.get_output('-X', 'utf8=0', '-c', code)
60 self.assertEqual(out, '0')
61
62 if MS_WINDOWS:
63 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
64 # and has the priority over -X utf8
65 out = self.get_output('-X', 'utf8', '-c', code,
66 PYTHONLEGACYWINDOWSFSENCODING='1')
67 self.assertEqual(out, '0')
68
69 def test_env_var(self):
70 code = 'import sys; print(sys.flags.utf8_mode)'
71
72 out = self.get_output('-c', code, PYTHONUTF8='1')
73 self.assertEqual(out, '1')
74
75 out = self.get_output('-c', code, PYTHONUTF8='0')
76 self.assertEqual(out, '0')
77
78 # -X utf8 has the priority over PYTHONUTF8
79 out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
80 self.assertEqual(out, '0')
81
82 if MS_WINDOWS:
83 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
84 # and has the priority over PYTHONUTF8
85 out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
86 PYTHONLEGACYWINDOWSFSENCODING='1')
87 self.assertEqual(out, '0')
88
89 # Cannot test with the POSIX locale, since the POSIX locale enables
90 # the UTF-8 mode
91 if not self.posix_locale():
92 # PYTHONUTF8 should be ignored if -E is used
93 out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
94 self.assertEqual(out, '0')
95
96 # invalid mode
97 out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
98 self.assertIn('invalid PYTHONUTF8 environment variable value',
99 out.rstrip())
100
101 def test_filesystemencoding(self):
102 code = textwrap.dedent('''
103 import sys
104 print("{}/{}".format(sys.getfilesystemencoding(),
105 sys.getfilesystemencodeerrors()))
106 ''')
107
108 if MS_WINDOWS:
109 expected = 'utf-8/surrogatepass'
110 else:
111 expected = 'utf-8/surrogateescape'
112
113 out = self.get_output('-X', 'utf8', '-c', code)
114 self.assertEqual(out, expected)
115
116 if MS_WINDOWS:
117 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
118 # and has the priority over -X utf8 and PYTHONUTF8
119 out = self.get_output('-X', 'utf8', '-c', code,
120 PYTHONUTF8='strict',
121 PYTHONLEGACYWINDOWSFSENCODING='1')
122 self.assertEqual(out, 'mbcs/replace')
123
124 def test_stdio(self):
125 code = textwrap.dedent('''
126 import sys
127 print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
128 print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
129 print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
130 ''')
131
132 out = self.get_output('-X', 'utf8', '-c', code,
133 PYTHONIOENCODING='')
134 self.assertEqual(out.splitlines(),
135 ['stdin: utf-8/surrogateescape',
136 'stdout: utf-8/surrogateescape',
137 'stderr: utf-8/backslashreplace'])
138
139 # PYTHONIOENCODING has the priority over PYTHONUTF8
140 out = self.get_output('-X', 'utf8', '-c', code,
141 PYTHONIOENCODING="latin1")
142 self.assertEqual(out.splitlines(),
143 ['stdin: iso8859-1/strict',
144 'stdout: iso8859-1/strict',
145 'stderr: iso8859-1/backslashreplace'])
146
147 out = self.get_output('-X', 'utf8', '-c', code,
148 PYTHONIOENCODING=":namereplace")
149 self.assertEqual(out.splitlines(),
150 ['stdin: utf-8/namereplace',
151 'stdout: utf-8/namereplace',
152 'stderr: utf-8/backslashreplace'])
153
154 def test_io(self):
155 code = textwrap.dedent('''
156 import sys
157 filename = sys.argv[1]
158 with open(filename) as fp:
159 print(f"{fp.encoding}/{fp.errors}")
160 ''')
161 filename = __file__
162
163 out = self.get_output('-c', code, filename, PYTHONUTF8='1')
164 self.assertEqual(out.lower(), 'utf-8/strict')
165
166 def _check_io_encoding(self, module, encoding=None, errors=None):
167 filename = __file__
168
169 # Encoding explicitly set
170 args = []
171 if encoding:
172 args.append(f'encoding={encoding!r}')
173 if errors:
174 args.append(f'errors={errors!r}')
175 code = textwrap.dedent('''
176 import sys
177 from %s import open
178 filename = sys.argv[1]
179 with open(filename, %s) as fp:
180 print(f"{fp.encoding}/{fp.errors}")
181 ''') % (module, ', '.join(args))
182 out = self.get_output('-c', code, filename,
183 PYTHONUTF8='1')
184
185 if not encoding:
186 encoding = 'utf-8'
187 if not errors:
188 errors = 'strict'
189 self.assertEqual(out.lower(), f'{encoding}/{errors}')
190
191 def check_io_encoding(self, module):
192 self._check_io_encoding(module, encoding="latin1")
193 self._check_io_encoding(module, errors="namereplace")
194 self._check_io_encoding(module,
195 encoding="latin1", errors="namereplace")
196
197 def test_io_encoding(self):
198 self.check_io_encoding('io')
199
200 def test_pyio_encoding(self):
201 self.check_io_encoding('_pyio')
202
203 def test_locale_getpreferredencoding(self):
204 code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
205 out = self.get_output('-X', 'utf8', '-c', code)
206 self.assertEqual(out, 'utf-8 utf-8')
207
208 for loc in POSIX_LOCALES:
209 with self.subTest(LC_ALL=loc):
210 out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
211 self.assertEqual(out, 'utf-8 utf-8')
212
213 @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
214 def test_cmd_line(self):
215 arg = 'h\xe9\u20ac'.encode('utf-8')
216 arg_utf8 = arg.decode('utf-8')
217 arg_ascii = arg.decode('ascii', 'surrogateescape')
218 code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
219
220 def check(utf8_opt, expected, **kw):
221 out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
222 args = out.partition(':')[2].rstrip()
223 self.assertEqual(args, ascii(expected), out)
224
225 check('utf8', [arg_utf8])
226 for loc in POSIX_LOCALES:
227 with self.subTest(LC_ALL=loc):
228 check('utf8', [arg_utf8], LC_ALL=loc)
229
230 if sys.platform == 'darwin' or support.is_android or VXWORKS:
231 c_arg = arg_utf8
232 elif sys.platform.startswith("aix"):
233 c_arg = arg.decode('iso-8859-1')
234 else:
235 c_arg = arg_ascii
236 for loc in POSIX_LOCALES:
237 with self.subTest(LC_ALL=loc):
238 check('utf8=0', [c_arg], LC_ALL=loc)
239
240 def test_optim_level(self):
241 # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
242 # twice when -X utf8 requires to parse the configuration twice (when
243 # the encoding changes after reading the configuration, the
244 # configuration is read again with the new encoding).
245 code = 'import sys; print(sys.flags.optimize)'
246 out = self.get_output('-X', 'utf8', '-O', '-c', code)
247 self.assertEqual(out, '1')
248 out = self.get_output('-X', 'utf8', '-OO', '-c', code)
249 self.assertEqual(out, '2')
250
251 code = 'import sys; print(sys.flags.ignore_environment)'
252 out = self.get_output('-X', 'utf8', '-E', '-c', code)
253 self.assertEqual(out, '1')
254
255 @unittest.skipIf(MS_WINDOWS,
256 "os.device_encoding() doesn't implement "
257 "the UTF-8 Mode on Windows")
258 @support.requires_subprocess()
259 def test_device_encoding(self):
260 # Use stdout as TTY
261 if not sys.stdout.isatty():
262 self.skipTest("sys.stdout is not a TTY")
263
264 filename = 'out.txt'
265 self.addCleanup(os_helper.unlink, filename)
266
267 code = (f'import os, sys; fd = sys.stdout.fileno(); '
268 f'out = open({filename!r}, "w", encoding="utf-8"); '
269 f'print(os.isatty(fd), os.device_encoding(fd), file=out); '
270 f'out.close()')
271 cmd = [sys.executable, '-X', 'utf8', '-c', code]
272 # The stdout TTY is inherited to the child process
273 proc = subprocess.run(cmd, text=True)
274 self.assertEqual(proc.returncode, 0, proc)
275
276 # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
277 with open(filename, encoding="utf8") as fp:
278 out = fp.read().rstrip()
279 self.assertEqual(out, 'True utf-8')
280
281
282 if __name__ == "__main__":
283 unittest.main()