1  """
       2  Test the implementation of the PEP 540: the UTF-8 Mode.
       3  """
       4  
       5  import locale
       6  import subprocess
       7  import sys
       8  import textwrap
       9  import unittest
      10  from test import support
      11  from test.support.script_helper import assert_python_ok, assert_python_failure
      12  from test.support import os_helper, MS_WINDOWS
      13  
      14  
      15  POSIX_LOCALES = ('C', 'POSIX')
      16  VXWORKS = (sys.platform == "vxworks")
      17  
      18  class ESC[4;38;5;81mUTF8ModeTests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      19      DEFAULT_ENV = {
      20          'PYTHONUTF8': '',
      21          'PYTHONLEGACYWINDOWSFSENCODING': '',
      22          'PYTHONCOERCECLOCALE': '0',
      23      }
      24  
      25      def posix_locale(self):
      26          loc = locale.setlocale(locale.LC_CTYPE, None)
      27          return (loc in POSIX_LOCALES)
      28  
      29      def get_output(self, *args, failure=False, **kw):
      30          kw = dict(self.DEFAULT_ENV, **kw)
      31          if failure:
      32              out = assert_python_failure(*args, **kw)
      33              out = out[2]
      34          else:
      35              out = assert_python_ok(*args, **kw)
      36              out = out[1]
      37          return out.decode().rstrip("\n\r")
      38  
      39      @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
      40      def test_posix_locale(self):
      41          code = 'import sys; print(sys.flags.utf8_mode)'
      42  
      43          for loc in POSIX_LOCALES:
      44              with self.subTest(LC_ALL=loc):
      45                  out = self.get_output('-c', code, LC_ALL=loc)
      46                  self.assertEqual(out, '1')
      47  
      48      def test_xoption(self):
      49          code = 'import sys; print(sys.flags.utf8_mode)'
      50  
      51          out = self.get_output('-X', 'utf8', '-c', code)
      52          self.assertEqual(out, '1')
      53  
      54          # undocumented but accepted syntax: -X utf8=1
      55          out = self.get_output('-X', 'utf8=1', '-c', code)
      56          self.assertEqual(out, '1')
      57  
      58          out = self.get_output('-X', 'utf8=0', '-c', code)
      59          self.assertEqual(out, '0')
      60  
      61          if MS_WINDOWS:
      62              # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
      63              # and has the priority over -X utf8
      64              out = self.get_output('-X', 'utf8', '-c', code,
      65                                    PYTHONLEGACYWINDOWSFSENCODING='1')
      66              self.assertEqual(out, '0')
      67  
      68      def test_env_var(self):
      69          code = 'import sys; print(sys.flags.utf8_mode)'
      70  
      71          out = self.get_output('-c', code, PYTHONUTF8='1')
      72          self.assertEqual(out, '1')
      73  
      74          out = self.get_output('-c', code, PYTHONUTF8='0')
      75          self.assertEqual(out, '0')
      76  
      77          # -X utf8 has the priority over PYTHONUTF8
      78          out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
      79          self.assertEqual(out, '0')
      80  
      81          if MS_WINDOWS:
      82              # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
      83              # and has the priority over PYTHONUTF8
      84              out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
      85                                    PYTHONLEGACYWINDOWSFSENCODING='1')
      86              self.assertEqual(out, '0')
      87  
      88          # Cannot test with the POSIX locale, since the POSIX locale enables
      89          # the UTF-8 mode
      90          if not self.posix_locale():
      91              # PYTHONUTF8 should be ignored if -E is used
      92              out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
      93              self.assertEqual(out, '0')
      94  
      95          # invalid mode
      96          out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
      97          self.assertIn('invalid PYTHONUTF8 environment variable value',
      98                        out.rstrip())
      99  
     100      def test_filesystemencoding(self):
     101          code = textwrap.dedent('''
     102              import sys
     103              print("{}/{}".format(sys.getfilesystemencoding(),
     104                                   sys.getfilesystemencodeerrors()))
     105          ''')
     106  
     107          if MS_WINDOWS:
     108              expected = 'utf-8/surrogatepass'
     109          else:
     110              expected = 'utf-8/surrogateescape'
     111  
     112          out = self.get_output('-X', 'utf8', '-c', code)
     113          self.assertEqual(out, expected)
     114  
     115          if MS_WINDOWS:
     116              # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
     117              # and has the priority over -X utf8 and PYTHONUTF8
     118              out = self.get_output('-X', 'utf8', '-c', code,
     119                                    PYTHONUTF8='strict',
     120                                    PYTHONLEGACYWINDOWSFSENCODING='1')
     121              self.assertEqual(out, 'mbcs/replace')
     122  
     123      def test_stdio(self):
     124          code = textwrap.dedent('''
     125              import sys
     126              print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
     127              print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
     128              print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
     129          ''')
     130  
     131          out = self.get_output('-X', 'utf8', '-c', code,
     132                                PYTHONIOENCODING='')
     133          self.assertEqual(out.splitlines(),
     134                           ['stdin: utf-8/surrogateescape',
     135                            'stdout: utf-8/surrogateescape',
     136                            'stderr: utf-8/backslashreplace'])
     137  
     138          # PYTHONIOENCODING has the priority over PYTHONUTF8
     139          out = self.get_output('-X', 'utf8', '-c', code,
     140                                PYTHONIOENCODING="latin1")
     141          self.assertEqual(out.splitlines(),
     142                           ['stdin: iso8859-1/strict',
     143                            'stdout: iso8859-1/strict',
     144                            'stderr: iso8859-1/backslashreplace'])
     145  
     146          out = self.get_output('-X', 'utf8', '-c', code,
     147                                PYTHONIOENCODING=":namereplace")
     148          self.assertEqual(out.splitlines(),
     149                           ['stdin: utf-8/namereplace',
     150                            'stdout: utf-8/namereplace',
     151                            'stderr: utf-8/backslashreplace'])
     152  
     153      def test_io(self):
     154          code = textwrap.dedent('''
     155              import sys
     156              filename = sys.argv[1]
     157              with open(filename) as fp:
     158                  print(f"{fp.encoding}/{fp.errors}")
     159          ''')
     160          filename = __file__
     161  
     162          out = self.get_output('-c', code, filename, PYTHONUTF8='1')
     163          self.assertEqual(out.lower(), 'utf-8/strict')
     164  
     165      def _check_io_encoding(self, module, encoding=None, errors=None):
     166          filename = __file__
     167  
     168          # Encoding explicitly set
     169          args = []
     170          if encoding:
     171              args.append(f'encoding={encoding!r}')
     172          if errors:
     173              args.append(f'errors={errors!r}')
     174          code = textwrap.dedent('''
     175              import sys
     176              from %s import open
     177              filename = sys.argv[1]
     178              with open(filename, %s) as fp:
     179                  print(f"{fp.encoding}/{fp.errors}")
     180          ''') % (module, ', '.join(args))
     181          out = self.get_output('-c', code, filename,
     182                                PYTHONUTF8='1')
     183  
     184          if not encoding:
     185              encoding = 'utf-8'
     186          if not errors:
     187              errors = 'strict'
     188          self.assertEqual(out.lower(), f'{encoding}/{errors}')
     189  
     190      def check_io_encoding(self, module):
     191          self._check_io_encoding(module, encoding="latin1")
     192          self._check_io_encoding(module, errors="namereplace")
     193          self._check_io_encoding(module,
     194                                  encoding="latin1", errors="namereplace")
     195  
     196      def test_io_encoding(self):
     197          self.check_io_encoding('io')
     198  
     199      def test_pyio_encoding(self):
     200          self.check_io_encoding('_pyio')
     201  
     202      def test_locale_getpreferredencoding(self):
     203          code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
     204          out = self.get_output('-X', 'utf8', '-c', code)
     205          self.assertEqual(out, 'utf-8 utf-8')
     206  
     207          for loc in POSIX_LOCALES:
     208              with self.subTest(LC_ALL=loc):
     209                  out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
     210                  self.assertEqual(out, 'utf-8 utf-8')
     211  
     212      @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
     213      def test_cmd_line(self):
     214          arg = 'h\xe9\u20ac'.encode('utf-8')
     215          arg_utf8 = arg.decode('utf-8')
     216          arg_ascii = arg.decode('ascii', 'surrogateescape')
     217          code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
     218  
     219          def check(utf8_opt, expected, **kw):
     220              out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
     221              args = out.partition(':')[2].rstrip()
     222              self.assertEqual(args, ascii(expected), out)
     223  
     224          check('utf8', [arg_utf8])
     225          for loc in POSIX_LOCALES:
     226              with self.subTest(LC_ALL=loc):
     227                  check('utf8', [arg_utf8], LC_ALL=loc)
     228  
     229          if sys.platform == 'darwin' or support.is_android or VXWORKS:
     230              c_arg = arg_utf8
     231          elif sys.platform.startswith("aix"):
     232              c_arg = arg.decode('iso-8859-1')
     233          else:
     234              c_arg = arg_ascii
     235          for loc in POSIX_LOCALES:
     236              with self.subTest(LC_ALL=loc):
     237                  check('utf8=0', [c_arg], LC_ALL=loc)
     238  
     239      def test_optim_level(self):
     240          # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
     241          # twice when -X utf8 requires to parse the configuration twice (when
     242          # the encoding changes after reading the configuration, the
     243          # configuration is read again with the new encoding).
     244          code = 'import sys; print(sys.flags.optimize)'
     245          out = self.get_output('-X', 'utf8', '-O', '-c', code)
     246          self.assertEqual(out, '1')
     247          out = self.get_output('-X', 'utf8', '-OO', '-c', code)
     248          self.assertEqual(out, '2')
     249  
     250          code = 'import sys; print(sys.flags.ignore_environment)'
     251          out = self.get_output('-X', 'utf8', '-E', '-c', code)
     252          self.assertEqual(out, '1')
     253  
     254      @unittest.skipIf(MS_WINDOWS,
     255                       "os.device_encoding() doesn't implement "
     256                       "the UTF-8 Mode on Windows")
     257      @support.requires_subprocess()
     258      def test_device_encoding(self):
     259          # Use stdout as TTY
     260          if not sys.stdout.isatty():
     261              self.skipTest("sys.stdout is not a TTY")
     262  
     263          filename = 'out.txt'
     264          self.addCleanup(os_helper.unlink, filename)
     265  
     266          code = (f'import os, sys; fd = sys.stdout.fileno(); '
     267                  f'out = open({filename!r}, "w", encoding="utf-8"); '
     268                  f'print(os.isatty(fd), os.device_encoding(fd), file=out); '
     269                  f'out.close()')
     270          cmd = [sys.executable, '-X', 'utf8', '-c', code]
     271          # The stdout TTY is inherited to the child process
     272          proc = subprocess.run(cmd, text=True)
     273          self.assertEqual(proc.returncode, 0, proc)
     274  
     275          # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
     276          with open(filename, encoding="utf8") as fp:
     277              out = fp.read().rstrip()
     278          self.assertEqual(out, 'True utf-8')
     279  
     280  
     281  if __name__ == "__main__":
     282      unittest.main()