(root)/
Python-3.12.0/
Lib/
test/
test_codecencodings_cn.py
       1  #
       2  # test_codecencodings_cn.py
       3  #   Codec encoding tests for PRC encodings.
       4  #
       5  
       6  from test import multibytecodec_support
       7  import unittest
       8  
       9  class ESC[4;38;5;81mTest_GB2312(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      10      encoding = 'gb2312'
      11      tstring = multibytecodec_support.load_teststring('gb2312')
      12      codectests = (
      13          # invalid bytes
      14          (b"abc\x81\x81\xc1\xc4", "strict",  None),
      15          (b"abc\xc8", "strict",  None),
      16          (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
      17          (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
      18          (b"abc\x81\x81\xc1\xc4", "ignore",  "abc\u804a"),
      19          (b"\xc1\x64", "strict", None),
      20      )
      21  
      22  class ESC[4;38;5;81mTest_GBK(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      23      encoding = 'gbk'
      24      tstring = multibytecodec_support.load_teststring('gbk')
      25      codectests = (
      26          # invalid bytes
      27          (b"abc\x80\x80\xc1\xc4", "strict",  None),
      28          (b"abc\xc8", "strict",  None),
      29          (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
      30          (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
      31          (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
      32          (b"\x83\x34\x83\x31", "strict", None),
      33          ("\u30fb", "strict", None),
      34      )
      35  
      36  class ESC[4;38;5;81mTest_GB18030(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      37      encoding = 'gb18030'
      38      tstring = multibytecodec_support.load_teststring('gb18030')
      39      codectests = (
      40          # invalid bytes
      41          (b"abc\x80\x80\xc1\xc4", "strict",  None),
      42          (b"abc\xc8", "strict",  None),
      43          (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
      44          (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
      45          (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
      46          (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
      47          ("\u30fb", "strict", b"\x819\xa79"),
      48          (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
      49          (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
      50          (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
      51          # issue29990
      52          (b"\xff\x30\x81\x30", "strict", None),
      53          (b"\x81\x30\xff\x30", "strict", None),
      54          (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"),
      55          (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'),
      56          (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore",  "abc\x38\x32\u804a"),
      57      )
      58      has_iso10646 = True
      59  
      60  class ESC[4;38;5;81mTest_HZ(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      61      encoding = 'hz'
      62      tstring = multibytecodec_support.load_teststring('hz')
      63      codectests = (
      64          # test '~\n' (3 lines)
      65          (b'This sentence is in ASCII.\n'
      66           b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
      67           b'~{NpJ)l6HK!#~}Bye.\n',
      68           'strict',
      69           'This sentence is in ASCII.\n'
      70           'The next sentence is in GB.'
      71           '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
      72           'Bye.\n'),
      73          # test '~\n' (4 lines)
      74          (b'This sentence is in ASCII.\n'
      75           b'The next sentence is in GB.~\n'
      76           b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
      77           b'Bye.\n',
      78           'strict',
      79           'This sentence is in ASCII.\n'
      80           'The next sentence is in GB.'
      81           '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
      82           'Bye.\n'),
      83          # invalid bytes
      84          (b'ab~cd', 'replace', 'ab\uFFFDcd'),
      85          (b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
      86          (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
      87          (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
      88          (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
      89          # issue 30003
      90          ('ab~cd', 'strict',  b'ab~~cd'),  # escape ~
      91          (b'~{Dc~~:C~}', 'strict', None),  # ~~ only in ASCII mode
      92          (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
      93      )
      94  
      95  if __name__ == "__main__":
      96      unittest.main()