1 #
2 # test_codecencodings_cn.py
3 # Codec encoding tests for PRC encodings.
4 #
5
6 from test import multibytecodec_support
7 import unittest
8
9 class ESC[4;38;5;81mTest_GB2312(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
10 encoding = 'gb2312'
11 tstring = multibytecodec_support.load_teststring('gb2312')
12 codectests = (
13 # invalid bytes
14 (b"abc\x81\x81\xc1\xc4", "strict", None),
15 (b"abc\xc8", "strict", None),
16 (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
17 (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
18 (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
19 (b"\xc1\x64", "strict", None),
20 )
21
22 class ESC[4;38;5;81mTest_GBK(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
23 encoding = 'gbk'
24 tstring = multibytecodec_support.load_teststring('gbk')
25 codectests = (
26 # invalid bytes
27 (b"abc\x80\x80\xc1\xc4", "strict", None),
28 (b"abc\xc8", "strict", None),
29 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
30 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
31 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
32 (b"\x83\x34\x83\x31", "strict", None),
33 ("\u30fb", "strict", None),
34 )
35
36 class ESC[4;38;5;81mTest_GB18030(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
37 encoding = 'gb18030'
38 tstring = multibytecodec_support.load_teststring('gb18030')
39 codectests = (
40 # invalid bytes
41 (b"abc\x80\x80\xc1\xc4", "strict", None),
42 (b"abc\xc8", "strict", None),
43 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
44 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
45 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
46 (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
47 ("\u30fb", "strict", b"\x819\xa79"),
48 (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
49 (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
50 (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
51 # issue29990
52 (b"\xff\x30\x81\x30", "strict", None),
53 (b"\x81\x30\xff\x30", "strict", None),
54 (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"),
55 (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'),
56 (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore", "abc\x38\x32\u804a"),
57 )
58 has_iso10646 = True
59
60 class ESC[4;38;5;81mTest_HZ(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
61 encoding = 'hz'
62 tstring = multibytecodec_support.load_teststring('hz')
63 codectests = (
64 # test '~\n' (3 lines)
65 (b'This sentence is in ASCII.\n'
66 b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
67 b'~{NpJ)l6HK!#~}Bye.\n',
68 'strict',
69 'This sentence is in ASCII.\n'
70 'The next sentence is in GB.'
71 '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
72 'Bye.\n'),
73 # test '~\n' (4 lines)
74 (b'This sentence is in ASCII.\n'
75 b'The next sentence is in GB.~\n'
76 b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
77 b'Bye.\n',
78 'strict',
79 'This sentence is in ASCII.\n'
80 'The next sentence is in GB.'
81 '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
82 'Bye.\n'),
83 # invalid bytes
84 (b'ab~cd', 'replace', 'ab\uFFFDcd'),
85 (b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
86 (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
87 (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
88 (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
89 # issue 30003
90 ('ab~cd', 'strict', b'ab~~cd'), # escape ~
91 (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
92 (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
93 )
94
95 if __name__ == "__main__":
96 unittest.main()