1 #
2 # test_codecencodings_jp.py
3 # Codec encoding tests for Japanese encodings.
4 #
5
6 from test import multibytecodec_support
7 import unittest
8
9 class ESC[4;38;5;81mTest_CP932(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
10 encoding = 'cp932'
11 tstring = multibytecodec_support.load_teststring('shift_jis')
12 codectests = (
13 # invalid bytes
14 (b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
15 (b"abc\xf8", "strict", None),
16 (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
17 (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
18 (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"),
19 (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
20 (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
21 (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
22 # sjis vs cp932
23 (b"\\\x7e", "replace", "\\\x7e"),
24 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
25 )
26
27 euc_commontests = (
28 # invalid bytes
29 (b"abc\x80\x80\xc1\xc4", "strict", None),
30 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
31 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
32 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
33 (b"abc\xc8", "strict", None),
34 (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
35 (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
36 (b"\xc1\x64", "strict", None),
37 (b"\xa1\xc0", "strict", "\uff3c"),
38 (b"\xa1\xc0\\", "strict", "\uff3c\\"),
39 (b"\x8eXY", "replace", "\ufffdXY"),
40 )
41
42 class ESC[4;38;5;81mTest_EUC_JIS_2004(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase,
43 ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
44 encoding = 'euc_jis_2004'
45 tstring = multibytecodec_support.load_teststring('euc_jisx0213')
46 codectests = euc_commontests
47 xmlcharnametest = (
48 "\xab\u211c\xbb = \u2329\u1234\u232a",
49 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
50 )
51
52 class ESC[4;38;5;81mTest_EUC_JISX0213(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase,
53 ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
54 encoding = 'euc_jisx0213'
55 tstring = multibytecodec_support.load_teststring('euc_jisx0213')
56 codectests = euc_commontests
57 xmlcharnametest = (
58 "\xab\u211c\xbb = \u2329\u1234\u232a",
59 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
60 )
61
62 class ESC[4;38;5;81mTest_EUC_JP_COMPAT(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase,
63 ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
64 encoding = 'euc_jp'
65 tstring = multibytecodec_support.load_teststring('euc_jp')
66 codectests = euc_commontests + (
67 ("\xa5", "strict", b"\x5c"),
68 ("\u203e", "strict", b"\x7e"),
69 )
70
71 shiftjis_commonenctests = (
72 (b"abc\x80\x80\x82\x84", "strict", None),
73 (b"abc\xf8", "strict", None),
74 (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
75 )
76
77 class ESC[4;38;5;81mTest_SJIS_COMPAT(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
78 encoding = 'shift_jis'
79 tstring = multibytecodec_support.load_teststring('shift_jis')
80 codectests = shiftjis_commonenctests + (
81 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
82 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
83
84 (b"\\\x7e", "strict", "\\\x7e"),
85 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
86 (b"abc\x81\x39", "replace", "abc\ufffd9"),
87 (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"),
88 (b"abc\xFF\x58", "replace", "abc\ufffdX"),
89 )
90
91 class ESC[4;38;5;81mTest_SJIS_2004(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
92 encoding = 'shift_jis_2004'
93 tstring = multibytecodec_support.load_teststring('shift_jis')
94 codectests = shiftjis_commonenctests + (
95 (b"\\\x7e", "strict", "\xa5\u203e"),
96 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
97 (b"abc\xEA\xFC", "strict", "abc\u64bf"),
98 (b"\x81\x39xy", "replace", "\ufffd9xy"),
99 (b"\xFF\x58xy", "replace", "\ufffdXxy"),
100 (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
101 (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
102 (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
103 )
104 xmlcharnametest = (
105 "\xab\u211c\xbb = \u2329\u1234\u232a",
106 b"\x85Gℜ\x85Q = ⟨ሴ⟩"
107 )
108
109 class ESC[4;38;5;81mTest_SJISX0213(ESC[4;38;5;149mmultibytecodec_supportESC[4;38;5;149m.ESC[4;38;5;149mTestBase, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
110 encoding = 'shift_jisx0213'
111 tstring = multibytecodec_support.load_teststring('shift_jisx0213')
112 codectests = shiftjis_commonenctests + (
113 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
114 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
115
116 # sjis vs cp932
117 (b"\\\x7e", "replace", "\xa5\u203e"),
118 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
119 )
120 xmlcharnametest = (
121 "\xab\u211c\xbb = \u2329\u1234\u232a",
122 b"\x85Gℜ\x85Q = ⟨ሴ⟩"
123 )
124
125 if __name__ == "__main__":
126 unittest.main()