python (3.12.0)
1 import unittest
2 import sys
3 from test.support import import_helper
4
5 _testcapi = import_helper.import_module('_testcapi')
6
7 NULL = None
8
9
10 class ESC[4;38;5;81mCAPITest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
11 # TODO: Test the following functions:
12 #
13 # PyUnicode_BuildEncodingMap
14 # PyUnicode_FSConverter
15 # PyUnicode_FSDecoder
16 # PyUnicode_DecodeMBCS
17 # PyUnicode_DecodeMBCSStateful
18 # PyUnicode_DecodeCodePageStateful
19 # PyUnicode_AsMBCSString
20 # PyUnicode_EncodeCodePage
21 # PyUnicode_DecodeLocaleAndSize
22 # PyUnicode_DecodeLocale
23 # PyUnicode_EncodeLocale
24 # PyUnicode_DecodeFSDefault
25 # PyUnicode_DecodeFSDefaultAndSize
26 # PyUnicode_EncodeFSDefault
27
28 def test_fromencodedobject(self):
29 """Test PyUnicode_FromEncodedObject()"""
30 fromencodedobject = _testcapi.unicode_fromencodedobject
31
32 self.assertEqual(fromencodedobject(b'abc', NULL), 'abc')
33 self.assertEqual(fromencodedobject(b'abc', 'ascii'), 'abc')
34 b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80'
35 s = 'a\xa1\u4f60\U0001f600'
36 self.assertEqual(fromencodedobject(b, NULL), s)
37 self.assertEqual(fromencodedobject(b, 'utf-8'), s)
38 self.assertEqual(fromencodedobject(b, 'latin1'), b.decode('latin1'))
39 self.assertRaises(UnicodeDecodeError, fromencodedobject, b, 'ascii')
40 self.assertEqual(fromencodedobject(b, 'ascii', 'replace'),
41 'a' + '\ufffd'*9)
42 self.assertEqual(fromencodedobject(bytearray(b), NULL), s)
43 self.assertEqual(fromencodedobject(bytearray(b), 'utf-8'), s)
44 self.assertRaises(LookupError, fromencodedobject, b'abc', 'foo')
45 self.assertRaises(LookupError, fromencodedobject, b, 'ascii', 'foo')
46 self.assertRaises(TypeError, fromencodedobject, 'abc', NULL)
47 self.assertRaises(TypeError, fromencodedobject, 'abc', 'ascii')
48 self.assertRaises(TypeError, fromencodedobject, [], NULL)
49 self.assertRaises(TypeError, fromencodedobject, [], 'ascii')
50 self.assertRaises(SystemError, fromencodedobject, NULL, NULL)
51 self.assertRaises(SystemError, fromencodedobject, NULL, 'ascii')
52
53 def test_decode(self):
54 """Test PyUnicode_Decode()"""
55 decode = _testcapi.unicode_decode
56
57 self.assertEqual(decode(b'[\xe2\x82\xac]', 'utf-8'), '[\u20ac]')
58 self.assertEqual(decode(b'[\xa4]', 'iso8859-15'), '[\u20ac]')
59 self.assertEqual(decode(b'[\xa4]', 'iso8859-15', 'strict'), '[\u20ac]')
60 self.assertRaises(UnicodeDecodeError, decode, b'[\xa4]', 'utf-8')
61 self.assertEqual(decode(b'[\xa4]', 'utf-8', 'replace'), '[\ufffd]')
62
63 self.assertEqual(decode(b'[\xe2\x82\xac]', NULL), '[\u20ac]')
64 self.assertEqual(decode(b'[\xa4]', NULL, 'replace'), '[\ufffd]')
65
66 self.assertRaises(LookupError, decode, b'\xa4', 'foo')
67 self.assertRaises(LookupError, decode, b'\xa4', 'utf-8', 'foo')
68 # TODO: Test PyUnicode_Decode() with NULL as data and
69 # negative size.
70
71 def test_asencodedstring(self):
72 """Test PyUnicode_AsEncodedString()"""
73 asencodedstring = _testcapi.unicode_asencodedstring
74
75 self.assertEqual(asencodedstring('abc', NULL), b'abc')
76 self.assertEqual(asencodedstring('abc', 'ascii'), b'abc')
77 s = 'a\xa1\u4f60\U0001f600'
78 b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80'
79 self.assertEqual(asencodedstring(s, NULL), b)
80 self.assertEqual(asencodedstring(s, 'utf-8'), b)
81 self.assertEqual(asencodedstring('\xa1\xa2', 'latin1'), b'\xa1\xa2')
82 self.assertRaises(UnicodeEncodeError, asencodedstring, '\xa1\xa2', 'ascii')
83 self.assertEqual(asencodedstring(s, 'ascii', 'replace'), b'a???')
84
85 self.assertRaises(LookupError, asencodedstring, 'abc', 'foo')
86 self.assertRaises(LookupError, asencodedstring, s, 'ascii', 'foo')
87 self.assertRaises(TypeError, asencodedstring, b'abc', NULL)
88 self.assertRaises(TypeError, asencodedstring, b'abc', 'ascii')
89 self.assertRaises(TypeError, asencodedstring, [], NULL)
90 self.assertRaises(TypeError, asencodedstring, [], 'ascii')
91 # CRASHES asencodedstring(NULL, NULL)
92 # CRASHES asencodedstring(NULL, 'ascii')
93
94 def test_decodeutf8(self):
95 """Test PyUnicode_DecodeUTF8()"""
96 decodeutf8 = _testcapi.unicode_decodeutf8
97
98 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
99 b = s.encode('utf-8')
100 self.assertEqual(decodeutf8(b), s)
101 self.assertEqual(decodeutf8(b, 'strict'), s)
102
103 self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80')
104 self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0')
105 self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff')
106 self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f')
107 self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd')
108 self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb')
109
110 self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo')
111 # TODO: Test PyUnicode_DecodeUTF8() with NULL as data and
112 # negative size.
113
114 def test_decodeutf8stateful(self):
115 """Test PyUnicode_DecodeUTF8Stateful()"""
116 decodeutf8stateful = _testcapi.unicode_decodeutf8stateful
117
118 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
119 b = s.encode('utf-8')
120 self.assertEqual(decodeutf8stateful(b), (s, len(b)))
121 self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b)))
122
123 self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80')
124 self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0')
125 self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff')
126 self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1))
127 self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1))
128 self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb')
129 self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4))
130
131 self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo')
132 # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and
133 # negative size.
134 # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of
135 # "consumed".
136
137 def test_asutf8string(self):
138 """Test PyUnicode_AsUTF8String()"""
139 asutf8string = _testcapi.unicode_asutf8string
140
141 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
142 self.assertEqual(asutf8string(s), s.encode('utf-8'))
143
144 self.assertRaises(UnicodeEncodeError, asutf8string, '\ud8ff')
145 self.assertRaises(TypeError, asutf8string, b'abc')
146 self.assertRaises(TypeError, asutf8string, [])
147 # CRASHES asutf8string(NULL)
148
149 def test_decodeutf16(self):
150 """Test PyUnicode_DecodeUTF16()"""
151 decodeutf16 = _testcapi.unicode_decodeutf16
152
153 naturalbyteorder = -1 if sys.byteorder == 'little' else 1
154 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
155 b = s.encode('utf-16')
156 self.assertEqual(decodeutf16(0, b), (naturalbyteorder, s))
157 b = s.encode('utf-16le')
158 self.assertEqual(decodeutf16(-1, b), (-1, s))
159 self.assertEqual(decodeutf16(0, b'\xff\xfe'+b), (-1, s))
160 b = s.encode('utf-16be')
161 self.assertEqual(decodeutf16(1, b), (1, s))
162 self.assertEqual(decodeutf16(0, b'\xfe\xff'+b), (1, s))
163
164 self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'a')
165 self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'a')
166 self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xff\xfea')
167 self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xfe\xffa')
168
169 self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'\x00\xde')
170 self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'\xde\x00')
171 self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xde\xde')
172 self.assertEqual(decodeutf16(-1, b'\x00\xde', 'replace'), (-1, '\ufffd'))
173 self.assertEqual(decodeutf16(1, b'\xde\x00', 'replace'), (1, '\ufffd'))
174 self.assertEqual(decodeutf16(0, b'\xde\xde', 'replace'), (0, '\ufffd'))
175 self.assertEqual(decodeutf16(0, b'\xff\xfe\x00\xde', 'replace'), (-1, '\ufffd'))
176 self.assertEqual(decodeutf16(0, b'\xfe\xff\xde\x00', 'replace'), (1, '\ufffd'))
177
178 self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'\x3d\xd8')
179 self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'\xd8\x3d')
180 self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xd8\xd8')
181 self.assertEqual(decodeutf16(-1, b'\x3d\xd8', 'replace'), (-1, '\ufffd'))
182 self.assertEqual(decodeutf16(1, b'\xd8\x3d', 'replace'), (1, '\ufffd'))
183 self.assertEqual(decodeutf16(0, b'\xd8\xd8', 'replace'), (0, '\ufffd'))
184 self.assertEqual(decodeutf16(0, b'\xff\xfe\x3d\xd8', 'replace'), (-1, '\ufffd'))
185 self.assertEqual(decodeutf16(0, b'\xfe\xff\xd8\x3d', 'replace'), (1, '\ufffd'))
186
187 self.assertRaises(LookupError, decodeutf16, -1, b'\x00\xde', 'foo')
188 self.assertRaises(LookupError, decodeutf16, 1, b'\xde\x00', 'foo')
189 self.assertRaises(LookupError, decodeutf16, 0, b'\xde\xde', 'foo')
190 # TODO: Test PyUnicode_DecodeUTF16() with NULL as data and
191 # negative size.
192
193 def test_decodeutf16stateful(self):
194 """Test PyUnicode_DecodeUTF16Stateful()"""
195 decodeutf16stateful = _testcapi.unicode_decodeutf16stateful
196
197 naturalbyteorder = -1 if sys.byteorder == 'little' else 1
198 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
199 b = s.encode('utf-16')
200 self.assertEqual(decodeutf16stateful(0, b), (naturalbyteorder, s, len(b)))
201 b = s.encode('utf-16le')
202 self.assertEqual(decodeutf16stateful(-1, b), (-1, s, len(b)))
203 self.assertEqual(decodeutf16stateful(0, b'\xff\xfe'+b), (-1, s, len(b)+2))
204 b = s.encode('utf-16be')
205 self.assertEqual(decodeutf16stateful(1, b), (1, s, len(b)))
206 self.assertEqual(decodeutf16stateful(0, b'\xfe\xff'+b), (1, s, len(b)+2))
207
208 self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d'), (-1, 'a', 2))
209 self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d\xd8'), (-1, 'a', 2))
210 self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d\xd8\x00'), (-1, 'a', 2))
211 self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8'), (1, 'a', 2))
212 self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8\x3d'), (1, 'a', 2))
213 self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8\x3d\xde'), (1, 'a', 2))
214 self.assertEqual(decodeutf16stateful(0, b'\xff\xfe\x61\x00\x3d\xd8\x00'), (-1, 'a', 4))
215 self.assertEqual(decodeutf16stateful(0, b'\xfe\xff\x00\x61\xd8\x3d\xde'), (1, 'a', 4))
216
217 self.assertRaises(UnicodeDecodeError, decodeutf16stateful, -1, b'\x00\xde')
218 self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 1, b'\xde\x00')
219 self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 0, b'\xde\xde')
220 self.assertEqual(decodeutf16stateful(-1, b'\x00\xde', 'replace'), (-1, '\ufffd', 2))
221 self.assertEqual(decodeutf16stateful(1, b'\xde\x00', 'replace'), (1, '\ufffd', 2))
222 self.assertEqual(decodeutf16stateful(0, b'\xde\xde', 'replace'), (0, '\ufffd', 2))
223 self.assertEqual(decodeutf16stateful(0, b'\xff\xfe\x00\xde', 'replace'), (-1, '\ufffd', 4))
224 self.assertEqual(decodeutf16stateful(0, b'\xfe\xff\xde\x00', 'replace'), (1, '\ufffd', 4))
225
226 self.assertRaises(UnicodeDecodeError, decodeutf16stateful, -1, b'\x3d\xd8\x61\x00')
227 self.assertEqual(decodeutf16stateful(-1, b'\x3d\xd8\x61\x00', 'replace'), (-1, '\ufffda', 4))
228 self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 1, b'\xd8\x3d\x00\x61')
229 self.assertEqual(decodeutf16stateful(1, b'\xd8\x3d\x00\x61', 'replace'), (1, '\ufffda', 4))
230
231 self.assertRaises(LookupError, decodeutf16stateful, -1, b'\x00\xde', 'foo')
232 self.assertRaises(LookupError, decodeutf16stateful, 1, b'\xde\x00', 'foo')
233 self.assertRaises(LookupError, decodeutf16stateful, 0, b'\xde\xde', 'foo')
234 # TODO: Test PyUnicode_DecodeUTF16Stateful() with NULL as data and
235 # negative size.
236 # TODO: Test PyUnicode_DecodeUTF16Stateful() with NULL as the address of
237 # "consumed".
238
239 def test_asutf16string(self):
240 """Test PyUnicode_AsUTF16String()"""
241 asutf16string = _testcapi.unicode_asutf16string
242
243 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
244 self.assertEqual(asutf16string(s), s.encode('utf-16'))
245
246 self.assertRaises(UnicodeEncodeError, asutf16string, '\ud8ff')
247 self.assertRaises(TypeError, asutf16string, b'abc')
248 self.assertRaises(TypeError, asutf16string, [])
249 # CRASHES asutf16string(NULL)
250
251 def test_decodeutf32(self):
252 """Test PyUnicode_DecodeUTF8()"""
253 decodeutf32 = _testcapi.unicode_decodeutf32
254
255 naturalbyteorder = -1 if sys.byteorder == 'little' else 1
256 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
257 b = s.encode('utf-32')
258 self.assertEqual(decodeutf32(0, b), (naturalbyteorder, s))
259 b = s.encode('utf-32le')
260 self.assertEqual(decodeutf32(-1, b), (-1, s))
261 self.assertEqual(decodeutf32(0, b'\xff\xfe\x00\x00'+b), (-1, s))
262 b = s.encode('utf-32be')
263 self.assertEqual(decodeutf32(1, b), (1, s))
264 self.assertEqual(decodeutf32(0, b'\x00\x00\xfe\xff'+b), (1, s))
265
266 self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\x61\x00\x00\x00\x00')
267 self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\x00\x00\x00\x61\x00')
268 self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\xff\xfe\x00\x00\x61\x00\x00\x00\x00')
269 self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\x00\x00\xfe\xff\x00\x00\x00\x61\x00')
270
271 self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\xff\xff\xff\xff')
272 self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\xff\xff\xff\xff')
273 self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\xff\xff\xff\xff')
274 self.assertEqual(decodeutf32(-1, b'\xff\xff\xff\xff', 'replace'), (-1, '\ufffd'))
275 self.assertEqual(decodeutf32(1, b'\xff\xff\xff\xff', 'replace'), (1, '\ufffd'))
276 self.assertEqual(decodeutf32(0, b'\xff\xff\xff\xff', 'replace'), (0, '\ufffd'))
277 self.assertEqual(decodeutf32(0, b'\xff\xfe\x00\x00\xff\xff\xff\xff', 'replace'), (-1, '\ufffd'))
278 self.assertEqual(decodeutf32(0, b'\x00\x00\xfe\xff\xff\xff\xff\xff', 'replace'), (1, '\ufffd'))
279
280 self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\x3d\xd8\x00\x00')
281 self.assertEqual(decodeutf32(-1, b'\x3d\xd8\x00\x00', 'replace'), (-1, '\ufffd'))
282 self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\x00\x00\xd8\x3d')
283 self.assertEqual(decodeutf32(1, b'\x00\x00\xd8\x3d', 'replace'), (1, '\ufffd'))
284
285 self.assertRaises(LookupError, decodeutf32, -1, b'\xff\xff\xff\xff', 'foo')
286 self.assertRaises(LookupError, decodeutf32, 1, b'\xff\xff\xff\xff', 'foo')
287 self.assertRaises(LookupError, decodeutf32, 0, b'\xff\xff\xff\xff', 'foo')
288 # TODO: Test PyUnicode_DecodeUTF32() with NULL as data and
289 # negative size.
290
291 def test_decodeutf32stateful(self):
292 """Test PyUnicode_DecodeUTF32Stateful()"""
293 decodeutf32stateful = _testcapi.unicode_decodeutf32stateful
294
295 naturalbyteorder = -1 if sys.byteorder == 'little' else 1
296 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
297 b = s.encode('utf-32')
298 self.assertEqual(decodeutf32stateful(0, b), (naturalbyteorder, s, len(b)))
299 b = s.encode('utf-32le')
300 self.assertEqual(decodeutf32stateful(-1, b), (-1, s, len(b)))
301 self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00'+b), (-1, s, len(b)+4))
302 b = s.encode('utf-32be')
303 self.assertEqual(decodeutf32stateful(1, b), (1, s, len(b)))
304 self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff'+b), (1, s, len(b)+4))
305
306 self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00'), (-1, 'a', 4))
307 self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00\xf6'), (-1, 'a', 4))
308 self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00\xf6\x01'), (-1, 'a', 4))
309 self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00'), (1, 'a', 4))
310 self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00\x01'), (1, 'a', 4))
311 self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00\x01\xf6'), (1, 'a', 4))
312 self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00\x61\x00\x00\x00\x00\xf6\x01'), (-1, 'a', 8))
313 self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff\x00\x00\x00\x61\x00\x01\xf6'), (1, 'a', 8))
314
315 for b in b'\xff', b'\xff\xff', b'\xff\xff\xff':
316 self.assertEqual(decodeutf32stateful(-1, b), (-1, '', 0))
317 self.assertEqual(decodeutf32stateful(1, b), (1, '', 0))
318 self.assertEqual(decodeutf32stateful(0, b), (0, '', 0))
319 self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00'+b), (-1, '', 4))
320 self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff'+b), (1, '', 4))
321 self.assertRaises(UnicodeDecodeError, decodeutf32stateful, -1, b'\xff\xff\xff\xff')
322 self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 1, b'\xff\xff\xff\xff')
323 self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 0, b'\xff\xff\xff\xff')
324 self.assertEqual(decodeutf32stateful(-1, b'\xff\xff\xff\xff', 'replace'), (-1, '\ufffd', 4))
325 self.assertEqual(decodeutf32stateful(1, b'\xff\xff\xff\xff', 'replace'), (1, '\ufffd', 4))
326 self.assertEqual(decodeutf32stateful(0, b'\xff\xff\xff\xff', 'replace'), (0, '\ufffd', 4))
327 self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00\xff\xff\xff\xff', 'replace'), (-1, '\ufffd', 8))
328 self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff\xff\xff\xff\xff', 'replace'), (1, '\ufffd', 8))
329
330 self.assertRaises(UnicodeDecodeError, decodeutf32stateful, -1, b'\x3d\xd8\x00\x00')
331 self.assertEqual(decodeutf32stateful(-1, b'\x3d\xd8\x00\x00', 'replace'), (-1, '\ufffd', 4))
332 self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 1, b'\x00\x00\xd8\x3d')
333 self.assertEqual(decodeutf32stateful(1, b'\x00\x00\xd8\x3d', 'replace'), (1, '\ufffd', 4))
334
335 self.assertRaises(LookupError, decodeutf32stateful, -1, b'\xff\xff\xff\xff', 'foo')
336 self.assertRaises(LookupError, decodeutf32stateful, 1, b'\xff\xff\xff\xff', 'foo')
337 self.assertRaises(LookupError, decodeutf32stateful, 0, b'\xff\xff\xff\xff', 'foo')
338 # TODO: Test PyUnicode_DecodeUTF32Stateful() with NULL as data and
339 # negative size.
340 # TODO: Test PyUnicode_DecodeUTF32Stateful() with NULL as the address of
341 # "consumed".
342
343 def test_asutf32string(self):
344 """Test PyUnicode_AsUTF32String()"""
345 asutf32string = _testcapi.unicode_asutf32string
346
347 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
348 self.assertEqual(asutf32string(s), s.encode('utf-32'))
349
350 self.assertRaises(UnicodeEncodeError, asutf32string, '\ud8ff')
351 self.assertRaises(TypeError, asutf32string, b'abc')
352 self.assertRaises(TypeError, asutf32string, [])
353 # CRASHES asutf32string(NULL)
354
355 def test_decodelatin1(self):
356 """Test PyUnicode_DecodeLatin1()"""
357 decodelatin1 = _testcapi.unicode_decodelatin1
358
359 self.assertEqual(decodelatin1(b'abc'), 'abc')
360 self.assertEqual(decodelatin1(b'abc', 'strict'), 'abc')
361 self.assertEqual(decodelatin1(b'\xa1\xa2'), '\xa1\xa2')
362 self.assertEqual(decodelatin1(b'\xa1\xa2', 'strict'), '\xa1\xa2')
363 # TODO: Test PyUnicode_DecodeLatin1() with NULL as data and
364 # negative size.
365
366 def test_aslatin1string(self):
367 """Test PyUnicode_AsLatin1String()"""
368 aslatin1string = _testcapi.unicode_aslatin1string
369
370 self.assertEqual(aslatin1string('abc'), b'abc')
371 self.assertEqual(aslatin1string('\xa1\xa2'), b'\xa1\xa2')
372
373 self.assertRaises(UnicodeEncodeError, aslatin1string, '\u4f60')
374 self.assertRaises(TypeError, aslatin1string, b'abc')
375 self.assertRaises(TypeError, aslatin1string, [])
376 # CRASHES aslatin1string(NULL)
377
378 def test_decodeascii(self):
379 """Test PyUnicode_DecodeASCII()"""
380 decodeascii = _testcapi.unicode_decodeascii
381
382 self.assertEqual(decodeascii(b'abc'), 'abc')
383 self.assertEqual(decodeascii(b'abc', 'strict'), 'abc')
384
385 self.assertRaises(UnicodeDecodeError, decodeascii, b'\xff')
386 self.assertEqual(decodeascii(b'a\xff', 'replace'), 'a\ufffd')
387 self.assertEqual(decodeascii(b'a\xffb', 'replace'), 'a\ufffdb')
388
389 self.assertRaises(LookupError, decodeascii, b'a\xff', 'foo')
390 # TODO: Test PyUnicode_DecodeASCII() with NULL as data and
391 # negative size.
392
393 def test_asasciistring(self):
394 """Test PyUnicode_AsASCIIString()"""
395 asasciistring = _testcapi.unicode_asasciistring
396
397 self.assertEqual(asasciistring('abc'), b'abc')
398
399 self.assertRaises(UnicodeEncodeError, asasciistring, '\x80')
400 self.assertRaises(TypeError, asasciistring, b'abc')
401 self.assertRaises(TypeError, asasciistring, [])
402 # CRASHES asasciistring(NULL)
403
404 def test_decodecharmap(self):
405 """Test PyUnicode_DecodeCharmap()"""
406 decodecharmap = _testcapi.unicode_decodecharmap
407
408 self.assertEqual(decodecharmap(b'\3\0\7', {0: 'a', 3: 'b', 7: 'c'}), 'bac')
409 self.assertEqual(decodecharmap(b'\1\0\2', ['a', 'b', 'c']), 'bac')
410 self.assertEqual(decodecharmap(b'\1\0\2', 'abc'), 'bac')
411 self.assertEqual(decodecharmap(b'\1\0\2', ['\xa1', '\xa2', '\xa3']), '\xa2\xa1\xa3')
412 self.assertEqual(decodecharmap(b'\1\0\2', ['\u4f60', '\u597d', '\u4e16']), '\u597d\u4f60\u4e16')
413 self.assertEqual(decodecharmap(b'\1\0\2', ['\U0001f600', '\U0001f601', '\U0001f602']), '\U0001f601\U0001f600\U0001f602')
414
415 self.assertEqual(decodecharmap(b'\1\0\2', [97, 98, 99]), 'bac')
416 self.assertEqual(decodecharmap(b'\1\0\2', ['', 'b', 'cd']), 'bcd')
417
418 self.assertRaises(UnicodeDecodeError, decodecharmap, b'\0', {})
419 self.assertRaises(UnicodeDecodeError, decodecharmap, b'\0', {0: None})
420 self.assertEqual(decodecharmap(b'\1\0\2', [None, 'b', 'c'], 'replace'), 'b\ufffdc')
421 self.assertEqual(decodecharmap(b'\1\0\2\xff', NULL), '\1\0\2\xff')
422 self.assertRaises(TypeError, decodecharmap, b'\0', 42)
423
424 # TODO: Test PyUnicode_DecodeCharmap() with NULL as data and
425 # negative size.
426
427 def test_ascharmapstring(self):
428 """Test PyUnicode_AsCharmapString()"""
429 ascharmapstring = _testcapi.unicode_ascharmapstring
430
431 self.assertEqual(ascharmapstring('abc', {97: 3, 98: 0, 99: 7}), b'\3\0\7')
432 self.assertEqual(ascharmapstring('\xa1\xa2\xa3', {0xa1: 3, 0xa2: 0, 0xa3: 7}), b'\3\0\7')
433 self.assertEqual(ascharmapstring('\u4f60\u597d\u4e16', {0x4f60: 3, 0x597d: 0, 0x4e16: 7}), b'\3\0\7')
434 self.assertEqual(ascharmapstring('\U0001f600\U0001f601\U0001f602', {0x1f600: 3, 0x1f601: 0, 0x1f602: 7}), b'\3\0\7')
435 self.assertEqual(ascharmapstring('abc', {97: 3, 98: b'', 99: b'spam'}), b'\3spam')
436
437 self.assertRaises(UnicodeEncodeError, ascharmapstring, 'a', {})
438 self.assertRaises(UnicodeEncodeError, ascharmapstring, 'a', {97: None})
439 self.assertRaises(TypeError, ascharmapstring, b'a', {})
440 self.assertRaises(TypeError, ascharmapstring, [], {})
441 self.assertRaises(TypeError, ascharmapstring, 'a', NULL)
442 # CRASHES ascharmapstring(NULL, {})
443
444 def test_decodeunicodeescape(self):
445 """Test PyUnicode_DecodeUnicodeEscape()"""
446 decodeunicodeescape = _testcapi.unicode_decodeunicodeescape
447
448 self.assertEqual(decodeunicodeescape(b'abc'), 'abc')
449 self.assertEqual(decodeunicodeescape(br'\t\n\r\x0b\x0c\x00\\'), '\t\n\r\v\f\0\\')
450 self.assertEqual(decodeunicodeescape(b'\t\n\r\x0b\x0c\x00'), '\t\n\r\v\f\0')
451 self.assertEqual(decodeunicodeescape(br'\xa1\xa2'), '\xa1\xa2')
452 self.assertEqual(decodeunicodeescape(b'\xa1\xa2'), '\xa1\xa2')
453 self.assertEqual(decodeunicodeescape(br'\u4f60\u597d'), '\u4f60\u597d')
454 self.assertEqual(decodeunicodeescape(br'\U0001f600'), '\U0001f600')
455 with self.assertWarns(DeprecationWarning):
456 self.assertEqual(decodeunicodeescape(br'\z'), r'\z')
457
458 for b in b'\\', br'\xa', br'\u4f6', br'\U0001f60':
459 self.assertRaises(UnicodeDecodeError, decodeunicodeescape, b)
460 self.assertRaises(UnicodeDecodeError, decodeunicodeescape, b, 'strict')
461 self.assertEqual(decodeunicodeescape(br'x\U0001f60', 'replace'), 'x\ufffd')
462 self.assertEqual(decodeunicodeescape(br'x\U0001f60y', 'replace'), 'x\ufffdy')
463
464 self.assertRaises(LookupError, decodeunicodeescape, b'\\', 'foo')
465 # TODO: Test PyUnicode_DecodeUnicodeEscape() with NULL as data and
466 # negative size.
467
468 def test_asunicodeescapestring(self):
469 """Test PyUnicode_AsUnicodeEscapeString()"""
470 asunicodeescapestring = _testcapi.unicode_asunicodeescapestring
471
472 self.assertEqual(asunicodeescapestring('abc'), b'abc')
473 self.assertEqual(asunicodeescapestring('\t\n\r\v\f\0\\'), br'\t\n\r\x0b\x0c\x00\\')
474 self.assertEqual(asunicodeescapestring('\xa1\xa2'), br'\xa1\xa2')
475 self.assertEqual(asunicodeescapestring('\u4f60\u597d'), br'\u4f60\u597d')
476 self.assertEqual(asunicodeescapestring('\U0001f600'), br'\U0001f600')
477
478 self.assertRaises(TypeError, asunicodeescapestring, b'abc')
479 self.assertRaises(TypeError, asunicodeescapestring, [])
480 # CRASHES asunicodeescapestring(NULL)
481
482 def test_decoderawunicodeescape(self):
483 """Test PyUnicode_DecodeRawUnicodeEscape()"""
484 decoderawunicodeescape = _testcapi.unicode_decoderawunicodeescape
485
486 self.assertEqual(decoderawunicodeescape(b'abc'), 'abc')
487 self.assertEqual(decoderawunicodeescape(b'\t\n\r\v\f\0\\'), '\t\n\r\v\f\0\\')
488 self.assertEqual(decoderawunicodeescape(b'\xa1\xa2'), '\xa1\xa2')
489 self.assertEqual(decoderawunicodeescape(br'\u4f60\u597d'), '\u4f60\u597d')
490 self.assertEqual(decoderawunicodeescape(br'\U0001f600'), '\U0001f600')
491 self.assertEqual(decoderawunicodeescape(br'\xa1\xa2'), r'\xa1\xa2')
492 self.assertEqual(decoderawunicodeescape(br'\z'), r'\z')
493
494 for b in br'\u4f6', br'\U0001f60':
495 self.assertRaises(UnicodeDecodeError, decoderawunicodeescape, b)
496 self.assertRaises(UnicodeDecodeError, decoderawunicodeescape, b, 'strict')
497 self.assertEqual(decoderawunicodeescape(br'x\U0001f60', 'replace'), 'x\ufffd')
498 self.assertEqual(decoderawunicodeescape(br'x\U0001f60y', 'replace'), 'x\ufffdy')
499
500 self.assertRaises(LookupError, decoderawunicodeescape, br'\U0001f60', 'foo')
501 # TODO: Test PyUnicode_DecodeRawUnicodeEscape() with NULL as data and
502 # negative size.
503
504 def test_asrawunicodeescapestring(self):
505 """Test PyUnicode_AsRawUnicodeEscapeString()"""
506 asrawunicodeescapestring = _testcapi.unicode_asrawunicodeescapestring
507
508 self.assertEqual(asrawunicodeescapestring('abc'), b'abc')
509 self.assertEqual(asrawunicodeescapestring('\t\n\r\v\f\0\\'), b'\t\n\r\v\f\0\\')
510 self.assertEqual(asrawunicodeescapestring('\xa1\xa2'), b'\xa1\xa2')
511 self.assertEqual(asrawunicodeescapestring('\u4f60\u597d'), br'\u4f60\u597d')
512 self.assertEqual(asrawunicodeescapestring('\U0001f600'), br'\U0001f600')
513
514 self.assertRaises(TypeError, asrawunicodeescapestring, b'abc')
515 self.assertRaises(TypeError, asrawunicodeescapestring, [])
516 # CRASHES asrawunicodeescapestring(NULL)
517
518
519 if __name__ == "__main__":
520 unittest.main()