1 import unittest
2 from test.support import import_helper
3
4 _testcapi = import_helper.import_module('_testcapi')
5
6
7 class ESC[4;38;5;81mCAPITest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
8
9 def test_decodeutf8(self):
10 """Test PyUnicode_DecodeUTF8()"""
11 decodeutf8 = _testcapi.unicode_decodeutf8
12
13 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
14 b = s.encode('utf-8')
15 self.assertEqual(decodeutf8(b), s)
16 self.assertEqual(decodeutf8(b, 'strict'), s)
17
18 self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80')
19 self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0')
20 self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff')
21 self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f')
22 self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd')
23 self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb')
24
25 self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo')
26 # TODO: Test PyUnicode_DecodeUTF8() with NULL as data and
27 # negative size.
28
29 def test_decodeutf8stateful(self):
30 """Test PyUnicode_DecodeUTF8Stateful()"""
31 decodeutf8stateful = _testcapi.unicode_decodeutf8stateful
32
33 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
34 b = s.encode('utf-8')
35 self.assertEqual(decodeutf8stateful(b), (s, len(b)))
36 self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b)))
37
38 self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80')
39 self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0')
40 self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff')
41 self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1))
42 self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1))
43 self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb')
44 self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4))
45
46 self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo')
47 # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and
48 # negative size.
49 # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of
50 # "consumed".
51
52
53 if __name__ == "__main__":
54 unittest.main()