python (3.12.0)

(root)/
lib/
python3.12/
test/
test_capi/
test_codecs.py
       1  import unittest
       2  import sys
       3  from test.support import import_helper
       4  
       5  _testcapi = import_helper.import_module('_testcapi')
       6  
       7  NULL = None
       8  
       9  
      10  class ESC[4;38;5;81mCAPITest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      11      # TODO: Test the following functions:
      12      #
      13      #   PyUnicode_BuildEncodingMap
      14      #   PyUnicode_FSConverter
      15      #   PyUnicode_FSDecoder
      16      #   PyUnicode_DecodeMBCS
      17      #   PyUnicode_DecodeMBCSStateful
      18      #   PyUnicode_DecodeCodePageStateful
      19      #   PyUnicode_AsMBCSString
      20      #   PyUnicode_EncodeCodePage
      21      #   PyUnicode_DecodeLocaleAndSize
      22      #   PyUnicode_DecodeLocale
      23      #   PyUnicode_EncodeLocale
      24      #   PyUnicode_DecodeFSDefault
      25      #   PyUnicode_DecodeFSDefaultAndSize
      26      #   PyUnicode_EncodeFSDefault
      27  
      28      def test_fromencodedobject(self):
      29          """Test PyUnicode_FromEncodedObject()"""
      30          fromencodedobject = _testcapi.unicode_fromencodedobject
      31  
      32          self.assertEqual(fromencodedobject(b'abc', NULL), 'abc')
      33          self.assertEqual(fromencodedobject(b'abc', 'ascii'), 'abc')
      34          b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80'
      35          s = 'a\xa1\u4f60\U0001f600'
      36          self.assertEqual(fromencodedobject(b, NULL), s)
      37          self.assertEqual(fromencodedobject(b, 'utf-8'), s)
      38          self.assertEqual(fromencodedobject(b, 'latin1'), b.decode('latin1'))
      39          self.assertRaises(UnicodeDecodeError, fromencodedobject, b, 'ascii')
      40          self.assertEqual(fromencodedobject(b, 'ascii', 'replace'),
      41                           'a' + '\ufffd'*9)
      42          self.assertEqual(fromencodedobject(bytearray(b), NULL), s)
      43          self.assertEqual(fromencodedobject(bytearray(b), 'utf-8'), s)
      44          self.assertRaises(LookupError, fromencodedobject, b'abc', 'foo')
      45          self.assertRaises(LookupError, fromencodedobject, b, 'ascii', 'foo')
      46          self.assertRaises(TypeError, fromencodedobject, 'abc', NULL)
      47          self.assertRaises(TypeError, fromencodedobject, 'abc', 'ascii')
      48          self.assertRaises(TypeError, fromencodedobject, [], NULL)
      49          self.assertRaises(TypeError, fromencodedobject, [], 'ascii')
      50          self.assertRaises(SystemError, fromencodedobject, NULL, NULL)
      51          self.assertRaises(SystemError, fromencodedobject, NULL, 'ascii')
      52  
      53      def test_decode(self):
      54          """Test PyUnicode_Decode()"""
      55          decode = _testcapi.unicode_decode
      56  
      57          self.assertEqual(decode(b'[\xe2\x82\xac]', 'utf-8'), '[\u20ac]')
      58          self.assertEqual(decode(b'[\xa4]', 'iso8859-15'), '[\u20ac]')
      59          self.assertEqual(decode(b'[\xa4]', 'iso8859-15', 'strict'), '[\u20ac]')
      60          self.assertRaises(UnicodeDecodeError, decode, b'[\xa4]', 'utf-8')
      61          self.assertEqual(decode(b'[\xa4]', 'utf-8', 'replace'), '[\ufffd]')
      62  
      63          self.assertEqual(decode(b'[\xe2\x82\xac]', NULL), '[\u20ac]')
      64          self.assertEqual(decode(b'[\xa4]', NULL, 'replace'), '[\ufffd]')
      65  
      66          self.assertRaises(LookupError, decode, b'\xa4', 'foo')
      67          self.assertRaises(LookupError, decode, b'\xa4', 'utf-8', 'foo')
      68          # TODO: Test PyUnicode_Decode() with NULL as data and
      69          # negative size.
      70  
      71      def test_asencodedstring(self):
      72          """Test PyUnicode_AsEncodedString()"""
      73          asencodedstring = _testcapi.unicode_asencodedstring
      74  
      75          self.assertEqual(asencodedstring('abc', NULL), b'abc')
      76          self.assertEqual(asencodedstring('abc', 'ascii'), b'abc')
      77          s = 'a\xa1\u4f60\U0001f600'
      78          b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80'
      79          self.assertEqual(asencodedstring(s, NULL), b)
      80          self.assertEqual(asencodedstring(s, 'utf-8'), b)
      81          self.assertEqual(asencodedstring('\xa1\xa2', 'latin1'), b'\xa1\xa2')
      82          self.assertRaises(UnicodeEncodeError, asencodedstring, '\xa1\xa2', 'ascii')
      83          self.assertEqual(asencodedstring(s, 'ascii', 'replace'), b'a???')
      84  
      85          self.assertRaises(LookupError, asencodedstring, 'abc', 'foo')
      86          self.assertRaises(LookupError, asencodedstring, s, 'ascii', 'foo')
      87          self.assertRaises(TypeError, asencodedstring, b'abc', NULL)
      88          self.assertRaises(TypeError, asencodedstring, b'abc', 'ascii')
      89          self.assertRaises(TypeError, asencodedstring, [], NULL)
      90          self.assertRaises(TypeError, asencodedstring, [], 'ascii')
      91          # CRASHES asencodedstring(NULL, NULL)
      92          # CRASHES asencodedstring(NULL, 'ascii')
      93  
      94      def test_decodeutf8(self):
      95          """Test PyUnicode_DecodeUTF8()"""
      96          decodeutf8 = _testcapi.unicode_decodeutf8
      97  
      98          for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
      99              b = s.encode('utf-8')
     100              self.assertEqual(decodeutf8(b), s)
     101              self.assertEqual(decodeutf8(b, 'strict'), s)
     102  
     103          self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80')
     104          self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0')
     105          self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff')
     106          self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f')
     107          self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd')
     108          self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb')
     109  
     110          self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo')
     111          # TODO: Test PyUnicode_DecodeUTF8() with NULL as data and
     112          # negative size.
     113  
     114      def test_decodeutf8stateful(self):
     115          """Test PyUnicode_DecodeUTF8Stateful()"""
     116          decodeutf8stateful = _testcapi.unicode_decodeutf8stateful
     117  
     118          for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
     119              b = s.encode('utf-8')
     120              self.assertEqual(decodeutf8stateful(b), (s, len(b)))
     121              self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b)))
     122  
     123          self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80')
     124          self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0')
     125          self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff')
     126          self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1))
     127          self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1))
     128          self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb')
     129          self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4))
     130  
     131          self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo')
     132          # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and
     133          # negative size.
     134          # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of
     135          # "consumed".
     136  
     137      def test_asutf8string(self):
     138          """Test PyUnicode_AsUTF8String()"""
     139          asutf8string = _testcapi.unicode_asutf8string
     140  
     141          for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
     142              self.assertEqual(asutf8string(s), s.encode('utf-8'))
     143  
     144          self.assertRaises(UnicodeEncodeError, asutf8string, '\ud8ff')
     145          self.assertRaises(TypeError, asutf8string, b'abc')
     146          self.assertRaises(TypeError, asutf8string, [])
     147          # CRASHES asutf8string(NULL)
     148  
     149      def test_decodeutf16(self):
     150          """Test PyUnicode_DecodeUTF16()"""
     151          decodeutf16 = _testcapi.unicode_decodeutf16
     152  
     153          naturalbyteorder = -1 if sys.byteorder == 'little' else 1
     154          for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
     155              b = s.encode('utf-16')
     156              self.assertEqual(decodeutf16(0, b), (naturalbyteorder, s))
     157              b = s.encode('utf-16le')
     158              self.assertEqual(decodeutf16(-1, b), (-1, s))
     159              self.assertEqual(decodeutf16(0, b'\xff\xfe'+b), (-1, s))
     160              b = s.encode('utf-16be')
     161              self.assertEqual(decodeutf16(1, b), (1, s))
     162              self.assertEqual(decodeutf16(0, b'\xfe\xff'+b), (1, s))
     163  
     164          self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'a')
     165          self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'a')
     166          self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xff\xfea')
     167          self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xfe\xffa')
     168  
     169          self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'\x00\xde')
     170          self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'\xde\x00')
     171          self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xde\xde')
     172          self.assertEqual(decodeutf16(-1, b'\x00\xde', 'replace'), (-1, '\ufffd'))
     173          self.assertEqual(decodeutf16(1, b'\xde\x00', 'replace'), (1, '\ufffd'))
     174          self.assertEqual(decodeutf16(0, b'\xde\xde', 'replace'), (0, '\ufffd'))
     175          self.assertEqual(decodeutf16(0, b'\xff\xfe\x00\xde', 'replace'), (-1, '\ufffd'))
     176          self.assertEqual(decodeutf16(0, b'\xfe\xff\xde\x00', 'replace'), (1, '\ufffd'))
     177  
     178          self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'\x3d\xd8')
     179          self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'\xd8\x3d')
     180          self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xd8\xd8')
     181          self.assertEqual(decodeutf16(-1, b'\x3d\xd8', 'replace'), (-1, '\ufffd'))
     182          self.assertEqual(decodeutf16(1, b'\xd8\x3d', 'replace'), (1, '\ufffd'))
     183          self.assertEqual(decodeutf16(0, b'\xd8\xd8', 'replace'), (0, '\ufffd'))
     184          self.assertEqual(decodeutf16(0, b'\xff\xfe\x3d\xd8', 'replace'), (-1, '\ufffd'))
     185          self.assertEqual(decodeutf16(0, b'\xfe\xff\xd8\x3d', 'replace'), (1, '\ufffd'))
     186  
     187          self.assertRaises(LookupError, decodeutf16, -1, b'\x00\xde', 'foo')
     188          self.assertRaises(LookupError, decodeutf16, 1, b'\xde\x00', 'foo')
     189          self.assertRaises(LookupError, decodeutf16, 0, b'\xde\xde', 'foo')
     190          # TODO: Test PyUnicode_DecodeUTF16() with NULL as data and
     191          # negative size.
     192  
     193      def test_decodeutf16stateful(self):
     194          """Test PyUnicode_DecodeUTF16Stateful()"""
     195          decodeutf16stateful = _testcapi.unicode_decodeutf16stateful
     196  
     197          naturalbyteorder = -1 if sys.byteorder == 'little' else 1
     198          for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
     199              b = s.encode('utf-16')
     200              self.assertEqual(decodeutf16stateful(0, b), (naturalbyteorder, s, len(b)))
     201              b = s.encode('utf-16le')
     202              self.assertEqual(decodeutf16stateful(-1, b), (-1, s, len(b)))
     203              self.assertEqual(decodeutf16stateful(0, b'\xff\xfe'+b), (-1, s, len(b)+2))
     204              b = s.encode('utf-16be')
     205              self.assertEqual(decodeutf16stateful(1, b), (1, s, len(b)))
     206              self.assertEqual(decodeutf16stateful(0, b'\xfe\xff'+b), (1, s, len(b)+2))
     207  
     208          self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d'), (-1, 'a', 2))
     209          self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d\xd8'), (-1, 'a', 2))
     210          self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d\xd8\x00'), (-1, 'a', 2))
     211          self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8'), (1, 'a', 2))
     212          self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8\x3d'), (1, 'a', 2))
     213          self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8\x3d\xde'), (1, 'a', 2))
     214          self.assertEqual(decodeutf16stateful(0, b'\xff\xfe\x61\x00\x3d\xd8\x00'), (-1, 'a', 4))
     215          self.assertEqual(decodeutf16stateful(0, b'\xfe\xff\x00\x61\xd8\x3d\xde'), (1, 'a', 4))
     216  
     217          self.assertRaises(UnicodeDecodeError, decodeutf16stateful, -1, b'\x00\xde')
     218          self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 1, b'\xde\x00')
     219          self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 0, b'\xde\xde')
     220          self.assertEqual(decodeutf16stateful(-1, b'\x00\xde', 'replace'), (-1, '\ufffd', 2))
     221          self.assertEqual(decodeutf16stateful(1, b'\xde\x00', 'replace'), (1, '\ufffd', 2))
     222          self.assertEqual(decodeutf16stateful(0, b'\xde\xde', 'replace'), (0, '\ufffd', 2))
     223          self.assertEqual(decodeutf16stateful(0, b'\xff\xfe\x00\xde', 'replace'), (-1, '\ufffd', 4))
     224          self.assertEqual(decodeutf16stateful(0, b'\xfe\xff\xde\x00', 'replace'), (1, '\ufffd', 4))
     225  
     226          self.assertRaises(UnicodeDecodeError, decodeutf16stateful, -1, b'\x3d\xd8\x61\x00')
     227          self.assertEqual(decodeutf16stateful(-1, b'\x3d\xd8\x61\x00', 'replace'), (-1, '\ufffda', 4))
     228          self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 1, b'\xd8\x3d\x00\x61')
     229          self.assertEqual(decodeutf16stateful(1, b'\xd8\x3d\x00\x61', 'replace'), (1, '\ufffda', 4))
     230  
     231          self.assertRaises(LookupError, decodeutf16stateful, -1, b'\x00\xde', 'foo')
     232          self.assertRaises(LookupError, decodeutf16stateful, 1, b'\xde\x00', 'foo')
     233          self.assertRaises(LookupError, decodeutf16stateful, 0, b'\xde\xde', 'foo')
     234          # TODO: Test PyUnicode_DecodeUTF16Stateful() with NULL as data and
     235          # negative size.
     236          # TODO: Test PyUnicode_DecodeUTF16Stateful() with NULL as the address of
     237          # "consumed".
     238  
     239      def test_asutf16string(self):
     240          """Test PyUnicode_AsUTF16String()"""
     241          asutf16string = _testcapi.unicode_asutf16string
     242  
     243          for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
     244              self.assertEqual(asutf16string(s), s.encode('utf-16'))
     245  
     246          self.assertRaises(UnicodeEncodeError, asutf16string, '\ud8ff')
     247          self.assertRaises(TypeError, asutf16string, b'abc')
     248          self.assertRaises(TypeError, asutf16string, [])
     249          # CRASHES asutf16string(NULL)
     250  
     251      def test_decodeutf32(self):
     252          """Test PyUnicode_DecodeUTF8()"""
     253          decodeutf32 = _testcapi.unicode_decodeutf32
     254  
     255          naturalbyteorder = -1 if sys.byteorder == 'little' else 1
     256          for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
     257              b = s.encode('utf-32')
     258              self.assertEqual(decodeutf32(0, b), (naturalbyteorder, s))
     259              b = s.encode('utf-32le')
     260              self.assertEqual(decodeutf32(-1, b), (-1, s))
     261              self.assertEqual(decodeutf32(0, b'\xff\xfe\x00\x00'+b), (-1, s))
     262              b = s.encode('utf-32be')
     263              self.assertEqual(decodeutf32(1, b), (1, s))
     264              self.assertEqual(decodeutf32(0, b'\x00\x00\xfe\xff'+b), (1, s))
     265  
     266          self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\x61\x00\x00\x00\x00')
     267          self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\x00\x00\x00\x61\x00')
     268          self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\xff\xfe\x00\x00\x61\x00\x00\x00\x00')
     269          self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\x00\x00\xfe\xff\x00\x00\x00\x61\x00')
     270  
     271          self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\xff\xff\xff\xff')
     272          self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\xff\xff\xff\xff')
     273          self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\xff\xff\xff\xff')
     274          self.assertEqual(decodeutf32(-1, b'\xff\xff\xff\xff', 'replace'), (-1, '\ufffd'))
     275          self.assertEqual(decodeutf32(1, b'\xff\xff\xff\xff', 'replace'), (1, '\ufffd'))
     276          self.assertEqual(decodeutf32(0, b'\xff\xff\xff\xff', 'replace'), (0, '\ufffd'))
     277          self.assertEqual(decodeutf32(0, b'\xff\xfe\x00\x00\xff\xff\xff\xff', 'replace'), (-1, '\ufffd'))
     278          self.assertEqual(decodeutf32(0, b'\x00\x00\xfe\xff\xff\xff\xff\xff', 'replace'), (1, '\ufffd'))
     279  
     280          self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\x3d\xd8\x00\x00')
     281          self.assertEqual(decodeutf32(-1, b'\x3d\xd8\x00\x00', 'replace'), (-1, '\ufffd'))
     282          self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\x00\x00\xd8\x3d')
     283          self.assertEqual(decodeutf32(1, b'\x00\x00\xd8\x3d', 'replace'), (1, '\ufffd'))
     284  
     285          self.assertRaises(LookupError, decodeutf32, -1, b'\xff\xff\xff\xff', 'foo')
     286          self.assertRaises(LookupError, decodeutf32, 1, b'\xff\xff\xff\xff', 'foo')
     287          self.assertRaises(LookupError, decodeutf32, 0, b'\xff\xff\xff\xff', 'foo')
     288          # TODO: Test PyUnicode_DecodeUTF32() with NULL as data and
     289          # negative size.
     290  
     291      def test_decodeutf32stateful(self):
     292          """Test PyUnicode_DecodeUTF32Stateful()"""
     293          decodeutf32stateful = _testcapi.unicode_decodeutf32stateful
     294  
     295          naturalbyteorder = -1 if sys.byteorder == 'little' else 1
     296          for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
     297              b = s.encode('utf-32')
     298              self.assertEqual(decodeutf32stateful(0, b), (naturalbyteorder, s, len(b)))
     299              b = s.encode('utf-32le')
     300              self.assertEqual(decodeutf32stateful(-1, b), (-1, s, len(b)))
     301              self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00'+b), (-1, s, len(b)+4))
     302              b = s.encode('utf-32be')
     303              self.assertEqual(decodeutf32stateful(1, b), (1, s, len(b)))
     304              self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff'+b), (1, s, len(b)+4))
     305  
     306          self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00'), (-1, 'a', 4))
     307          self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00\xf6'), (-1, 'a', 4))
     308          self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00\xf6\x01'), (-1, 'a', 4))
     309          self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00'), (1, 'a', 4))
     310          self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00\x01'), (1, 'a', 4))
     311          self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00\x01\xf6'), (1, 'a', 4))
     312          self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00\x61\x00\x00\x00\x00\xf6\x01'), (-1, 'a', 8))
     313          self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff\x00\x00\x00\x61\x00\x01\xf6'), (1, 'a', 8))
     314  
     315          for b in b'\xff', b'\xff\xff', b'\xff\xff\xff':
     316              self.assertEqual(decodeutf32stateful(-1, b), (-1, '', 0))
     317              self.assertEqual(decodeutf32stateful(1, b), (1, '', 0))
     318              self.assertEqual(decodeutf32stateful(0, b), (0, '', 0))
     319              self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00'+b), (-1, '', 4))
     320              self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff'+b), (1, '', 4))
     321          self.assertRaises(UnicodeDecodeError, decodeutf32stateful, -1, b'\xff\xff\xff\xff')
     322          self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 1, b'\xff\xff\xff\xff')
     323          self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 0, b'\xff\xff\xff\xff')
     324          self.assertEqual(decodeutf32stateful(-1, b'\xff\xff\xff\xff', 'replace'), (-1, '\ufffd', 4))
     325          self.assertEqual(decodeutf32stateful(1, b'\xff\xff\xff\xff', 'replace'), (1, '\ufffd', 4))
     326          self.assertEqual(decodeutf32stateful(0, b'\xff\xff\xff\xff', 'replace'), (0, '\ufffd', 4))
     327          self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00\xff\xff\xff\xff', 'replace'), (-1, '\ufffd', 8))
     328          self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff\xff\xff\xff\xff', 'replace'), (1, '\ufffd', 8))
     329  
     330          self.assertRaises(UnicodeDecodeError, decodeutf32stateful, -1, b'\x3d\xd8\x00\x00')
     331          self.assertEqual(decodeutf32stateful(-1, b'\x3d\xd8\x00\x00', 'replace'), (-1, '\ufffd', 4))
     332          self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 1, b'\x00\x00\xd8\x3d')
     333          self.assertEqual(decodeutf32stateful(1, b'\x00\x00\xd8\x3d', 'replace'), (1, '\ufffd', 4))
     334  
     335          self.assertRaises(LookupError, decodeutf32stateful, -1, b'\xff\xff\xff\xff', 'foo')
     336          self.assertRaises(LookupError, decodeutf32stateful, 1, b'\xff\xff\xff\xff', 'foo')
     337          self.assertRaises(LookupError, decodeutf32stateful, 0, b'\xff\xff\xff\xff', 'foo')
     338          # TODO: Test PyUnicode_DecodeUTF32Stateful() with NULL as data and
     339          # negative size.
     340          # TODO: Test PyUnicode_DecodeUTF32Stateful() with NULL as the address of
     341          # "consumed".
     342  
     343      def test_asutf32string(self):
     344          """Test PyUnicode_AsUTF32String()"""
     345          asutf32string = _testcapi.unicode_asutf32string
     346  
     347          for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
     348              self.assertEqual(asutf32string(s), s.encode('utf-32'))
     349  
     350          self.assertRaises(UnicodeEncodeError, asutf32string, '\ud8ff')
     351          self.assertRaises(TypeError, asutf32string, b'abc')
     352          self.assertRaises(TypeError, asutf32string, [])
     353          # CRASHES asutf32string(NULL)
     354  
     355      def test_decodelatin1(self):
     356          """Test PyUnicode_DecodeLatin1()"""
     357          decodelatin1 = _testcapi.unicode_decodelatin1
     358  
     359          self.assertEqual(decodelatin1(b'abc'), 'abc')
     360          self.assertEqual(decodelatin1(b'abc', 'strict'), 'abc')
     361          self.assertEqual(decodelatin1(b'\xa1\xa2'), '\xa1\xa2')
     362          self.assertEqual(decodelatin1(b'\xa1\xa2', 'strict'), '\xa1\xa2')
     363          # TODO: Test PyUnicode_DecodeLatin1() with NULL as data and
     364          # negative size.
     365  
     366      def test_aslatin1string(self):
     367          """Test PyUnicode_AsLatin1String()"""
     368          aslatin1string = _testcapi.unicode_aslatin1string
     369  
     370          self.assertEqual(aslatin1string('abc'), b'abc')
     371          self.assertEqual(aslatin1string('\xa1\xa2'), b'\xa1\xa2')
     372  
     373          self.assertRaises(UnicodeEncodeError, aslatin1string, '\u4f60')
     374          self.assertRaises(TypeError, aslatin1string, b'abc')
     375          self.assertRaises(TypeError, aslatin1string, [])
     376          # CRASHES aslatin1string(NULL)
     377  
     378      def test_decodeascii(self):
     379          """Test PyUnicode_DecodeASCII()"""
     380          decodeascii = _testcapi.unicode_decodeascii
     381  
     382          self.assertEqual(decodeascii(b'abc'), 'abc')
     383          self.assertEqual(decodeascii(b'abc', 'strict'), 'abc')
     384  
     385          self.assertRaises(UnicodeDecodeError, decodeascii, b'\xff')
     386          self.assertEqual(decodeascii(b'a\xff', 'replace'), 'a\ufffd')
     387          self.assertEqual(decodeascii(b'a\xffb', 'replace'), 'a\ufffdb')
     388  
     389          self.assertRaises(LookupError, decodeascii, b'a\xff', 'foo')
     390          # TODO: Test PyUnicode_DecodeASCII() with NULL as data and
     391          # negative size.
     392  
     393      def test_asasciistring(self):
     394          """Test PyUnicode_AsASCIIString()"""
     395          asasciistring = _testcapi.unicode_asasciistring
     396  
     397          self.assertEqual(asasciistring('abc'), b'abc')
     398  
     399          self.assertRaises(UnicodeEncodeError, asasciistring, '\x80')
     400          self.assertRaises(TypeError, asasciistring, b'abc')
     401          self.assertRaises(TypeError, asasciistring, [])
     402          # CRASHES asasciistring(NULL)
     403  
     404      def test_decodecharmap(self):
     405          """Test PyUnicode_DecodeCharmap()"""
     406          decodecharmap = _testcapi.unicode_decodecharmap
     407  
     408          self.assertEqual(decodecharmap(b'\3\0\7', {0: 'a', 3: 'b', 7: 'c'}), 'bac')
     409          self.assertEqual(decodecharmap(b'\1\0\2', ['a', 'b', 'c']), 'bac')
     410          self.assertEqual(decodecharmap(b'\1\0\2', 'abc'), 'bac')
     411          self.assertEqual(decodecharmap(b'\1\0\2', ['\xa1', '\xa2', '\xa3']), '\xa2\xa1\xa3')
     412          self.assertEqual(decodecharmap(b'\1\0\2', ['\u4f60', '\u597d', '\u4e16']), '\u597d\u4f60\u4e16')
     413          self.assertEqual(decodecharmap(b'\1\0\2', ['\U0001f600', '\U0001f601', '\U0001f602']), '\U0001f601\U0001f600\U0001f602')
     414  
     415          self.assertEqual(decodecharmap(b'\1\0\2', [97, 98, 99]), 'bac')
     416          self.assertEqual(decodecharmap(b'\1\0\2', ['', 'b', 'cd']), 'bcd')
     417  
     418          self.assertRaises(UnicodeDecodeError, decodecharmap, b'\0', {})
     419          self.assertRaises(UnicodeDecodeError, decodecharmap, b'\0', {0: None})
     420          self.assertEqual(decodecharmap(b'\1\0\2', [None, 'b', 'c'], 'replace'), 'b\ufffdc')
     421          self.assertEqual(decodecharmap(b'\1\0\2\xff', NULL), '\1\0\2\xff')
     422          self.assertRaises(TypeError, decodecharmap, b'\0', 42)
     423  
     424          # TODO: Test PyUnicode_DecodeCharmap() with NULL as data and
     425          # negative size.
     426  
     427      def test_ascharmapstring(self):
     428          """Test PyUnicode_AsCharmapString()"""
     429          ascharmapstring = _testcapi.unicode_ascharmapstring
     430  
     431          self.assertEqual(ascharmapstring('abc', {97: 3, 98: 0, 99: 7}), b'\3\0\7')
     432          self.assertEqual(ascharmapstring('\xa1\xa2\xa3', {0xa1: 3, 0xa2: 0, 0xa3: 7}), b'\3\0\7')
     433          self.assertEqual(ascharmapstring('\u4f60\u597d\u4e16', {0x4f60: 3, 0x597d: 0, 0x4e16: 7}), b'\3\0\7')
     434          self.assertEqual(ascharmapstring('\U0001f600\U0001f601\U0001f602', {0x1f600: 3, 0x1f601: 0, 0x1f602: 7}), b'\3\0\7')
     435          self.assertEqual(ascharmapstring('abc', {97: 3, 98: b'', 99: b'spam'}), b'\3spam')
     436  
     437          self.assertRaises(UnicodeEncodeError, ascharmapstring, 'a', {})
     438          self.assertRaises(UnicodeEncodeError, ascharmapstring, 'a', {97: None})
     439          self.assertRaises(TypeError, ascharmapstring, b'a', {})
     440          self.assertRaises(TypeError, ascharmapstring, [], {})
     441          self.assertRaises(TypeError, ascharmapstring, 'a', NULL)
     442          # CRASHES ascharmapstring(NULL, {})
     443  
     444      def test_decodeunicodeescape(self):
     445          """Test PyUnicode_DecodeUnicodeEscape()"""
     446          decodeunicodeescape = _testcapi.unicode_decodeunicodeescape
     447  
     448          self.assertEqual(decodeunicodeescape(b'abc'), 'abc')
     449          self.assertEqual(decodeunicodeescape(br'\t\n\r\x0b\x0c\x00\\'), '\t\n\r\v\f\0\\')
     450          self.assertEqual(decodeunicodeescape(b'\t\n\r\x0b\x0c\x00'), '\t\n\r\v\f\0')
     451          self.assertEqual(decodeunicodeescape(br'\xa1\xa2'), '\xa1\xa2')
     452          self.assertEqual(decodeunicodeescape(b'\xa1\xa2'), '\xa1\xa2')
     453          self.assertEqual(decodeunicodeescape(br'\u4f60\u597d'), '\u4f60\u597d')
     454          self.assertEqual(decodeunicodeescape(br'\U0001f600'), '\U0001f600')
     455          with self.assertWarns(DeprecationWarning):
     456              self.assertEqual(decodeunicodeescape(br'\z'), r'\z')
     457  
     458          for b in b'\\', br'\xa', br'\u4f6', br'\U0001f60':
     459              self.assertRaises(UnicodeDecodeError, decodeunicodeescape, b)
     460              self.assertRaises(UnicodeDecodeError, decodeunicodeescape, b, 'strict')
     461          self.assertEqual(decodeunicodeescape(br'x\U0001f60', 'replace'), 'x\ufffd')
     462          self.assertEqual(decodeunicodeescape(br'x\U0001f60y', 'replace'), 'x\ufffdy')
     463  
     464          self.assertRaises(LookupError, decodeunicodeescape, b'\\', 'foo')
     465          # TODO: Test PyUnicode_DecodeUnicodeEscape() with NULL as data and
     466          # negative size.
     467  
     468      def test_asunicodeescapestring(self):
     469          """Test PyUnicode_AsUnicodeEscapeString()"""
     470          asunicodeescapestring = _testcapi.unicode_asunicodeescapestring
     471  
     472          self.assertEqual(asunicodeescapestring('abc'), b'abc')
     473          self.assertEqual(asunicodeescapestring('\t\n\r\v\f\0\\'), br'\t\n\r\x0b\x0c\x00\\')
     474          self.assertEqual(asunicodeescapestring('\xa1\xa2'), br'\xa1\xa2')
     475          self.assertEqual(asunicodeescapestring('\u4f60\u597d'), br'\u4f60\u597d')
     476          self.assertEqual(asunicodeescapestring('\U0001f600'), br'\U0001f600')
     477  
     478          self.assertRaises(TypeError, asunicodeescapestring, b'abc')
     479          self.assertRaises(TypeError, asunicodeescapestring, [])
     480          # CRASHES asunicodeescapestring(NULL)
     481  
     482      def test_decoderawunicodeescape(self):
     483          """Test PyUnicode_DecodeRawUnicodeEscape()"""
     484          decoderawunicodeescape = _testcapi.unicode_decoderawunicodeescape
     485  
     486          self.assertEqual(decoderawunicodeescape(b'abc'), 'abc')
     487          self.assertEqual(decoderawunicodeescape(b'\t\n\r\v\f\0\\'), '\t\n\r\v\f\0\\')
     488          self.assertEqual(decoderawunicodeescape(b'\xa1\xa2'), '\xa1\xa2')
     489          self.assertEqual(decoderawunicodeescape(br'\u4f60\u597d'), '\u4f60\u597d')
     490          self.assertEqual(decoderawunicodeescape(br'\U0001f600'), '\U0001f600')
     491          self.assertEqual(decoderawunicodeescape(br'\xa1\xa2'), r'\xa1\xa2')
     492          self.assertEqual(decoderawunicodeescape(br'\z'), r'\z')
     493  
     494          for b in br'\u4f6', br'\U0001f60':
     495              self.assertRaises(UnicodeDecodeError, decoderawunicodeescape, b)
     496              self.assertRaises(UnicodeDecodeError, decoderawunicodeescape, b, 'strict')
     497          self.assertEqual(decoderawunicodeescape(br'x\U0001f60', 'replace'), 'x\ufffd')
     498          self.assertEqual(decoderawunicodeescape(br'x\U0001f60y', 'replace'), 'x\ufffdy')
     499  
     500          self.assertRaises(LookupError, decoderawunicodeescape, br'\U0001f60', 'foo')
     501          # TODO: Test PyUnicode_DecodeRawUnicodeEscape() with NULL as data and
     502          # negative size.
     503  
     504      def test_asrawunicodeescapestring(self):
     505          """Test PyUnicode_AsRawUnicodeEscapeString()"""
     506          asrawunicodeescapestring = _testcapi.unicode_asrawunicodeescapestring
     507  
     508          self.assertEqual(asrawunicodeescapestring('abc'), b'abc')
     509          self.assertEqual(asrawunicodeescapestring('\t\n\r\v\f\0\\'), b'\t\n\r\v\f\0\\')
     510          self.assertEqual(asrawunicodeescapestring('\xa1\xa2'), b'\xa1\xa2')
     511          self.assertEqual(asrawunicodeescapestring('\u4f60\u597d'), br'\u4f60\u597d')
     512          self.assertEqual(asrawunicodeescapestring('\U0001f600'), br'\U0001f600')
     513  
     514          self.assertRaises(TypeError, asrawunicodeescapestring, b'abc')
     515          self.assertRaises(TypeError, asrawunicodeescapestring, [])
     516          # CRASHES asrawunicodeescapestring(NULL)
     517  
     518  
     519  if __name__ == "__main__":
     520      unittest.main()