python (3.12.0)

(root)/
lib/
python3.12/
site-packages/
pip/
_vendor/
chardet/
escsm.py
       1  ######################## BEGIN LICENSE BLOCK ########################
       2  # The Original Code is mozilla.org code.
       3  #
       4  # The Initial Developer of the Original Code is
       5  # Netscape Communications Corporation.
       6  # Portions created by the Initial Developer are Copyright (C) 1998
       7  # the Initial Developer. All Rights Reserved.
       8  #
       9  # Contributor(s):
      10  #   Mark Pilgrim - port to Python
      11  #
      12  # This library is free software; you can redistribute it and/or
      13  # modify it under the terms of the GNU Lesser General Public
      14  # License as published by the Free Software Foundation; either
      15  # version 2.1 of the License,  or (at your option) any later version.
      16  #
      17  # This library is distributed in the hope that it will be useful,
      18  # but WITHOUT ANY WARRANTY; without even the implied warranty of
      19  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      20  # Lesser General Public License for more details.
      21  #
      22  # You should have received a copy of the GNU Lesser General Public
      23  # License along with this library; if not,  write to the Free Software
      24  # Foundation,  Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA
      25  # 02110-1301  USA
      26  ######################### END LICENSE BLOCK #########################
      27  
      28  from .codingstatemachinedict import CodingStateMachineDict
      29  from .enums import MachineState
      30  
      31  # fmt: off
      32  HZ_CLS = (
      33      1, 0, 0, 0, 0, 0, 0, 0,  # 00 - 07
      34      0, 0, 0, 0, 0, 0, 0, 0,  # 08 - 0f
      35      0, 0, 0, 0, 0, 0, 0, 0,  # 10 - 17
      36      0, 0, 0, 1, 0, 0, 0, 0,  # 18 - 1f
      37      0, 0, 0, 0, 0, 0, 0, 0,  # 20 - 27
      38      0, 0, 0, 0, 0, 0, 0, 0,  # 28 - 2f
      39      0, 0, 0, 0, 0, 0, 0, 0,  # 30 - 37
      40      0, 0, 0, 0, 0, 0, 0, 0,  # 38 - 3f
      41      0, 0, 0, 0, 0, 0, 0, 0,  # 40 - 47
      42      0, 0, 0, 0, 0, 0, 0, 0,  # 48 - 4f
      43      0, 0, 0, 0, 0, 0, 0, 0,  # 50 - 57
      44      0, 0, 0, 0, 0, 0, 0, 0,  # 58 - 5f
      45      0, 0, 0, 0, 0, 0, 0, 0,  # 60 - 67
      46      0, 0, 0, 0, 0, 0, 0, 0,  # 68 - 6f
      47      0, 0, 0, 0, 0, 0, 0, 0,  # 70 - 77
      48      0, 0, 0, 4, 0, 5, 2, 0,  # 78 - 7f
      49      1, 1, 1, 1, 1, 1, 1, 1,  # 80 - 87
      50      1, 1, 1, 1, 1, 1, 1, 1,  # 88 - 8f
      51      1, 1, 1, 1, 1, 1, 1, 1,  # 90 - 97
      52      1, 1, 1, 1, 1, 1, 1, 1,  # 98 - 9f
      53      1, 1, 1, 1, 1, 1, 1, 1,  # a0 - a7
      54      1, 1, 1, 1, 1, 1, 1, 1,  # a8 - af
      55      1, 1, 1, 1, 1, 1, 1, 1,  # b0 - b7
      56      1, 1, 1, 1, 1, 1, 1, 1,  # b8 - bf
      57      1, 1, 1, 1, 1, 1, 1, 1,  # c0 - c7
      58      1, 1, 1, 1, 1, 1, 1, 1,  # c8 - cf
      59      1, 1, 1, 1, 1, 1, 1, 1,  # d0 - d7
      60      1, 1, 1, 1, 1, 1, 1, 1,  # d8 - df
      61      1, 1, 1, 1, 1, 1, 1, 1,  # e0 - e7
      62      1, 1, 1, 1, 1, 1, 1, 1,  # e8 - ef
      63      1, 1, 1, 1, 1, 1, 1, 1,  # f0 - f7
      64      1, 1, 1, 1, 1, 1, 1, 1,  # f8 - ff
      65  )
      66  
      67  HZ_ST = (
      68  MachineState.START, MachineState.ERROR,      3, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07
      69  MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f
      70  MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.START, MachineState.START,      4, MachineState.ERROR, # 10-17
      71       5, MachineState.ERROR,      6, MachineState.ERROR,      5,      5,      4, MachineState.ERROR, # 18-1f
      72       4, MachineState.ERROR,      4,      4,      4, MachineState.ERROR,      4, MachineState.ERROR, # 20-27
      73       4, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 28-2f
      74  )
      75  # fmt: on
      76  
      77  HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
      78  
      79  HZ_SM_MODEL: CodingStateMachineDict = {
      80      "class_table": HZ_CLS,
      81      "class_factor": 6,
      82      "state_table": HZ_ST,
      83      "char_len_table": HZ_CHAR_LEN_TABLE,
      84      "name": "HZ-GB-2312",
      85      "language": "Chinese",
      86  }
      87  
      88  # fmt: off
      89  ISO2022CN_CLS = (
      90      2, 0, 0, 0, 0, 0, 0, 0,  # 00 - 07
      91      0, 0, 0, 0, 0, 0, 0, 0,  # 08 - 0f
      92      0, 0, 0, 0, 0, 0, 0, 0,  # 10 - 17
      93      0, 0, 0, 1, 0, 0, 0, 0,  # 18 - 1f
      94      0, 0, 0, 0, 0, 0, 0, 0,  # 20 - 27
      95      0, 3, 0, 0, 0, 0, 0, 0,  # 28 - 2f
      96      0, 0, 0, 0, 0, 0, 0, 0,  # 30 - 37
      97      0, 0, 0, 0, 0, 0, 0, 0,  # 38 - 3f
      98      0, 0, 0, 4, 0, 0, 0, 0,  # 40 - 47
      99      0, 0, 0, 0, 0, 0, 0, 0,  # 48 - 4f
     100      0, 0, 0, 0, 0, 0, 0, 0,  # 50 - 57
     101      0, 0, 0, 0, 0, 0, 0, 0,  # 58 - 5f
     102      0, 0, 0, 0, 0, 0, 0, 0,  # 60 - 67
     103      0, 0, 0, 0, 0, 0, 0, 0,  # 68 - 6f
     104      0, 0, 0, 0, 0, 0, 0, 0,  # 70 - 77
     105      0, 0, 0, 0, 0, 0, 0, 0,  # 78 - 7f
     106      2, 2, 2, 2, 2, 2, 2, 2,  # 80 - 87
     107      2, 2, 2, 2, 2, 2, 2, 2,  # 88 - 8f
     108      2, 2, 2, 2, 2, 2, 2, 2,  # 90 - 97
     109      2, 2, 2, 2, 2, 2, 2, 2,  # 98 - 9f
     110      2, 2, 2, 2, 2, 2, 2, 2,  # a0 - a7
     111      2, 2, 2, 2, 2, 2, 2, 2,  # a8 - af
     112      2, 2, 2, 2, 2, 2, 2, 2,  # b0 - b7
     113      2, 2, 2, 2, 2, 2, 2, 2,  # b8 - bf
     114      2, 2, 2, 2, 2, 2, 2, 2,  # c0 - c7
     115      2, 2, 2, 2, 2, 2, 2, 2,  # c8 - cf
     116      2, 2, 2, 2, 2, 2, 2, 2,  # d0 - d7
     117      2, 2, 2, 2, 2, 2, 2, 2,  # d8 - df
     118      2, 2, 2, 2, 2, 2, 2, 2,  # e0 - e7
     119      2, 2, 2, 2, 2, 2, 2, 2,  # e8 - ef
     120      2, 2, 2, 2, 2, 2, 2, 2,  # f0 - f7
     121      2, 2, 2, 2, 2, 2, 2, 2,  # f8 - ff
     122  )
     123  
     124  ISO2022CN_ST = (
     125      MachineState.START,      3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07
     126      MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f
     127      MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17
     128      MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR,      4, MachineState.ERROR, # 18-1f
     129      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 20-27
     130          5,      6, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 28-2f
     131      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 30-37
     132      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, # 38-3f
     133  )
     134  # fmt: on
     135  
     136  ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
     137  
     138  ISO2022CN_SM_MODEL: CodingStateMachineDict = {
     139      "class_table": ISO2022CN_CLS,
     140      "class_factor": 9,
     141      "state_table": ISO2022CN_ST,
     142      "char_len_table": ISO2022CN_CHAR_LEN_TABLE,
     143      "name": "ISO-2022-CN",
     144      "language": "Chinese",
     145  }
     146  
     147  # fmt: off
     148  ISO2022JP_CLS = (
     149      2, 0, 0, 0, 0, 0, 0, 0,  # 00 - 07
     150      0, 0, 0, 0, 0, 0, 2, 2,  # 08 - 0f
     151      0, 0, 0, 0, 0, 0, 0, 0,  # 10 - 17
     152      0, 0, 0, 1, 0, 0, 0, 0,  # 18 - 1f
     153      0, 0, 0, 0, 7, 0, 0, 0,  # 20 - 27
     154      3, 0, 0, 0, 0, 0, 0, 0,  # 28 - 2f
     155      0, 0, 0, 0, 0, 0, 0, 0,  # 30 - 37
     156      0, 0, 0, 0, 0, 0, 0, 0,  # 38 - 3f
     157      6, 0, 4, 0, 8, 0, 0, 0,  # 40 - 47
     158      0, 9, 5, 0, 0, 0, 0, 0,  # 48 - 4f
     159      0, 0, 0, 0, 0, 0, 0, 0,  # 50 - 57
     160      0, 0, 0, 0, 0, 0, 0, 0,  # 58 - 5f
     161      0, 0, 0, 0, 0, 0, 0, 0,  # 60 - 67
     162      0, 0, 0, 0, 0, 0, 0, 0,  # 68 - 6f
     163      0, 0, 0, 0, 0, 0, 0, 0,  # 70 - 77
     164      0, 0, 0, 0, 0, 0, 0, 0,  # 78 - 7f
     165      2, 2, 2, 2, 2, 2, 2, 2,  # 80 - 87
     166      2, 2, 2, 2, 2, 2, 2, 2,  # 88 - 8f
     167      2, 2, 2, 2, 2, 2, 2, 2,  # 90 - 97
     168      2, 2, 2, 2, 2, 2, 2, 2,  # 98 - 9f
     169      2, 2, 2, 2, 2, 2, 2, 2,  # a0 - a7
     170      2, 2, 2, 2, 2, 2, 2, 2,  # a8 - af
     171      2, 2, 2, 2, 2, 2, 2, 2,  # b0 - b7
     172      2, 2, 2, 2, 2, 2, 2, 2,  # b8 - bf
     173      2, 2, 2, 2, 2, 2, 2, 2,  # c0 - c7
     174      2, 2, 2, 2, 2, 2, 2, 2,  # c8 - cf
     175      2, 2, 2, 2, 2, 2, 2, 2,  # d0 - d7
     176      2, 2, 2, 2, 2, 2, 2, 2,  # d8 - df
     177      2, 2, 2, 2, 2, 2, 2, 2,  # e0 - e7
     178      2, 2, 2, 2, 2, 2, 2, 2,  # e8 - ef
     179      2, 2, 2, 2, 2, 2, 2, 2,  # f0 - f7
     180      2, 2, 2, 2, 2, 2, 2, 2,  # f8 - ff
     181  )
     182  
     183  ISO2022JP_ST = (
     184      MachineState.START,      3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07
     185      MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f
     186      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17
     187      MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, # 18-1f
     188      MachineState.ERROR,      5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR,      4, MachineState.ERROR, MachineState.ERROR, # 20-27
     189      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR,      6, MachineState.ITS_ME, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, # 28-2f
     190      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, # 30-37
     191      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 38-3f
     192      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, MachineState.START, # 40-47
     193  )
     194  # fmt: on
     195  
     196  ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
     197  
     198  ISO2022JP_SM_MODEL: CodingStateMachineDict = {
     199      "class_table": ISO2022JP_CLS,
     200      "class_factor": 10,
     201      "state_table": ISO2022JP_ST,
     202      "char_len_table": ISO2022JP_CHAR_LEN_TABLE,
     203      "name": "ISO-2022-JP",
     204      "language": "Japanese",
     205  }
     206  
     207  # fmt: off
     208  ISO2022KR_CLS = (
     209      2, 0, 0, 0, 0, 0, 0, 0,  # 00 - 07
     210      0, 0, 0, 0, 0, 0, 0, 0,  # 08 - 0f
     211      0, 0, 0, 0, 0, 0, 0, 0,  # 10 - 17
     212      0, 0, 0, 1, 0, 0, 0, 0,  # 18 - 1f
     213      0, 0, 0, 0, 3, 0, 0, 0,  # 20 - 27
     214      0, 4, 0, 0, 0, 0, 0, 0,  # 28 - 2f
     215      0, 0, 0, 0, 0, 0, 0, 0,  # 30 - 37
     216      0, 0, 0, 0, 0, 0, 0, 0,  # 38 - 3f
     217      0, 0, 0, 5, 0, 0, 0, 0,  # 40 - 47
     218      0, 0, 0, 0, 0, 0, 0, 0,  # 48 - 4f
     219      0, 0, 0, 0, 0, 0, 0, 0,  # 50 - 57
     220      0, 0, 0, 0, 0, 0, 0, 0,  # 58 - 5f
     221      0, 0, 0, 0, 0, 0, 0, 0,  # 60 - 67
     222      0, 0, 0, 0, 0, 0, 0, 0,  # 68 - 6f
     223      0, 0, 0, 0, 0, 0, 0, 0,  # 70 - 77
     224      0, 0, 0, 0, 0, 0, 0, 0,  # 78 - 7f
     225      2, 2, 2, 2, 2, 2, 2, 2,  # 80 - 87
     226      2, 2, 2, 2, 2, 2, 2, 2,  # 88 - 8f
     227      2, 2, 2, 2, 2, 2, 2, 2,  # 90 - 97
     228      2, 2, 2, 2, 2, 2, 2, 2,  # 98 - 9f
     229      2, 2, 2, 2, 2, 2, 2, 2,  # a0 - a7
     230      2, 2, 2, 2, 2, 2, 2, 2,  # a8 - af
     231      2, 2, 2, 2, 2, 2, 2, 2,  # b0 - b7
     232      2, 2, 2, 2, 2, 2, 2, 2,  # b8 - bf
     233      2, 2, 2, 2, 2, 2, 2, 2,  # c0 - c7
     234      2, 2, 2, 2, 2, 2, 2, 2,  # c8 - cf
     235      2, 2, 2, 2, 2, 2, 2, 2,  # d0 - d7
     236      2, 2, 2, 2, 2, 2, 2, 2,  # d8 - df
     237      2, 2, 2, 2, 2, 2, 2, 2,  # e0 - e7
     238      2, 2, 2, 2, 2, 2, 2, 2,  # e8 - ef
     239      2, 2, 2, 2, 2, 2, 2, 2,  # f0 - f7
     240      2, 2, 2, 2, 2, 2, 2, 2,  # f8 - ff
     241  )
     242  
     243  ISO2022KR_ST = (
     244      MachineState.START,      3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07
     245      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f
     246      MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR,      4, MachineState.ERROR, MachineState.ERROR, # 10-17
     247      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR,      5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 18-1f
     248      MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 20-27
     249  )
     250  # fmt: on
     251  
     252  ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
     253  
     254  ISO2022KR_SM_MODEL: CodingStateMachineDict = {
     255      "class_table": ISO2022KR_CLS,
     256      "class_factor": 6,
     257      "state_table": ISO2022KR_ST,
     258      "char_len_table": ISO2022KR_CHAR_LEN_TABLE,
     259      "name": "ISO-2022-KR",
     260      "language": "Korean",
     261  }