python (3.11.7)
       1  ######################## BEGIN LICENSE BLOCK ########################
       2  # The Original Code is Mozilla Communicator client code.
       3  #
       4  # The Initial Developer of the Original Code is
       5  # Netscape Communications Corporation.
       6  # Portions created by the Initial Developer are Copyright (C) 1998
       7  # the Initial Developer. All Rights Reserved.
       8  #
       9  # Contributor(s):
      10  #   Mark Pilgrim - port to Python
      11  #
      12  # This library is free software; you can redistribute it and/or
      13  # modify it under the terms of the GNU Lesser General Public
      14  # License as published by the Free Software Foundation; either
      15  # version 2.1 of the License, or (at your option) any later version.
      16  #
      17  # This library is distributed in the hope that it will be useful,
      18  # but WITHOUT ANY WARRANTY; without even the implied warranty of
      19  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      20  # Lesser General Public License for more details.
      21  #
      22  # You should have received a copy of the GNU Lesser General Public
      23  # License along with this library; if not, write to the Free Software
      24  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
      25  # 02110-1301  USA
      26  ######################### END LICENSE BLOCK #########################
      27  
      28  # Sampling from about 20M text materials include literature and computer technology
      29  
      30  # 128  --> 0.79
      31  # 256  --> 0.92
      32  # 512  --> 0.986
      33  # 1024 --> 0.99944
      34  # 2048 --> 0.99999
      35  #
      36  # Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24
      37  # Random Distribution Ration = 512 / (2350-512) = 0.279.
      38  #
      39  # Typical Distribution Ratio
      40  
      41  EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
      42  
      43  EUCKR_TABLE_SIZE = 2352
      44  
      45  # Char to FreqOrder table ,
      46  # fmt: off
      47  EUCKR_CHAR_TO_FREQ_ORDER = (
      48    13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722,  87,
      49  1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
      50  1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488,  20,1733,1269,1734,
      51   945,1400,1735,  47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739,
      52   116, 987, 813,1401, 683,  75,1204, 145,1740,1741,1742,1743,  16, 847, 667, 622,
      53   708,1744,1745,1746, 966, 787, 304, 129,1747,  60, 820, 123, 676,1748,1749,1750,
      54  1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856,
      55   344,1763,1764,1765,1766,  89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205,
      56   709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779,
      57  1780, 337, 751,1058,  28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782,  19,
      58  1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567,
      59  1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797,
      60  1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802,
      61  1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899,
      62   885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818,
      63  1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409,
      64  1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697,
      65  1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770,
      66  1412,1837,1838,  39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723,
      67   544,1023,1081, 869,  91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416,
      68  1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300,
      69   119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083,
      70   893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857,
      71  1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871,
      72   282,  96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420,
      73  1421, 268,1877,1422,1878,1879,1880, 308,1881,   2, 537,1882,1883,1215,1884,1885,
      74   127, 791,1886,1273,1423,1887,  34, 336, 404, 643,1888, 571, 654, 894, 840,1889,
      75     0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893,
      76  1894,1123,  48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317,
      77  1899, 694,1900, 909, 734,1424, 572, 866,1425, 691,  85, 524,1010, 543, 394, 841,
      78  1901,1902,1903,1026,1904,1905,1906,1907,1908,1909,  30, 451, 651, 988, 310,1910,
      79  1911,1426, 810,1216,  93,1912,1913,1277,1217,1914, 858, 759,  45,  58, 181, 610,
      80   269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375,
      81  1919, 359,1920, 687,1921, 822,1922, 293,1923,1924,  40, 662, 118, 692,  29, 939,
      82   887, 640, 482, 174,1925,  69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870,
      83   217, 854,1163, 823,1927,1928,1929,1930, 834,1931,  78,1932, 859,1933,1063,1934,
      84  1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888,
      85  1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950,
      86  1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065,
      87  1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002,
      88  1283,1222,1960,1961,1962,1963,  36, 383, 228, 753, 247, 454,1964, 876, 678,1965,
      89  1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467,
      90    50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285,
      91   639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971,   7,
      92   103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979,
      93  1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985,
      94   818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994,
      95  1995, 560, 223,1287,  98,   8, 189, 650, 978,1288,1996,1437,1997,  17, 345, 250,
      96   423, 277, 234, 512, 226,  97, 289,  42, 167,1998, 201,1999,2000, 843, 836, 824,
      97   532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003,
      98  2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008,  71,1440, 745,
      99   619, 688,2009, 829,2010,2011, 147,2012,  33, 948,2013,2014,  74, 224,2015,  61,
     100   191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023,
     101  2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591,  52, 724, 246,2031,2032,
     102  2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912,
     103  2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224,
     104   719,1170, 959, 440, 437, 534,  84, 388, 480,1131, 159, 220, 198, 679,2044,1012,
     105   819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050,
     106  2051,2052,2053,  59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681,
     107   499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414,
     108  1444,2064,2065,  41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068,
     109  2069,1292,2070,2071,1445,2072,1446,2073,2074,  55, 588,  66,1447, 271,1092,2075,
     110  1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850,
     111  2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606,
     112  2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449,
     113  1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452,
     114   949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112,
     115  2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121,
     116  2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130,
     117    22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174,  73,1096, 231, 274,
     118   962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139,
     119  2141,2142,2143,2144,  11, 374, 844,2145, 154,1232,  46,1461,2146, 838, 830, 721,
     120  1233, 106,2147,  90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298,
     121  2150,1462, 761, 565,2151, 686,2152, 649,2153,  72, 173,2154, 460, 415,2155,1463,
     122  2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747,
     123  2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177,  23, 530, 285,
     124  2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187,
     125  2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193,  10,
     126  2194, 613, 424,2195, 979, 108, 449, 589,  27, 172,  81,1031,  80, 774, 281, 350,
     127  1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201,
     128  2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972,
     129  2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219,
     130  2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233,
     131  2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242,
     132  2243, 521, 486, 548,2244,2245,2246,1473,1300,  53, 549, 137, 875,  76, 158,2247,
     133  1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178,
     134  1475,2249,  82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255,
     135  2256,  18, 450, 206,2257, 290, 292,1142,2258, 511, 162,  99, 346, 164, 735,2259,
     136  1476,1477,   4, 554, 343, 798,1099,2260,1100,2261,  43, 171,1303, 139, 215,2262,
     137  2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702,
     138  1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272,  67,2273,
     139   295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541,
     140  2282,2283,2284,2285,2286,  70, 852,1071,2287,2288,2289,2290,  21,  56, 509, 117,
     141   432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187,
     142  2294,1046,1479,2295, 340,2296,  63,1047, 230,2297,2298,1305, 763,1306, 101, 800,
     143   808, 494,2299,2300,2301, 903,2302,  37,1072,  14,   5,2303,  79, 675,2304, 312,
     144  2305,2306,2307,2308,2309,1480,   6,1307,2310,2311,2312,   1, 470,  35,  24, 229,
     145  2313, 695, 210,  86, 778,  15, 784, 592, 779,  32,  77, 855, 964,2314, 259,2315,
     146   501, 380,2316,2317,  83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484,
     147  2320,2321,2322,2323,2324,2325,1485,2326,2327, 128,  57,  68, 261,1048, 211, 170,
     148  1240,  31,2328,  51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335,
     149   425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601,
     150  1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395,
     151  2351,1490,1491,  62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354,
     152  1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476,
     153  2361,2362, 332,  12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035,
     154   416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498,
     155  2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310,
     156  1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389,
     157  2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504,
     158  1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505,
     159  2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145,
     160  1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624,
     161   593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700,
     162  2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221,
     163  2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377,
     164   644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448,
     165   915, 489,2449,1514,1184,2450,2451, 515,  64, 427, 495,2452, 583,2453, 483, 485,
     166  1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705,
     167  1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465,
     168   291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471,
     169  2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997,
     170  2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486,
     171   797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187,  65,2494,
     172   434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771,
     173   585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323,
     174  2499,2500,  49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491,
     175    95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510,
     176   161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519,
     177  2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532,
     178  2533,  25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199,
     179   704, 504, 468, 758, 657,1528, 196,  44, 839,1246, 272, 750,2543, 765, 862,2544,
     180  2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247,
     181  1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441,
     182   249,1075,2556,2557,2558, 466, 743,2559,2560,2561,  92, 514, 426, 420, 526,2562,
     183  2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362,
     184  2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583,
     185  2584,1532,  54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465,
     186     3, 458,   9,  38,2588, 107, 110, 890, 209,  26, 737, 498,2589,1534,2590, 431,
     187   202,  88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151,
     188   974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596,
     189  2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601,  94, 175, 197, 406,
     190  2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611,
     191  2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619,
     192  1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
     193  2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
     194   670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642,  # 512, 256
     195  )
     196  # fmt: on