1 /*****************************************************************************/
2 /* LibreDWG - free implementation of the DWG file format */
3 /* */
4 /* Copyright (C) 2023 Free Software Foundation, Inc. */
5 /* */
6 /* This library is free software, licensed under the terms of the GNU */
7 /* General Public License as published by the Free Software Foundation, */
8 /* either version 3 of the License, or (at your option) any later version. */
9 /* You should have received a copy of the GNU General Public License */
10 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
11 /*****************************************************************************/
12
13 /*
14 * codepages.c: preR2007 codepages support via iconv
15 * written by Reini Urban
16 */
17
18 #ifndef CODEPAGES_H
19 #define CODEPAGES_H
20
21 #include "config.h"
22 #include <stdint.h>
23 #include <stdbool.h>
24 #if defined HAVE_WCHAR_H
25 # include <wchar.h>
26 // clang/*/include/stddef.h(74,24): typedef __WCHAR_TYPE__ wchar_t
27 #elif defined __clang__
28 #else
29 typedef uint32_t wchar_t;
30 #endif
31 #include "dwg.h"
32
33 /* May not be changed, as it directly maps to the dwg->header.codepage number
34 */
35 typedef enum _dwg_codepage
36 {
37 CP_UTF8 = 0,
38 CP_US_ASCII = 1,
39 CP_ISO_8859_1,
40 CP_ISO_8859_2,
41 CP_ISO_8859_3,
42 CP_ISO_8859_4,
43 CP_ISO_8859_5,
44 CP_ISO_8859_6,
45 CP_ISO_8859_7,
46 CP_ISO_8859_8,
47 CP_ISO_8859_9,
48 CP_CP437, // DOS English
49 CP_CP850, // 12 DOS Latin-1
50 CP_CP852, // DOS Central European
51 CP_CP855, // DOS Cyrillic
52 CP_CP857, // DOS Turkish
53 CP_CP860, // DOS Portoguese
54 CP_CP861, // DOS Icelandic
55 CP_CP863, // DOS Hebrew
56 CP_CP864, // DOS Arabic (IBM)
57 CP_CP865, // DOS Nordic
58 CP_CP869, // DOS Greek
59 CP_CP932, // DOS Japanese (shiftjis)
60 CP_MACINTOSH, // 23
61 CP_BIG5,
62 CP_CP949 = 25, // Korean (Wansung + Johab)
63 CP_JOHAB = 26, // Johab?
64 CP_CP866 = 27, // Russian
65 CP_ANSI_1250 = 28, // Central + Eastern European
66 CP_ANSI_1251 = 29, // Cyrillic
67 CP_ANSI_1252 = 30, // Western European
68 CP_GB2312 = 31, // EUC-CN Chinese
69 CP_ANSI_1253, // Greek
70 CP_ANSI_1254, // Turkish
71 CP_ANSI_1255, // Hebrew
72 CP_ANSI_1256, // Arabic
73 CP_ANSI_1257, // Baltic
74 CP_ANSI_874, // Thai
75 CP_ANSI_932, // 38 Japanese (extended shiftjis, windows-31j)
76 CP_ANSI_936, // 39 Simplified Chinese
77 CP_ANSI_949, // 40 Korean Wansung
78 CP_ANSI_950, // 41 Trad Chinese
79 CP_ANSI_1361, // 42 Korean Wansung
80 CP_UTF16 = 43,
81 CP_ANSI_1258 = 44, // Vietnamese
82 CP_UNDEFINED = 0xff // mostly R11
83 } Dwg_Codepage;
84
85 #ifdef HAVE_ICONV
86 const char *dwg_codepage_iconvstr (Dwg_Codepage cp);
87 #endif
88 const char *dwg_codepage_dxfstr (Dwg_Codepage cp);
89 Dwg_Codepage dwg_codepage_int (const char *s); // dxfstr
90
91 // returns the matching unicode codepoint,
92 // or 0 if the codepage does not contain the character
93 wchar_t dwg_codepage_uc (Dwg_Codepage cp, unsigned char c);
94
95 // returns the matching codepoint,
96 // or 0 if the codepage does not contain the wide character
97 unsigned char dwg_codepage_c (Dwg_Codepage cp, wchar_t wc);
98 // for wide asian chars
99 uint16_t dwg_codepage_wc (Dwg_Codepage cp, wchar_t wc);
100
101 #ifndef COMMON_TEST_C
102 // for wide asian chars
103 EXPORT wchar_t dwg_codepage_uwc (Dwg_Codepage cp, uint16_t c);
104 // these old codepages use 2-byte chars for some 0x8* bytes, all others only
105 // one byte.
106 EXPORT bool dwg_codepage_isasian (const Dwg_Codepage cp);
107 EXPORT bool dwg_codepage_is_twobyte (const Dwg_Codepage cp,
108 const unsigned char c);
109 #else
110 extern wchar_t dwg_codepage_uwc (Dwg_Codepage cp, uint16_t c);
111 extern bool dwg_codepage_isasian (const Dwg_Codepage cp);
112 extern bool dwg_codepage_is_twobyte (const Dwg_Codepage cp,
113 const unsigned char c);
114 #endif
115
116 #endif