1 /* Charset handling while reading PO files.
2 Copyright (C) 2001-2003, 2006, 2021 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifndef _PO_CHARSET_H
19 #define _PO_CHARSET_H
20
21 #include <stdbool.h>
22 #include <stddef.h>
23
24 #if HAVE_ICONV
25 #include <iconv.h>
26 #endif
27
28
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32
33
34 /* Canonicalize an encoding name.
35 The results of this function are statically allocated and can be
36 compared using ==.
37 Return NULL if CHARSET is not a valid encoding name. */
38 extern const char *po_charset_canonicalize (const char *charset);
39
40 /* The canonicalized encoding name for ASCII. */
41 extern DLL_VARIABLE const char *po_charset_ascii;
42
43 /* The canonicalized encoding name for UTF-8. */
44 extern DLL_VARIABLE const char *po_charset_utf8;
45
46 /* Test for ASCII compatibility. */
47 extern bool po_charset_ascii_compatible (const char *canon_charset);
48
49 /* Test for a weird encoding, i.e. an encoding which has double-byte
50 characters ending in 0x5C. */
51 extern bool po_is_charset_weird (const char *canon_charset);
52
53 /* Test for a weird CJK encoding, i.e. a weird encoding with CJK structure.
54 An encoding has CJK structure if every valid character stream is composed
55 of single bytes in the range 0x{00..7F} and of byte pairs in the range
56 0x{80..FF}{30..FF}. */
57 extern bool po_is_charset_weird_cjk (const char *canon_charset);
58
59 /* Returns a character iterator for a given encoding.
60 Given a pointer into a string, it returns the number occupied by the next
61 single character. If the piece of string is not valid or if the *s == '\0',
62 it returns 1. */
63 typedef size_t (*character_iterator_t) (const char *s);
64 extern character_iterator_t po_charset_character_iterator (const char *canon_charset);
65
66
67 /* The PO file's encoding, as specified in the header entry. */
68 extern DLL_VARIABLE const char *po_lex_charset;
69
70 /* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the PO file's
71 encoding, or NULL if not available. */
72 extern DLL_VARIABLE const char *po_lex_isolate_start;
73 /* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the PO file's
74 encoding, or NULL if not available. */
75 extern DLL_VARIABLE const char *po_lex_isolate_end;
76
77 #if HAVE_ICONV
78 /* Converter from the PO file's encoding to UTF-8. */
79 extern DLL_VARIABLE iconv_t po_lex_iconv;
80 #endif
81 /* If no converter is available, some information about the structure of the
82 PO file's encoding. */
83 extern DLL_VARIABLE bool po_lex_weird_cjk;
84
85 /* Initialize the PO file's encoding. */
86 extern void po_lex_charset_init (void);
87
88 /* Set the PO file's encoding from the header entry. */
89 extern void po_lex_charset_set (const char *header_entry,
90 const char *filename);
91
92 /* Finish up with the PO file's encoding. */
93 extern void po_lex_charset_close (void);
94
95
96 #ifdef __cplusplus
97 }
98 #endif
99
100
101 #endif /* _PO_CHARSET_H */