(root)/
tar-1.35/
gnu/
unicase.in.h
       1  /* Unicode character case mappings.
       2     Copyright (C) 2002, 2009-2023 Free Software Foundation, Inc.
       3  
       4     This file is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU Lesser General Public License as
       6     published by the Free Software Foundation; either version 2.1 of the
       7     License, or (at your option) any later version.
       8  
       9     This file is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU Lesser General Public License for more details.
      13  
      14     You should have received a copy of the GNU Lesser General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #ifndef _UNICASE_H
      18  #define _UNICASE_H
      19  
      20  #include "unitypes.h"
      21  
      22  /* Get bool.  */
      23  #include <stdbool.h>
      24  
      25  /* Get size_t.  */
      26  #include <stddef.h>
      27  
      28  /* Get uninorm_t.  */
      29  #include "uninorm.h"
      30  
      31  #if @HAVE_UNISTRING_WOE32DLL_H@
      32  # include <unistring/woe32dll.h>
      33  #else
      34  # define LIBUNISTRING_DLL_VARIABLE
      35  #endif
      36  
      37  #ifdef __cplusplus
      38  extern "C" {
      39  #endif
      40  
      41  /* ========================================================================= */
      42  
      43  /* Character case mappings.
      44     These mappings are locale and context independent.
      45     WARNING! These functions are not sufficient for languages such as German.
      46     Better use the functions below that treat an entire string at once and are
      47     language aware.  */
      48  
      49  /* Return the uppercase mapping of a Unicode character.  */
      50  extern ucs4_t
      51         uc_toupper (ucs4_t uc)
      52         _UC_ATTRIBUTE_CONST;
      53  
      54  /* Return the lowercase mapping of a Unicode character.  */
      55  extern ucs4_t
      56         uc_tolower (ucs4_t uc)
      57         _UC_ATTRIBUTE_CONST;
      58  
      59  /* Return the titlecase mapping of a Unicode character.  */
      60  extern ucs4_t
      61         uc_totitle (ucs4_t uc)
      62         _UC_ATTRIBUTE_CONST;
      63  
      64  /* ========================================================================= */
      65  
      66  /* String case mappings.  */
      67  
      68  /* These functions are locale dependent.  The iso639_language argument
      69     identifies the language (e.g. "tr" for Turkish).  NULL means to use
      70     locale independent case mappings.  */
      71  
      72  /* Return the ISO 639 language code of the current locale.
      73     Return "" if it is unknown, or in the "C" locale.  */
      74  extern const char *
      75         uc_locale_language (void)
      76         _UC_ATTRIBUTE_PURE;
      77  
      78  /* Conventions:
      79  
      80     All functions prefixed with u8_ operate on UTF-8 encoded strings.
      81     Their unit is an uint8_t (1 byte).
      82  
      83     All functions prefixed with u16_ operate on UTF-16 encoded strings.
      84     Their unit is an uint16_t (a 2-byte word).
      85  
      86     All functions prefixed with u32_ operate on UCS-4 encoded strings.
      87     Their unit is an uint32_t (a 4-byte word).
      88  
      89     All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
      90     n units.
      91  
      92     Functions returning a string result take a (resultbuf, lengthp) argument
      93     pair.  If resultbuf is not NULL and the result fits into *lengthp units,
      94     it is put in resultbuf, and resultbuf is returned.  Otherwise, a freshly
      95     allocated string is returned.  In both cases, *lengthp is set to the
      96     length (number of units) of the returned string.  In case of error,
      97     NULL is returned and errno is set.  */
      98  
      99  /* Return the uppercase mapping of a string.
     100     The nf argument identifies the normalization form to apply after the
     101     case-mapping.  It can also be NULL, for no normalization.  */
     102  extern uint8_t *
     103         u8_toupper (const uint8_t *s, size_t n, const char *iso639_language,
     104                     uninorm_t nf,
     105                     uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     106  extern uint16_t *
     107         u16_toupper (const uint16_t *s, size_t n, const char *iso639_language,
     108                      uninorm_t nf,
     109                      uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     110  extern uint32_t *
     111         u32_toupper (const uint32_t *s, size_t n, const char *iso639_language,
     112                      uninorm_t nf,
     113                      uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     114  
     115  /* Return the lowercase mapping of a string.
     116     The nf argument identifies the normalization form to apply after the
     117     case-mapping.  It can also be NULL, for no normalization.  */
     118  extern uint8_t *
     119         u8_tolower (const uint8_t *s, size_t n, const char *iso639_language,
     120                     uninorm_t nf,
     121                     uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     122  extern uint16_t *
     123         u16_tolower (const uint16_t *s, size_t n, const char *iso639_language,
     124                      uninorm_t nf,
     125                      uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     126  extern uint32_t *
     127         u32_tolower (const uint32_t *s, size_t n, const char *iso639_language,
     128                      uninorm_t nf,
     129                      uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     130  
     131  /* Return the titlecase mapping of a string.
     132     The nf argument identifies the normalization form to apply after the
     133     case-mapping.  It can also be NULL, for no normalization.  */
     134  extern uint8_t *
     135         u8_totitle (const uint8_t *s, size_t n, const char *iso639_language,
     136                     uninorm_t nf,
     137                     uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     138  extern uint16_t *
     139         u16_totitle (const uint16_t *s, size_t n, const char *iso639_language,
     140                      uninorm_t nf,
     141                      uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     142  extern uint32_t *
     143         u32_totitle (const uint32_t *s, size_t n, const char *iso639_language,
     144                      uninorm_t nf,
     145                      uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     146  
     147  /* The case-mapping context given by a prefix string.  */
     148  typedef struct casing_prefix_context
     149          {
     150            /* These fields are private, undocumented.  */
     151            uint32_t last_char_except_ignorable;
     152            uint32_t last_char_normal_or_above;
     153          }
     154          casing_prefix_context_t;
     155  /* The case-mapping context of the empty prefix string.  */
     156  extern @GNULIB_UNICASE_EMPTY_PREFIX_CONTEXT_DLL_VARIABLE@ const casing_prefix_context_t unicase_empty_prefix_context;
     157  /* Return the case-mapping context of a given prefix string.  */
     158  extern casing_prefix_context_t
     159         u8_casing_prefix_context (const uint8_t *s, size_t n);
     160  extern casing_prefix_context_t
     161         u16_casing_prefix_context (const uint16_t *s, size_t n);
     162  extern casing_prefix_context_t
     163         u32_casing_prefix_context (const uint32_t *s, size_t n);
     164  /* Return the case-mapping context of the prefix concat(A, S), given the
     165     case-mapping context of the prefix A.  */
     166  extern casing_prefix_context_t
     167         u8_casing_prefixes_context (const uint8_t *s, size_t n,
     168                                     casing_prefix_context_t a_context);
     169  extern casing_prefix_context_t
     170         u16_casing_prefixes_context (const uint16_t *s, size_t n,
     171                                      casing_prefix_context_t a_context);
     172  extern casing_prefix_context_t
     173         u32_casing_prefixes_context (const uint32_t *s, size_t n,
     174                                      casing_prefix_context_t a_context);
     175  
     176  /* The case-mapping context given by a suffix string.  */
     177  typedef struct casing_suffix_context
     178          {
     179            /* These fields are private, undocumented.  */
     180            uint32_t first_char_except_ignorable;
     181            uint32_t bits;
     182          }
     183          casing_suffix_context_t;
     184  /* The case-mapping context of the empty suffix string.  */
     185  extern @GNULIB_UNICASE_EMPTY_SUFFIX_CONTEXT_DLL_VARIABLE@ const casing_suffix_context_t unicase_empty_suffix_context;
     186  /* Return the case-mapping context of a given suffix string.  */
     187  extern casing_suffix_context_t
     188         u8_casing_suffix_context (const uint8_t *s, size_t n);
     189  extern casing_suffix_context_t
     190         u16_casing_suffix_context (const uint16_t *s, size_t n);
     191  extern casing_suffix_context_t
     192         u32_casing_suffix_context (const uint32_t *s, size_t n);
     193  /* Return the case-mapping context of the suffix concat(S, A), given the
     194     case-mapping context of the suffix A.  */
     195  extern casing_suffix_context_t
     196         u8_casing_suffixes_context (const uint8_t *s, size_t n,
     197                                     casing_suffix_context_t a_context);
     198  extern casing_suffix_context_t
     199         u16_casing_suffixes_context (const uint16_t *s, size_t n,
     200                                      casing_suffix_context_t a_context);
     201  extern casing_suffix_context_t
     202         u32_casing_suffixes_context (const uint32_t *s, size_t n,
     203                                      casing_suffix_context_t a_context);
     204  
     205  /* Return the uppercase mapping of a string that is surrounded by a prefix
     206     and a suffix.  */
     207  extern uint8_t *
     208         u8_ct_toupper (const uint8_t *s, size_t n,
     209                        casing_prefix_context_t prefix_context,
     210                        casing_suffix_context_t suffix_context,
     211                        const char *iso639_language,
     212                        uninorm_t nf,
     213                        uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     214  extern uint16_t *
     215         u16_ct_toupper (const uint16_t *s, size_t n,
     216                        casing_prefix_context_t prefix_context,
     217                        casing_suffix_context_t suffix_context,
     218                        const char *iso639_language,
     219                        uninorm_t nf,
     220                        uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     221  extern uint32_t *
     222         u32_ct_toupper (const uint32_t *s, size_t n,
     223                        casing_prefix_context_t prefix_context,
     224                        casing_suffix_context_t suffix_context,
     225                        const char *iso639_language,
     226                        uninorm_t nf,
     227                        uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     228  
     229  /* Return the lowercase mapping of a string that is surrounded by a prefix
     230     and a suffix.  */
     231  extern uint8_t *
     232         u8_ct_tolower (const uint8_t *s, size_t n,
     233                        casing_prefix_context_t prefix_context,
     234                        casing_suffix_context_t suffix_context,
     235                        const char *iso639_language,
     236                        uninorm_t nf,
     237                        uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     238  extern uint16_t *
     239         u16_ct_tolower (const uint16_t *s, size_t n,
     240                        casing_prefix_context_t prefix_context,
     241                        casing_suffix_context_t suffix_context,
     242                        const char *iso639_language,
     243                        uninorm_t nf,
     244                        uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     245  extern uint32_t *
     246         u32_ct_tolower (const uint32_t *s, size_t n,
     247                        casing_prefix_context_t prefix_context,
     248                        casing_suffix_context_t suffix_context,
     249                        const char *iso639_language,
     250                        uninorm_t nf,
     251                        uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     252  
     253  /* Return the titlecase mapping of a string that is surrounded by a prefix
     254     and a suffix.  */
     255  extern uint8_t *
     256         u8_ct_totitle (const uint8_t *s, size_t n,
     257                        casing_prefix_context_t prefix_context,
     258                        casing_suffix_context_t suffix_context,
     259                        const char *iso639_language,
     260                        uninorm_t nf,
     261                        uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     262  extern uint16_t *
     263         u16_ct_totitle (const uint16_t *s, size_t n,
     264                        casing_prefix_context_t prefix_context,
     265                        casing_suffix_context_t suffix_context,
     266                        const char *iso639_language,
     267                        uninorm_t nf,
     268                        uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     269  extern uint32_t *
     270         u32_ct_totitle (const uint32_t *s, size_t n,
     271                        casing_prefix_context_t prefix_context,
     272                        casing_suffix_context_t suffix_context,
     273                        const char *iso639_language,
     274                        uninorm_t nf,
     275                        uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     276  
     277  /* Return the case folded string.
     278     Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
     279     to comparing S1 and S2 with uN_casecmp().
     280     The nf argument identifies the normalization form to apply after the
     281     case-mapping.  It can also be NULL, for no normalization.  */
     282  extern uint8_t *
     283         u8_casefold (const uint8_t *s, size_t n, const char *iso639_language,
     284                      uninorm_t nf,
     285                      uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     286  extern uint16_t *
     287         u16_casefold (const uint16_t *s, size_t n, const char *iso639_language,
     288                       uninorm_t nf,
     289                       uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     290  extern uint32_t *
     291         u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
     292                       uninorm_t nf,
     293                       uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     294  /* Likewise, for a string that is surrounded by a prefix and a suffix.  */
     295  extern uint8_t *
     296         u8_ct_casefold (const uint8_t *s, size_t n,
     297                         casing_prefix_context_t prefix_context,
     298                         casing_suffix_context_t suffix_context,
     299                         const char *iso639_language,
     300                         uninorm_t nf,
     301                         uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     302  extern uint16_t *
     303         u16_ct_casefold (const uint16_t *s, size_t n,
     304                          casing_prefix_context_t prefix_context,
     305                          casing_suffix_context_t suffix_context,
     306                          const char *iso639_language,
     307                          uninorm_t nf,
     308                          uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     309  extern uint32_t *
     310         u32_ct_casefold (const uint32_t *s, size_t n,
     311                          casing_prefix_context_t prefix_context,
     312                          casing_suffix_context_t suffix_context,
     313                          const char *iso639_language,
     314                          uninorm_t nf,
     315                          uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
     316  
     317  /* Compare S1 and S2, ignoring differences in case and normalization.
     318     The nf argument identifies the normalization form to apply after the
     319     case-mapping.  It can also be NULL, for no normalization.
     320     If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
     321     return 0.  Upon failure, return -1 with errno set.  */
     322  extern int
     323         u8_casecmp (const uint8_t *s1, size_t n1,
     324                     const uint8_t *s2, size_t n2,
     325                     const char *iso639_language, uninorm_t nf, int *resultp);
     326  extern int
     327         u16_casecmp (const uint16_t *s1, size_t n1,
     328                      const uint16_t *s2, size_t n2,
     329                      const char *iso639_language, uninorm_t nf, int *resultp);
     330  extern int
     331         u32_casecmp (const uint32_t *s1, size_t n1,
     332                      const uint32_t *s2, size_t n2,
     333                      const char *iso639_language, uninorm_t nf, int *resultp);
     334  extern int
     335         ulc_casecmp (const char *s1, size_t n1,
     336                      const char *s2, size_t n2,
     337                      const char *iso639_language, uninorm_t nf, int *resultp);
     338  
     339  /* Convert the string S of length N to a NUL-terminated byte sequence, in such
     340     a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
     341     function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
     342     NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization.  */
     343  extern char *
     344         u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language,
     345                      uninorm_t nf,
     346                      char *_UC_RESTRICT resultbuf, size_t *lengthp);
     347  extern char *
     348         u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language,
     349                       uninorm_t nf,
     350                       char *_UC_RESTRICT resultbuf, size_t *lengthp);
     351  extern char *
     352         u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language,
     353                       uninorm_t nf,
     354                       char *_UC_RESTRICT resultbuf, size_t *lengthp);
     355  extern char *
     356         ulc_casexfrm (const char *s, size_t n, const char *iso639_language,
     357                       uninorm_t nf,
     358                       char *_UC_RESTRICT resultbuf, size_t *lengthp);
     359  
     360  /* Compare S1 and S2, ignoring differences in case and normalization, using the
     361     collation rules of the current locale.
     362     The nf argument identifies the normalization form to apply after the
     363     case-mapping.  It must be either UNINORM_NFC or UNINORM_NFKC.  It can also
     364     be NULL, for no normalization.
     365     If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
     366     return 0.  Upon failure, return -1 with errno set.  */
     367  extern int
     368         u8_casecoll (const uint8_t *s1, size_t n1,
     369                      const uint8_t *s2, size_t n2,
     370                      const char *iso639_language, uninorm_t nf, int *resultp);
     371  extern int
     372         u16_casecoll (const uint16_t *s1, size_t n1,
     373                       const uint16_t *s2, size_t n2,
     374                       const char *iso639_language, uninorm_t nf, int *resultp);
     375  extern int
     376         u32_casecoll (const uint32_t *s1, size_t n1,
     377                       const uint32_t *s2, size_t n2,
     378                       const char *iso639_language, uninorm_t nf, int *resultp);
     379  extern int
     380         ulc_casecoll (const char *s1, size_t n1,
     381                       const char *s2, size_t n2,
     382                       const char *iso639_language, uninorm_t nf, int *resultp);
     383  
     384  
     385  /* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
     386     otherwise, and return 0.  Upon failure, return -1 with errno set.  */
     387  extern int
     388         u8_is_uppercase (const uint8_t *s, size_t n,
     389                          const char *iso639_language,
     390                          bool *resultp);
     391  extern int
     392         u16_is_uppercase (const uint16_t *s, size_t n,
     393                           const char *iso639_language,
     394                           bool *resultp);
     395  extern int
     396         u32_is_uppercase (const uint32_t *s, size_t n,
     397                           const char *iso639_language,
     398                           bool *resultp);
     399  
     400  /* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
     401     otherwise, and return 0.  Upon failure, return -1 with errno set.  */
     402  extern int
     403         u8_is_lowercase (const uint8_t *s, size_t n,
     404                          const char *iso639_language,
     405                          bool *resultp);
     406  extern int
     407         u16_is_lowercase (const uint16_t *s, size_t n,
     408                           const char *iso639_language,
     409                           bool *resultp);
     410  extern int
     411         u32_is_lowercase (const uint32_t *s, size_t n,
     412                           const char *iso639_language,
     413                           bool *resultp);
     414  
     415  /* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
     416     otherwise, and return 0.  Upon failure, return -1 with errno set.  */
     417  extern int
     418         u8_is_titlecase (const uint8_t *s, size_t n,
     419                          const char *iso639_language,
     420                          bool *resultp);
     421  extern int
     422         u16_is_titlecase (const uint16_t *s, size_t n,
     423                           const char *iso639_language,
     424                           bool *resultp);
     425  extern int
     426         u32_is_titlecase (const uint32_t *s, size_t n,
     427                           const char *iso639_language,
     428                           bool *resultp);
     429  
     430  /* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
     431     false otherwise, and return 0.  Upon failure, return -1 with errno set.  */
     432  extern int
     433         u8_is_casefolded (const uint8_t *s, size_t n,
     434                           const char *iso639_language,
     435                           bool *resultp);
     436  extern int
     437         u16_is_casefolded (const uint16_t *s, size_t n,
     438                            const char *iso639_language,
     439                            bool *resultp);
     440  extern int
     441         u32_is_casefolded (const uint32_t *s, size_t n,
     442                            const char *iso639_language,
     443                            bool *resultp);
     444  
     445  /* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
     446     either upper case or lower case or title case is not a no-op.
     447     Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
     448     under the lower case mapping, and under the title case mapping; in other
     449     words, when NFD(S) consists entirely of caseless characters.
     450     Upon failure, return -1 with errno set.  */
     451  extern int
     452         u8_is_cased (const uint8_t *s, size_t n,
     453                      const char *iso639_language,
     454                      bool *resultp);
     455  extern int
     456         u16_is_cased (const uint16_t *s, size_t n,
     457                       const char *iso639_language,
     458                       bool *resultp);
     459  extern int
     460         u32_is_cased (const uint32_t *s, size_t n,
     461                       const char *iso639_language,
     462                       bool *resultp);
     463  
     464  
     465  /* ========================================================================= */
     466  
     467  #ifdef __cplusplus
     468  }
     469  #endif
     470  
     471  #endif /* _UNICASE_H */