(root)/
gettext-0.22.4/
gettext-tools/
libgettextpo/
striconv.c
       1  /* Charset conversion.
       2     Copyright (C) 2001-2007, 2010-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible and Simon Josefsson.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  /* Specification.  */
      21  #include "striconv.h"
      22  
      23  #include <errno.h>
      24  #include <stdlib.h>
      25  #include <string.h>
      26  
      27  #if HAVE_ICONV
      28  # include <iconv.h>
      29  /* Get MB_LEN_MAX, CHAR_BIT.  */
      30  # include <limits.h>
      31  #endif
      32  
      33  #include "c-strcase.h"
      34  
      35  #ifndef SIZE_MAX
      36  # define SIZE_MAX ((size_t) -1)
      37  #endif
      38  
      39  
      40  #if HAVE_ICONV
      41  
      42  int
      43  mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
      44                char **resultp, size_t *lengthp)
      45  {
      46  # define tmpbufsize 4096
      47    size_t length;
      48    char *result;
      49  
      50    /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
      51  # if defined _LIBICONV_VERSION \
      52       || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
      53            || defined __sun)
      54    /* Set to the initial state.  */
      55    iconv (cd, NULL, NULL, NULL, NULL);
      56  # endif
      57  
      58    /* Determine the length we need.  */
      59    {
      60      size_t count = 0;
      61      /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
      62         libiconv's UCS-4-INTERNAL encoding.  */
      63      union { unsigned int align; char buf[tmpbufsize]; } tmp;
      64  # define tmpbuf tmp.buf
      65      const char *inptr = src;
      66      size_t insize = srclen;
      67  
      68      while (insize > 0)
      69        {
      70          char *outptr = tmpbuf;
      71          size_t outsize = tmpbufsize;
      72          size_t res = iconv (cd,
      73                              (ICONV_CONST char **) &inptr, &insize,
      74                              &outptr, &outsize);
      75  
      76          if (res == (size_t)(-1))
      77            {
      78              if (errno == E2BIG)
      79                ;
      80              else if (errno == EINVAL)
      81                break;
      82              else
      83                return -1;
      84            }
      85  # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
      86          /* Irix iconv() inserts a NUL byte if it cannot convert.
      87             NetBSD iconv() inserts a question mark if it cannot convert.
      88             Only GNU libiconv and GNU libc are known to prefer to fail rather
      89             than doing a lossy conversion.  */
      90          else if (res > 0)
      91            {
      92              errno = EILSEQ;
      93              return -1;
      94            }
      95  # endif
      96          count += outptr - tmpbuf;
      97        }
      98      /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
      99  # if defined _LIBICONV_VERSION \
     100       || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     101            || defined __sun)
     102      {
     103        char *outptr = tmpbuf;
     104        size_t outsize = tmpbufsize;
     105        size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
     106  
     107        if (res == (size_t)(-1))
     108          return -1;
     109        count += outptr - tmpbuf;
     110      }
     111  # endif
     112      length = count;
     113  # undef tmpbuf
     114    }
     115  
     116    if (length == 0)
     117      {
     118        *lengthp = 0;
     119        return 0;
     120      }
     121    if (*resultp != NULL && *lengthp >= length)
     122      result = *resultp;
     123    else
     124      {
     125        result = (char *) malloc (length);
     126        if (result == NULL)
     127          {
     128            errno = ENOMEM;
     129            return -1;
     130          }
     131      }
     132  
     133    /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
     134  # if defined _LIBICONV_VERSION \
     135       || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     136            || defined __sun)
     137    /* Return to the initial state.  */
     138    iconv (cd, NULL, NULL, NULL, NULL);
     139  # endif
     140  
     141    /* Do the conversion for real.  */
     142    {
     143      const char *inptr = src;
     144      size_t insize = srclen;
     145      char *outptr = result;
     146      size_t outsize = length;
     147  
     148      while (insize > 0)
     149        {
     150          size_t res = iconv (cd,
     151                              (ICONV_CONST char **) &inptr, &insize,
     152                              &outptr, &outsize);
     153  
     154          if (res == (size_t)(-1))
     155            {
     156              if (errno == EINVAL)
     157                break;
     158              else
     159                goto fail;
     160            }
     161  # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
     162          /* Irix iconv() inserts a NUL byte if it cannot convert.
     163             NetBSD iconv() inserts a question mark if it cannot convert.
     164             Only GNU libiconv and GNU libc are known to prefer to fail rather
     165             than doing a lossy conversion.  */
     166          else if (res > 0)
     167            {
     168              errno = EILSEQ;
     169              goto fail;
     170            }
     171  # endif
     172        }
     173      /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
     174  # if defined _LIBICONV_VERSION \
     175       || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     176            || defined __sun)
     177      {
     178        size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
     179  
     180        if (res == (size_t)(-1))
     181          goto fail;
     182      }
     183  # endif
     184      if (outsize != 0)
     185        abort ();
     186    }
     187  
     188    *resultp = result;
     189    *lengthp = length;
     190  
     191    return 0;
     192  
     193   fail:
     194    {
     195      if (result != *resultp)
     196        free (result);
     197      return -1;
     198    }
     199  # undef tmpbufsize
     200  }
     201  
     202  char *
     203  str_cd_iconv (const char *src, iconv_t cd)
     204  {
     205    /* For most encodings, a trailing NUL byte in the input will be converted
     206       to a trailing NUL byte in the output.  But not for UTF-7.  So that this
     207       function is usable for UTF-7, we have to exclude the NUL byte from the
     208       conversion and add it by hand afterwards.  */
     209  # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
     210    /* Irix iconv() inserts a NUL byte if it cannot convert.
     211       NetBSD iconv() inserts a question mark if it cannot convert.
     212       Only GNU libiconv and GNU libc are known to prefer to fail rather
     213       than doing a lossy conversion.  For other iconv() implementations,
     214       we have to look at the number of irreversible conversions returned;
     215       but this information is lost when iconv() returns for an E2BIG reason.
     216       Therefore we cannot use the second, faster algorithm.  */
     217  
     218    char *result = NULL;
     219    size_t length = 0;
     220    int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
     221    char *final_result;
     222  
     223    if (retval < 0)
     224      {
     225        if (result != NULL)
     226          abort ();
     227        return NULL;
     228      }
     229  
     230    /* Add the terminating NUL byte.  */
     231    final_result =
     232      (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
     233    if (final_result == NULL)
     234      {
     235        free (result);
     236        errno = ENOMEM;
     237        return NULL;
     238      }
     239    final_result[length] = '\0';
     240  
     241    return final_result;
     242  
     243  # else
     244    /* This algorithm is likely faster than the one above.  But it may produce
     245       iconv() returns for an E2BIG reason, when the output size guess is too
     246       small.  Therefore it can only be used when we don't need the number of
     247       irreversible conversions performed.  */
     248    char *result;
     249    size_t result_size;
     250    size_t length;
     251    const char *inptr = src;
     252    size_t inbytes_remaining = strlen (src);
     253  
     254    /* Make a guess for the worst-case output size, in order to avoid a
     255       realloc.  It's OK if the guess is wrong as long as it is not zero and
     256       doesn't lead to an integer overflow.  */
     257    result_size = inbytes_remaining;
     258    {
     259      size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
     260      if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
     261        result_size *= MB_LEN_MAX;
     262    }
     263    result_size += 1; /* for the terminating NUL */
     264  
     265    result = (char *) malloc (result_size);
     266    if (result == NULL)
     267      {
     268        errno = ENOMEM;
     269        return NULL;
     270      }
     271  
     272    /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
     273  # if defined _LIBICONV_VERSION \
     274       || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     275            || defined __sun)
     276    /* Set to the initial state.  */
     277    iconv (cd, NULL, NULL, NULL, NULL);
     278  # endif
     279  
     280    /* Do the conversion.  */
     281    {
     282      char *outptr = result;
     283      size_t outbytes_remaining = result_size - 1;
     284  
     285      for (;;)
     286        {
     287          /* Here inptr + inbytes_remaining = src + strlen (src),
     288                  outptr + outbytes_remaining = result + result_size - 1.  */
     289          size_t res = iconv (cd,
     290                              (ICONV_CONST char **) &inptr, &inbytes_remaining,
     291                              &outptr, &outbytes_remaining);
     292  
     293          if (res == (size_t)(-1))
     294            {
     295              if (errno == EINVAL)
     296                break;
     297              else if (errno == E2BIG)
     298                {
     299                  size_t used = outptr - result;
     300                  size_t newsize = result_size * 2;
     301                  char *newresult;
     302  
     303                  if (!(newsize > result_size))
     304                    {
     305                      errno = ENOMEM;
     306                      goto failed;
     307                    }
     308                  newresult = (char *) realloc (result, newsize);
     309                  if (newresult == NULL)
     310                    {
     311                      errno = ENOMEM;
     312                      goto failed;
     313                    }
     314                  result = newresult;
     315                  result_size = newsize;
     316                  outptr = result + used;
     317                  outbytes_remaining = result_size - 1 - used;
     318                }
     319              else
     320                goto failed;
     321            }
     322          else
     323            break;
     324        }
     325      /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
     326  # if defined _LIBICONV_VERSION \
     327       || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     328            || defined __sun)
     329      for (;;)
     330        {
     331          /* Here outptr + outbytes_remaining = result + result_size - 1.  */
     332          size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
     333  
     334          if (res == (size_t)(-1))
     335            {
     336              if (errno == E2BIG)
     337                {
     338                  size_t used = outptr - result;
     339                  size_t newsize = result_size * 2;
     340                  char *newresult;
     341  
     342                  if (!(newsize > result_size))
     343                    {
     344                      errno = ENOMEM;
     345                      goto failed;
     346                    }
     347                  newresult = (char *) realloc (result, newsize);
     348                  if (newresult == NULL)
     349                    {
     350                      errno = ENOMEM;
     351                      goto failed;
     352                    }
     353                  result = newresult;
     354                  result_size = newsize;
     355                  outptr = result + used;
     356                  outbytes_remaining = result_size - 1 - used;
     357                }
     358              else
     359                goto failed;
     360            }
     361          else
     362            break;
     363        }
     364  # endif
     365  
     366      /* Add the terminating NUL byte.  */
     367      *outptr++ = '\0';
     368  
     369      length = outptr - result;
     370    }
     371  
     372    /* Give away unused memory.  */
     373    if (length < result_size)
     374      {
     375        char *smaller_result = (char *) realloc (result, length);
     376  
     377        if (smaller_result != NULL)
     378          result = smaller_result;
     379      }
     380  
     381    return result;
     382  
     383   failed:
     384    free (result);
     385    return NULL;
     386  
     387  # endif
     388  }
     389  
     390  #endif
     391  
     392  char *
     393  str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
     394  {
     395    if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
     396      {
     397        char *result = strdup (src);
     398  
     399        if (result == NULL)
     400          errno = ENOMEM;
     401        return result;
     402      }
     403    else
     404      {
     405  #if HAVE_ICONV
     406        iconv_t cd;
     407        char *result;
     408  
     409        /* Avoid glibc-2.1 bug with EUC-KR.  */
     410  # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     411       && !defined _LIBICONV_VERSION
     412        if (c_strcasecmp (from_codeset, "EUC-KR") == 0
     413            || c_strcasecmp (to_codeset, "EUC-KR") == 0)
     414          {
     415            errno = EINVAL;
     416            return NULL;
     417          }
     418  # endif
     419        cd = iconv_open (to_codeset, from_codeset);
     420        if (cd == (iconv_t) -1)
     421          return NULL;
     422  
     423        result = str_cd_iconv (src, cd);
     424  
     425        if (result == NULL)
     426          {
     427            /* Close cd, but preserve the errno from str_cd_iconv.  */
     428            int saved_errno = errno;
     429            iconv_close (cd);
     430            errno = saved_errno;
     431          }
     432        else
     433          {
     434            if (iconv_close (cd) < 0)
     435              {
     436                free (result);
     437                return NULL;
     438              }
     439          }
     440        return result;
     441  #else
     442        /* This is a different error code than if iconv_open existed but didn't
     443           support from_codeset and to_codeset, so that the caller can emit
     444           an error message such as
     445             "iconv() is not supported. Installing GNU libiconv and
     446              then reinstalling this package would fix this."  */
     447        errno = ENOSYS;
     448        return NULL;
     449  #endif
     450      }
     451  }