1  /* Test of conversion to UTF-8 from legacy encodings.
       2     Copyright (C) 2007-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Bruno Haible <bruno@clisp.org>, 2007.  */
      18  
      19  #include <config.h>
      20  
      21  #include "uniconv.h"
      22  
      23  #include <stdlib.h>
      24  #include <string.h>
      25  
      26  #include "unistr.h"
      27  #include "macros.h"
      28  extern int iconv_supports_encoding (const char *encoding);
      29  
      30  /* Magic number for detecting bounds violations.  */
      31  #define MAGIC 0x1983EFF1
      32  
      33  static size_t *
      34  new_offsets (size_t n)
      35  {
      36    size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
      37    offsets[n] = MAGIC;
      38    return offsets;
      39  }
      40  
      41  int
      42  main ()
      43  {
      44  #if HAVE_ICONV
      45    static enum iconv_ilseq_handler handlers[] =
      46      { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
      47    size_t h;
      48    size_t o;
      49    size_t i;
      50  
      51    /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
      52       ISO-8859-2, and UTF-8.  */
      53  
      54    /* Test conversion from ISO-8859-1 to UTF-8 with no errors.  */
      55    for (h = 0; h < SIZEOF (handlers); h++)
      56      {
      57        enum iconv_ilseq_handler handler = handlers[h];
      58        static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
      59        static const uint8_t expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
      60        for (o = 0; o < 2; o++)
      61          {
      62            size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
      63            size_t length;
      64            uint8_t *result = u8_conv_from_encoding ("ISO-8859-1", handler,
      65                                                     input, strlen (input),
      66                                                     offsets,
      67                                                     NULL, &length);
      68            ASSERT (result != NULL);
      69            ASSERT (length == u8_strlen (expected));
      70            ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
      71            if (o)
      72              {
      73                for (i = 0; i < 37; i++)
      74                  ASSERT (offsets[i] == (i < 1 ? i :
      75                                         i < 12 ? i + 1 :
      76                                         i < 18 ? i + 2 :
      77                                         i + 3));
      78                ASSERT (offsets[37] == MAGIC);
      79                free (offsets);
      80              }
      81            free (result);
      82          }
      83      }
      84  
      85    /* Test conversion from ISO-8859-2 to UTF-8 with no errors.  */
      86    for (h = 0; h < SIZEOF (handlers); h++)
      87      {
      88        enum iconv_ilseq_handler handler = handlers[h];
      89        static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
      90        static const uint8_t expected[] = "Rafa\305\202 Maszkowski";
      91        for (o = 0; o < 2; o++)
      92          {
      93            size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
      94            size_t length;
      95            uint8_t *result = u8_conv_from_encoding ("ISO-8859-2", handler,
      96                                                     input, strlen (input),
      97                                                     offsets,
      98                                                     NULL, &length);
      99            ASSERT (result != NULL);
     100            ASSERT (length == u8_strlen (expected));
     101            ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
     102            if (o)
     103              {
     104                for (i = 0; i < 16; i++)
     105                  ASSERT (offsets[i] == (i < 5 ? i :
     106                                         i + 1));
     107                ASSERT (offsets[16] == MAGIC);
     108                free (offsets);
     109              }
     110            free (result);
     111          }
     112      }
     113  
     114    /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2.  */
     115  # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
     116    if (iconv_supports_encoding ("ISO-2022-JP-2"))
     117      {
     118        /* Test conversions from autodetect_jp to UTF-8.  */
     119        for (h = 0; h < SIZEOF (handlers); h++)
     120          {
     121            enum iconv_ilseq_handler handler = handlers[h];
     122            static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
     123            static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
     124            for (o = 0; o < 2; o++)
     125              {
     126                size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
     127                size_t length;
     128                uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler,
     129                                                         input, strlen (input),
     130                                                         offsets,
     131                                                         NULL, &length);
     132                ASSERT (result != NULL);
     133                ASSERT (length == u8_strlen (expected));
     134                ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
     135                if (o)
     136                  {
     137                    for (i = 0; i < 10; i++)
     138                      ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
     139                    ASSERT (offsets[10] == MAGIC);
     140                    free (offsets);
     141                  }
     142                free (result);
     143              }
     144          }
     145        for (h = 0; h < SIZEOF (handlers); h++)
     146          {
     147            enum iconv_ilseq_handler handler = handlers[h];
     148            static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
     149            static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
     150            for (o = 0; o < 2; o++)
     151              {
     152                size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
     153                size_t length;
     154                uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler,
     155                                                         input, strlen (input),
     156                                                         offsets,
     157                                                         NULL, &length);
     158                ASSERT (result != NULL);
     159                ASSERT (length == u8_strlen (expected));
     160                ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
     161                if (o)
     162                  {
     163                    for (i = 0; i < 10; i++)
     164                      ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
     165                    ASSERT (offsets[10] == MAGIC);
     166                    free (offsets);
     167                  }
     168                free (result);
     169              }
     170          }
     171        for (h = 0; h < SIZEOF (handlers); h++)
     172          {
     173            enum iconv_ilseq_handler handler = handlers[h];
     174            static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
     175            static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
     176            for (o = 0; o < 2; o++)
     177              {
     178                size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
     179                size_t length;
     180                uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler,
     181                                                         input, strlen (input),
     182                                                         offsets,
     183                                                         NULL, &length);
     184                ASSERT (result != NULL);
     185                ASSERT (length == u8_strlen (expected));
     186                ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
     187                if (o)
     188                  {
     189                    for (i = 0; i < 16; i++)
     190                      ASSERT (offsets[i] == (i == 0 ? 0 :
     191                                             i == 5 ? 3 :
     192                                             i == 7 ? 6 :
     193                                             i == 9 ? 9 :
     194                                             i == 11 ? 12 :
     195                                             i == 13 ? 15 :
     196                                             (size_t)(-1)));
     197                    ASSERT (offsets[16] == MAGIC);
     198                    free (offsets);
     199                  }
     200                free (result);
     201              }
     202          }
     203      }
     204  # endif
     205  
     206  #endif
     207  
     208    return 0;
     209  }