(root)/
gettext-0.22.4/
gettext-tools/
libgettextpo/
unilbrk/
u8-width-linebreaks.c
       1  /* Line breaking of UTF-8 strings.
       2     Copyright (C) 2001-2003, 2006-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2001.
       4  
       5     This file is free software.
       6     It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
       7     You can redistribute it and/or modify it under either
       8       - the terms of the GNU Lesser General Public License as published
       9         by the Free Software Foundation, either version 3, or (at your
      10         option) any later version, or
      11       - the terms of the GNU General Public License as published by the
      12         Free Software Foundation; either version 2, or (at your option)
      13         any later version, or
      14       - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
      15  
      16     This file is distributed in the hope that it will be useful,
      17     but WITHOUT ANY WARRANTY; without even the implied warranty of
      18     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      19     Lesser General Public License and the GNU General Public License
      20     for more details.
      21  
      22     You should have received a copy of the GNU Lesser General Public
      23     License and of the GNU General Public License along with this
      24     program.  If not, see <https://www.gnu.org/licenses/>.  */
      25  
      26  #include <config.h>
      27  
      28  /* Specification.  */
      29  #include "unilbrk.h"
      30  #include "unilbrk/internal.h"
      31  
      32  #include "unilbrk/lbrktables.h"
      33  #include "unistr.h"
      34  #include "uniwidth.h"
      35  
      36  int
      37  u8_width_linebreaks_internal (const uint8_t *s, size_t n,
      38                                int width, int start_column, int at_end_columns,
      39                                const char *o, const char *encoding, int cr,
      40                                char *p)
      41  {
      42    const uint8_t *s_end;
      43    char *last_p;
      44    int last_column;
      45    int piece_width;
      46  
      47    u8_possible_linebreaks_loop (s, n, encoding, cr, p);
      48  
      49    s_end = s + n;
      50    last_p = NULL;
      51    last_column = start_column;
      52    piece_width = 0;
      53    while (s < s_end)
      54      {
      55        ucs4_t uc;
      56        int count = u8_mbtouc_unsafe (&uc, s, s_end - s);
      57  
      58        /* Respect the override.  */
      59        if (o != NULL && *o != UC_BREAK_UNDEFINED)
      60          *p = *o;
      61  
      62        if (*p == UC_BREAK_POSSIBLE
      63            || *p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF)
      64          {
      65            /* An atomic piece of text ends here.  */
      66            if (last_p != NULL && last_column + piece_width > width)
      67              {
      68                /* Insert a line break.  */
      69                *last_p = UC_BREAK_POSSIBLE;
      70                last_column = 0;
      71              }
      72          }
      73  
      74        if (*p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF)
      75          {
      76            /* uc is a line break character.  */
      77            /* Start a new piece at column 0.  */
      78            last_p = NULL;
      79            last_column = 0;
      80            piece_width = 0;
      81          }
      82        else
      83          {
      84            /* uc is not a line break character.  */
      85            int w;
      86  
      87            if (*p == UC_BREAK_POSSIBLE)
      88              {
      89                /* Start a new piece.  */
      90                last_p = p;
      91                last_column += piece_width;
      92                piece_width = 0;
      93                /* No line break for the moment, may be turned into
      94                   UC_BREAK_POSSIBLE later, via last_p. */
      95              }
      96  
      97            *p = UC_BREAK_PROHIBITED;
      98  
      99            w = uc_width (uc, encoding);
     100            if (w >= 0) /* ignore control characters in the string */
     101              piece_width += w;
     102          }
     103  
     104        s += count;
     105        p += count;
     106        if (o != NULL)
     107          o += count;
     108      }
     109  
     110    /* The last atomic piece of text ends here.  */
     111    if (last_p != NULL && last_column + piece_width + at_end_columns > width)
     112      {
     113        /* Insert a line break.  */
     114        *last_p = UC_BREAK_POSSIBLE;
     115        last_column = 0;
     116      }
     117  
     118    return last_column + piece_width;
     119  }
     120  
     121  #if defined IN_LIBUNISTRING
     122  /* For backward compatibility with older versions of libunistring.  */
     123  
     124  # undef u8_width_linebreaks
     125  
     126  int
     127  u8_width_linebreaks (const uint8_t *s, size_t n,
     128                       int width, int start_column, int at_end_columns,
     129                       const char *o, const char *encoding,
     130                       char *p)
     131  {
     132    return u8_width_linebreaks_internal (s, n,
     133                                         width, start_column, at_end_columns,
     134                                         o, encoding, -1, p);
     135  }
     136  
     137  #endif
     138  
     139  int
     140  u8_width_linebreaks_v2 (const uint8_t *s, size_t n,
     141                          int width, int start_column, int at_end_columns,
     142                          const char *o, const char *encoding,
     143                          char *p)
     144  {
     145    return u8_width_linebreaks_internal (s, n,
     146                                         width, start_column, at_end_columns,
     147                                         o, encoding, LBP_CR, p);
     148  }
     149  
     150  
     151  #ifdef TEST
     152  
     153  #include <stdio.h>
     154  #include <stdlib.h>
     155  #include <string.h>
     156  
     157  /* Read the contents of an input stream, and return it, terminated with a NUL
     158     byte. */
     159  char *
     160  read_file (FILE *stream)
     161  {
     162  #define BUFSIZE 4096
     163    char *buf = NULL;
     164    int alloc = 0;
     165    int size = 0;
     166    int count;
     167  
     168    while (! feof (stream))
     169      {
     170        if (size + BUFSIZE > alloc)
     171          {
     172            alloc = alloc + alloc / 2;
     173            if (alloc < size + BUFSIZE)
     174              alloc = size + BUFSIZE;
     175            buf = realloc (buf, alloc);
     176            if (buf == NULL)
     177              {
     178                fprintf (stderr, "out of memory\n");
     179                exit (1);
     180              }
     181          }
     182        count = fread (buf + size, 1, BUFSIZE, stream);
     183        if (count == 0)
     184          {
     185            if (ferror (stream))
     186              {
     187                perror ("fread");
     188                exit (1);
     189              }
     190          }
     191        else
     192          size += count;
     193      }
     194    buf = realloc (buf, size + 1);
     195    if (buf == NULL)
     196      {
     197        fprintf (stderr, "out of memory\n");
     198        exit (1);
     199      }
     200    buf[size] = '\0';
     201    return buf;
     202  #undef BUFSIZE
     203  }
     204  
     205  int
     206  main (int argc, char * argv[])
     207  {
     208    if (argc == 2)
     209      {
     210        /* Insert line breaks for a given width.  */
     211        int width = atoi (argv[1]);
     212        char *input = read_file (stdin);
     213        int length = strlen (input);
     214        char *breaks = malloc (length);
     215        int i;
     216  
     217        u8_width_linebreaks_v2 ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks);
     218  
     219        for (i = 0; i < length; i++)
     220          {
     221            switch (breaks[i])
     222              {
     223              case UC_BREAK_POSSIBLE:
     224                putc ('\n', stdout);
     225                break;
     226              case UC_BREAK_MANDATORY:
     227                break;
     228              case UC_BREAK_CR_BEFORE_LF:
     229                break;
     230              case UC_BREAK_PROHIBITED:
     231                break;
     232              default:
     233                abort ();
     234              }
     235            putc (input[i], stdout);
     236          }
     237  
     238        free (breaks);
     239  
     240        return 0;
     241      }
     242    else
     243      return 1;
     244  }
     245  
     246  #endif /* TEST */