(root)/
coreutils-9.4/
lib/
mbsalign.c
       1  /* Align/Truncate a string in a given screen width
       2     Copyright (C) 2009-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Pádraig Brady.  */
      18  
      19  #include <config.h>
      20  #include "mbsalign.h"
      21  
      22  #include "minmax.h"
      23  
      24  #include <stdlib.h>
      25  #include <string.h>
      26  #include <stdio.h>
      27  #include <stdint.h>
      28  #include <limits.h>
      29  #include <wchar.h>
      30  #include <wctype.h>
      31  
      32  /* Replace non printable chars.
      33     Note \t and \n etc. are non printable.
      34     Return 1 if replacement made, 0 otherwise.  */
      35  
      36  static bool
      37  wc_ensure_printable (wchar_t *wchars)
      38  {
      39    bool replaced = false;
      40    wchar_t *wc = wchars;
      41    while (*wc)
      42      {
      43        if (!iswprint ((wint_t) *wc))
      44          {
      45            *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
      46            replaced = true;
      47          }
      48        wc++;
      49      }
      50    return replaced;
      51  }
      52  
      53  /* Truncate wchar string to width cells.
      54   * Returns number of cells used.  */
      55  
      56  static size_t
      57  wc_truncate (wchar_t *wc, size_t width)
      58  {
      59    size_t cells = 0;
      60    int next_cells = 0;
      61  
      62    while (*wc)
      63      {
      64        next_cells = wcwidth (*wc);
      65        if (next_cells == -1) /* non printable */
      66          {
      67            *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
      68            next_cells = 1;
      69          }
      70        if (cells + next_cells > width)
      71          break;
      72        cells += next_cells;
      73        wc++;
      74      }
      75    *wc = L'\0';
      76    return cells;
      77  }
      78  
      79  /* Write N_SPACES space characters to DEST while ensuring
      80     nothing is written beyond DEST_END. A terminating NUL
      81     is always added to DEST.
      82     A pointer to the terminating NUL is returned.  */
      83  
      84  static char *
      85  mbs_align_pad (char *dest, char const *dest_end, size_t n_spaces)
      86  {
      87    /* FIXME: Should we pad with "figure space" (\u2007)
      88       if non ascii data present?  */
      89    while (n_spaces-- && (dest < dest_end))
      90      *dest++ = ' ';
      91    *dest = '\0';
      92    return dest;
      93  }
      94  
      95  /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
      96     characters; write the result into the DEST_SIZE-byte buffer, DEST.
      97     ALIGNMENT specifies whether to left- or right-justify or to center.
      98     If SRC requires more than *WIDTH columns, truncate it to fit.
      99     When centering, the number of trailing spaces may be one less than the
     100     number of leading spaces.
     101     Return the length in bytes required for the final result, not counting
     102     the trailing NUL.  A return value of DEST_SIZE or larger means there
     103     wasn't enough space.  DEST will be NUL terminated in any case.
     104     Return SIZE_MAX upon error (invalid multi-byte sequence in SRC,
     105     or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
     106     Update *WIDTH to indicate how many columns were used before padding.  */
     107  
     108  size_t
     109  mbsalign (char const *src, char *dest, size_t dest_size,
     110            size_t *width, mbs_align_t align, int flags)
     111  {
     112    size_t ret = SIZE_MAX;
     113    size_t src_size = strlen (src) + 1;
     114    char *newstr = nullptr;
     115    wchar_t *str_wc = nullptr;
     116    char const *str_to_print = src;
     117    size_t n_cols = src_size - 1;
     118    size_t n_used_bytes = n_cols; /* Not including NUL */
     119    size_t n_spaces = 0;
     120    bool conversion = false;
     121    bool wc_enabled = false;
     122  
     123    /* In multi-byte locales convert to wide characters
     124       to allow easy truncation. Also determine number
     125       of screen columns used.  */
     126    if (!(flags & MBA_UNIBYTE_ONLY) && MB_CUR_MAX > 1)
     127      {
     128        size_t src_chars = mbstowcs (nullptr, src, 0);
     129        if (src_chars == SIZE_MAX)
     130          {
     131            if (flags & MBA_UNIBYTE_FALLBACK)
     132              goto mbsalign_unibyte;
     133            else
     134              goto mbsalign_cleanup;
     135          }
     136        src_chars += 1; /* make space for NUL */
     137        str_wc = malloc (src_chars * sizeof (wchar_t));
     138        if (str_wc == nullptr)
     139          {
     140            if (flags & MBA_UNIBYTE_FALLBACK)
     141              goto mbsalign_unibyte;
     142            else
     143              goto mbsalign_cleanup;
     144          }
     145        if (mbstowcs (str_wc, src, src_chars) != 0)
     146          {
     147            str_wc[src_chars - 1] = L'\0';
     148            wc_enabled = true;
     149            conversion = wc_ensure_printable (str_wc);
     150            n_cols = wcswidth (str_wc, src_chars);
     151          }
     152      }
     153  
     154    /* If we transformed or need to truncate the source string
     155       then create a modified copy of it.  */
     156    if (wc_enabled && (conversion || (n_cols > *width)))
     157      {
     158          if (conversion)
     159            {
     160               /* May have increased the size by converting
     161                  \t to \uFFFD for example.  */
     162              src_size = wcstombs (nullptr, str_wc, 0) + 1;
     163            }
     164          newstr = malloc (src_size);
     165          if (newstr == nullptr)
     166          {
     167            if (flags & MBA_UNIBYTE_FALLBACK)
     168              goto mbsalign_unibyte;
     169            else
     170              goto mbsalign_cleanup;
     171          }
     172          str_to_print = newstr;
     173          n_cols = wc_truncate (str_wc, *width);
     174          n_used_bytes = wcstombs (newstr, str_wc, src_size);
     175      }
     176  
     177  mbsalign_unibyte:
     178  
     179    if (n_cols > *width) /* Unibyte truncation required.  */
     180      {
     181        n_cols = *width;
     182        n_used_bytes = n_cols;
     183      }
     184  
     185    if (*width > n_cols) /* Padding required.  */
     186      n_spaces = *width - n_cols;
     187  
     188    /* indicate to caller how many cells needed (not including padding).  */
     189    *width = n_cols;
     190  
     191    {
     192      size_t start_spaces, end_spaces;
     193  
     194      switch (align)
     195        {
     196        case MBS_ALIGN_LEFT:
     197          start_spaces = 0;
     198          end_spaces = n_spaces;
     199          break;
     200        case MBS_ALIGN_RIGHT:
     201          start_spaces = n_spaces;
     202          end_spaces = 0;
     203          break;
     204        case MBS_ALIGN_CENTER:
     205        default:
     206          start_spaces = n_spaces / 2 + n_spaces % 2;
     207          end_spaces = n_spaces / 2;
     208          break;
     209        }
     210  
     211        if (flags & MBA_NO_LEFT_PAD)
     212          start_spaces = 0;
     213        if (flags & MBA_NO_RIGHT_PAD)
     214          end_spaces = 0;
     215  
     216        /* Write as much NUL terminated output to DEST as possible.  */
     217        if (dest_size != 0)
     218          {
     219            size_t space_left;
     220            char *dest_end = dest + dest_size - 1;
     221  
     222            dest = mbs_align_pad (dest, dest_end, start_spaces);
     223            space_left = dest_end - dest;
     224            dest = mempcpy (dest, str_to_print, MIN (n_used_bytes, space_left));
     225            mbs_align_pad (dest, dest_end, end_spaces);
     226          }
     227  
     228      /* indicate to caller how many bytes needed (not including NUL).  */
     229      ret = n_used_bytes + ((start_spaces + end_spaces) * 1);
     230    }
     231  
     232  mbsalign_cleanup:
     233  
     234    free (str_wc);
     235    free (newstr);
     236  
     237    return ret;
     238  }
     239  
     240  /* A wrapper around mbsalign() to dynamically allocate the
     241     minimum amount of memory to store the result.
     242     Return nullptr on failure.  */
     243  
     244  char *
     245  ambsalign (char const *src, size_t *width, mbs_align_t align, int flags)
     246  {
     247    size_t orig_width = *width;
     248    size_t size = *width;         /* Start with enough for unibyte mode.  */
     249    size_t req = size;
     250    char *buf = nullptr;
     251  
     252    while (req >= size)
     253      {
     254        char *nbuf;
     255        size = req + 1;           /* Space for NUL.  */
     256        nbuf = realloc (buf, size);
     257        if (nbuf == nullptr)
     258          {
     259            free (buf);
     260            buf = nullptr;
     261            break;
     262          }
     263        buf = nbuf;
     264        *width = orig_width;
     265        req = mbsalign (src, buf, size, width, align, flags);
     266        if (req == SIZE_MAX)
     267          {
     268            free (buf);
     269            buf = nullptr;
     270            break;
     271          }
     272      }
     273  
     274    return buf;
     275  }