(root)/
util-linux-2.39/
lib/
mbsalign.c
       1  /* Align/Truncate a string in a given screen width
       2     Copyright (C) 2009-2010 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU Lesser General Public License as published by
       6     the Free Software Foundation, either version 2.1 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Pádraig Brady.  */
      18  
      19  #include <stdlib.h>
      20  #include <string.h>
      21  #include <stdio.h>
      22  #include <stdbool.h>
      23  #include <limits.h>
      24  #include <ctype.h>
      25  
      26  #include "c.h"
      27  #include "mbsalign.h"
      28  #include "strutils.h"
      29  #include "widechar.h"
      30  
      31  /*
      32   * Counts number of cells in multibyte string. All control and
      33   * non-printable chars are ignored.
      34   *
      35   * Returns: number of cells.
      36   */
      37  size_t mbs_nwidth(const char *buf, size_t bufsz)
      38  {
      39  	const char *p = buf, *last = buf;
      40  	size_t width = 0;
      41  
      42  #ifdef HAVE_WIDECHAR
      43  	mbstate_t st;
      44  	memset(&st, 0, sizeof(st));
      45  #endif
      46  	if (p && *p && bufsz)
      47  		last = p + (bufsz - 1);
      48  
      49  	while (p && *p && p <= last) {
      50  		if (iscntrl((unsigned char) *p)) {
      51  			p++;
      52  
      53  			/* try detect "\e[x;ym" and skip on success */
      54  			if (*p && *p == '[') {
      55  				const char *e = p;
      56  				while (*e && e < last && *e != 'm')
      57  					e++;
      58  				if (*e == 'm')
      59  					p = e + 1;
      60  			}
      61  			continue;
      62  		}
      63  #ifdef HAVE_WIDECHAR
      64  		wchar_t wc;
      65  		size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
      66  
      67  		if (len == 0)
      68  			break;
      69  		if (len > 0 && iswprint(wc)) {
      70  			int x = wcwidth(wc);
      71  			if (x > 0)
      72  				width += x;
      73  		} else if (len == (size_t) -1 || len == (size_t) -2)
      74  			len = 1;
      75  		p += len;
      76  #else
      77  		if (isprint((unsigned char) *p))
      78  			width++;
      79  		p++;
      80  #endif
      81  	}
      82  
      83  	return width;
      84  }
      85  
      86  size_t mbs_width(const char *s)
      87  {
      88  	if (!s || !*s)
      89  		return 0;
      90  	return mbs_nwidth(s, strlen(s));
      91  }
      92  
      93  /*
      94   * Counts number of cells in multibyte string. For all control and
      95   * non-printable chars is the result width enlarged to store \x?? hex
      96   * sequence. See mbs_safe_encode().
      97   *
      98   * Returns: number of cells, @sz returns number of bytes.
      99   */
     100  size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
     101  {
     102  	const char *p = buf, *last = buf;
     103  	size_t width = 0, bytes = 0;
     104  
     105  #ifdef HAVE_WIDECHAR
     106  	mbstate_t st;
     107  	memset(&st, 0, sizeof(st));
     108  #endif
     109  	if (p && *p && bufsz)
     110  		last = p + (bufsz - 1);
     111  
     112  	while (p && *p && p <= last) {
     113  		if ((p < last && *p == '\\' && *(p + 1) == 'x')
     114  		    || iscntrl((unsigned char) *p)) {
     115  			width += 4, bytes += 4;		/* *p encoded to \x?? */
     116  			p++;
     117  		}
     118  #ifdef HAVE_WIDECHAR
     119  		else {
     120  			wchar_t wc;
     121  			size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
     122  
     123  			if (len == 0)
     124  				break;
     125  
     126  			if (len == (size_t) -1 || len == (size_t) -2) {
     127  				len = 1;
     128  				if (isprint((unsigned char) *p))
     129  					width += 1, bytes += 1;
     130  				else
     131  					width += 4, bytes += 4;
     132  
     133  			} else if (!iswprint(wc)) {
     134  				width += len * 4;	/* hex encode whole sequence */
     135  				bytes += len * 4;
     136  			} else {
     137  				width += wcwidth(wc);	/* number of cells */
     138  				bytes += len;		/* number of bytes */
     139  			}
     140  			p += len;
     141  		}
     142  #else
     143  		else if (!isprint((unsigned char) *p)) {
     144  			width += 4, bytes += 4;		/* *p encoded to \x?? */
     145  			p++;
     146  		} else {
     147  			width++, bytes++;
     148  			p++;
     149  		}
     150  #endif
     151  	}
     152  
     153  	if (sz)
     154  		*sz = bytes;
     155  	return width;
     156  }
     157  
     158  size_t mbs_safe_width(const char *s)
     159  {
     160  	if (!s || !*s)
     161  		return 0;
     162  	return mbs_safe_nwidth(s, strlen(s), NULL);
     163  }
     164  
     165  /*
     166   * Copy @s to @buf and replace control and non-printable chars with
     167   * \x?? hex sequence. The @width returns number of cells. The @safechars
     168   * are not encoded.
     169   *
     170   * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
     171   * bytes.
     172   */
     173  char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
     174  {
     175  	const char *p = s;
     176  	char *r;
     177  	size_t sz = s ? strlen(s) : 0;
     178  
     179  #ifdef HAVE_WIDECHAR
     180  	mbstate_t st;
     181  	memset(&st, 0, sizeof(st));
     182  #endif
     183  	if (!sz || !buf)
     184  		return NULL;
     185  
     186  	r = buf;
     187  	*width = 0;
     188  
     189  	while (p && *p) {
     190  		if (safechars && strchr(safechars, *p)) {
     191  			*r++ = *p++;
     192  			continue;
     193  		}
     194  
     195  		if ((*p == '\\' && *(p + 1) == 'x')
     196  		    || iscntrl((unsigned char) *p)) {
     197  			sprintf(r, "\\x%02x", (unsigned char) *p);
     198  			r += 4;
     199  			*width += 4;
     200  			p++;
     201  		}
     202  #ifdef HAVE_WIDECHAR
     203  		else {
     204  			wchar_t wc;
     205  			size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
     206  
     207  			if (len == 0)
     208  				break;		/* end of string */
     209  
     210  			if (len == (size_t) -1 || len == (size_t) -2) {
     211  				len = 1;
     212  				/*
     213  				 * Not valid multibyte sequence -- maybe it's
     214  				 * printable char according to the current locales.
     215  				 */
     216  				if (!isprint((unsigned char) *p)) {
     217  					sprintf(r, "\\x%02x", (unsigned char) *p);
     218  					r += 4;
     219  					*width += 4;
     220  				} else {
     221  					(*width)++;
     222  					*r++ = *p;
     223  				}
     224  			} else if (!iswprint(wc)) {
     225  				size_t i;
     226  				for (i = 0; i < len; i++) {
     227  					sprintf(r, "\\x%02x", (unsigned char) p[i]);
     228  					r += 4;
     229  					*width += 4;
     230  				}
     231  			} else {
     232  				memcpy(r, p, len);
     233  				r += len;
     234  				*width += wcwidth(wc);
     235  			}
     236  			p += len;
     237  		}
     238  #else
     239  		else if (!isprint((unsigned char) *p)) {
     240  			sprintf(r, "\\x%02x", (unsigned char) *p);
     241  			p++;
     242  			r += 4;
     243  			*width += 4;
     244  		} else {
     245  			*r++ = *p++;
     246  			(*width)++;
     247  		}
     248  #endif
     249  	}
     250  
     251  	*r = '\0';
     252  	return buf;
     253  }
     254  
     255  /*
     256   * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
     257   * @width returns number of cells. The @safechars are not encoded.
     258   *
     259   * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
     260   * bytes.
     261   */
     262  char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
     263  {
     264  	const char *p = s;
     265  	char *r;
     266  	size_t sz = s ? strlen(s) : 0;
     267  
     268  #ifdef HAVE_WIDECHAR
     269  	mbstate_t st;
     270  	memset(&st, 0, sizeof(st));
     271  #endif
     272  	if (!sz || !buf)
     273  		return NULL;
     274  
     275  	r = buf;
     276  	*width = 0;
     277  
     278  	while (p && *p) {
     279  #ifdef HAVE_WIDECHAR
     280  		wchar_t wc;
     281  		size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
     282  #else
     283  		size_t len = 1;
     284  #endif
     285  
     286  		if (len == 0)
     287  			break;		/* end of string */
     288  
     289  		if (len == (size_t) -1 || len == (size_t) -2) {
     290  			len = 1;
     291  			/*
     292  			 * Not valid multibyte sequence -- maybe it's
     293  			 * printable char according to the current locales.
     294  			 */
     295  			if (!isprint((unsigned char) *p)) {
     296  				sprintf(r, "\\x%02x", (unsigned char) *p);
     297  				r += 4;
     298  				*width += 4;
     299  			} else {
     300  				(*width)++;
     301  				*r++ = *p;
     302  			}
     303  		} else if (*p == '\\' && *(p + 1) == 'x') {
     304  			sprintf(r, "\\x%02x", (unsigned char) *p);
     305  			r += 4;
     306  			*width += 4;
     307  		} else {
     308  			r = mempcpy(r, p, len);
     309  			*width += wcwidth(wc);
     310  		}
     311  		p += len;
     312  	}
     313  
     314  	*r = '\0';
     315  	return buf;
     316  }
     317  
     318  size_t mbs_safe_encode_size(size_t bytes)
     319  {
     320  	return (bytes * 4) + 1;
     321  }
     322  
     323  /*
     324   * Returns allocated string where all control and non-printable chars are
     325   * replaced with \x?? hex sequence.
     326   */
     327  char *mbs_safe_encode(const char *s, size_t *width)
     328  {
     329  	size_t sz = s ? strlen(s) : 0;
     330  	char *buf, *ret = NULL;
     331  
     332  	if (!sz)
     333  		return NULL;
     334  	buf = malloc(mbs_safe_encode_size(sz));
     335  	if (buf)
     336  		ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
     337  	if (!ret)
     338  		free(buf);
     339  	return ret;
     340  }
     341  
     342  /*
     343   * Returns allocated string where all broken widechars chars are
     344   * replaced with \x?? hex sequence.
     345   */
     346  char *mbs_invalid_encode(const char *s, size_t *width)
     347  {
     348  	size_t sz = s ? strlen(s) : 0;
     349  	char *buf, *ret = NULL;
     350  
     351  	if (!sz)
     352  		return NULL;
     353  	buf = malloc(mbs_safe_encode_size(sz));
     354  	if (buf)
     355  		ret = mbs_invalid_encode_to_buffer(s, width, buf);
     356  	if (!ret)
     357  		free(buf);
     358  	return ret;
     359  }
     360  
     361  #ifdef HAVE_WIDECHAR
     362  
     363  static bool
     364  wc_ensure_printable (wchar_t *wchars)
     365  {
     366    bool replaced = false;
     367    wchar_t *wc = wchars;
     368    while (*wc)
     369      {
     370        if (!iswprint ((wint_t) *wc))
     371          {
     372            *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
     373            replaced = true;
     374          }
     375        wc++;
     376      }
     377    return replaced;
     378  }
     379  
     380  /* Truncate wchar string to width cells.
     381   * Returns number of cells used.  */
     382  
     383  static size_t
     384  wc_truncate (wchar_t *wc, size_t width)
     385  {
     386    size_t cells = 0;
     387    int next_cells = 0;
     388  
     389    while (*wc)
     390      {
     391        next_cells = wcwidth (*wc);
     392        if (next_cells == -1) /* non printable */
     393          {
     394            *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
     395            next_cells = 1;
     396          }
     397        if (cells + next_cells > width)
     398          break;
     399  
     400        cells += next_cells;
     401        wc++;
     402      }
     403    *wc = L'\0';
     404    return cells;
     405  }
     406  
     407  static int
     408  rpl_wcswidth (const wchar_t *s, size_t n)
     409  {
     410    int ret = 0;
     411  
     412    while (n-- > 0 && *s != L'\0')
     413      {
     414        int nwidth = wcwidth (*s++);
     415        if (nwidth == -1)             /* non printable */
     416          return -1;
     417        if (ret > (INT_MAX - nwidth)) /* overflow */
     418          return -1;
     419        ret += nwidth;
     420      }
     421  
     422    return ret;
     423  }
     424  #endif /* HAVE_WIDECHAR */
     425  
     426  /* Truncate multi-byte string to @width and returns number of
     427   * bytes of the new string @str, and in @width returns number
     428   * of cells.
     429   */
     430  size_t
     431  mbs_truncate(char *str, size_t *width)
     432  {
     433  	ssize_t bytes = strlen(str);
     434  #ifdef HAVE_WIDECHAR
     435  	ssize_t sz = mbstowcs(NULL, str, 0);
     436  	wchar_t *wcs = NULL;
     437  
     438  	if (sz == (ssize_t) -1)
     439  		goto done;
     440  
     441  	wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
     442  	if (!wcs)
     443  		goto done;
     444  
     445  	if (!mbstowcs(wcs, str, sz))
     446  		goto done;
     447  	*width = wc_truncate(wcs, *width);
     448  	bytes = wcstombs(str, wcs, bytes);
     449  done:
     450  	free(wcs);
     451  #else
     452  	if (bytes >= 0 && *width < (size_t) bytes)
     453  		bytes = *width;
     454  #endif
     455  	if (bytes >= 0)
     456  		str[bytes] = '\0';
     457  	return bytes;
     458  }
     459  
     460  /* Write N_SPACES space characters to DEST while ensuring
     461     nothing is written beyond DEST_END. A terminating NUL
     462     is always added to DEST.
     463     A pointer to the terminating NUL is returned.  */
     464  
     465  static char*
     466  mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
     467  {
     468    for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
     469      *dest++ = padchar;
     470    *dest = '\0';
     471    return dest;
     472  }
     473  
     474  size_t
     475  mbsalign (const char *src, char *dest, size_t dest_size,
     476            size_t *width, mbs_align_t align, int flags)
     477  {
     478  	return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
     479  }
     480  
     481  /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
     482     characters; write the result into the DEST_SIZE-byte buffer, DEST.
     483     ALIGNMENT specifies whether to left- or right-justify or to center.
     484     If SRC requires more than *WIDTH columns, truncate it to fit.
     485     When centering, the number of trailing spaces may be one less than the
     486     number of leading spaces. The FLAGS parameter is unused at present.
     487     Return the length in bytes required for the final result, not counting
     488     the trailing NUL.  A return value of DEST_SIZE or larger means there
     489     wasn't enough space.  DEST will be NUL terminated in any case.
     490     Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
     491     or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
     492     Update *WIDTH to indicate how many columns were used before padding.  */
     493  
     494  size_t
     495  mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
     496  	               size_t *width, mbs_align_t align,
     497  #ifdef HAVE_WIDECHAR
     498  		       int flags,
     499  #else
     500  		       int flags __attribute__((__unused__)),
     501  #endif
     502  		       int padchar)
     503  {
     504    size_t ret = -1;
     505    size_t src_size = strlen (src) + 1;
     506    char *newstr = NULL;
     507    wchar_t *str_wc = NULL;
     508    const char *str_to_print = src;
     509    size_t n_cols = src_size - 1;
     510    size_t n_used_bytes = n_cols; /* Not including NUL */
     511    size_t n_spaces = 0, space_left;
     512  
     513  #ifdef HAVE_WIDECHAR
     514    bool conversion = false;
     515    bool wc_enabled = false;
     516  
     517    /* In multi-byte locales convert to wide characters
     518       to allow easy truncation. Also determine number
     519       of screen columns used.  */
     520    if (MB_CUR_MAX > 1)
     521      {
     522        size_t src_chars = mbstowcs (NULL, src, 0);
     523        if (src_chars == (size_t) -1)
     524          {
     525            if (flags & MBA_UNIBYTE_FALLBACK)
     526              goto mbsalign_unibyte;
     527            else
     528              goto mbsalign_cleanup;
     529          }
     530        src_chars += 1; /* make space for NUL */
     531        str_wc = malloc (src_chars * sizeof (wchar_t));
     532        if (str_wc == NULL)
     533          {
     534            if (flags & MBA_UNIBYTE_FALLBACK)
     535              goto mbsalign_unibyte;
     536            else
     537              goto mbsalign_cleanup;
     538          }
     539        if (mbstowcs (str_wc, src, src_chars) != 0)
     540          {
     541            str_wc[src_chars - 1] = L'\0';
     542            wc_enabled = true;
     543            conversion = wc_ensure_printable (str_wc);
     544            n_cols = rpl_wcswidth (str_wc, src_chars);
     545          }
     546      }
     547  
     548    /* If we transformed or need to truncate the source string
     549       then create a modified copy of it.  */
     550    if (wc_enabled && (conversion || (n_cols > *width)))
     551      {
     552          if (conversion)
     553            {
     554               /* May have increased the size by converting
     555                  \t to \uFFFD for example.  */
     556              src_size = wcstombs(NULL, str_wc, 0) + 1;
     557            }
     558          newstr = malloc (src_size);
     559          if (newstr == NULL)
     560          {
     561            if (flags & MBA_UNIBYTE_FALLBACK)
     562              goto mbsalign_unibyte;
     563            else
     564              goto mbsalign_cleanup;
     565          }
     566          str_to_print = newstr;
     567          n_cols = wc_truncate (str_wc, *width);
     568          n_used_bytes = wcstombs (newstr, str_wc, src_size);
     569      }
     570  
     571  mbsalign_unibyte:
     572  #endif
     573  
     574    if (n_cols > *width) /* Unibyte truncation required.  */
     575      {
     576        n_cols = *width;
     577        n_used_bytes = n_cols;
     578      }
     579  
     580    if (*width > n_cols) /* Padding required.  */
     581      n_spaces = *width - n_cols;
     582  
     583    /* indicate to caller how many cells needed (not including padding).  */
     584    *width = n_cols;
     585  
     586    /* indicate to caller how many bytes needed (not including NUL).  */
     587    ret = n_used_bytes + (n_spaces * 1);
     588  
     589    /* Write as much NUL terminated output to DEST as possible.  */
     590    if (dest_size != 0)
     591      {
     592        char *dest_end = dest + dest_size - 1;
     593        size_t start_spaces;
     594        size_t end_spaces;
     595  
     596        switch (align)
     597          {
     598          case MBS_ALIGN_CENTER:
     599            start_spaces = n_spaces / 2 + n_spaces % 2;
     600            end_spaces = n_spaces / 2;
     601            break;
     602          case MBS_ALIGN_LEFT:
     603            start_spaces = 0;
     604            end_spaces = n_spaces;
     605            break;
     606          case MBS_ALIGN_RIGHT:
     607            start_spaces = n_spaces;
     608            end_spaces = 0;
     609            break;
     610  	default:
     611  	  abort();
     612          }
     613  
     614        dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
     615        space_left = dest_end - dest;
     616        dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
     617        mbs_align_pad (dest, dest_end, end_spaces, padchar);
     618      }
     619  #ifdef HAVE_WIDECHAR
     620  mbsalign_cleanup:
     621  #endif
     622    free (str_wc);
     623    free (newstr);
     624  
     625    return ret;
     626  }