1  /* human.c -- print human readable file size
       2  
       3     Copyright (C) 1996-2007, 2009-2023 Free Software Foundation, Inc.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation, either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  /* Written by Paul Eggert and Larry McVoy.  */
      19  
      20  #include <config.h>
      21  
      22  #include "human.h"
      23  
      24  #include <locale.h>
      25  #include <stdio.h>
      26  #include <stdlib.h>
      27  #include <string.h>
      28  
      29  #include <argmatch.h>
      30  #include <error.h>
      31  #include <intprops.h>
      32  
      33  /* The maximum length of a suffix like "KiB".  */
      34  #define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
      35  
      36  static const char power_letter[] =
      37  {
      38    0,    /* not used */
      39    'K',  /* kibi ('k' for kilo is a special case) */
      40    'M',  /* mega or mebi */
      41    'G',  /* giga or gibi */
      42    'T',  /* tera or tebi */
      43    'P',  /* peta or pebi */
      44    'E',  /* exa or exbi */
      45    'Z',  /* zetta or 2**70 */
      46    'Y',  /* yotta or 2**80 */
      47    'R',  /* ronna or 2**90 */
      48    'Q'   /* quetta or 2**100 */
      49  };
      50  
      51  
      52  /* If INEXACT_STYLE is not human_round_to_nearest, and if easily
      53     possible, adjust VALUE according to the style.  */
      54  
      55  static long double
      56  adjust_value (int inexact_style, long double value)
      57  {
      58    /* Do not use the floorl or ceill functions, as that would mean
      59       checking for their presence and possibly linking with the
      60       standard math library, which is a porting pain.  So leave the
      61       value alone if it is too large to easily round.  */
      62    if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
      63      {
      64        uintmax_t u = value;
      65        value = u + (inexact_style == human_ceiling && u != value);
      66      }
      67  
      68    return value;
      69  }
      70  
      71  /* Group the digits of NUMBER according to the grouping rules of the
      72     current locale.  NUMBER contains NUMBERLEN digits.  Modify the
      73     bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
      74     each byte inserted.  Return the starting address of the modified
      75     number.
      76  
      77     To group the digits, use GROUPING and THOUSANDS_SEP as in 'struct
      78     lconv' from <locale.h>.  */
      79  
      80  static char *
      81  group_number (char *number, size_t numberlen,
      82                char const *grouping, char const *thousands_sep)
      83  {
      84    register char *d;
      85    size_t grouplen = SIZE_MAX;
      86    size_t thousands_seplen = strlen (thousands_sep);
      87    size_t i = numberlen;
      88  
      89    /* The maximum possible value for NUMBERLEN is the number of digits
      90       in the square of the largest uintmax_t, so double the size needed.  */
      91    char buf[2 * INT_STRLEN_BOUND (uintmax_t) + 1];
      92  
      93    memcpy (buf, number, numberlen);
      94    d = number + numberlen;
      95  
      96    for (;;)
      97      {
      98        unsigned char g = *grouping;
      99  
     100        if (g)
     101          {
     102            grouplen = g < CHAR_MAX ? g : i;
     103            grouping++;
     104          }
     105  
     106        if (i < grouplen)
     107          grouplen = i;
     108  
     109        d -= grouplen;
     110        i -= grouplen;
     111        memcpy (d, buf + i, grouplen);
     112        if (i == 0)
     113          return d;
     114  
     115        d -= thousands_seplen;
     116        memcpy (d, thousands_sep, thousands_seplen);
     117      }
     118  }
     119  
     120  /* Convert N to a human readable format in BUF, using the options OPTS.
     121  
     122     N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
     123     be nonnegative.
     124  
     125     Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
     126     must be positive.
     127  
     128     Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
     129     to determine whether to take the ceiling or floor of any result
     130     that cannot be expressed exactly.
     131  
     132     If (OPTS & human_group_digits), group the thousands digits
     133     according to the locale, e.g., "1,000,000" in an American English
     134     locale.
     135  
     136     If (OPTS & human_autoscale), deduce the output block size
     137     automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
     138     output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
     139     of 1000 otherwise.  For example, assuming powers of 1024, 8500
     140     would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
     141     so on.  Numbers smaller than the power aren't modified.
     142     human_autoscale is normally used together with human_SI.
     143  
     144     If (OPTS & human_space_before_unit), use a space to separate the
     145     number from any suffix that is appended as described below.
     146  
     147     If (OPTS & human_SI), append an SI prefix indicating which power is
     148     being used.  If in addition (OPTS & human_B), append "B" (if base
     149     1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
     150     human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
     151     power of 1024 or of 1000, depending on (OPTS &
     152     human_base_1024).  */
     153  
     154  char *
     155  human_readable (uintmax_t n, char *buf, int opts,
     156                  uintmax_t from_block_size, uintmax_t to_block_size)
     157  {
     158    int inexact_style =
     159      opts & (human_round_to_nearest | human_floor | human_ceiling);
     160    unsigned int base = opts & human_base_1024 ? 1024 : 1000;
     161    uintmax_t amt;
     162    int tenths;
     163    int exponent = -1;
     164    int exponent_max = sizeof power_letter - 1;
     165    char *p;
     166    char *psuffix;
     167    char const *integerlim;
     168  
     169    /* 0 means adjusted N == AMT.TENTHS;
     170       1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
     171       2 means adjusted N == AMT.TENTHS + 0.05;
     172       3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
     173    int rounding;
     174  
     175    char const *decimal_point = ".";
     176    size_t decimal_pointlen = 1;
     177    char const *grouping = "";
     178    char const *thousands_sep = "";
     179    struct lconv const *l = localeconv ();
     180    size_t pointlen = strlen (l->decimal_point);
     181    if (0 < pointlen && pointlen <= MB_LEN_MAX)
     182      {
     183        decimal_point = l->decimal_point;
     184        decimal_pointlen = pointlen;
     185      }
     186    grouping = l->grouping;
     187    if (strlen (l->thousands_sep) <= MB_LEN_MAX)
     188      thousands_sep = l->thousands_sep;
     189  
     190    /* Leave room for a trailing space and following suffix.  */
     191    psuffix = buf + LONGEST_HUMAN_READABLE - 1 - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
     192    p = psuffix;
     193  
     194    /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
     195       units.  If this can be done exactly with integer arithmetic, do
     196       not use floating point operations.  */
     197    if (to_block_size <= from_block_size)
     198      {
     199        if (from_block_size % to_block_size == 0)
     200          {
     201            uintmax_t multiplier = from_block_size / to_block_size;
     202            amt = n * multiplier;
     203            if (amt / multiplier == n)
     204              {
     205                tenths = 0;
     206                rounding = 0;
     207                goto use_integer_arithmetic;
     208              }
     209          }
     210      }
     211    else if (from_block_size != 0 && to_block_size % from_block_size == 0)
     212      {
     213        uintmax_t divisor = to_block_size / from_block_size;
     214        uintmax_t r10 = (n % divisor) * 10;
     215        uintmax_t r2 = (r10 % divisor) * 2;
     216        amt = n / divisor;
     217        tenths = r10 / divisor;
     218        rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
     219        goto use_integer_arithmetic;
     220      }
     221  
     222    {
     223      /* Either the result cannot be computed easily using uintmax_t,
     224         or from_block_size is zero.  Fall back on floating point.
     225         FIXME: This can yield answers that are slightly off.  */
     226  
     227      long double dto_block_size = to_block_size;
     228      long double damt = n * (from_block_size / dto_block_size);
     229      size_t buflen;
     230      size_t nonintegerlen;
     231  
     232      if (! (opts & human_autoscale))
     233        {
     234          sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
     235          buflen = strlen (buf);
     236          nonintegerlen = 0;
     237        }
     238      else
     239        {
     240          long double e = 1;
     241          exponent = 0;
     242  
     243          do
     244            {
     245              e *= base;
     246              exponent++;
     247            }
     248          while (e * base <= damt && exponent < exponent_max);
     249  
     250          damt /= e;
     251  
     252          sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
     253          buflen = strlen (buf);
     254          nonintegerlen = decimal_pointlen + 1;
     255  
     256          if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
     257              || ((opts & human_suppress_point_zero)
     258                  && buf[buflen - 1] == '0'))
     259            {
     260              sprintf (buf, "%.0Lf",
     261                       adjust_value (inexact_style, damt * 10) / 10);
     262              buflen = strlen (buf);
     263              nonintegerlen = 0;
     264            }
     265        }
     266  
     267      p = psuffix - buflen;
     268      memmove (p, buf, buflen);
     269      integerlim = p + buflen - nonintegerlen;
     270    }
     271    goto do_grouping;
     272  
     273   use_integer_arithmetic:
     274    {
     275      /* The computation can be done exactly, with integer arithmetic.
     276  
     277         Use power of BASE notation if requested and if adjusted AMT is
     278         large enough.  */
     279  
     280      if (opts & human_autoscale)
     281        {
     282          exponent = 0;
     283  
     284          if (base <= amt)
     285            {
     286              do
     287                {
     288                  unsigned int r10 = (amt % base) * 10 + tenths;
     289                  unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
     290                  amt /= base;
     291                  tenths = r10 / base;
     292                  rounding = (r2 < base
     293                              ? (r2 + rounding) != 0
     294                              : 2 + (base < r2 + rounding));
     295                  exponent++;
     296                }
     297              while (base <= amt && exponent < exponent_max);
     298  
     299              if (amt < 10)
     300                {
     301                  if (inexact_style == human_round_to_nearest
     302                      ? 2 < rounding + (tenths & 1)
     303                      : inexact_style == human_ceiling && 0 < rounding)
     304                    {
     305                      tenths++;
     306                      rounding = 0;
     307  
     308                      if (tenths == 10)
     309                        {
     310                          amt++;
     311                          tenths = 0;
     312                        }
     313                    }
     314  
     315                  if (amt < 10
     316                      && (tenths || ! (opts & human_suppress_point_zero)))
     317                    {
     318                      *--p = '0' + tenths;
     319                      p -= decimal_pointlen;
     320                      memcpy (p, decimal_point, decimal_pointlen);
     321                      tenths = rounding = 0;
     322                    }
     323                }
     324            }
     325        }
     326  
     327      if (inexact_style == human_round_to_nearest
     328          ? 5 < tenths + (0 < rounding + (amt & 1))
     329          : inexact_style == human_ceiling && 0 < tenths + rounding)
     330        {
     331          amt++;
     332  
     333          if ((opts & human_autoscale)
     334              && amt == base && exponent < exponent_max)
     335            {
     336              exponent++;
     337              if (! (opts & human_suppress_point_zero))
     338                {
     339                  *--p = '0';
     340                  p -= decimal_pointlen;
     341                  memcpy (p, decimal_point, decimal_pointlen);
     342                }
     343              amt = 1;
     344            }
     345        }
     346  
     347      integerlim = p;
     348  
     349      do
     350        {
     351          int digit = amt % 10;
     352          *--p = digit + '0';
     353        }
     354      while ((amt /= 10) != 0);
     355    }
     356  
     357   do_grouping:
     358    if (opts & human_group_digits)
     359      p = group_number (p, integerlim - p, grouping, thousands_sep);
     360  
     361    if (opts & human_SI)
     362      {
     363        if (exponent < 0)
     364          {
     365            uintmax_t power;
     366            exponent = 0;
     367            for (power = 1; power < to_block_size; power *= base)
     368              if (++exponent == exponent_max)
     369                break;
     370          }
     371  
     372        if ((exponent | (opts & human_B)) && (opts & human_space_before_unit))
     373          *psuffix++ = ' ';
     374  
     375        if (exponent)
     376          *psuffix++ = (! (opts & human_base_1024) && exponent == 1
     377                        ? 'k'
     378                        : power_letter[exponent]);
     379  
     380        if (opts & human_B)
     381          {
     382            if ((opts & human_base_1024) && exponent)
     383              *psuffix++ = 'i';
     384            *psuffix++ = 'B';
     385          }
     386      }
     387  
     388    *psuffix = '\0';
     389  
     390    return p;
     391  }
     392  
     393  
     394  /* The default block size used for output.  This number may change in
     395     the future as disks get larger.  */
     396  #ifndef DEFAULT_BLOCK_SIZE
     397  # define DEFAULT_BLOCK_SIZE 1024
     398  #endif
     399  
     400  static char const *const block_size_args[] = { "human-readable", "si", 0 };
     401  static int const block_size_opts[] =
     402    {
     403      human_autoscale + human_SI + human_base_1024,
     404      human_autoscale + human_SI
     405    };
     406  
     407  static uintmax_t
     408  default_block_size (void)
     409  {
     410    return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
     411  }
     412  
     413  static strtol_error
     414  humblock (char const *spec, uintmax_t *block_size, int *options)
     415  {
     416    int i;
     417    int opts = 0;
     418  
     419    if (! spec
     420        && ! (spec = getenv ("BLOCK_SIZE"))
     421        && ! (spec = getenv ("BLOCKSIZE")))
     422      *block_size = default_block_size ();
     423    else
     424      {
     425        if (*spec == '\'')
     426          {
     427            opts |= human_group_digits;
     428            spec++;
     429          }
     430  
     431        if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
     432          {
     433            opts |= block_size_opts[i];
     434            *block_size = 1;
     435          }
     436        else
     437          {
     438            char *ptr;
     439            strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
     440                                         "eEgGkKmMpPtTyYzZ0");
     441            if (e != LONGINT_OK)
     442              {
     443                *options = 0;
     444                return e;
     445              }
     446            for (; ! ('0' <= *spec && *spec <= '9'); spec++)
     447              if (spec == ptr)
     448                {
     449                  opts |= human_SI;
     450                  if (ptr[-1] == 'B')
     451                    opts |= human_B;
     452                  if (ptr[-1] != 'B' || ptr[-2] == 'i')
     453                    opts |= human_base_1024;
     454                  break;
     455                }
     456          }
     457      }
     458  
     459    *options = opts;
     460    return LONGINT_OK;
     461  }
     462  
     463  enum strtol_error
     464  human_options (char const *spec, int *opts, uintmax_t *block_size)
     465  {
     466    strtol_error e = humblock (spec, block_size, opts);
     467    if (*block_size == 0)
     468      {
     469        *block_size = default_block_size ();
     470        e = LONGINT_INVALID;
     471      }
     472    return e;
     473  }