1  /* human.c -- print human readable file size
       2  
       3     Copyright (C) 1996-2007, 2009-2022 Free Software Foundation, Inc.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation, either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  /* Written by Paul Eggert and Larry McVoy.  */
      19  
      20  #include <config.h>
      21  
      22  #include "human.h"
      23  
      24  #include <locale.h>
      25  #include <stdio.h>
      26  #include <stdlib.h>
      27  #include <string.h>
      28  
      29  #include <argmatch.h>
      30  #include <error.h>
      31  #include <intprops.h>
      32  
      33  /* The maximum length of a suffix like "KiB".  */
      34  #define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
      35  
      36  static const char power_letter[] =
      37  {
      38    0,    /* not used */
      39    'K',  /* kibi ('k' for kilo is a special case) */
      40    'M',  /* mega or mebi */
      41    'G',  /* giga or gibi */
      42    'T',  /* tera or tebi */
      43    'P',  /* peta or pebi */
      44    'E',  /* exa or exbi */
      45    'Z',  /* zetta or 2**70 */
      46    'Y'   /* yotta or 2**80 */
      47  };
      48  
      49  
      50  /* If INEXACT_STYLE is not human_round_to_nearest, and if easily
      51     possible, adjust VALUE according to the style.  */
      52  
      53  static long double
      54  adjust_value (int inexact_style, long double value)
      55  {
      56    /* Do not use the floorl or ceill functions, as that would mean
      57       checking for their presence and possibly linking with the
      58       standard math library, which is a porting pain.  So leave the
      59       value alone if it is too large to easily round.  */
      60    if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
      61      {
      62        uintmax_t u = value;
      63        value = u + (inexact_style == human_ceiling && u != value);
      64      }
      65  
      66    return value;
      67  }
      68  
      69  /* Group the digits of NUMBER according to the grouping rules of the
      70     current locale.  NUMBER contains NUMBERLEN digits.  Modify the
      71     bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
      72     each byte inserted.  Return the starting address of the modified
      73     number.
      74  
      75     To group the digits, use GROUPING and THOUSANDS_SEP as in 'struct
      76     lconv' from <locale.h>.  */
      77  
      78  static char *
      79  group_number (char *number, size_t numberlen,
      80                char const *grouping, char const *thousands_sep)
      81  {
      82    register char *d;
      83    size_t grouplen = SIZE_MAX;
      84    size_t thousands_seplen = strlen (thousands_sep);
      85    size_t i = numberlen;
      86  
      87    /* The maximum possible value for NUMBERLEN is the number of digits
      88       in the square of the largest uintmax_t, so double the size needed.  */
      89    char buf[2 * INT_STRLEN_BOUND (uintmax_t) + 1];
      90  
      91    memcpy (buf, number, numberlen);
      92    d = number + numberlen;
      93  
      94    for (;;)
      95      {
      96        unsigned char g = *grouping;
      97  
      98        if (g)
      99          {
     100            grouplen = g < CHAR_MAX ? g : i;
     101            grouping++;
     102          }
     103  
     104        if (i < grouplen)
     105          grouplen = i;
     106  
     107        d -= grouplen;
     108        i -= grouplen;
     109        memcpy (d, buf + i, grouplen);
     110        if (i == 0)
     111          return d;
     112  
     113        d -= thousands_seplen;
     114        memcpy (d, thousands_sep, thousands_seplen);
     115      }
     116  }
     117  
     118  /* Convert N to a human readable format in BUF, using the options OPTS.
     119  
     120     N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
     121     be nonnegative.
     122  
     123     Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
     124     must be positive.
     125  
     126     Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
     127     to determine whether to take the ceiling or floor of any result
     128     that cannot be expressed exactly.
     129  
     130     If (OPTS & human_group_digits), group the thousands digits
     131     according to the locale, e.g., "1,000,000" in an American English
     132     locale.
     133  
     134     If (OPTS & human_autoscale), deduce the output block size
     135     automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
     136     output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
     137     of 1000 otherwise.  For example, assuming powers of 1024, 8500
     138     would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
     139     so on.  Numbers smaller than the power aren't modified.
     140     human_autoscale is normally used together with human_SI.
     141  
     142     If (OPTS & human_space_before_unit), use a space to separate the
     143     number from any suffix that is appended as described below.
     144  
     145     If (OPTS & human_SI), append an SI prefix indicating which power is
     146     being used.  If in addition (OPTS & human_B), append "B" (if base
     147     1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
     148     human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
     149     power of 1024 or of 1000, depending on (OPTS &
     150     human_base_1024).  */
     151  
     152  char *
     153  human_readable (uintmax_t n, char *buf, int opts,
     154                  uintmax_t from_block_size, uintmax_t to_block_size)
     155  {
     156    int inexact_style =
     157      opts & (human_round_to_nearest | human_floor | human_ceiling);
     158    unsigned int base = opts & human_base_1024 ? 1024 : 1000;
     159    uintmax_t amt;
     160    int tenths;
     161    int exponent = -1;
     162    int exponent_max = sizeof power_letter - 1;
     163    char *p;
     164    char *psuffix;
     165    char const *integerlim;
     166  
     167    /* 0 means adjusted N == AMT.TENTHS;
     168       1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
     169       2 means adjusted N == AMT.TENTHS + 0.05;
     170       3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
     171    int rounding;
     172  
     173    char const *decimal_point = ".";
     174    size_t decimal_pointlen = 1;
     175    char const *grouping = "";
     176    char const *thousands_sep = "";
     177    struct lconv const *l = localeconv ();
     178    size_t pointlen = strlen (l->decimal_point);
     179    if (0 < pointlen && pointlen <= MB_LEN_MAX)
     180      {
     181        decimal_point = l->decimal_point;
     182        decimal_pointlen = pointlen;
     183      }
     184    grouping = l->grouping;
     185    if (strlen (l->thousands_sep) <= MB_LEN_MAX)
     186      thousands_sep = l->thousands_sep;
     187  
     188    /* Leave room for a trailing space and following suffix.  */
     189    psuffix = buf + LONGEST_HUMAN_READABLE - 1 - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
     190    p = psuffix;
     191  
     192    /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
     193       units.  If this can be done exactly with integer arithmetic, do
     194       not use floating point operations.  */
     195    if (to_block_size <= from_block_size)
     196      {
     197        if (from_block_size % to_block_size == 0)
     198          {
     199            uintmax_t multiplier = from_block_size / to_block_size;
     200            amt = n * multiplier;
     201            if (amt / multiplier == n)
     202              {
     203                tenths = 0;
     204                rounding = 0;
     205                goto use_integer_arithmetic;
     206              }
     207          }
     208      }
     209    else if (from_block_size != 0 && to_block_size % from_block_size == 0)
     210      {
     211        uintmax_t divisor = to_block_size / from_block_size;
     212        uintmax_t r10 = (n % divisor) * 10;
     213        uintmax_t r2 = (r10 % divisor) * 2;
     214        amt = n / divisor;
     215        tenths = r10 / divisor;
     216        rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
     217        goto use_integer_arithmetic;
     218      }
     219  
     220    {
     221      /* Either the result cannot be computed easily using uintmax_t,
     222         or from_block_size is zero.  Fall back on floating point.
     223         FIXME: This can yield answers that are slightly off.  */
     224  
     225      long double dto_block_size = to_block_size;
     226      long double damt = n * (from_block_size / dto_block_size);
     227      size_t buflen;
     228      size_t nonintegerlen;
     229  
     230      if (! (opts & human_autoscale))
     231        {
     232          sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
     233          buflen = strlen (buf);
     234          nonintegerlen = 0;
     235        }
     236      else
     237        {
     238          long double e = 1;
     239          exponent = 0;
     240  
     241          do
     242            {
     243              e *= base;
     244              exponent++;
     245            }
     246          while (e * base <= damt && exponent < exponent_max);
     247  
     248          damt /= e;
     249  
     250          sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
     251          buflen = strlen (buf);
     252          nonintegerlen = decimal_pointlen + 1;
     253  
     254          if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
     255              || ((opts & human_suppress_point_zero)
     256                  && buf[buflen - 1] == '0'))
     257            {
     258              sprintf (buf, "%.0Lf",
     259                       adjust_value (inexact_style, damt * 10) / 10);
     260              buflen = strlen (buf);
     261              nonintegerlen = 0;
     262            }
     263        }
     264  
     265      p = psuffix - buflen;
     266      memmove (p, buf, buflen);
     267      integerlim = p + buflen - nonintegerlen;
     268    }
     269    goto do_grouping;
     270  
     271   use_integer_arithmetic:
     272    {
     273      /* The computation can be done exactly, with integer arithmetic.
     274  
     275         Use power of BASE notation if requested and if adjusted AMT is
     276         large enough.  */
     277  
     278      if (opts & human_autoscale)
     279        {
     280          exponent = 0;
     281  
     282          if (base <= amt)
     283            {
     284              do
     285                {
     286                  unsigned int r10 = (amt % base) * 10 + tenths;
     287                  unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
     288                  amt /= base;
     289                  tenths = r10 / base;
     290                  rounding = (r2 < base
     291                              ? (r2 + rounding) != 0
     292                              : 2 + (base < r2 + rounding));
     293                  exponent++;
     294                }
     295              while (base <= amt && exponent < exponent_max);
     296  
     297              if (amt < 10)
     298                {
     299                  if (inexact_style == human_round_to_nearest
     300                      ? 2 < rounding + (tenths & 1)
     301                      : inexact_style == human_ceiling && 0 < rounding)
     302                    {
     303                      tenths++;
     304                      rounding = 0;
     305  
     306                      if (tenths == 10)
     307                        {
     308                          amt++;
     309                          tenths = 0;
     310                        }
     311                    }
     312  
     313                  if (amt < 10
     314                      && (tenths || ! (opts & human_suppress_point_zero)))
     315                    {
     316                      *--p = '0' + tenths;
     317                      p -= decimal_pointlen;
     318                      memcpy (p, decimal_point, decimal_pointlen);
     319                      tenths = rounding = 0;
     320                    }
     321                }
     322            }
     323        }
     324  
     325      if (inexact_style == human_round_to_nearest
     326          ? 5 < tenths + (0 < rounding + (amt & 1))
     327          : inexact_style == human_ceiling && 0 < tenths + rounding)
     328        {
     329          amt++;
     330  
     331          if ((opts & human_autoscale)
     332              && amt == base && exponent < exponent_max)
     333            {
     334              exponent++;
     335              if (! (opts & human_suppress_point_zero))
     336                {
     337                  *--p = '0';
     338                  p -= decimal_pointlen;
     339                  memcpy (p, decimal_point, decimal_pointlen);
     340                }
     341              amt = 1;
     342            }
     343        }
     344  
     345      integerlim = p;
     346  
     347      do
     348        {
     349          int digit = amt % 10;
     350          *--p = digit + '0';
     351        }
     352      while ((amt /= 10) != 0);
     353    }
     354  
     355   do_grouping:
     356    if (opts & human_group_digits)
     357      p = group_number (p, integerlim - p, grouping, thousands_sep);
     358  
     359    if (opts & human_SI)
     360      {
     361        if (exponent < 0)
     362          {
     363            uintmax_t power;
     364            exponent = 0;
     365            for (power = 1; power < to_block_size; power *= base)
     366              if (++exponent == exponent_max)
     367                break;
     368          }
     369  
     370        if ((exponent | (opts & human_B)) && (opts & human_space_before_unit))
     371          *psuffix++ = ' ';
     372  
     373        if (exponent)
     374          *psuffix++ = (! (opts & human_base_1024) && exponent == 1
     375                        ? 'k'
     376                        : power_letter[exponent]);
     377  
     378        if (opts & human_B)
     379          {
     380            if ((opts & human_base_1024) && exponent)
     381              *psuffix++ = 'i';
     382            *psuffix++ = 'B';
     383          }
     384      }
     385  
     386    *psuffix = '\0';
     387  
     388    return p;
     389  }
     390  
     391  
     392  /* The default block size used for output.  This number may change in
     393     the future as disks get larger.  */
     394  #ifndef DEFAULT_BLOCK_SIZE
     395  # define DEFAULT_BLOCK_SIZE 1024
     396  #endif
     397  
     398  static char const *const block_size_args[] = { "human-readable", "si", 0 };
     399  static int const block_size_opts[] =
     400    {
     401      human_autoscale + human_SI + human_base_1024,
     402      human_autoscale + human_SI
     403    };
     404  
     405  static uintmax_t
     406  default_block_size (void)
     407  {
     408    return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
     409  }
     410  
     411  static strtol_error
     412  humblock (char const *spec, uintmax_t *block_size, int *options)
     413  {
     414    int i;
     415    int opts = 0;
     416  
     417    if (! spec
     418        && ! (spec = getenv ("BLOCK_SIZE"))
     419        && ! (spec = getenv ("BLOCKSIZE")))
     420      *block_size = default_block_size ();
     421    else
     422      {
     423        if (*spec == '\'')
     424          {
     425            opts |= human_group_digits;
     426            spec++;
     427          }
     428  
     429        if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
     430          {
     431            opts |= block_size_opts[i];
     432            *block_size = 1;
     433          }
     434        else
     435          {
     436            char *ptr;
     437            strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
     438                                         "eEgGkKmMpPtTyYzZ0");
     439            if (e != LONGINT_OK)
     440              {
     441                *options = 0;
     442                return e;
     443              }
     444            for (; ! ('0' <= *spec && *spec <= '9'); spec++)
     445              if (spec == ptr)
     446                {
     447                  opts |= human_SI;
     448                  if (ptr[-1] == 'B')
     449                    opts |= human_B;
     450                  if (ptr[-1] != 'B' || ptr[-2] == 'i')
     451                    opts |= human_base_1024;
     452                  break;
     453                }
     454          }
     455      }
     456  
     457    *options = opts;
     458    return LONGINT_OK;
     459  }
     460  
     461  enum strtol_error
     462  human_options (char const *spec, int *opts, uintmax_t *block_size)
     463  {
     464    strtol_error e = humblock (spec, block_size, opts);
     465    if (*block_size == 0)
     466      {
     467        *block_size = default_block_size ();
     468        e = LONGINT_INVALID;
     469      }
     470    return e;
     471  }