(root)/
coreutils-9.4/
lib/
base32.c
       1  /* base32.c -- Encode binary data using printable characters.
       2     Copyright (C) 1999-2001, 2004-2006, 2009-2023 Free Software Foundation, Inc.
       3  
       4     This file is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU Lesser General Public License as
       6     published by the Free Software Foundation; either version 2.1 of the
       7     License, or (at your option) any later version.
       8  
       9     This file is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU Lesser General Public License for more details.
      13  
      14     You should have received a copy of the GNU Lesser General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Adapted from Simon Josefsson's base64 code by Gijs van Tulder.
      18   *
      19   * See also RFC 4648 <https://www.ietf.org/rfc/rfc4648.txt>.
      20   *
      21   * Be careful with error checking.  Here is how you would typically
      22   * use these functions:
      23   *
      24   * bool ok = base32_decode_alloc (in, inlen, &out, &outlen);
      25   * if (!ok)
      26   *   FAIL: input was not valid base32
      27   * if (out == NULL)
      28   *   FAIL: memory allocation error
      29   * OK: data in OUT/OUTLEN
      30   *
      31   * idx_t outlen = base32_encode_alloc (in, inlen, &out);
      32   * if (out == NULL && outlen == 0 && inlen != 0)
      33   *   FAIL: input too long
      34   * if (out == NULL)
      35   *   FAIL: memory allocation error
      36   * OK: data in OUT/OUTLEN.
      37   *
      38   */
      39  
      40  #include <config.h>
      41  
      42  /* Get prototype. */
      43  #include "base32.h"
      44  
      45  /* Get imalloc. */
      46  #include <ialloc.h>
      47  
      48  #include <intprops.h>
      49  
      50  /* Get UCHAR_MAX. */
      51  #include <limits.h>
      52  
      53  #include <string.h>
      54  
      55  /* Convert 'char' to 'unsigned char' without casting.  */
      56  static unsigned char
      57  to_uchar (char ch)
      58  {
      59    return ch;
      60  }
      61  
      62  /* Base32 encode IN array of size INLEN into OUT array of size OUTLEN.
      63     If OUTLEN is less than BASE32_LENGTH(INLEN), write as many bytes as
      64     possible.  If OUTLEN is larger than BASE32_LENGTH(INLEN), also zero
      65     terminate the output buffer. */
      66  void
      67  base32_encode (const char *restrict in, idx_t inlen,
      68                 char *restrict out, idx_t outlen)
      69  {
      70    static const char b32str[32] =
      71      "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
      72  
      73    while (inlen && outlen)
      74      {
      75        *out++ = b32str[(to_uchar (in[0]) >> 3) & 0x1f];
      76        if (!--outlen)
      77          break;
      78        *out++ = b32str[((to_uchar (in[0]) << 2)
      79                         + (--inlen ? to_uchar (in[1]) >> 6 : 0))
      80                        & 0x1f];
      81        if (!--outlen)
      82          break;
      83        *out++ =
      84          (inlen
      85           ? b32str[(to_uchar (in[1]) >> 1) & 0x1f]
      86           : '=');
      87        if (!--outlen)
      88          break;
      89        *out++ =
      90          (inlen
      91           ? b32str[((to_uchar (in[1]) << 4)
      92                     + (--inlen ? to_uchar (in[2]) >> 4 : 0))
      93                    & 0x1f]
      94           : '=');
      95        if (!--outlen)
      96          break;
      97        *out++ =
      98          (inlen
      99           ? b32str[((to_uchar (in[2]) << 1)
     100                     + (--inlen ? to_uchar (in[3]) >> 7 : 0))
     101                    & 0x1f]
     102           : '=');
     103        if (!--outlen)
     104          break;
     105        *out++ =
     106          (inlen
     107           ? b32str[(to_uchar (in[3]) >> 2) & 0x1f]
     108           : '=');
     109        if (!--outlen)
     110          break;
     111        *out++ =
     112          (inlen
     113           ? b32str[((to_uchar (in[3]) << 3)
     114                     + (--inlen ? to_uchar (in[4]) >> 5 : 0))
     115                    & 0x1f]
     116           : '=');
     117        if (!--outlen)
     118          break;
     119        *out++ = inlen ? b32str[to_uchar (in[4]) & 0x1f] : '=';
     120        if (!--outlen)
     121          break;
     122        if (inlen)
     123          inlen--;
     124        if (inlen)
     125          in += 5;
     126      }
     127  
     128    if (outlen)
     129      *out = '\0';
     130  }
     131  
     132  /* Allocate a buffer and store zero terminated base32 encoded data
     133     from array IN of size INLEN, returning BASE32_LENGTH(INLEN), i.e.,
     134     the length of the encoded data, excluding the terminating zero.  On
     135     return, the OUT variable will hold a pointer to newly allocated
     136     memory that must be deallocated by the caller.  If output string
     137     length would overflow, 0 is returned and OUT is set to NULL.  If
     138     memory allocation failed, OUT is set to NULL, and the return value
     139     indicates length of the requested memory block, i.e.,
     140     BASE32_LENGTH(inlen) + 1. */
     141  idx_t
     142  base32_encode_alloc (const char *in, idx_t inlen, char **out)
     143  {
     144    /* Check for overflow in outlen computation.
     145       Treat negative INLEN as overflow, for better compatibility with
     146       pre-2021-08-27 API, which used size_t.  */
     147    idx_t in_over_5 = inlen / 5 + (inlen % 5 != 0), outlen;
     148    if (! INT_MULTIPLY_OK (in_over_5, 8, &outlen) || inlen < 0)
     149      {
     150        *out = NULL;
     151        return 0;
     152      }
     153    outlen++;
     154  
     155    *out = imalloc (outlen);
     156    if (!*out)
     157      return outlen;
     158  
     159    base32_encode (in, inlen, *out, outlen);
     160  
     161    return outlen - 1;
     162  }
     163  
     164  /* With this approach this file works independent of the charset used
     165     (think EBCDIC).  However, it does assume that the characters in the
     166     Base32 alphabet (A-Z2-7) are encoded in 0..255.  POSIX
     167     1003.1-2001 require that char and unsigned char are 8-bit
     168     quantities, though, taking care of that problem.  But this may be a
     169     potential problem on non-POSIX C99 platforms.
     170  
     171     IBM C V6 for AIX mishandles "#define B32(x) ...'x'...", so use "_"
     172     as the formal parameter rather than "x".  */
     173  #define B32(_)                                  \
     174    ((_) == 'A' ? 0                               \
     175     : (_) == 'B' ? 1                             \
     176     : (_) == 'C' ? 2                             \
     177     : (_) == 'D' ? 3                             \
     178     : (_) == 'E' ? 4                             \
     179     : (_) == 'F' ? 5                             \
     180     : (_) == 'G' ? 6                             \
     181     : (_) == 'H' ? 7                             \
     182     : (_) == 'I' ? 8                             \
     183     : (_) == 'J' ? 9                             \
     184     : (_) == 'K' ? 10                            \
     185     : (_) == 'L' ? 11                            \
     186     : (_) == 'M' ? 12                            \
     187     : (_) == 'N' ? 13                            \
     188     : (_) == 'O' ? 14                            \
     189     : (_) == 'P' ? 15                            \
     190     : (_) == 'Q' ? 16                            \
     191     : (_) == 'R' ? 17                            \
     192     : (_) == 'S' ? 18                            \
     193     : (_) == 'T' ? 19                            \
     194     : (_) == 'U' ? 20                            \
     195     : (_) == 'V' ? 21                            \
     196     : (_) == 'W' ? 22                            \
     197     : (_) == 'X' ? 23                            \
     198     : (_) == 'Y' ? 24                            \
     199     : (_) == 'Z' ? 25                            \
     200     : (_) == '2' ? 26                            \
     201     : (_) == '3' ? 27                            \
     202     : (_) == '4' ? 28                            \
     203     : (_) == '5' ? 29                            \
     204     : (_) == '6' ? 30                            \
     205     : (_) == '7' ? 31                            \
     206     : -1)
     207  
     208  static const signed char b32[0x100] = {
     209    B32 (0), B32 (1), B32 (2), B32 (3),
     210    B32 (4), B32 (5), B32 (6), B32 (7),
     211    B32 (8), B32 (9), B32 (10), B32 (11),
     212    B32 (12), B32 (13), B32 (14), B32 (15),
     213    B32 (16), B32 (17), B32 (18), B32 (19),
     214    B32 (20), B32 (21), B32 (22), B32 (23),
     215    B32 (24), B32 (25), B32 (26), B32 (27),
     216    B32 (28), B32 (29), B32 (30), B32 (31),
     217    B32 (32), B32 (33), B32 (34), B32 (35),
     218    B32 (36), B32 (37), B32 (38), B32 (39),
     219    B32 (40), B32 (41), B32 (42), B32 (43),
     220    B32 (44), B32 (45), B32 (46), B32 (47),
     221    B32 (48), B32 (49), B32 (50), B32 (51),
     222    B32 (52), B32 (53), B32 (54), B32 (55),
     223    B32 (56), B32 (57), B32 (58), B32 (59),
     224    B32 (60), B32 (61), B32 (62), B32 (63),
     225    B32 (32), B32 (65), B32 (66), B32 (67),
     226    B32 (68), B32 (69), B32 (70), B32 (71),
     227    B32 (72), B32 (73), B32 (74), B32 (75),
     228    B32 (76), B32 (77), B32 (78), B32 (79),
     229    B32 (80), B32 (81), B32 (82), B32 (83),
     230    B32 (84), B32 (85), B32 (86), B32 (87),
     231    B32 (88), B32 (89), B32 (90), B32 (91),
     232    B32 (92), B32 (93), B32 (94), B32 (95),
     233    B32 (96), B32 (97), B32 (98), B32 (99),
     234    B32 (100), B32 (101), B32 (102), B32 (103),
     235    B32 (104), B32 (105), B32 (106), B32 (107),
     236    B32 (108), B32 (109), B32 (110), B32 (111),
     237    B32 (112), B32 (113), B32 (114), B32 (115),
     238    B32 (116), B32 (117), B32 (118), B32 (119),
     239    B32 (120), B32 (121), B32 (122), B32 (123),
     240    B32 (124), B32 (125), B32 (126), B32 (127),
     241    B32 (128), B32 (129), B32 (130), B32 (131),
     242    B32 (132), B32 (133), B32 (134), B32 (135),
     243    B32 (136), B32 (137), B32 (138), B32 (139),
     244    B32 (140), B32 (141), B32 (142), B32 (143),
     245    B32 (144), B32 (145), B32 (146), B32 (147),
     246    B32 (148), B32 (149), B32 (150), B32 (151),
     247    B32 (152), B32 (153), B32 (154), B32 (155),
     248    B32 (156), B32 (157), B32 (158), B32 (159),
     249    B32 (160), B32 (161), B32 (162), B32 (163),
     250    B32 (132), B32 (165), B32 (166), B32 (167),
     251    B32 (168), B32 (169), B32 (170), B32 (171),
     252    B32 (172), B32 (173), B32 (174), B32 (175),
     253    B32 (176), B32 (177), B32 (178), B32 (179),
     254    B32 (180), B32 (181), B32 (182), B32 (183),
     255    B32 (184), B32 (185), B32 (186), B32 (187),
     256    B32 (188), B32 (189), B32 (190), B32 (191),
     257    B32 (192), B32 (193), B32 (194), B32 (195),
     258    B32 (196), B32 (197), B32 (198), B32 (199),
     259    B32 (200), B32 (201), B32 (202), B32 (203),
     260    B32 (204), B32 (205), B32 (206), B32 (207),
     261    B32 (208), B32 (209), B32 (210), B32 (211),
     262    B32 (212), B32 (213), B32 (214), B32 (215),
     263    B32 (216), B32 (217), B32 (218), B32 (219),
     264    B32 (220), B32 (221), B32 (222), B32 (223),
     265    B32 (224), B32 (225), B32 (226), B32 (227),
     266    B32 (228), B32 (229), B32 (230), B32 (231),
     267    B32 (232), B32 (233), B32 (234), B32 (235),
     268    B32 (236), B32 (237), B32 (238), B32 (239),
     269    B32 (240), B32 (241), B32 (242), B32 (243),
     270    B32 (244), B32 (245), B32 (246), B32 (247),
     271    B32 (248), B32 (249), B32 (250), B32 (251),
     272    B32 (252), B32 (253), B32 (254), B32 (255)
     273  };
     274  
     275  #if UCHAR_MAX == 255
     276  # define uchar_in_range(c) true
     277  #else
     278  # define uchar_in_range(c) ((c) <= 255)
     279  #endif
     280  
     281  /* Return true if CH is a character from the Base32 alphabet, and
     282     false otherwise.  Note that '=' is padding and not considered to be
     283     part of the alphabet.  */
     284  bool
     285  isbase32 (char ch)
     286  {
     287    return uchar_in_range (to_uchar (ch)) && 0 <= b32[to_uchar (ch)];
     288  }
     289  
     290  /* Initialize decode-context buffer, CTX.  */
     291  void
     292  base32_decode_ctx_init (struct base32_decode_context *ctx)
     293  {
     294    ctx->i = 0;
     295  }
     296  
     297  /* If CTX->i is 0 or 8, there are eight or more bytes in [*IN..IN_END), and
     298     none of those eight is a newline, then return *IN.  Otherwise, copy up to
     299     4 - CTX->i non-newline bytes from that range into CTX->buf, starting at
     300     index CTX->i and setting CTX->i to reflect the number of bytes copied,
     301     and return CTX->buf.  In either case, advance *IN to point to the byte
     302     after the last one processed, and set *N_NON_NEWLINE to the number of
     303     verified non-newline bytes accessible through the returned pointer.  */
     304  static char *
     305  get_8 (struct base32_decode_context *ctx,
     306         char const *restrict *in, char const *restrict in_end,
     307         idx_t *n_non_newline)
     308  {
     309    if (ctx->i == 8)
     310      ctx->i = 0;
     311  
     312    if (ctx->i == 0)
     313      {
     314        char const *t = *in;
     315        if (8 <= in_end - *in && memchr (t, '\n', 8) == NULL)
     316          {
     317            /* This is the common case: no newline.  */
     318            *in += 8;
     319            *n_non_newline = 8;
     320            return (char *) t;
     321          }
     322      }
     323  
     324    {
     325      /* Copy non-newline bytes into BUF.  */
     326      char const *p = *in;
     327      while (p < in_end)
     328        {
     329          char c = *p++;
     330          if (c != '\n')
     331            {
     332              ctx->buf[ctx->i++] = c;
     333              if (ctx->i == 8)
     334                break;
     335            }
     336        }
     337  
     338      *in = p;
     339      *n_non_newline = ctx->i;
     340      return ctx->buf;
     341    }
     342  }
     343  
     344  #define return_false                            \
     345    do                                            \
     346      {                                           \
     347        *outp = out;                              \
     348        return false;                             \
     349      }                                           \
     350    while (false)
     351  
     352  /* Decode eight bytes of base32-encoded data, IN, of length INLEN
     353     into the output buffer, *OUT, of size *OUTLEN bytes.  Return true if
     354     decoding is successful, false otherwise.  If *OUTLEN is too small,
     355     as many bytes as possible are written to *OUT.  On return, advance
     356     *OUT to point to the byte after the last one written, and decrement
     357     *OUTLEN to reflect the number of bytes remaining in *OUT.  */
     358  static bool
     359  decode_8 (char const *restrict in, idx_t inlen,
     360            char *restrict *outp, idx_t *outleft)
     361  {
     362    char *out = *outp;
     363    if (inlen < 8)
     364      return false;
     365  
     366    if (!isbase32 (in[0]) || !isbase32 (in[1]))
     367      return false;
     368  
     369    if (*outleft)
     370      {
     371        *out++ = ((b32[to_uchar (in[0])] << 3)
     372                  | (b32[to_uchar (in[1])] >> 2));
     373        --*outleft;
     374      }
     375  
     376    if (in[2] == '=')
     377      {
     378        if (in[3] != '=' || in[4] != '=' || in[5] != '='
     379            || in[6] != '=' || in[7] != '=')
     380          return_false;
     381      }
     382    else
     383      {
     384        if (!isbase32 (in[2]) || !isbase32 (in[3]))
     385          return_false;
     386  
     387        if (*outleft)
     388          {
     389            *out++ = ((b32[to_uchar (in[1])] << 6)
     390                      | (b32[to_uchar (in[2])] << 1)
     391                      | (b32[to_uchar (in[3])] >> 4));
     392            --*outleft;
     393          }
     394  
     395        if (in[4] == '=')
     396          {
     397            if (in[5] != '=' || in[6] != '=' || in[7] != '=')
     398              return_false;
     399          }
     400        else
     401          {
     402            if (!isbase32 (in[4]))
     403              return_false;
     404  
     405            if (*outleft)
     406              {
     407                *out++ = ((b32[to_uchar (in[3])] << 4)
     408                          | (b32[to_uchar (in[4])] >> 1));
     409                --*outleft;
     410              }
     411  
     412            if (in[5] == '=')
     413              {
     414                if (in[6] != '=' || in[7] != '=')
     415                  return_false;
     416              }
     417            else
     418              {
     419                if (!isbase32 (in[5]) || !isbase32 (in[6]))
     420                  return_false;
     421  
     422                if (*outleft)
     423                  {
     424                    *out++ = ((b32[to_uchar (in[4])] << 7)
     425                              | (b32[to_uchar (in[5])] << 2)
     426                              | (b32[to_uchar (in[6])] >> 3));
     427                    --*outleft;
     428                  }
     429  
     430                if (in[7] != '=')
     431                  {
     432                    if (!isbase32 (in[7]))
     433                      return_false;
     434  
     435                    if (*outleft)
     436                      {
     437                        *out++ = ((b32[to_uchar (in[6])] << 5)
     438                                  | (b32[to_uchar (in[7])]));
     439                        --*outleft;
     440                      }
     441                  }
     442              }
     443          }
     444      }
     445  
     446    *outp = out;
     447    return true;
     448  }
     449  
     450  /* Decode base32-encoded input array IN of length INLEN to output array
     451     OUT that can hold *OUTLEN bytes.  The input data may be interspersed
     452     with newlines.  Return true if decoding was successful, i.e. if the
     453     input was valid base32 data, false otherwise.  If *OUTLEN is too
     454     small, as many bytes as possible will be written to OUT.  On return,
     455     *OUTLEN holds the length of decoded bytes in OUT.  Note that as soon
     456     as any non-alphabet, non-newline character is encountered, decoding
     457     is stopped and false is returned.  If INLEN is zero, then process
     458     only whatever data is stored in CTX.
     459  
     460     Initially, CTX must have been initialized via base32_decode_ctx_init.
     461     Subsequent calls to this function must reuse whatever state is recorded
     462     in that buffer.  It is necessary for when a octuple of base32 input
     463     bytes spans two input buffers.
     464  
     465     If CTX is NULL then newlines are treated as garbage and the input
     466     buffer is processed as a unit.  */
     467  
     468  bool
     469  base32_decode_ctx (struct base32_decode_context *ctx,
     470                     const char *restrict in, idx_t inlen,
     471                     char *restrict out, idx_t *outlen)
     472  {
     473    idx_t outleft = *outlen;
     474    bool ignore_newlines = ctx != NULL;
     475    bool flush_ctx = false;
     476    unsigned int ctx_i = 0;
     477  
     478    if (ignore_newlines)
     479      {
     480        ctx_i = ctx->i;
     481        flush_ctx = inlen == 0;
     482      }
     483  
     484  
     485    while (true)
     486      {
     487        idx_t outleft_save = outleft;
     488        if (ctx_i == 0 && !flush_ctx)
     489          {
     490            while (true)
     491              {
     492                /* Save a copy of outleft, in case we need to re-parse this
     493                   block of four bytes.  */
     494                outleft_save = outleft;
     495                if (!decode_8 (in, inlen, &out, &outleft))
     496                  break;
     497  
     498                in += 8;
     499                inlen -= 8;
     500              }
     501          }
     502  
     503        if (inlen == 0 && !flush_ctx)
     504          break;
     505  
     506        /* Handle the common case of 72-byte wrapped lines.
     507           This also handles any other multiple-of-8-byte wrapping.  */
     508        if (inlen && *in == '\n' && ignore_newlines)
     509          {
     510            ++in;
     511            --inlen;
     512            continue;
     513          }
     514  
     515        /* Restore OUT and OUTLEFT.  */
     516        out -= outleft_save - outleft;
     517        outleft = outleft_save;
     518  
     519        {
     520          char const *in_end = in + inlen;
     521          char const *non_nl;
     522  
     523          if (ignore_newlines)
     524            non_nl = get_8 (ctx, &in, in_end, &inlen);
     525          else
     526            non_nl = in;  /* Might have nl in this case. */
     527  
     528          /* If the input is empty or consists solely of newlines (0 non-newlines),
     529             then we're done.  Likewise if there are fewer than 8 bytes when not
     530             flushing context and not treating newlines as garbage.  */
     531          if (inlen == 0 || (inlen < 8 && !flush_ctx && ignore_newlines))
     532            {
     533              inlen = 0;
     534              break;
     535            }
     536          if (!decode_8 (non_nl, inlen, &out, &outleft))
     537            break;
     538  
     539          inlen = in_end - in;
     540        }
     541      }
     542  
     543    *outlen -= outleft;
     544  
     545    return inlen == 0;
     546  }
     547  
     548  /* Allocate an output buffer in *OUT, and decode the base32 encoded
     549     data stored in IN of size INLEN to the *OUT buffer.  On return, the
     550     size of the decoded data is stored in *OUTLEN.  OUTLEN may be NULL,
     551     if the caller is not interested in the decoded length.  *OUT may be
     552     NULL to indicate an out of memory error, in which case *OUTLEN
     553     contains the size of the memory block needed.  The function returns
     554     true on successful decoding and memory allocation errors.  (Use the
     555     *OUT and *OUTLEN parameters to differentiate between successful
     556     decoding and memory error.)  The function returns false if the
     557     input was invalid, in which case *OUT is NULL and *OUTLEN is
     558     undefined. */
     559  bool
     560  base32_decode_alloc_ctx (struct base32_decode_context *ctx,
     561                           const char *in, idx_t inlen, char **out,
     562                           idx_t *outlen)
     563  {
     564    /* This may allocate a few bytes too many, depending on input,
     565       but it's not worth the extra CPU time to compute the exact size.
     566       The exact size is 5 * inlen / 8, minus one or more bytes if the
     567       input is padded with one or more "=".
     568       Shifting before multiplying avoids the possibility of overflow.  */
     569    idx_t needlen = 5 * ((inlen >> 3) + 1);
     570  
     571    *out = imalloc (needlen);
     572    if (!*out)
     573      return true;
     574  
     575    if (!base32_decode_ctx (ctx, in, inlen, *out, &needlen))
     576      {
     577        free (*out);
     578        *out = NULL;
     579        return false;
     580      }
     581  
     582    if (outlen)
     583      *outlen = needlen;
     584  
     585    return true;
     586  }