(root)/
coreutils-9.4/
lib/
base64.c
       1  /* base64.c -- Encode binary data using printable characters.
       2     Copyright (C) 1999-2001, 2004-2006, 2009-2023 Free Software Foundation, Inc.
       3  
       4     This file is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU Lesser General Public License as
       6     published by the Free Software Foundation; either version 2.1 of the
       7     License, or (at your option) any later version.
       8  
       9     This file is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU Lesser General Public License for more details.
      13  
      14     You should have received a copy of the GNU Lesser General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Simon Josefsson.  Partially adapted from GNU MailUtils
      18   * (mailbox/filter_trans.c, as of 2004-11-28).  Improved by review
      19   * from Paul Eggert, Bruno Haible, and Stepan Kasal.
      20   *
      21   * See also RFC 4648 <https://www.ietf.org/rfc/rfc4648.txt>.
      22   *
      23   * Be careful with error checking.  Here is how you would typically
      24   * use these functions:
      25   *
      26   * bool ok = base64_decode_alloc (in, inlen, &out, &outlen);
      27   * if (!ok)
      28   *   FAIL: input was not valid base64
      29   * if (out == NULL)
      30   *   FAIL: memory allocation error
      31   * OK: data in OUT/OUTLEN
      32   *
      33   * idx_t outlen = base64_encode_alloc (in, inlen, &out);
      34   * if (out == NULL && outlen == 0 && inlen != 0)
      35   *   FAIL: input too long
      36   * if (out == NULL)
      37   *   FAIL: memory allocation error
      38   * OK: data in OUT/OUTLEN.
      39   *
      40   */
      41  
      42  #include <config.h>
      43  
      44  /* Get prototype. */
      45  #include "base64.h"
      46  
      47  /* Get imalloc. */
      48  #include <ialloc.h>
      49  
      50  #include <intprops.h>
      51  
      52  /* Get UCHAR_MAX. */
      53  #include <limits.h>
      54  
      55  #include <string.h>
      56  
      57  /* Convert 'char' to 'unsigned char' without casting.  */
      58  static unsigned char
      59  to_uchar (char ch)
      60  {
      61    return ch;
      62  }
      63  
      64  static const char b64c[64] =
      65    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
      66  
      67  /* Base64 encode IN array of size INLEN into OUT array. OUT needs
      68     to be of length >= BASE64_LENGTH(INLEN), and INLEN needs to be
      69     a multiple of 3.  */
      70  static void
      71  base64_encode_fast (const char *restrict in, idx_t inlen, char *restrict out)
      72  {
      73    while (inlen)
      74      {
      75        *out++ = b64c[(to_uchar (in[0]) >> 2) & 0x3f];
      76        *out++ = b64c[((to_uchar (in[0]) << 4) + (to_uchar (in[1]) >> 4)) & 0x3f];
      77        *out++ = b64c[((to_uchar (in[1]) << 2) + (to_uchar (in[2]) >> 6)) & 0x3f];
      78        *out++ = b64c[to_uchar (in[2]) & 0x3f];
      79  
      80        inlen -= 3;
      81        in += 3;
      82      }
      83  }
      84  
      85  /* Base64 encode IN array of size INLEN into OUT array of size OUTLEN.
      86     If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as
      87     possible.  If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero
      88     terminate the output buffer. */
      89  void
      90  base64_encode (const char *restrict in, idx_t inlen,
      91                 char *restrict out, idx_t outlen)
      92  {
      93    /* Note this outlen constraint can be enforced at compile time.
      94       I.E. that the output buffer is exactly large enough to hold
      95       the encoded inlen bytes.  The inlen constraints (of corresponding
      96       to outlen, and being a multiple of 3) can change at runtime
      97       at the end of input.  However the common case when reading
      98       large inputs is to have both constraints satisfied, so we depend
      99       on both in base_encode_fast().  */
     100    if (outlen % 4 == 0 && inlen == (outlen >> 2) * 3)
     101      {
     102        base64_encode_fast (in, inlen, out);
     103        return;
     104      }
     105  
     106    while (inlen && outlen)
     107      {
     108        *out++ = b64c[(to_uchar (in[0]) >> 2) & 0x3f];
     109        if (!--outlen)
     110          break;
     111        *out++ = b64c[((to_uchar (in[0]) << 4)
     112                         + (--inlen ? to_uchar (in[1]) >> 4 : 0))
     113                        & 0x3f];
     114        if (!--outlen)
     115          break;
     116        *out++ =
     117          (inlen
     118           ? b64c[((to_uchar (in[1]) << 2)
     119                     + (--inlen ? to_uchar (in[2]) >> 6 : 0))
     120                    & 0x3f]
     121           : '=');
     122        if (!--outlen)
     123          break;
     124        *out++ = inlen ? b64c[to_uchar (in[2]) & 0x3f] : '=';
     125        if (!--outlen)
     126          break;
     127        if (inlen)
     128          inlen--;
     129        if (inlen)
     130          in += 3;
     131      }
     132  
     133    if (outlen)
     134      *out = '\0';
     135  }
     136  
     137  /* Allocate a buffer and store zero terminated base64 encoded data
     138     from array IN of size INLEN, returning BASE64_LENGTH(INLEN), i.e.,
     139     the length of the encoded data, excluding the terminating zero.  On
     140     return, the OUT variable will hold a pointer to newly allocated
     141     memory that must be deallocated by the caller.  If output string
     142     length would overflow, 0 is returned and OUT is set to NULL.  If
     143     memory allocation failed, OUT is set to NULL, and the return value
     144     indicates length of the requested memory block, i.e.,
     145     BASE64_LENGTH(inlen) + 1. */
     146  idx_t
     147  base64_encode_alloc (const char *in, idx_t inlen, char **out)
     148  {
     149    /* Check for overflow in outlen computation.
     150       Treat negative INLEN as overflow, for better compatibility with
     151       pre-2021-08-27 API, which used size_t.  */
     152    idx_t in_over_3 = inlen / 3 + (inlen % 3 != 0), outlen;
     153    if (! INT_MULTIPLY_OK (in_over_3, 4, &outlen) || inlen < 0)
     154      {
     155        *out = NULL;
     156        return 0;
     157      }
     158    outlen++;
     159  
     160    *out = imalloc (outlen);
     161    if (!*out)
     162      return outlen;
     163  
     164    base64_encode (in, inlen, *out, outlen);
     165  
     166    return outlen - 1;
     167  }
     168  
     169  /* With this approach this file works independent of the charset used
     170     (think EBCDIC).  However, it does assume that the characters in the
     171     Base64 alphabet (A-Za-z0-9+/) are encoded in 0..255.  POSIX
     172     1003.1-2001 require that char and unsigned char are 8-bit
     173     quantities, though, taking care of that problem.  But this may be a
     174     potential problem on non-POSIX C99 platforms.
     175  
     176     IBM C V6 for AIX mishandles "#define B64(x) ...'x'...", so use "_"
     177     as the formal parameter rather than "x".  */
     178  #define B64(_)                                  \
     179    ((_) == 'A' ? 0                               \
     180     : (_) == 'B' ? 1                             \
     181     : (_) == 'C' ? 2                             \
     182     : (_) == 'D' ? 3                             \
     183     : (_) == 'E' ? 4                             \
     184     : (_) == 'F' ? 5                             \
     185     : (_) == 'G' ? 6                             \
     186     : (_) == 'H' ? 7                             \
     187     : (_) == 'I' ? 8                             \
     188     : (_) == 'J' ? 9                             \
     189     : (_) == 'K' ? 10                            \
     190     : (_) == 'L' ? 11                            \
     191     : (_) == 'M' ? 12                            \
     192     : (_) == 'N' ? 13                            \
     193     : (_) == 'O' ? 14                            \
     194     : (_) == 'P' ? 15                            \
     195     : (_) == 'Q' ? 16                            \
     196     : (_) == 'R' ? 17                            \
     197     : (_) == 'S' ? 18                            \
     198     : (_) == 'T' ? 19                            \
     199     : (_) == 'U' ? 20                            \
     200     : (_) == 'V' ? 21                            \
     201     : (_) == 'W' ? 22                            \
     202     : (_) == 'X' ? 23                            \
     203     : (_) == 'Y' ? 24                            \
     204     : (_) == 'Z' ? 25                            \
     205     : (_) == 'a' ? 26                            \
     206     : (_) == 'b' ? 27                            \
     207     : (_) == 'c' ? 28                            \
     208     : (_) == 'd' ? 29                            \
     209     : (_) == 'e' ? 30                            \
     210     : (_) == 'f' ? 31                            \
     211     : (_) == 'g' ? 32                            \
     212     : (_) == 'h' ? 33                            \
     213     : (_) == 'i' ? 34                            \
     214     : (_) == 'j' ? 35                            \
     215     : (_) == 'k' ? 36                            \
     216     : (_) == 'l' ? 37                            \
     217     : (_) == 'm' ? 38                            \
     218     : (_) == 'n' ? 39                            \
     219     : (_) == 'o' ? 40                            \
     220     : (_) == 'p' ? 41                            \
     221     : (_) == 'q' ? 42                            \
     222     : (_) == 'r' ? 43                            \
     223     : (_) == 's' ? 44                            \
     224     : (_) == 't' ? 45                            \
     225     : (_) == 'u' ? 46                            \
     226     : (_) == 'v' ? 47                            \
     227     : (_) == 'w' ? 48                            \
     228     : (_) == 'x' ? 49                            \
     229     : (_) == 'y' ? 50                            \
     230     : (_) == 'z' ? 51                            \
     231     : (_) == '0' ? 52                            \
     232     : (_) == '1' ? 53                            \
     233     : (_) == '2' ? 54                            \
     234     : (_) == '3' ? 55                            \
     235     : (_) == '4' ? 56                            \
     236     : (_) == '5' ? 57                            \
     237     : (_) == '6' ? 58                            \
     238     : (_) == '7' ? 59                            \
     239     : (_) == '8' ? 60                            \
     240     : (_) == '9' ? 61                            \
     241     : (_) == '+' ? 62                            \
     242     : (_) == '/' ? 63                            \
     243     : -1)
     244  
     245  static const signed char b64[0x100] = {
     246    B64 (0), B64 (1), B64 (2), B64 (3),
     247    B64 (4), B64 (5), B64 (6), B64 (7),
     248    B64 (8), B64 (9), B64 (10), B64 (11),
     249    B64 (12), B64 (13), B64 (14), B64 (15),
     250    B64 (16), B64 (17), B64 (18), B64 (19),
     251    B64 (20), B64 (21), B64 (22), B64 (23),
     252    B64 (24), B64 (25), B64 (26), B64 (27),
     253    B64 (28), B64 (29), B64 (30), B64 (31),
     254    B64 (32), B64 (33), B64 (34), B64 (35),
     255    B64 (36), B64 (37), B64 (38), B64 (39),
     256    B64 (40), B64 (41), B64 (42), B64 (43),
     257    B64 (44), B64 (45), B64 (46), B64 (47),
     258    B64 (48), B64 (49), B64 (50), B64 (51),
     259    B64 (52), B64 (53), B64 (54), B64 (55),
     260    B64 (56), B64 (57), B64 (58), B64 (59),
     261    B64 (60), B64 (61), B64 (62), B64 (63),
     262    B64 (64), B64 (65), B64 (66), B64 (67),
     263    B64 (68), B64 (69), B64 (70), B64 (71),
     264    B64 (72), B64 (73), B64 (74), B64 (75),
     265    B64 (76), B64 (77), B64 (78), B64 (79),
     266    B64 (80), B64 (81), B64 (82), B64 (83),
     267    B64 (84), B64 (85), B64 (86), B64 (87),
     268    B64 (88), B64 (89), B64 (90), B64 (91),
     269    B64 (92), B64 (93), B64 (94), B64 (95),
     270    B64 (96), B64 (97), B64 (98), B64 (99),
     271    B64 (100), B64 (101), B64 (102), B64 (103),
     272    B64 (104), B64 (105), B64 (106), B64 (107),
     273    B64 (108), B64 (109), B64 (110), B64 (111),
     274    B64 (112), B64 (113), B64 (114), B64 (115),
     275    B64 (116), B64 (117), B64 (118), B64 (119),
     276    B64 (120), B64 (121), B64 (122), B64 (123),
     277    B64 (124), B64 (125), B64 (126), B64 (127),
     278    B64 (128), B64 (129), B64 (130), B64 (131),
     279    B64 (132), B64 (133), B64 (134), B64 (135),
     280    B64 (136), B64 (137), B64 (138), B64 (139),
     281    B64 (140), B64 (141), B64 (142), B64 (143),
     282    B64 (144), B64 (145), B64 (146), B64 (147),
     283    B64 (148), B64 (149), B64 (150), B64 (151),
     284    B64 (152), B64 (153), B64 (154), B64 (155),
     285    B64 (156), B64 (157), B64 (158), B64 (159),
     286    B64 (160), B64 (161), B64 (162), B64 (163),
     287    B64 (164), B64 (165), B64 (166), B64 (167),
     288    B64 (168), B64 (169), B64 (170), B64 (171),
     289    B64 (172), B64 (173), B64 (174), B64 (175),
     290    B64 (176), B64 (177), B64 (178), B64 (179),
     291    B64 (180), B64 (181), B64 (182), B64 (183),
     292    B64 (184), B64 (185), B64 (186), B64 (187),
     293    B64 (188), B64 (189), B64 (190), B64 (191),
     294    B64 (192), B64 (193), B64 (194), B64 (195),
     295    B64 (196), B64 (197), B64 (198), B64 (199),
     296    B64 (200), B64 (201), B64 (202), B64 (203),
     297    B64 (204), B64 (205), B64 (206), B64 (207),
     298    B64 (208), B64 (209), B64 (210), B64 (211),
     299    B64 (212), B64 (213), B64 (214), B64 (215),
     300    B64 (216), B64 (217), B64 (218), B64 (219),
     301    B64 (220), B64 (221), B64 (222), B64 (223),
     302    B64 (224), B64 (225), B64 (226), B64 (227),
     303    B64 (228), B64 (229), B64 (230), B64 (231),
     304    B64 (232), B64 (233), B64 (234), B64 (235),
     305    B64 (236), B64 (237), B64 (238), B64 (239),
     306    B64 (240), B64 (241), B64 (242), B64 (243),
     307    B64 (244), B64 (245), B64 (246), B64 (247),
     308    B64 (248), B64 (249), B64 (250), B64 (251),
     309    B64 (252), B64 (253), B64 (254), B64 (255)
     310  };
     311  
     312  #if UCHAR_MAX == 255
     313  # define uchar_in_range(c) true
     314  #else
     315  # define uchar_in_range(c) ((c) <= 255)
     316  #endif
     317  
     318  /* Return true if CH is a character from the Base64 alphabet, and
     319     false otherwise.  Note that '=' is padding and not considered to be
     320     part of the alphabet.  */
     321  bool
     322  isbase64 (char ch)
     323  {
     324    return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)];
     325  }
     326  
     327  /* Initialize decode-context buffer, CTX.  */
     328  void
     329  base64_decode_ctx_init (struct base64_decode_context *ctx)
     330  {
     331    ctx->i = 0;
     332  }
     333  
     334  /* If CTX->i is 0 or 4, there are four or more bytes in [*IN..IN_END), and
     335     none of those four is a newline, then return *IN.  Otherwise, copy up to
     336     4 - CTX->i non-newline bytes from that range into CTX->buf, starting at
     337     index CTX->i and setting CTX->i to reflect the number of bytes copied,
     338     and return CTX->buf.  In either case, advance *IN to point to the byte
     339     after the last one processed, and set *N_NON_NEWLINE to the number of
     340     verified non-newline bytes accessible through the returned pointer.  */
     341  static char *
     342  get_4 (struct base64_decode_context *ctx,
     343         char const *restrict *in, char const *restrict in_end,
     344         idx_t *n_non_newline)
     345  {
     346    if (ctx->i == 4)
     347      ctx->i = 0;
     348  
     349    if (ctx->i == 0)
     350      {
     351        char const *t = *in;
     352        if (4 <= in_end - *in && memchr (t, '\n', 4) == NULL)
     353          {
     354            /* This is the common case: no newline.  */
     355            *in += 4;
     356            *n_non_newline = 4;
     357            return (char *) t;
     358          }
     359      }
     360  
     361    {
     362      /* Copy non-newline bytes into BUF.  */
     363      char const *p = *in;
     364      while (p < in_end)
     365        {
     366          char c = *p++;
     367          if (c != '\n')
     368            {
     369              ctx->buf[ctx->i++] = c;
     370              if (ctx->i == 4)
     371                break;
     372            }
     373        }
     374  
     375      *in = p;
     376      *n_non_newline = ctx->i;
     377      return ctx->buf;
     378    }
     379  }
     380  
     381  #define return_false                            \
     382    do                                            \
     383      {                                           \
     384        *outp = out;                              \
     385        return false;                             \
     386      }                                           \
     387    while (false)
     388  
     389  /* Decode up to four bytes of base64-encoded data, IN, of length INLEN
     390     into the output buffer, *OUT, of size *OUTLEN bytes.  Return true if
     391     decoding is successful, false otherwise.  If *OUTLEN is too small,
     392     as many bytes as possible are written to *OUT.  On return, advance
     393     *OUT to point to the byte after the last one written, and decrement
     394     *OUTLEN to reflect the number of bytes remaining in *OUT.  */
     395  static bool
     396  decode_4 (char const *restrict in, idx_t inlen,
     397            char *restrict *outp, idx_t *outleft)
     398  {
     399    char *out = *outp;
     400    if (inlen < 2)
     401      return false;
     402  
     403    if (!isbase64 (in[0]) || !isbase64 (in[1]))
     404      return false;
     405  
     406    if (*outleft)
     407      {
     408        *out++ = ((b64[to_uchar (in[0])] << 2)
     409                  | (b64[to_uchar (in[1])] >> 4));
     410        --*outleft;
     411      }
     412  
     413    if (inlen == 2)
     414      return_false;
     415  
     416    if (in[2] == '=')
     417      {
     418        if (inlen != 4)
     419          return_false;
     420  
     421        if (in[3] != '=')
     422          return_false;
     423      }
     424    else
     425      {
     426        if (!isbase64 (in[2]))
     427          return_false;
     428  
     429        if (*outleft)
     430          {
     431            *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0)
     432                      | (b64[to_uchar (in[2])] >> 2));
     433            --*outleft;
     434          }
     435  
     436        if (inlen == 3)
     437          return_false;
     438  
     439        if (in[3] == '=')
     440          {
     441            if (inlen != 4)
     442              return_false;
     443          }
     444        else
     445          {
     446            if (!isbase64 (in[3]))
     447              return_false;
     448  
     449            if (*outleft)
     450              {
     451                *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0)
     452                          | b64[to_uchar (in[3])]);
     453                --*outleft;
     454              }
     455          }
     456      }
     457  
     458    *outp = out;
     459    return true;
     460  }
     461  
     462  /* Decode base64-encoded input array IN of length INLEN to output array
     463     OUT that can hold *OUTLEN bytes.  The input data may be interspersed
     464     with newlines.  Return true if decoding was successful, i.e. if the
     465     input was valid base64 data, false otherwise.  If *OUTLEN is too
     466     small, as many bytes as possible will be written to OUT.  On return,
     467     *OUTLEN holds the length of decoded bytes in OUT.  Note that as soon
     468     as any non-alphabet, non-newline character is encountered, decoding
     469     is stopped and false is returned.  If INLEN is zero, then process
     470     only whatever data is stored in CTX.
     471  
     472     Initially, CTX must have been initialized via base64_decode_ctx_init.
     473     Subsequent calls to this function must reuse whatever state is recorded
     474     in that buffer.  It is necessary for when a quadruple of base64 input
     475     bytes spans two input buffers.
     476  
     477     If CTX is NULL then newlines are treated as garbage and the input
     478     buffer is processed as a unit.  */
     479  
     480  bool
     481  base64_decode_ctx (struct base64_decode_context *ctx,
     482                     const char *restrict in, idx_t inlen,
     483                     char *restrict out, idx_t *outlen)
     484  {
     485    idx_t outleft = *outlen;
     486    bool ignore_newlines = ctx != NULL;
     487    bool flush_ctx = false;
     488    unsigned int ctx_i = 0;
     489  
     490    if (ignore_newlines)
     491      {
     492        ctx_i = ctx->i;
     493        flush_ctx = inlen == 0;
     494      }
     495  
     496  
     497    while (true)
     498      {
     499        idx_t outleft_save = outleft;
     500        if (ctx_i == 0 && !flush_ctx)
     501          {
     502            while (true)
     503              {
     504                /* Save a copy of outleft, in case we need to re-parse this
     505                   block of four bytes.  */
     506                outleft_save = outleft;
     507                if (!decode_4 (in, inlen, &out, &outleft))
     508                  break;
     509  
     510                in += 4;
     511                inlen -= 4;
     512              }
     513          }
     514  
     515        if (inlen == 0 && !flush_ctx)
     516          break;
     517  
     518        /* Handle the common case of 72-byte wrapped lines.
     519           This also handles any other multiple-of-4-byte wrapping.  */
     520        if (inlen && *in == '\n' && ignore_newlines)
     521          {
     522            ++in;
     523            --inlen;
     524            continue;
     525          }
     526  
     527        /* Restore OUT and OUTLEFT.  */
     528        out -= outleft_save - outleft;
     529        outleft = outleft_save;
     530  
     531        {
     532          char const *in_end = in + inlen;
     533          char const *non_nl;
     534  
     535          if (ignore_newlines)
     536            non_nl = get_4 (ctx, &in, in_end, &inlen);
     537          else
     538            non_nl = in;  /* Might have nl in this case. */
     539  
     540          /* If the input is empty or consists solely of newlines (0 non-newlines),
     541             then we're done.  Likewise if there are fewer than 4 bytes when not
     542             flushing context and not treating newlines as garbage.  */
     543          if (inlen == 0 || (inlen < 4 && !flush_ctx && ignore_newlines))
     544            {
     545              inlen = 0;
     546              break;
     547            }
     548          if (!decode_4 (non_nl, inlen, &out, &outleft))
     549            break;
     550  
     551          inlen = in_end - in;
     552        }
     553      }
     554  
     555    *outlen -= outleft;
     556  
     557    return inlen == 0;
     558  }
     559  
     560  /* Allocate an output buffer in *OUT, and decode the base64 encoded
     561     data stored in IN of size INLEN to the *OUT buffer.  On return, the
     562     size of the decoded data is stored in *OUTLEN.  OUTLEN may be NULL,
     563     if the caller is not interested in the decoded length.  *OUT may be
     564     NULL to indicate an out of memory error, in which case *OUTLEN
     565     contains the size of the memory block needed.  The function returns
     566     true on successful decoding and memory allocation errors.  (Use the
     567     *OUT and *OUTLEN parameters to differentiate between successful
     568     decoding and memory error.)  The function returns false if the
     569     input was invalid, in which case *OUT is NULL and *OUTLEN is
     570     undefined. */
     571  bool
     572  base64_decode_alloc_ctx (struct base64_decode_context *ctx,
     573                           const char *in, idx_t inlen, char **out,
     574                           idx_t *outlen)
     575  {
     576    /* This may allocate a few bytes too many, depending on input,
     577       but it's not worth the extra CPU time to compute the exact size.
     578       The exact size is 3 * (inlen + (ctx ? ctx->i : 0)) / 4, minus 1 if the
     579       input ends with "=" and minus another 1 if the input ends with "==".
     580       Shifting before multiplying avoids the possibility of overflow.  */
     581    idx_t needlen = 3 * ((inlen >> 2) + 1);
     582  
     583    *out = imalloc (needlen);
     584    if (!*out)
     585      return true;
     586  
     587    if (!base64_decode_ctx (ctx, in, inlen, *out, &needlen))
     588      {
     589        free (*out);
     590        *out = NULL;
     591        return false;
     592      }
     593  
     594    if (outlen)
     595      *outlen = needlen;
     596  
     597    return true;
     598  }