1  /* gbase64.c - Base64 encoding/decoding
       2   *
       3   *  Copyright (C) 2006 Alexander Larsson <alexl@redhat.com>
       4   *  Copyright (C) 2000-2003 Ximian Inc.
       5   *
       6   * SPDX-License-Identifier: LGPL-2.1-or-later
       7   *
       8   * This library is free software; you can redistribute it and/or
       9   * modify it under the terms of the GNU Lesser General Public
      10   * License as published by the Free Software Foundation; either
      11   * version 2.1 of the License, or (at your option) any later version.
      12   *
      13   * This library is distributed in the hope that it will be useful,
      14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16   * Lesser General Public License for more details.
      17   *
      18   * You should have received a copy of the GNU Lesser General Public License
      19   * along with this library; if not, see <http://www.gnu.org/licenses/>.
      20   *
      21   * This is based on code in camel, written by:
      22   *    Michael Zucchi <notzed@ximian.com>
      23   *    Jeffrey Stedfast <fejj@ximian.com>
      24   */
      25  
      26  #include "config.h"
      27  
      28  #include <string.h>
      29  
      30  #include "gbase64.h"
      31  #include "gtestutils.h"
      32  #include "glibintl.h"
      33  
      34  static const char base64_alphabet[] =
      35          "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
      36  
      37  /**
      38   * g_base64_encode_step:
      39   * @in: (array length=len) (element-type guint8): the binary data to encode
      40   * @len: the length of @in
      41   * @break_lines: whether to break long lines
      42   * @out: (out) (array) (element-type guint8): pointer to destination buffer
      43   * @state: (inout): Saved state between steps, initialize to 0
      44   * @save: (inout): Saved state between steps, initialize to 0
      45   *
      46   * Incrementally encode a sequence of binary data into its Base-64 stringified
      47   * representation. By calling this function multiple times you can convert
      48   * data in chunks to avoid having to have the full encoded data in memory.
      49   *
      50   * When all of the data has been converted you must call
      51   * g_base64_encode_close() to flush the saved state.
      52   *
      53   * The output buffer must be large enough to fit all the data that will
      54   * be written to it. Due to the way base64 encodes you will need
      55   * at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of
      56   * non-zero state). If you enable line-breaking you will need at least:
      57   * ((@len / 3 + 1) * 4 + 4) / 76 + 1 bytes of extra space.
      58   *
      59   * @break_lines is typically used when putting base64-encoded data in emails.
      60   * It breaks the lines at 76 columns instead of putting all of the text on
      61   * the same line. This avoids problems with long lines in the email system.
      62   * Note however that it breaks the lines with `LF` characters, not
      63   * `CR LF` sequences, so the result cannot be passed directly to SMTP
      64   * or certain other protocols.
      65   *
      66   * Returns: The number of bytes of output that was written
      67   *
      68   * Since: 2.12
      69   */
      70  gsize
      71  g_base64_encode_step (const guchar *in,
      72                        gsize         len,
      73                        gboolean      break_lines,
      74                        gchar        *out,
      75                        gint         *state,
      76                        gint         *save)
      77  {
      78    char *outptr;
      79    const guchar *inptr;
      80  
      81    g_return_val_if_fail (in != NULL || len == 0, 0);
      82    g_return_val_if_fail (out != NULL, 0);
      83    g_return_val_if_fail (state != NULL, 0);
      84    g_return_val_if_fail (save != NULL, 0);
      85  
      86    if (len == 0)
      87      return 0;
      88  
      89    inptr = in;
      90    outptr = out;
      91  
      92    if (len + ((char *) save) [0] > 2)
      93      {
      94        const guchar *inend = in+len-2;
      95        int c1, c2, c3;
      96        int already;
      97  
      98        already = *state;
      99  
     100        switch (((char *) save) [0])
     101          {
     102          case 1:
     103            c1 = ((unsigned char *) save) [1];
     104            goto skip1;
     105          case 2:
     106            c1 = ((unsigned char *) save) [1];
     107            c2 = ((unsigned char *) save) [2];
     108            goto skip2;
     109          }
     110  
     111        /*
     112         * yes, we jump into the loop, no i'm not going to change it,
     113         * it's beautiful!
     114         */
     115        while (inptr < inend)
     116          {
     117            c1 = *inptr++;
     118          skip1:
     119            c2 = *inptr++;
     120          skip2:
     121            c3 = *inptr++;
     122            *outptr++ = base64_alphabet [ c1 >> 2 ];
     123            *outptr++ = base64_alphabet [ c2 >> 4 |
     124                                          ((c1&0x3) << 4) ];
     125            *outptr++ = base64_alphabet [ ((c2 &0x0f) << 2) |
     126                                          (c3 >> 6) ];
     127            *outptr++ = base64_alphabet [ c3 & 0x3f ];
     128            /* this is a bit ugly ... */
     129            if (break_lines && (++already) >= 19)
     130              {
     131                *outptr++ = '\n';
     132                already = 0;
     133              }
     134          }
     135  
     136        ((char *)save)[0] = 0;
     137        len = 2 - (inptr - inend);
     138        *state = already;
     139      }
     140  
     141    g_assert (len == 0 || len == 1 || len == 2);
     142  
     143      {
     144        char *saveout;
     145  
     146        /* points to the slot for the next char to save */
     147        saveout = & (((char *)save)[1]) + ((char *)save)[0];
     148  
     149        /* len can only be 0 1 or 2 */
     150        switch(len)
     151          {
     152          case 2:
     153            *saveout++ = *inptr++;
     154            G_GNUC_FALLTHROUGH;
     155          case 1:
     156            *saveout++ = *inptr++;
     157          }
     158        ((char *)save)[0] += len;
     159      }
     160  
     161    return outptr - out;
     162  }
     163  
     164  /**
     165   * g_base64_encode_close:
     166   * @break_lines: whether to break long lines
     167   * @out: (out) (array) (element-type guint8): pointer to destination buffer
     168   * @state: (inout): Saved state from g_base64_encode_step()
     169   * @save: (inout): Saved state from g_base64_encode_step()
     170   *
     171   * Flush the status from a sequence of calls to g_base64_encode_step().
     172   *
     173   * The output buffer must be large enough to fit all the data that will
     174   * be written to it. It will need up to 4 bytes, or up to 5 bytes if
     175   * line-breaking is enabled.
     176   *
     177   * The @out array will not be automatically nul-terminated.
     178   *
     179   * Returns: The number of bytes of output that was written
     180   *
     181   * Since: 2.12
     182   */
     183  gsize
     184  g_base64_encode_close (gboolean  break_lines,
     185                         gchar    *out,
     186                         gint     *state,
     187                         gint     *save)
     188  {
     189    int c1, c2;
     190    char *outptr = out;
     191  
     192    g_return_val_if_fail (out != NULL, 0);
     193    g_return_val_if_fail (state != NULL, 0);
     194    g_return_val_if_fail (save != NULL, 0);
     195  
     196    c1 = ((unsigned char *) save) [1];
     197    c2 = ((unsigned char *) save) [2];
     198  
     199    switch (((char *) save) [0])
     200      {
     201      case 2:
     202        outptr [2] = base64_alphabet[ ( (c2 &0x0f) << 2 ) ];
     203        g_assert (outptr [2] != 0);
     204        goto skip;
     205      case 1:
     206        outptr[2] = '=';
     207        c2 = 0;  /* saved state here is not relevant */
     208      skip:
     209        outptr [0] = base64_alphabet [ c1 >> 2 ];
     210        outptr [1] = base64_alphabet [ c2 >> 4 | ( (c1&0x3) << 4 )];
     211        outptr [3] = '=';
     212        outptr += 4;
     213        break;
     214      }
     215    if (break_lines)
     216      *outptr++ = '\n';
     217  
     218    *save = 0;
     219    *state = 0;
     220  
     221    return outptr - out;
     222  }
     223  
     224  /**
     225   * g_base64_encode:
     226   * @data: (array length=len) (element-type guint8) (nullable): the binary data to encode
     227   * @len: the length of @data
     228   *
     229   * Encode a sequence of binary data into its Base-64 stringified
     230   * representation.
     231   *
     232   * Returns: (transfer full): a newly allocated, zero-terminated Base-64
     233   *               encoded string representing @data. The returned string must
     234   *               be freed with g_free().
     235   *
     236   * Since: 2.12
     237   */
     238  gchar *
     239  g_base64_encode (const guchar *data,
     240                   gsize         len)
     241  {
     242    gchar *out;
     243    gint state = 0, outlen;
     244    gint save = 0;
     245  
     246    g_return_val_if_fail (data != NULL || len == 0, NULL);
     247  
     248    /* We can use a smaller limit here, since we know the saved state is 0,
     249       +1 is needed for trailing \0, also check for unlikely integer overflow */
     250    g_return_val_if_fail (len < ((G_MAXSIZE - 1) / 4 - 1) * 3, NULL);
     251  
     252    out = g_malloc ((len / 3 + 1) * 4 + 1);
     253  
     254    outlen = g_base64_encode_step (data, len, FALSE, out, &state, &save);
     255    outlen += g_base64_encode_close (FALSE, out + outlen, &state, &save);
     256    out[outlen] = '\0';
     257  
     258    return (gchar *) out;
     259  }
     260  
     261  static const unsigned char mime_base64_rank[256] = {
     262    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     263    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     264    255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
     265     52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,  0,255,255,
     266    255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
     267     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
     268    255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
     269     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
     270    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     271    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     272    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     273    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     274    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     275    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     276    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     277    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     278  };
     279  
     280  /**
     281   * g_base64_decode_step: (skip)
     282   * @in: (array length=len) (element-type guint8): binary input data
     283   * @len: max length of @in data to decode
     284   * @out: (out caller-allocates) (array) (element-type guint8): output buffer
     285   * @state: (inout): Saved state between steps, initialize to 0
     286   * @save: (inout): Saved state between steps, initialize to 0
     287   *
     288   * Incrementally decode a sequence of binary data from its Base-64 stringified
     289   * representation. By calling this function multiple times you can convert
     290   * data in chunks to avoid having to have the full encoded data in memory.
     291   *
     292   * The output buffer must be large enough to fit all the data that will
     293   * be written to it. Since base64 encodes 3 bytes in 4 chars you need
     294   * at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero
     295   * state).
     296   *
     297   * Returns: The number of bytes of output that was written
     298   *
     299   * Since: 2.12
     300   **/
     301  gsize
     302  g_base64_decode_step (const gchar  *in,
     303                        gsize         len,
     304                        guchar       *out,
     305                        gint         *state,
     306                        guint        *save)
     307  {
     308    const guchar *inptr;
     309    guchar *outptr;
     310    const guchar *inend;
     311    guchar c, rank;
     312    guchar last[2];
     313    unsigned int v;
     314    int i;
     315  
     316    g_return_val_if_fail (in != NULL || len == 0, 0);
     317    g_return_val_if_fail (out != NULL, 0);
     318    g_return_val_if_fail (state != NULL, 0);
     319    g_return_val_if_fail (save != NULL, 0);
     320  
     321    if (len == 0)
     322      return 0;
     323  
     324    inend = (const guchar *)in+len;
     325    outptr = out;
     326  
     327    /* convert 4 base64 bytes to 3 normal bytes */
     328    v=*save;
     329    i=*state;
     330  
     331    last[0] = last[1] = 0;
     332  
     333    /* we use the sign in the state to determine if we got a padding character
     334       in the previous sequence */
     335    if (i < 0)
     336      {
     337        i = -i;
     338        last[0] = '=';
     339      }
     340  
     341    inptr = (const guchar *)in;
     342    while (inptr < inend)
     343      {
     344        c = *inptr++;
     345        rank = mime_base64_rank [c];
     346        if (rank != 0xff)
     347          {
     348            last[1] = last[0];
     349            last[0] = c;
     350            v = (v<<6) | rank;
     351            i++;
     352            if (i==4)
     353              {
     354                *outptr++ = v>>16;
     355                if (last[1] != '=')
     356                  *outptr++ = v>>8;
     357                if (last[0] != '=')
     358                  *outptr++ = v;
     359                i=0;
     360              }
     361          }
     362      }
     363  
     364    *save = v;
     365    *state = last[0] == '=' ? -i : i;
     366  
     367    return outptr - out;
     368  }
     369  
     370  /**
     371   * g_base64_decode:
     372   * @text: (not nullable): zero-terminated string with base64 text to decode
     373   * @out_len: (out): The length of the decoded data is written here
     374   *
     375   * Decode a sequence of Base-64 encoded text into binary data.  Note
     376   * that the returned binary data is not necessarily zero-terminated,
     377   * so it should not be used as a character string.
     378   *
     379   * Returns: (transfer full) (array length=out_len) (element-type guint8):
     380   *               newly allocated buffer containing the binary data
     381   *               that @text represents. The returned buffer must
     382   *               be freed with g_free().
     383   *
     384   * Since: 2.12
     385   */
     386  guchar *
     387  g_base64_decode (const gchar *text,
     388                   gsize       *out_len)
     389  {
     390    guchar *ret;
     391    gsize input_length;
     392    gint state = 0;
     393    guint save = 0;
     394  
     395    g_return_val_if_fail (text != NULL, NULL);
     396    g_return_val_if_fail (out_len != NULL, NULL);
     397  
     398    input_length = strlen (text);
     399  
     400    /* We can use a smaller limit here, since we know the saved state is 0,
     401       +1 used to avoid calling g_malloc0(0), and hence returning NULL */
     402    ret = g_malloc0 ((input_length / 4) * 3 + 1);
     403  
     404    *out_len = g_base64_decode_step (text, input_length, ret, &state, &save);
     405  
     406    return ret;
     407  }
     408  
     409  /**
     410   * g_base64_decode_inplace:
     411   * @text: (inout) (array length=out_len) (element-type guint8): zero-terminated
     412   *        string with base64 text to decode
     413   * @out_len: (inout): The length of the decoded data is written here
     414   *
     415   * Decode a sequence of Base-64 encoded text into binary data
     416   * by overwriting the input data.
     417   *
     418   * Returns: (transfer none): The binary data that @text responds. This pointer
     419   *               is the same as the input @text.
     420   *
     421   * Since: 2.20
     422   */
     423  guchar *
     424  g_base64_decode_inplace (gchar *text,
     425                           gsize *out_len)
     426  {
     427    gint input_length, state = 0;
     428    guint save = 0;
     429  
     430    g_return_val_if_fail (text != NULL, NULL);
     431    g_return_val_if_fail (out_len != NULL, NULL);
     432  
     433    input_length = strlen (text);
     434  
     435    g_return_val_if_fail (input_length > 1, NULL);
     436  
     437    *out_len = g_base64_decode_step (text, input_length, (guchar *) text, &state, &save);
     438  
     439    return (guchar *) text;
     440  }