1 /* gbase64.c - Base64 encoding/decoding
2 *
3 * Copyright (C) 2006 Alexander Larsson <alexl@redhat.com>
4 * Copyright (C) 2000-2003 Ximian Inc.
5 *
6 * SPDX-License-Identifier: LGPL-2.1-or-later
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this library; if not, see <http://www.gnu.org/licenses/>.
20 *
21 * This is based on code in camel, written by:
22 * Michael Zucchi <notzed@ximian.com>
23 * Jeffrey Stedfast <fejj@ximian.com>
24 */
25
26 #include "config.h"
27
28 #include <string.h>
29
30 #include "gbase64.h"
31 #include "gtestutils.h"
32 #include "glibintl.h"
33
34 static const char base64_alphabet[] =
35 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
36
37 /**
38 * g_base64_encode_step:
39 * @in: (array length=len) (element-type guint8): the binary data to encode
40 * @len: the length of @in
41 * @break_lines: whether to break long lines
42 * @out: (out) (array) (element-type guint8): pointer to destination buffer
43 * @state: (inout): Saved state between steps, initialize to 0
44 * @save: (inout): Saved state between steps, initialize to 0
45 *
46 * Incrementally encode a sequence of binary data into its Base-64 stringified
47 * representation. By calling this function multiple times you can convert
48 * data in chunks to avoid having to have the full encoded data in memory.
49 *
50 * When all of the data has been converted you must call
51 * g_base64_encode_close() to flush the saved state.
52 *
53 * The output buffer must be large enough to fit all the data that will
54 * be written to it. Due to the way base64 encodes you will need
55 * at least: (@len / 3 + 1) * 4 + 4 bytes (+ 4 may be needed in case of
56 * non-zero state). If you enable line-breaking you will need at least:
57 * ((@len / 3 + 1) * 4 + 4) / 76 + 1 bytes of extra space.
58 *
59 * @break_lines is typically used when putting base64-encoded data in emails.
60 * It breaks the lines at 76 columns instead of putting all of the text on
61 * the same line. This avoids problems with long lines in the email system.
62 * Note however that it breaks the lines with `LF` characters, not
63 * `CR LF` sequences, so the result cannot be passed directly to SMTP
64 * or certain other protocols.
65 *
66 * Returns: The number of bytes of output that was written
67 *
68 * Since: 2.12
69 */
70 gsize
71 g_base64_encode_step (const guchar *in,
72 gsize len,
73 gboolean break_lines,
74 gchar *out,
75 gint *state,
76 gint *save)
77 {
78 char *outptr;
79 const guchar *inptr;
80
81 g_return_val_if_fail (in != NULL || len == 0, 0);
82 g_return_val_if_fail (out != NULL, 0);
83 g_return_val_if_fail (state != NULL, 0);
84 g_return_val_if_fail (save != NULL, 0);
85
86 if (len == 0)
87 return 0;
88
89 inptr = in;
90 outptr = out;
91
92 if (len + ((char *) save) [0] > 2)
93 {
94 const guchar *inend = in+len-2;
95 int c1, c2, c3;
96 int already;
97
98 already = *state;
99
100 switch (((char *) save) [0])
101 {
102 case 1:
103 c1 = ((unsigned char *) save) [1];
104 goto skip1;
105 case 2:
106 c1 = ((unsigned char *) save) [1];
107 c2 = ((unsigned char *) save) [2];
108 goto skip2;
109 }
110
111 /*
112 * yes, we jump into the loop, no i'm not going to change it,
113 * it's beautiful!
114 */
115 while (inptr < inend)
116 {
117 c1 = *inptr++;
118 skip1:
119 c2 = *inptr++;
120 skip2:
121 c3 = *inptr++;
122 *outptr++ = base64_alphabet [ c1 >> 2 ];
123 *outptr++ = base64_alphabet [ c2 >> 4 |
124 ((c1&0x3) << 4) ];
125 *outptr++ = base64_alphabet [ ((c2 &0x0f) << 2) |
126 (c3 >> 6) ];
127 *outptr++ = base64_alphabet [ c3 & 0x3f ];
128 /* this is a bit ugly ... */
129 if (break_lines && (++already) >= 19)
130 {
131 *outptr++ = '\n';
132 already = 0;
133 }
134 }
135
136 ((char *)save)[0] = 0;
137 len = 2 - (inptr - inend);
138 *state = already;
139 }
140
141 g_assert (len == 0 || len == 1 || len == 2);
142
143 {
144 char *saveout;
145
146 /* points to the slot for the next char to save */
147 saveout = & (((char *)save)[1]) + ((char *)save)[0];
148
149 /* len can only be 0 1 or 2 */
150 switch(len)
151 {
152 case 2:
153 *saveout++ = *inptr++;
154 G_GNUC_FALLTHROUGH;
155 case 1:
156 *saveout++ = *inptr++;
157 }
158 ((char *)save)[0] += len;
159 }
160
161 return outptr - out;
162 }
163
164 /**
165 * g_base64_encode_close:
166 * @break_lines: whether to break long lines
167 * @out: (out) (array) (element-type guint8): pointer to destination buffer
168 * @state: (inout): Saved state from g_base64_encode_step()
169 * @save: (inout): Saved state from g_base64_encode_step()
170 *
171 * Flush the status from a sequence of calls to g_base64_encode_step().
172 *
173 * The output buffer must be large enough to fit all the data that will
174 * be written to it. It will need up to 4 bytes, or up to 5 bytes if
175 * line-breaking is enabled.
176 *
177 * The @out array will not be automatically nul-terminated.
178 *
179 * Returns: The number of bytes of output that was written
180 *
181 * Since: 2.12
182 */
183 gsize
184 g_base64_encode_close (gboolean break_lines,
185 gchar *out,
186 gint *state,
187 gint *save)
188 {
189 int c1, c2;
190 char *outptr = out;
191
192 g_return_val_if_fail (out != NULL, 0);
193 g_return_val_if_fail (state != NULL, 0);
194 g_return_val_if_fail (save != NULL, 0);
195
196 c1 = ((unsigned char *) save) [1];
197 c2 = ((unsigned char *) save) [2];
198
199 switch (((char *) save) [0])
200 {
201 case 2:
202 outptr [2] = base64_alphabet[ ( (c2 &0x0f) << 2 ) ];
203 g_assert (outptr [2] != 0);
204 goto skip;
205 case 1:
206 outptr[2] = '=';
207 c2 = 0; /* saved state here is not relevant */
208 skip:
209 outptr [0] = base64_alphabet [ c1 >> 2 ];
210 outptr [1] = base64_alphabet [ c2 >> 4 | ( (c1&0x3) << 4 )];
211 outptr [3] = '=';
212 outptr += 4;
213 break;
214 }
215 if (break_lines)
216 *outptr++ = '\n';
217
218 *save = 0;
219 *state = 0;
220
221 return outptr - out;
222 }
223
224 /**
225 * g_base64_encode:
226 * @data: (array length=len) (element-type guint8) (nullable): the binary data to encode
227 * @len: the length of @data
228 *
229 * Encode a sequence of binary data into its Base-64 stringified
230 * representation.
231 *
232 * Returns: (transfer full): a newly allocated, zero-terminated Base-64
233 * encoded string representing @data. The returned string must
234 * be freed with g_free().
235 *
236 * Since: 2.12
237 */
238 gchar *
239 g_base64_encode (const guchar *data,
240 gsize len)
241 {
242 gchar *out;
243 gint state = 0, outlen;
244 gint save = 0;
245
246 g_return_val_if_fail (data != NULL || len == 0, NULL);
247
248 /* We can use a smaller limit here, since we know the saved state is 0,
249 +1 is needed for trailing \0, also check for unlikely integer overflow */
250 g_return_val_if_fail (len < ((G_MAXSIZE - 1) / 4 - 1) * 3, NULL);
251
252 out = g_malloc ((len / 3 + 1) * 4 + 1);
253
254 outlen = g_base64_encode_step (data, len, FALSE, out, &state, &save);
255 outlen += g_base64_encode_close (FALSE, out + outlen, &state, &save);
256 out[outlen] = '\0';
257
258 return (gchar *) out;
259 }
260
261 static const unsigned char mime_base64_rank[256] = {
262 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
263 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
264 255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
265 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255,
266 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
267 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
268 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
269 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
270 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
271 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
272 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
273 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
274 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
275 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
276 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
277 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
278 };
279
280 /**
281 * g_base64_decode_step: (skip)
282 * @in: (array length=len) (element-type guint8): binary input data
283 * @len: max length of @in data to decode
284 * @out: (out caller-allocates) (array) (element-type guint8): output buffer
285 * @state: (inout): Saved state between steps, initialize to 0
286 * @save: (inout): Saved state between steps, initialize to 0
287 *
288 * Incrementally decode a sequence of binary data from its Base-64 stringified
289 * representation. By calling this function multiple times you can convert
290 * data in chunks to avoid having to have the full encoded data in memory.
291 *
292 * The output buffer must be large enough to fit all the data that will
293 * be written to it. Since base64 encodes 3 bytes in 4 chars you need
294 * at least: (@len / 4) * 3 + 3 bytes (+ 3 may be needed in case of non-zero
295 * state).
296 *
297 * Returns: The number of bytes of output that was written
298 *
299 * Since: 2.12
300 **/
301 gsize
302 g_base64_decode_step (const gchar *in,
303 gsize len,
304 guchar *out,
305 gint *state,
306 guint *save)
307 {
308 const guchar *inptr;
309 guchar *outptr;
310 const guchar *inend;
311 guchar c, rank;
312 guchar last[2];
313 unsigned int v;
314 int i;
315
316 g_return_val_if_fail (in != NULL || len == 0, 0);
317 g_return_val_if_fail (out != NULL, 0);
318 g_return_val_if_fail (state != NULL, 0);
319 g_return_val_if_fail (save != NULL, 0);
320
321 if (len == 0)
322 return 0;
323
324 inend = (const guchar *)in+len;
325 outptr = out;
326
327 /* convert 4 base64 bytes to 3 normal bytes */
328 v=*save;
329 i=*state;
330
331 last[0] = last[1] = 0;
332
333 /* we use the sign in the state to determine if we got a padding character
334 in the previous sequence */
335 if (i < 0)
336 {
337 i = -i;
338 last[0] = '=';
339 }
340
341 inptr = (const guchar *)in;
342 while (inptr < inend)
343 {
344 c = *inptr++;
345 rank = mime_base64_rank [c];
346 if (rank != 0xff)
347 {
348 last[1] = last[0];
349 last[0] = c;
350 v = (v<<6) | rank;
351 i++;
352 if (i==4)
353 {
354 *outptr++ = v>>16;
355 if (last[1] != '=')
356 *outptr++ = v>>8;
357 if (last[0] != '=')
358 *outptr++ = v;
359 i=0;
360 }
361 }
362 }
363
364 *save = v;
365 *state = last[0] == '=' ? -i : i;
366
367 return outptr - out;
368 }
369
370 /**
371 * g_base64_decode:
372 * @text: (not nullable): zero-terminated string with base64 text to decode
373 * @out_len: (out): The length of the decoded data is written here
374 *
375 * Decode a sequence of Base-64 encoded text into binary data. Note
376 * that the returned binary data is not necessarily zero-terminated,
377 * so it should not be used as a character string.
378 *
379 * Returns: (transfer full) (array length=out_len) (element-type guint8):
380 * newly allocated buffer containing the binary data
381 * that @text represents. The returned buffer must
382 * be freed with g_free().
383 *
384 * Since: 2.12
385 */
386 guchar *
387 g_base64_decode (const gchar *text,
388 gsize *out_len)
389 {
390 guchar *ret;
391 gsize input_length;
392 gint state = 0;
393 guint save = 0;
394
395 g_return_val_if_fail (text != NULL, NULL);
396 g_return_val_if_fail (out_len != NULL, NULL);
397
398 input_length = strlen (text);
399
400 /* We can use a smaller limit here, since we know the saved state is 0,
401 +1 used to avoid calling g_malloc0(0), and hence returning NULL */
402 ret = g_malloc0 ((input_length / 4) * 3 + 1);
403
404 *out_len = g_base64_decode_step (text, input_length, ret, &state, &save);
405
406 return ret;
407 }
408
409 /**
410 * g_base64_decode_inplace:
411 * @text: (inout) (array length=out_len) (element-type guint8): zero-terminated
412 * string with base64 text to decode
413 * @out_len: (inout): The length of the decoded data is written here
414 *
415 * Decode a sequence of Base-64 encoded text into binary data
416 * by overwriting the input data.
417 *
418 * Returns: (transfer none): The binary data that @text responds. This pointer
419 * is the same as the input @text.
420 *
421 * Since: 2.20
422 */
423 guchar *
424 g_base64_decode_inplace (gchar *text,
425 gsize *out_len)
426 {
427 gint input_length, state = 0;
428 guint save = 0;
429
430 g_return_val_if_fail (text != NULL, NULL);
431 g_return_val_if_fail (out_len != NULL, NULL);
432
433 input_length = strlen (text);
434
435 g_return_val_if_fail (input_length > 1, NULL);
436
437 *out_len = g_base64_decode_step (text, input_length, (guchar *) text, &state, &save);
438
439 return (guchar *) text;
440 }