blob: 21af7a8c96c0b94da95434bd210b545d622a82e1 [file] [log] [blame]
Denis Vlasenko21afc7d2006-09-03 15:49:40 +00001/* vi: set sw=4 ts=4: */
2/*
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +02003 * Copyright 2003, Glenn McGrath
4 * Copyright 2006, Rob Landley <rob@landley.net>
5 * Copyright 2010, Denys Vlasenko
Denis Vlasenko21afc7d2006-09-03 15:49:40 +00006 *
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02007 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
Denis Vlasenko21afc7d2006-09-03 15:49:40 +00008 */
Denis Vlasenko21afc7d2006-09-03 15:49:40 +00009#include "libbb.h"
10
Denys Vlasenko20900482020-11-25 22:47:00 +010011/* Conversion tables */
Denys Vlasenkofc6faac2020-11-28 12:48:34 +010012#if ENABLE_BASE32
13const char bb_uuenc_tbl_base32[] ALIGN1 = {
14 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
15 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
16 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
17 'Y', 'Z', '2', '3', '4', '5', '6', '7',
18 /* unused: '=', */
19};
20#endif
Denys Vlasenko20900482020-11-25 22:47:00 +010021/* for base 64 */
Denys Vlasenkofc6faac2020-11-28 12:48:34 +010022const char bb_uuenc_tbl_base64[] ALIGN1 = {
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000023 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
24 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
25 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
26 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
27 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
28 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
29 'w', 'x', 'y', 'z', '0', '1', '2', '3',
30 '4', '5', '6', '7', '8', '9', '+', '/',
Denys Vlasenkoae04ce82020-11-28 13:39:05 +010031 '=' /* termination character */
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000032};
Denys Vlasenkofc6faac2020-11-28 12:48:34 +010033const char bb_uuenc_tbl_std[] ALIGN1 = {
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000034 '`', '!', '"', '#', '$', '%', '&', '\'',
35 '(', ')', '*', '+', ',', '-', '.', '/',
36 '0', '1', '2', '3', '4', '5', '6', '7',
37 '8', '9', ':', ';', '<', '=', '>', '?',
38 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
39 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
40 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
Denys Vlasenkoae04ce82020-11-28 13:39:05 +010041 'X', 'Y', 'Z', '[', '\\',']', '^', '_',
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000042 '`' /* termination character */
43};
44
45/*
Denis Vlasenkoe8240f12007-06-26 15:59:37 +000046 * Encode bytes at S of length LENGTH to uuencode or base64 format and place it
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000047 * to STORE. STORE will be 0-terminated, and must point to a writable
48 * buffer of at least 1+BASE64_LENGTH(length) bytes.
49 * where BASE64_LENGTH(len) = (4 * ((LENGTH + 2) / 3))
50 */
Denis Vlasenkodefc1ea2008-06-27 02:52:20 +000051void FAST_FUNC bb_uuencode(char *p, const void *src, int length, const char *tbl)
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000052{
Denis Vlasenkoe8240f12007-06-26 15:59:37 +000053 const unsigned char *s = src;
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000054
Denis Vlasenko46611172007-08-06 15:43:17 +000055 /* Transform the 3x8 bits to 4x6 bits */
56 while (length > 0) {
57 unsigned s1, s2;
58
59 /* Are s[1], s[2] valid or should be assumed 0? */
60 s1 = s2 = 0;
61 length -= 3; /* can be >=0, -1, -2 */
Denis Vlasenkoe5dbba22007-08-06 15:49:12 +000062 if (length >= -1) {
Denis Vlasenko46611172007-08-06 15:43:17 +000063 s1 = s[1];
Denis Vlasenkoe5dbba22007-08-06 15:49:12 +000064 if (length >= 0)
Denis Vlasenko46611172007-08-06 15:43:17 +000065 s2 = s[2];
66 }
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000067 *p++ = tbl[s[0] >> 2];
Denis Vlasenko46611172007-08-06 15:43:17 +000068 *p++ = tbl[((s[0] & 3) << 4) + (s1 >> 4)];
69 *p++ = tbl[((s1 & 0xf) << 2) + (s2 >> 6)];
70 *p++ = tbl[s2 & 0x3f];
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000071 s += 3;
72 }
Denis Vlasenko46611172007-08-06 15:43:17 +000073 /* Zero-terminate */
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000074 *p = '\0';
Denis Vlasenko46611172007-08-06 15:43:17 +000075 /* If length is -2 or -1, pad last char or two */
76 while (length) {
77 *--p = tbl[64];
78 length++;
79 }
Denis Vlasenko21afc7d2006-09-03 15:49:40 +000080}
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +020081
82/*
Denys Vlasenko2cd37d62020-11-27 21:25:34 +010083 * Decode base64 encoded string.
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +020084 *
Denys Vlasenkoae04ce82020-11-28 13:39:05 +010085 * Returns: pointer past the last written output byte,
86 * the result is not NUL-terminated.
87 * (*pp_src) is advanced past the last read byte.
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +020088 * If points to '\0', then the source was fully decoded.
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +020089 */
Denys Vlasenko885121e2020-11-28 13:26:44 +010090char* FAST_FUNC decode_base64(char *dst, const char **pp_src)
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +020091{
Denys Vlasenko885121e2020-11-28 13:26:44 +010092 const char *src = pp_src ? *pp_src : dst; /* for httpd.c, support NULL 2nd param */
Denys Vlasenko170b8622020-11-27 20:44:55 +010093 unsigned ch = 0;
Denys Vlasenkof140b492020-11-28 12:33:50 +010094 unsigned t;
Denys Vlasenko170b8622020-11-27 20:44:55 +010095 int i = 0;
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +020096
Denys Vlasenko7467e902020-11-28 09:50:14 +010097 while ((t = (unsigned char)*src) != '\0') {
98 src++;
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +020099
Denys Vlasenko170b8622020-11-27 20:44:55 +0100100 /* "if" forest is faster than strchr(bb_uuenc_tbl_base64, t) */
101 if (t >= '0' && t <= '9')
102 t = t - '0' + 52;
103 else if (t >= 'A' && t <= 'Z')
104 t = t - 'A';
105 else if (t >= 'a' && t <= 'z')
106 t = t - 'a' + 26;
107 else if (t == '+')
108 t = 62;
109 else if (t == '/')
110 t = 63;
111 else if (t == '=' && (i == 3 || (i == 2 && *src == '=')))
112 /* the above disallows "==AA", "A===", "AA=A" etc */
113 t = 0x1000000;
114 else
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +0200115//TODO: add BASE64_FLAG_foo to die on bad char?
Denys Vlasenko170b8622020-11-27 20:44:55 +0100116 continue;
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +0200117
Denys Vlasenko170b8622020-11-27 20:44:55 +0100118 ch = (ch << 6) | t;
Denys Vlasenkof140b492020-11-28 12:33:50 +0100119 i = (i + 1) & 3;
120 if (i == 0) {
Denys Vlasenko170b8622020-11-27 20:44:55 +0100121 *dst++ = (char) (ch >> 16);
122 *dst++ = (char) (ch >> 8);
123 *dst++ = (char) ch;
Denys Vlasenko170b8622020-11-27 20:44:55 +0100124 if (ch & 0x1000000) { /* was last input char '='? */
125 dst--;
126 if (ch & (0x1000000 << 6)) /* was it "=="? */
127 dst--;
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +0200128 break;
Denys Vlasenko170b8622020-11-27 20:44:55 +0100129 }
130 ch = 0;
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +0200131 }
Denys Vlasenko170b8622020-11-27 20:44:55 +0100132 }
Denys Vlasenko7467e902020-11-28 09:50:14 +0100133 /* i is zero here if full 4-char block was decoded */
Denys Vlasenko885121e2020-11-28 13:26:44 +0100134 if (pp_src)
Denys Vlasenkoae04ce82020-11-28 13:39:05 +0100135 *pp_src = src - i; /* -i signals truncation: e.g. "MQ" and "MQ=" (correct encoding is "MQ==" -> "1") */
Denys Vlasenko885121e2020-11-28 13:26:44 +0100136 return dst;
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200137}
138
Denys Vlasenko20900482020-11-25 22:47:00 +0100139#if ENABLE_BASE32
Denys Vlasenko885121e2020-11-28 13:26:44 +0100140char* FAST_FUNC decode_base32(char *dst, const char **pp_src)
Denys Vlasenko20900482020-11-25 22:47:00 +0100141{
Denys Vlasenko885121e2020-11-28 13:26:44 +0100142 const char *src = *pp_src;
143 uint64_t ch = 0;
Denys Vlasenkof140b492020-11-28 12:33:50 +0100144 unsigned t;
Denys Vlasenko2cd37d62020-11-27 21:25:34 +0100145 int i = 0;
Denys Vlasenko20900482020-11-25 22:47:00 +0100146
Denys Vlasenko7467e902020-11-28 09:50:14 +0100147 while ((t = (unsigned char)*src) != '\0') {
148 src++;
Denys Vlasenko20900482020-11-25 22:47:00 +0100149
Denys Vlasenko2cd37d62020-11-27 21:25:34 +0100150 /* "if" forest is faster than strchr(bb_uuenc_tbl_base32, t) */
151 if (t >= '2' && t <= '7')
152 t = t - '2' + 26;
Denys Vlasenko2cd37d62020-11-27 21:25:34 +0100153 else if (t == '=' && i > 1)
154 t = 0;
Denys Vlasenkof140b492020-11-28 12:33:50 +0100155 else {
156 t = (t | 0x20) - 'a';
157 if (t > 25)
Denys Vlasenko2cd37d62020-11-27 21:25:34 +0100158//TODO: add BASE64_FLAG_foo to die on bad char?
Denys Vlasenkof140b492020-11-28 12:33:50 +0100159 continue;
160 }
Denys Vlasenko20900482020-11-25 22:47:00 +0100161
Denys Vlasenkof140b492020-11-28 12:33:50 +0100162 ch = (ch << 5) | t;
163 i = (i + 1) & 7;
164 if (i == 0) {
Denys Vlasenko2cd37d62020-11-27 21:25:34 +0100165 *dst++ = (char) (ch >> 32);
Denys Vlasenkof140b492020-11-28 12:33:50 +0100166 if (src[-1] == '=') /* was last input char '='? */
167 goto tail;
Denys Vlasenko2cd37d62020-11-27 21:25:34 +0100168 *dst++ = (char) (ch >> 24);
169 *dst++ = (char) (ch >> 16);
170 *dst++ = (char) (ch >> 8);
171 *dst++ = (char) ch;
Denys Vlasenko20900482020-11-25 22:47:00 +0100172 }
Denys Vlasenko2cd37d62020-11-27 21:25:34 +0100173 }
Denys Vlasenko7467e902020-11-28 09:50:14 +0100174 /* i is zero here if full 8-char block was decoded */
Denys Vlasenko885121e2020-11-28 13:26:44 +0100175 *pp_src = src - i;
176 return dst;
Denys Vlasenkof140b492020-11-28 12:33:50 +0100177 tail:
178 {
179 const char *s = src;
180 while (*--s == '=')
181 i++;
182 /* Why duplicate the below code? Testcase:
183 * echo ' 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18' | base32 | base32 -d
184 * IOW, decoding of
185 * EAYSAMRAGMQDIIBVEA3CANZAHAQDSIBRGAQDCMJAGEZCAMJTEAYTIIBRGUQDCNRAGE3SAMJYBI==
186 * ====
187 * must correctly stitch together the tail, must not overwrite
188 * the tail before it is analyzed! (we can be decoding in-place)
189 * Else testcase fails, prints trailing extra NUL bytes.
190 */
191 *dst++ = (char) (ch >> 24);
192 *dst++ = (char) (ch >> 16);
193 *dst++ = (char) (ch >> 8);
194 *dst++ = (char) ch;
195 dst -= (i+1) * 2 / 3; /* discard last 1, 2, 3 or 4 bytes */
196 }
Denys Vlasenko885121e2020-11-28 13:26:44 +0100197 *pp_src = src;
198 return dst;
Denys Vlasenko20900482020-11-25 22:47:00 +0100199}
200#endif
201
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200202/*
203 * Decode base64 encoded stream.
204 * Can stop on EOF, specified char, or on uuencode-style "====" line:
205 * flags argument controls it.
206 */
207void FAST_FUNC read_base64(FILE *src_stream, FILE *dst_stream, int flags)
208{
209/* Note that EOF _can_ be passed as exit_char too */
210#define exit_char ((int)(signed char)flags)
211#define uu_style_end (flags & BASE64_FLAG_UU_STOP)
Denys Vlasenko20900482020-11-25 22:47:00 +0100212#define base32 (flags & BASE64_32)
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200213
Denys Vlasenko7467e902020-11-28 09:50:14 +0100214 /* uuencoded files have 61 byte lines.
215 * base32/64 have 76 byte lines by default.
216 * Use 80 byte buffer to process one line at a time.
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200217 */
Denys Vlasenko7467e902020-11-28 09:50:14 +0100218 enum { BUFFER_SIZE = 80 };
219 /* decoded data is shorter than input, can use single buffer for both */
220 char buf[BUFFER_SIZE + 2];
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200221 int term_seen = 0;
222 int in_count = 0;
223
224 while (1) {
Denys Vlasenko7467e902020-11-28 09:50:14 +0100225 char *out_tail;
226 const char *in_tail;
227
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200228 while (in_count < BUFFER_SIZE) {
229 int ch = fgetc(src_stream);
230 if (ch == exit_char) {
231 if (in_count == 0)
232 return;
233 term_seen = 1;
234 break;
235 }
236 if (ch == EOF) {
237 term_seen = 1;
238 break;
239 }
240 /* Prevent "====" line to be split: stop if we see '\n'.
241 * We can also skip other whitespace and skirt the problem
242 * of files with NULs by stopping on any control char or space:
243 */
244 if (ch <= ' ')
245 break;
Denys Vlasenko7467e902020-11-28 09:50:14 +0100246 buf[in_count++] = ch;
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200247 }
Denys Vlasenko7467e902020-11-28 09:50:14 +0100248 buf[in_count] = '\0';
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200249
250 /* Did we encounter "====" line? */
Denys Vlasenko7467e902020-11-28 09:50:14 +0100251 if (uu_style_end && strcmp(buf, "====") == 0)
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200252 return;
253
Denys Vlasenko885121e2020-11-28 13:26:44 +0100254 in_tail = buf;
Denys Vlasenko20900482020-11-25 22:47:00 +0100255#if ENABLE_BASE32
256 if (base32)
Denys Vlasenko885121e2020-11-28 13:26:44 +0100257 out_tail = decode_base32(buf, &in_tail);
Denys Vlasenko20900482020-11-25 22:47:00 +0100258 else
259#endif
Denys Vlasenko885121e2020-11-28 13:26:44 +0100260 out_tail = decode_base64(buf, &in_tail);
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200261
Denys Vlasenko7467e902020-11-28 09:50:14 +0100262 fwrite(buf, (out_tail - buf), 1, dst_stream);
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200263
264 if (term_seen) {
265 /* Did we consume ALL characters? */
266 if (*in_tail == '\0')
267 return;
268 /* No */
Denys Vlasenkoae04ce82020-11-28 13:39:05 +0100269 bb_simple_error_msg_and_die("truncated input");
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200270 }
271
272 /* It was partial decode */
273 in_count = strlen(in_tail);
Denys Vlasenko7467e902020-11-28 09:50:14 +0100274 memmove(buf, in_tail, in_count);
Leonid Lisovskiy328f27f2011-10-28 13:59:04 +0200275 }
Denys Vlasenkoc8f9a8d2010-09-16 18:10:04 +0200276}