blob: a1d3f4e8b577016de7df45c0304bd2c5f38e01f2 [file] [log] [blame]
Denys Vlasenkod616ab62011-05-22 03:46:33 +02001/* vi: set sw=4 ts=4: */
2/*
Denys Vlasenkod616ab62011-05-22 03:46:33 +02003 * reformime: parse MIME-encoded message
4 *
5 * Copyright (C) 2008 by Vladimir Dronnikov <dronnikov@gmail.com>
6 *
7 * Licensed under GPLv2, see file LICENSE in this source tree.
8 */
Denys Vlasenkoc19f7582016-11-23 09:58:03 +01009//config:config REFORMIME
Denys Vlasenko4eed2c62017-07-18 22:01:24 +020010//config: bool "reformime (7.5 kb)"
Denys Vlasenkoc19f7582016-11-23 09:58:03 +010011//config: default y
12//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020013//config: Parse MIME-formatted messages.
Denys Vlasenkoc19f7582016-11-23 09:58:03 +010014//config:
15//config:config FEATURE_REFORMIME_COMPAT
16//config: bool "Accept and ignore options other than -x and -X"
17//config: default y
18//config: depends on REFORMIME
19//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020020//config: Accept (for compatibility only) and ignore options
21//config: other than -x and -X.
Denys Vlasenkoc19f7582016-11-23 09:58:03 +010022
23//applet:IF_REFORMIME(APPLET(reformime, BB_DIR_BIN, BB_SUID_DROP))
Denys Vlasenkod616ab62011-05-22 03:46:33 +020024
25//kbuild:lib-$(CONFIG_REFORMIME) += reformime.o mail.o
26
27#include "libbb.h"
28#include "mail.h"
29
30#if 0
31# define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
32#else
33# define dbg_error_msg(...) ((void)0)
34#endif
35
36static const char *find_token(const char *const string_array[], const char *key, const char *defvalue)
37{
38 const char *r = NULL;
39 int i;
40 for (i = 0; string_array[i] != NULL; i++) {
41 if (strcasecmp(string_array[i], key) == 0) {
42 r = (char *)string_array[i+1];
43 break;
44 }
45 }
46 return (r) ? r : defvalue;
47}
48
49static const char *xfind_token(const char *const string_array[], const char *key)
50{
51 const char *r = find_token(string_array, key, NULL);
52 if (r)
53 return r;
54 bb_error_msg_and_die("not found: '%s'", key);
55}
56
57enum {
58 OPT_x = 1 << 0,
59 OPT_X = 1 << 1,
60#if ENABLE_FEATURE_REFORMIME_COMPAT
61 OPT_d = 1 << 2,
62 OPT_e = 1 << 3,
63 OPT_i = 1 << 4,
64 OPT_s = 1 << 5,
65 OPT_r = 1 << 6,
66 OPT_c = 1 << 7,
67 OPT_m = 1 << 8,
68 OPT_h = 1 << 9,
69 OPT_o = 1 << 10,
70 OPT_O = 1 << 11,
71#endif
72};
73
74static int parse(const char *boundary, char **argv)
75{
76 int boundary_len = strlen(boundary);
77 char uniq[sizeof("%%llu.%u") + sizeof(int)*3];
78
79 dbg_error_msg("BOUNDARY[%s]", boundary);
80
81 // prepare unique string pattern
82 sprintf(uniq, "%%llu.%u", (unsigned)getpid());
83 dbg_error_msg("UNIQ[%s]", uniq);
84
85 while (1) {
86 char *header;
87 const char *tokens[32]; /* 32 is enough */
88 const char *type;
89
90 /* Read the header (everything up to two \n) */
91 {
92 unsigned header_idx = 0;
93 int last_ch = 0;
94 header = NULL;
95 while (1) {
96 int ch = fgetc(stdin);
97 if (ch == '\r') /* Support both line endings */
98 continue;
99 if (ch == EOF)
100 break;
101 if (ch == '\n' && last_ch == ch)
102 break;
103 if (!(header_idx & 0xff))
104 header = xrealloc(header, header_idx + 0x101);
105 header[header_idx++] = last_ch = ch;
106 }
107 if (!header) {
108 dbg_error_msg("EOF");
109 break;
110 }
111 header[header_idx] = '\0';
112 dbg_error_msg("H:'%s'", p);
113 }
114
115 /* Split to tokens */
116 {
117 char *s, *p;
Denys Vlasenko24966162020-10-06 02:36:47 +0200118 char *tokstate;
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200119 unsigned ntokens;
120 const char *delims = ";=\" \t\n";
121
122 /* Skip to last Content-Type: */
123 s = p = header;
124 while ((p = strchr(p, '\n')) != NULL) {
125 p++;
126 if (strncasecmp(p, "Content-Type:", sizeof("Content-Type:")-1) == 0)
127 s = p;
128 }
129 dbg_error_msg("L:'%s'", p);
130 ntokens = 0;
Denys Vlasenko24966162020-10-06 02:36:47 +0200131 s = strtok_r(s, delims, &tokstate);
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200132 while (s) {
133 tokens[ntokens] = s;
134 if (ntokens < ARRAY_SIZE(tokens) - 1)
135 ntokens++;
136 dbg_error_msg("L[%d]='%s'", ntokens, s);
Denys Vlasenko24966162020-10-06 02:36:47 +0200137 s = strtok_r(NULL, delims, &tokstate);
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200138 }
139 tokens[ntokens] = NULL;
140 dbg_error_msg("EMPTYLINE, ntokens:%d", ntokens);
141 if (ntokens == 0)
142 break;
143 }
144
145 /* Is it multipart? */
146 type = find_token(tokens, "Content-Type:", "text/plain");
147 dbg_error_msg("TYPE:'%s'", type);
148 if (0 == strncasecmp(type, "multipart/", 10)) {
149 /* Yes, recurse */
150 if (strcasecmp(type + 10, "mixed") != 0)
151 bb_error_msg_and_die("no support of content type '%s'", type);
152 parse(xfind_token(tokens, "boundary"), argv);
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200153 } else {
154 /* No, process one non-multipart section */
155 char *end;
156 pid_t pid = pid;
157 FILE *fp;
158
159 const char *charset = find_token(tokens, "charset", CONFIG_FEATURE_MIME_CHARSET);
160 const char *encoding = find_token(tokens, "Content-Transfer-Encoding:", "7bit");
161
162 /* Compose target filename */
163 char *filename = (char *)find_token(tokens, "filename", NULL);
164 if (!filename)
165 filename = xasprintf(uniq, monotonic_us());
166 else
167 filename = bb_get_last_path_component_strip(xstrdup(filename));
168
Denys Vlasenko25b26802020-12-17 12:24:50 +0100169 if (option_mask32 & OPT_X) {
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200170 int fd[2];
171
172 /* start external helper */
173 xpipe(fd);
174 pid = vfork();
175 if (0 == pid) {
176 /* child reads from fd[0] */
177 close(fd[1]);
178 xmove_fd(fd[0], STDIN_FILENO);
179 xsetenv("CONTENT_TYPE", type);
180 xsetenv("CHARSET", charset);
181 xsetenv("ENCODING", encoding);
182 xsetenv("FILENAME", filename);
183 BB_EXECVP_or_die(argv);
184 }
185 /* parent will write to fd[1] */
186 close(fd[0]);
187 fp = xfdopen_for_write(fd[1]);
188 signal(SIGPIPE, SIG_IGN);
189 } else {
190 /* write to file */
191 char *fname = xasprintf("%s%s", *argv, filename);
192 fp = xfopen_for_write(fname);
193 free(fname);
194 }
195 free(filename);
196
197 /* write to fp */
198 end = NULL;
199 if (0 == strcasecmp(encoding, "base64")) {
200 read_base64(stdin, fp, '-');
201 } else
202 if (0 != strcasecmp(encoding, "7bit")
203 && 0 != strcasecmp(encoding, "8bit")
204 ) {
205 /* quoted-printable, binary, user-defined are unsupported so far */
206 bb_error_msg_and_die("encoding '%s' not supported", encoding);
207 } else {
208 /* plain 7bit or 8bit */
209 while ((end = xmalloc_fgets(stdin)) != NULL) {
210 if ('-' == end[0]
211 && '-' == end[1]
212 && strncmp(end + 2, boundary, boundary_len) == 0
213 ) {
214 break;
215 }
216 fputs(end, fp);
217 }
218 }
219 fclose(fp);
220
221 /* Wait for child */
Denys Vlasenko25b26802020-12-17 12:24:50 +0100222 if (option_mask32 & OPT_X) {
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200223 int rc;
224 signal(SIGPIPE, SIG_DFL);
225 rc = (wait4pid(pid) & 0xff);
226 if (rc != 0)
227 return rc + 20;
228 }
229
230 /* Multipart ended? */
231 if (end && '-' == end[2 + boundary_len] && '-' == end[2 + boundary_len + 1]) {
232 dbg_error_msg("FINISHED MPART:'%s'", end);
233 break;
234 }
235 dbg_error_msg("FINISHED:'%s'", end);
236 free(end);
237 } /* end of "handle one non-multipart block" */
238
239 free(header);
240 } /* while (1) */
241
242 dbg_error_msg("ENDPARSE[%s]", boundary);
243
244 return EXIT_SUCCESS;
245}
246
247//usage:#define reformime_trivial_usage
248//usage: "[OPTIONS]"
249//usage:#define reformime_full_usage "\n\n"
250//usage: "Parse MIME-encoded message on stdin\n"
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200251//usage: "\n -x PREFIX Extract content of MIME sections to files"
252//usage: "\n -X PROG ARGS Filter content of MIME sections through PROG"
253//usage: "\n Must be the last option"
254//usage: "\n"
255//usage: "\nOther options are silently ignored"
256
257/*
258Usage: reformime [options]
259 -d - parse a delivery status notification.
260 -e - extract contents of MIME section.
261 -x - extract MIME section to a file.
262 -X - pipe MIME section to a program.
263 -i - show MIME info.
264 -s n.n.n.n - specify MIME section.
265 -r - rewrite message, filling in missing MIME headers.
266 -r7 - also convert 8bit/raw encoding to quoted-printable, if possible.
267 -r8 - also convert quoted-printable encoding to 8bit, if possible.
268 -c charset - default charset for rewriting, -o, and -O.
269 -m [file] [file]... - create a MIME message digest.
270 -h "header" - decode RFC 2047-encoded header.
271 -o "header" - encode unstructured header using RFC 2047.
272 -O "header" - encode address list header using RFC 2047.
273*/
274
275int reformime_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
276int reformime_main(int argc UNUSED_PARAM, char **argv)
277{
Denys Vlasenko25b26802020-12-17 12:24:50 +0100278 unsigned opts;
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200279 const char *opt_prefix = "";
280
281 INIT_G();
282
283 // parse options
284 // N.B. only -x and -X are supported so far
Denys Vlasenko22542ec2017-08-08 21:55:02 +0200285 opts = getopt32(argv, "^"
286 "x:X" IF_FEATURE_REFORMIME_COMPAT("deis:r:c:m:*h:o:O:")
287 "\0" "x--X:X--x",
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200288 &opt_prefix
289 IF_FEATURE_REFORMIME_COMPAT(, NULL, NULL, &G.opt_charset, NULL, NULL, NULL, NULL)
290 );
291 argv += optind;
292
293 return parse("", (opts & OPT_X) ? argv : (char **)&opt_prefix);
294}