blob: 86b2cfed344e92c8eb6ece05d5e0e87c729d8c29 [file] [log] [blame]
Denys Vlasenkod616ab62011-05-22 03:46:33 +02001/* vi: set sw=4 ts=4: */
2/*
Denys Vlasenkod616ab62011-05-22 03:46:33 +02003 * reformime: parse MIME-encoded message
4 *
5 * Copyright (C) 2008 by Vladimir Dronnikov <dronnikov@gmail.com>
6 *
7 * Licensed under GPLv2, see file LICENSE in this source tree.
8 */
9
10//kbuild:lib-$(CONFIG_REFORMIME) += reformime.o mail.o
11
12#include "libbb.h"
13#include "mail.h"
14
15#if 0
16# define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
17#else
18# define dbg_error_msg(...) ((void)0)
19#endif
20
21static const char *find_token(const char *const string_array[], const char *key, const char *defvalue)
22{
23 const char *r = NULL;
24 int i;
25 for (i = 0; string_array[i] != NULL; i++) {
26 if (strcasecmp(string_array[i], key) == 0) {
27 r = (char *)string_array[i+1];
28 break;
29 }
30 }
31 return (r) ? r : defvalue;
32}
33
34static const char *xfind_token(const char *const string_array[], const char *key)
35{
36 const char *r = find_token(string_array, key, NULL);
37 if (r)
38 return r;
39 bb_error_msg_and_die("not found: '%s'", key);
40}
41
42enum {
43 OPT_x = 1 << 0,
44 OPT_X = 1 << 1,
45#if ENABLE_FEATURE_REFORMIME_COMPAT
46 OPT_d = 1 << 2,
47 OPT_e = 1 << 3,
48 OPT_i = 1 << 4,
49 OPT_s = 1 << 5,
50 OPT_r = 1 << 6,
51 OPT_c = 1 << 7,
52 OPT_m = 1 << 8,
53 OPT_h = 1 << 9,
54 OPT_o = 1 << 10,
55 OPT_O = 1 << 11,
56#endif
57};
58
59static int parse(const char *boundary, char **argv)
60{
61 int boundary_len = strlen(boundary);
62 char uniq[sizeof("%%llu.%u") + sizeof(int)*3];
63
64 dbg_error_msg("BOUNDARY[%s]", boundary);
65
66 // prepare unique string pattern
67 sprintf(uniq, "%%llu.%u", (unsigned)getpid());
68 dbg_error_msg("UNIQ[%s]", uniq);
69
70 while (1) {
71 char *header;
72 const char *tokens[32]; /* 32 is enough */
73 const char *type;
74
75 /* Read the header (everything up to two \n) */
76 {
77 unsigned header_idx = 0;
78 int last_ch = 0;
79 header = NULL;
80 while (1) {
81 int ch = fgetc(stdin);
82 if (ch == '\r') /* Support both line endings */
83 continue;
84 if (ch == EOF)
85 break;
86 if (ch == '\n' && last_ch == ch)
87 break;
88 if (!(header_idx & 0xff))
89 header = xrealloc(header, header_idx + 0x101);
90 header[header_idx++] = last_ch = ch;
91 }
92 if (!header) {
93 dbg_error_msg("EOF");
94 break;
95 }
96 header[header_idx] = '\0';
97 dbg_error_msg("H:'%s'", p);
98 }
99
100 /* Split to tokens */
101 {
102 char *s, *p;
103 unsigned ntokens;
104 const char *delims = ";=\" \t\n";
105
106 /* Skip to last Content-Type: */
107 s = p = header;
108 while ((p = strchr(p, '\n')) != NULL) {
109 p++;
110 if (strncasecmp(p, "Content-Type:", sizeof("Content-Type:")-1) == 0)
111 s = p;
112 }
113 dbg_error_msg("L:'%s'", p);
114 ntokens = 0;
115 s = strtok(s, delims);
116 while (s) {
117 tokens[ntokens] = s;
118 if (ntokens < ARRAY_SIZE(tokens) - 1)
119 ntokens++;
120 dbg_error_msg("L[%d]='%s'", ntokens, s);
121 s = strtok(NULL, delims);
122 }
123 tokens[ntokens] = NULL;
124 dbg_error_msg("EMPTYLINE, ntokens:%d", ntokens);
125 if (ntokens == 0)
126 break;
127 }
128
129 /* Is it multipart? */
130 type = find_token(tokens, "Content-Type:", "text/plain");
131 dbg_error_msg("TYPE:'%s'", type);
132 if (0 == strncasecmp(type, "multipart/", 10)) {
133 /* Yes, recurse */
134 if (strcasecmp(type + 10, "mixed") != 0)
135 bb_error_msg_and_die("no support of content type '%s'", type);
136 parse(xfind_token(tokens, "boundary"), argv);
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200137 } else {
138 /* No, process one non-multipart section */
139 char *end;
140 pid_t pid = pid;
141 FILE *fp;
142
143 const char *charset = find_token(tokens, "charset", CONFIG_FEATURE_MIME_CHARSET);
144 const char *encoding = find_token(tokens, "Content-Transfer-Encoding:", "7bit");
145
146 /* Compose target filename */
147 char *filename = (char *)find_token(tokens, "filename", NULL);
148 if (!filename)
149 filename = xasprintf(uniq, monotonic_us());
150 else
151 filename = bb_get_last_path_component_strip(xstrdup(filename));
152
153 if (opts & OPT_X) {
154 int fd[2];
155
156 /* start external helper */
157 xpipe(fd);
158 pid = vfork();
159 if (0 == pid) {
160 /* child reads from fd[0] */
161 close(fd[1]);
162 xmove_fd(fd[0], STDIN_FILENO);
163 xsetenv("CONTENT_TYPE", type);
164 xsetenv("CHARSET", charset);
165 xsetenv("ENCODING", encoding);
166 xsetenv("FILENAME", filename);
167 BB_EXECVP_or_die(argv);
168 }
169 /* parent will write to fd[1] */
170 close(fd[0]);
171 fp = xfdopen_for_write(fd[1]);
172 signal(SIGPIPE, SIG_IGN);
173 } else {
174 /* write to file */
175 char *fname = xasprintf("%s%s", *argv, filename);
176 fp = xfopen_for_write(fname);
177 free(fname);
178 }
179 free(filename);
180
181 /* write to fp */
182 end = NULL;
183 if (0 == strcasecmp(encoding, "base64")) {
184 read_base64(stdin, fp, '-');
185 } else
186 if (0 != strcasecmp(encoding, "7bit")
187 && 0 != strcasecmp(encoding, "8bit")
188 ) {
189 /* quoted-printable, binary, user-defined are unsupported so far */
190 bb_error_msg_and_die("encoding '%s' not supported", encoding);
191 } else {
192 /* plain 7bit or 8bit */
193 while ((end = xmalloc_fgets(stdin)) != NULL) {
194 if ('-' == end[0]
195 && '-' == end[1]
196 && strncmp(end + 2, boundary, boundary_len) == 0
197 ) {
198 break;
199 }
200 fputs(end, fp);
201 }
202 }
203 fclose(fp);
204
205 /* Wait for child */
206 if (opts & OPT_X) {
207 int rc;
208 signal(SIGPIPE, SIG_DFL);
209 rc = (wait4pid(pid) & 0xff);
210 if (rc != 0)
211 return rc + 20;
212 }
213
214 /* Multipart ended? */
215 if (end && '-' == end[2 + boundary_len] && '-' == end[2 + boundary_len + 1]) {
216 dbg_error_msg("FINISHED MPART:'%s'", end);
217 break;
218 }
219 dbg_error_msg("FINISHED:'%s'", end);
220 free(end);
221 } /* end of "handle one non-multipart block" */
222
223 free(header);
224 } /* while (1) */
225
226 dbg_error_msg("ENDPARSE[%s]", boundary);
227
228 return EXIT_SUCCESS;
229}
230
231//usage:#define reformime_trivial_usage
232//usage: "[OPTIONS]"
233//usage:#define reformime_full_usage "\n\n"
234//usage: "Parse MIME-encoded message on stdin\n"
Denys Vlasenkod616ab62011-05-22 03:46:33 +0200235//usage: "\n -x PREFIX Extract content of MIME sections to files"
236//usage: "\n -X PROG ARGS Filter content of MIME sections through PROG"
237//usage: "\n Must be the last option"
238//usage: "\n"
239//usage: "\nOther options are silently ignored"
240
241/*
242Usage: reformime [options]
243 -d - parse a delivery status notification.
244 -e - extract contents of MIME section.
245 -x - extract MIME section to a file.
246 -X - pipe MIME section to a program.
247 -i - show MIME info.
248 -s n.n.n.n - specify MIME section.
249 -r - rewrite message, filling in missing MIME headers.
250 -r7 - also convert 8bit/raw encoding to quoted-printable, if possible.
251 -r8 - also convert quoted-printable encoding to 8bit, if possible.
252 -c charset - default charset for rewriting, -o, and -O.
253 -m [file] [file]... - create a MIME message digest.
254 -h "header" - decode RFC 2047-encoded header.
255 -o "header" - encode unstructured header using RFC 2047.
256 -O "header" - encode address list header using RFC 2047.
257*/
258
259int reformime_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
260int reformime_main(int argc UNUSED_PARAM, char **argv)
261{
262 const char *opt_prefix = "";
263
264 INIT_G();
265
266 // parse options
267 // N.B. only -x and -X are supported so far
268 opt_complementary = "x--X:X--x" IF_FEATURE_REFORMIME_COMPAT(":m::");
269 opts = getopt32(argv,
270 "x:X" IF_FEATURE_REFORMIME_COMPAT("deis:r:c:m:h:o:O:"),
271 &opt_prefix
272 IF_FEATURE_REFORMIME_COMPAT(, NULL, NULL, &G.opt_charset, NULL, NULL, NULL, NULL)
273 );
274 argv += optind;
275
276 return parse("", (opts & OPT_X) ? argv : (char **)&opt_prefix);
277}