Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 1 | /* vi: set sw=4 ts=4: */ |
| 2 | /* |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 3 | * reformime: parse MIME-encoded message |
| 4 | * |
| 5 | * Copyright (C) 2008 by Vladimir Dronnikov <dronnikov@gmail.com> |
| 6 | * |
| 7 | * Licensed under GPLv2, see file LICENSE in this source tree. |
| 8 | */ |
Denys Vlasenko | c19f758 | 2016-11-23 09:58:03 +0100 | [diff] [blame] | 9 | //config:config REFORMIME |
Denys Vlasenko | 4eed2c6 | 2017-07-18 22:01:24 +0200 | [diff] [blame] | 10 | //config: bool "reformime (7.5 kb)" |
Denys Vlasenko | c19f758 | 2016-11-23 09:58:03 +0100 | [diff] [blame] | 11 | //config: default y |
| 12 | //config: help |
Denys Vlasenko | 72089cf | 2017-07-21 09:50:55 +0200 | [diff] [blame] | 13 | //config: Parse MIME-formatted messages. |
Denys Vlasenko | c19f758 | 2016-11-23 09:58:03 +0100 | [diff] [blame] | 14 | //config: |
| 15 | //config:config FEATURE_REFORMIME_COMPAT |
| 16 | //config: bool "Accept and ignore options other than -x and -X" |
| 17 | //config: default y |
| 18 | //config: depends on REFORMIME |
| 19 | //config: help |
Denys Vlasenko | 72089cf | 2017-07-21 09:50:55 +0200 | [diff] [blame] | 20 | //config: Accept (for compatibility only) and ignore options |
| 21 | //config: other than -x and -X. |
Denys Vlasenko | c19f758 | 2016-11-23 09:58:03 +0100 | [diff] [blame] | 22 | |
| 23 | //applet:IF_REFORMIME(APPLET(reformime, BB_DIR_BIN, BB_SUID_DROP)) |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 24 | |
| 25 | //kbuild:lib-$(CONFIG_REFORMIME) += reformime.o mail.o |
| 26 | |
| 27 | #include "libbb.h" |
| 28 | #include "mail.h" |
| 29 | |
| 30 | #if 0 |
| 31 | # define dbg_error_msg(...) bb_error_msg(__VA_ARGS__) |
| 32 | #else |
| 33 | # define dbg_error_msg(...) ((void)0) |
| 34 | #endif |
| 35 | |
| 36 | static const char *find_token(const char *const string_array[], const char *key, const char *defvalue) |
| 37 | { |
| 38 | const char *r = NULL; |
| 39 | int i; |
| 40 | for (i = 0; string_array[i] != NULL; i++) { |
| 41 | if (strcasecmp(string_array[i], key) == 0) { |
| 42 | r = (char *)string_array[i+1]; |
| 43 | break; |
| 44 | } |
| 45 | } |
| 46 | return (r) ? r : defvalue; |
| 47 | } |
| 48 | |
| 49 | static const char *xfind_token(const char *const string_array[], const char *key) |
| 50 | { |
| 51 | const char *r = find_token(string_array, key, NULL); |
| 52 | if (r) |
| 53 | return r; |
| 54 | bb_error_msg_and_die("not found: '%s'", key); |
| 55 | } |
| 56 | |
| 57 | enum { |
| 58 | OPT_x = 1 << 0, |
| 59 | OPT_X = 1 << 1, |
| 60 | #if ENABLE_FEATURE_REFORMIME_COMPAT |
| 61 | OPT_d = 1 << 2, |
| 62 | OPT_e = 1 << 3, |
| 63 | OPT_i = 1 << 4, |
| 64 | OPT_s = 1 << 5, |
| 65 | OPT_r = 1 << 6, |
| 66 | OPT_c = 1 << 7, |
| 67 | OPT_m = 1 << 8, |
| 68 | OPT_h = 1 << 9, |
| 69 | OPT_o = 1 << 10, |
| 70 | OPT_O = 1 << 11, |
| 71 | #endif |
| 72 | }; |
| 73 | |
| 74 | static int parse(const char *boundary, char **argv) |
| 75 | { |
| 76 | int boundary_len = strlen(boundary); |
| 77 | char uniq[sizeof("%%llu.%u") + sizeof(int)*3]; |
| 78 | |
| 79 | dbg_error_msg("BOUNDARY[%s]", boundary); |
| 80 | |
| 81 | // prepare unique string pattern |
| 82 | sprintf(uniq, "%%llu.%u", (unsigned)getpid()); |
| 83 | dbg_error_msg("UNIQ[%s]", uniq); |
| 84 | |
| 85 | while (1) { |
| 86 | char *header; |
| 87 | const char *tokens[32]; /* 32 is enough */ |
| 88 | const char *type; |
| 89 | |
| 90 | /* Read the header (everything up to two \n) */ |
| 91 | { |
| 92 | unsigned header_idx = 0; |
| 93 | int last_ch = 0; |
| 94 | header = NULL; |
| 95 | while (1) { |
| 96 | int ch = fgetc(stdin); |
| 97 | if (ch == '\r') /* Support both line endings */ |
| 98 | continue; |
| 99 | if (ch == EOF) |
| 100 | break; |
| 101 | if (ch == '\n' && last_ch == ch) |
| 102 | break; |
| 103 | if (!(header_idx & 0xff)) |
| 104 | header = xrealloc(header, header_idx + 0x101); |
| 105 | header[header_idx++] = last_ch = ch; |
| 106 | } |
| 107 | if (!header) { |
| 108 | dbg_error_msg("EOF"); |
| 109 | break; |
| 110 | } |
| 111 | header[header_idx] = '\0'; |
| 112 | dbg_error_msg("H:'%s'", p); |
| 113 | } |
| 114 | |
| 115 | /* Split to tokens */ |
| 116 | { |
| 117 | char *s, *p; |
Denys Vlasenko | 2496616 | 2020-10-06 02:36:47 +0200 | [diff] [blame] | 118 | char *tokstate; |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 119 | unsigned ntokens; |
| 120 | const char *delims = ";=\" \t\n"; |
| 121 | |
| 122 | /* Skip to last Content-Type: */ |
| 123 | s = p = header; |
| 124 | while ((p = strchr(p, '\n')) != NULL) { |
| 125 | p++; |
| 126 | if (strncasecmp(p, "Content-Type:", sizeof("Content-Type:")-1) == 0) |
| 127 | s = p; |
| 128 | } |
| 129 | dbg_error_msg("L:'%s'", p); |
| 130 | ntokens = 0; |
Denys Vlasenko | 2496616 | 2020-10-06 02:36:47 +0200 | [diff] [blame] | 131 | s = strtok_r(s, delims, &tokstate); |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 132 | while (s) { |
| 133 | tokens[ntokens] = s; |
| 134 | if (ntokens < ARRAY_SIZE(tokens) - 1) |
| 135 | ntokens++; |
| 136 | dbg_error_msg("L[%d]='%s'", ntokens, s); |
Denys Vlasenko | 2496616 | 2020-10-06 02:36:47 +0200 | [diff] [blame] | 137 | s = strtok_r(NULL, delims, &tokstate); |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 138 | } |
| 139 | tokens[ntokens] = NULL; |
| 140 | dbg_error_msg("EMPTYLINE, ntokens:%d", ntokens); |
| 141 | if (ntokens == 0) |
| 142 | break; |
| 143 | } |
| 144 | |
| 145 | /* Is it multipart? */ |
| 146 | type = find_token(tokens, "Content-Type:", "text/plain"); |
| 147 | dbg_error_msg("TYPE:'%s'", type); |
| 148 | if (0 == strncasecmp(type, "multipart/", 10)) { |
| 149 | /* Yes, recurse */ |
| 150 | if (strcasecmp(type + 10, "mixed") != 0) |
| 151 | bb_error_msg_and_die("no support of content type '%s'", type); |
| 152 | parse(xfind_token(tokens, "boundary"), argv); |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 153 | } else { |
| 154 | /* No, process one non-multipart section */ |
| 155 | char *end; |
| 156 | pid_t pid = pid; |
| 157 | FILE *fp; |
| 158 | |
| 159 | const char *charset = find_token(tokens, "charset", CONFIG_FEATURE_MIME_CHARSET); |
| 160 | const char *encoding = find_token(tokens, "Content-Transfer-Encoding:", "7bit"); |
| 161 | |
| 162 | /* Compose target filename */ |
| 163 | char *filename = (char *)find_token(tokens, "filename", NULL); |
| 164 | if (!filename) |
| 165 | filename = xasprintf(uniq, monotonic_us()); |
| 166 | else |
| 167 | filename = bb_get_last_path_component_strip(xstrdup(filename)); |
| 168 | |
Denys Vlasenko | 25b2680 | 2020-12-17 12:24:50 +0100 | [diff] [blame] | 169 | if (option_mask32 & OPT_X) { |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 170 | int fd[2]; |
| 171 | |
| 172 | /* start external helper */ |
| 173 | xpipe(fd); |
| 174 | pid = vfork(); |
| 175 | if (0 == pid) { |
| 176 | /* child reads from fd[0] */ |
| 177 | close(fd[1]); |
| 178 | xmove_fd(fd[0], STDIN_FILENO); |
| 179 | xsetenv("CONTENT_TYPE", type); |
| 180 | xsetenv("CHARSET", charset); |
| 181 | xsetenv("ENCODING", encoding); |
| 182 | xsetenv("FILENAME", filename); |
| 183 | BB_EXECVP_or_die(argv); |
| 184 | } |
| 185 | /* parent will write to fd[1] */ |
| 186 | close(fd[0]); |
| 187 | fp = xfdopen_for_write(fd[1]); |
| 188 | signal(SIGPIPE, SIG_IGN); |
| 189 | } else { |
| 190 | /* write to file */ |
| 191 | char *fname = xasprintf("%s%s", *argv, filename); |
| 192 | fp = xfopen_for_write(fname); |
| 193 | free(fname); |
| 194 | } |
| 195 | free(filename); |
| 196 | |
| 197 | /* write to fp */ |
| 198 | end = NULL; |
| 199 | if (0 == strcasecmp(encoding, "base64")) { |
| 200 | read_base64(stdin, fp, '-'); |
| 201 | } else |
| 202 | if (0 != strcasecmp(encoding, "7bit") |
| 203 | && 0 != strcasecmp(encoding, "8bit") |
| 204 | ) { |
| 205 | /* quoted-printable, binary, user-defined are unsupported so far */ |
| 206 | bb_error_msg_and_die("encoding '%s' not supported", encoding); |
| 207 | } else { |
| 208 | /* plain 7bit or 8bit */ |
| 209 | while ((end = xmalloc_fgets(stdin)) != NULL) { |
| 210 | if ('-' == end[0] |
| 211 | && '-' == end[1] |
| 212 | && strncmp(end + 2, boundary, boundary_len) == 0 |
| 213 | ) { |
| 214 | break; |
| 215 | } |
| 216 | fputs(end, fp); |
| 217 | } |
| 218 | } |
| 219 | fclose(fp); |
| 220 | |
| 221 | /* Wait for child */ |
Denys Vlasenko | 25b2680 | 2020-12-17 12:24:50 +0100 | [diff] [blame] | 222 | if (option_mask32 & OPT_X) { |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 223 | int rc; |
| 224 | signal(SIGPIPE, SIG_DFL); |
| 225 | rc = (wait4pid(pid) & 0xff); |
| 226 | if (rc != 0) |
| 227 | return rc + 20; |
| 228 | } |
| 229 | |
| 230 | /* Multipart ended? */ |
| 231 | if (end && '-' == end[2 + boundary_len] && '-' == end[2 + boundary_len + 1]) { |
| 232 | dbg_error_msg("FINISHED MPART:'%s'", end); |
| 233 | break; |
| 234 | } |
| 235 | dbg_error_msg("FINISHED:'%s'", end); |
| 236 | free(end); |
| 237 | } /* end of "handle one non-multipart block" */ |
| 238 | |
| 239 | free(header); |
| 240 | } /* while (1) */ |
| 241 | |
| 242 | dbg_error_msg("ENDPARSE[%s]", boundary); |
| 243 | |
| 244 | return EXIT_SUCCESS; |
| 245 | } |
| 246 | |
| 247 | //usage:#define reformime_trivial_usage |
| 248 | //usage: "[OPTIONS]" |
| 249 | //usage:#define reformime_full_usage "\n\n" |
| 250 | //usage: "Parse MIME-encoded message on stdin\n" |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 251 | //usage: "\n -x PREFIX Extract content of MIME sections to files" |
| 252 | //usage: "\n -X PROG ARGS Filter content of MIME sections through PROG" |
| 253 | //usage: "\n Must be the last option" |
| 254 | //usage: "\n" |
| 255 | //usage: "\nOther options are silently ignored" |
| 256 | |
| 257 | /* |
| 258 | Usage: reformime [options] |
| 259 | -d - parse a delivery status notification. |
| 260 | -e - extract contents of MIME section. |
| 261 | -x - extract MIME section to a file. |
| 262 | -X - pipe MIME section to a program. |
| 263 | -i - show MIME info. |
| 264 | -s n.n.n.n - specify MIME section. |
| 265 | -r - rewrite message, filling in missing MIME headers. |
| 266 | -r7 - also convert 8bit/raw encoding to quoted-printable, if possible. |
| 267 | -r8 - also convert quoted-printable encoding to 8bit, if possible. |
| 268 | -c charset - default charset for rewriting, -o, and -O. |
| 269 | -m [file] [file]... - create a MIME message digest. |
| 270 | -h "header" - decode RFC 2047-encoded header. |
| 271 | -o "header" - encode unstructured header using RFC 2047. |
| 272 | -O "header" - encode address list header using RFC 2047. |
| 273 | */ |
| 274 | |
| 275 | int reformime_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
| 276 | int reformime_main(int argc UNUSED_PARAM, char **argv) |
| 277 | { |
Denys Vlasenko | 25b2680 | 2020-12-17 12:24:50 +0100 | [diff] [blame] | 278 | unsigned opts; |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 279 | const char *opt_prefix = ""; |
| 280 | |
| 281 | INIT_G(); |
| 282 | |
| 283 | // parse options |
| 284 | // N.B. only -x and -X are supported so far |
Denys Vlasenko | 22542ec | 2017-08-08 21:55:02 +0200 | [diff] [blame] | 285 | opts = getopt32(argv, "^" |
| 286 | "x:X" IF_FEATURE_REFORMIME_COMPAT("deis:r:c:m:*h:o:O:") |
| 287 | "\0" "x--X:X--x", |
Denys Vlasenko | d616ab6 | 2011-05-22 03:46:33 +0200 | [diff] [blame] | 288 | &opt_prefix |
| 289 | IF_FEATURE_REFORMIME_COMPAT(, NULL, NULL, &G.opt_charset, NULL, NULL, NULL, NULL) |
| 290 | ); |
| 291 | argv += optind; |
| 292 | |
| 293 | return parse("", (opts & OPT_X) ? argv : (char **)&opt_prefix); |
| 294 | } |