blob: 11d39cb66f2797d2c8dfddaafec610c618002a2b [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Eric Andersenb520e082000-10-03 00:21:45 +00006 *
Denis Vlasenkodb12d1d2008-12-07 00:52:58 +00007 * Licensed under GPLv2, see file LICENSE in this tarball for details.
Eric Andersen96700832000-09-04 15:15:55 +00008 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +00009#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000010
Eric Andersen79757c92001-04-05 21:45:54 +000011struct host_info {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +000012 // May be used if we ever will want to free() all xstrdup()s...
13 /* char *allocated; */
Denis Vlasenko818322b2007-09-24 18:27:04 +000014 const char *path;
15 const char *user;
16 char *host;
17 int port;
18 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000019};
20
Denis Vlasenko77105632007-09-24 15:04:00 +000021
22/* Globals (can be accessed from signal handlers) */
23struct globals {
24 off_t content_len; /* Content-length of the file */
25 off_t beg_range; /* Range at which continue begins */
26#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000027 off_t transferred; /* Number of bytes transferred so far */
28 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010029 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000030#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020031 smallint chunked; /* chunked transfer encoding */
32 smallint got_clen; /* got content-length: from server */
Denis Vlasenko77105632007-09-24 15:04:00 +000033};
34#define G (*(struct globals*)&bb_common_bufsiz1)
35struct BUG_G_too_big {
Denis Vlasenko6b404432008-01-07 16:13:14 +000036 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
Denis Vlasenko77105632007-09-24 15:04:00 +000037};
38#define content_len (G.content_len )
39#define beg_range (G.beg_range )
Denis Vlasenko77105632007-09-24 15:04:00 +000040#define transferred (G.transferred )
41#define curfile (G.curfile )
Denis Vlasenko77105632007-09-24 15:04:00 +000042#define INIT_G() do { } while (0)
43
44
Denis Vlasenko9cade082006-11-21 10:43:02 +000045#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko47ddd012007-09-24 18:24:17 +000046
Denis Vlasenko00d84172008-11-24 07:34:42 +000047static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000048{
Denis Vlasenkoa7ce2072007-09-24 18:25:08 +000049 /* We can be called from signal handler */
50 int save_errno = errno;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000051
Denis Vlasenko00d84172008-11-24 07:34:42 +000052 if (flag == -1) { /* first call to progress_meter */
Magnus Dammf5914992009-11-08 16:34:43 +010053 bb_progress_init(&G.pmt);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000054 }
55
Magnus Dammf5914992009-11-08 16:34:43 +010056 bb_progress_update(&G.pmt, curfile, beg_range, transferred,
57 G.chunked ? 0 : content_len + beg_range);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000058
Denis Vlasenkoa7ce2072007-09-24 18:25:08 +000059 if (flag == 0) {
Denis Vlasenko00d84172008-11-24 07:34:42 +000060 /* last call to progress_meter */
Denis Vlasenkoa7ce2072007-09-24 18:25:08 +000061 alarm(0);
Denis Vlasenko4daad902007-09-27 10:20:47 +000062 fputc('\n', stderr);
Magnus Dammf5914992009-11-08 16:34:43 +010063 transferred = 0;
Denis Vlasenkoa7ce2072007-09-24 18:25:08 +000064 } else {
Denis Vlasenko00d84172008-11-24 07:34:42 +000065 if (flag == -1) { /* first call to progress_meter */
66 signal_SA_RESTART_empty_mask(SIGALRM, progress_meter);
Denis Vlasenkoa7ce2072007-09-24 18:25:08 +000067 }
68 alarm(1);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000069 }
Denis Vlasenkoa7ce2072007-09-24 18:25:08 +000070
71 errno = save_errno;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000072}
Magnus Dammf5914992009-11-08 16:34:43 +010073
Denis Vlasenko47ddd012007-09-24 18:24:17 +000074#else /* FEATURE_WGET_STATUSBAR */
75
Denis Vlasenko00d84172008-11-24 07:34:42 +000076static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Denis Vlasenko47ddd012007-09-24 18:24:17 +000077
Eric Andersenb520e082000-10-03 00:21:45 +000078#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +000079
Denis Vlasenko47ddd012007-09-24 18:24:17 +000080
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +020081/* IPv6 knows scoped address types i.e. link and site local addresses. Link
82 * local addresses can have a scope identifier to specify the
83 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
84 * identifier is only valid on a single node.
85 *
86 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
87 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
88 * in the Host header as invalid requests, see
89 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
90 */
91static void strip_ipv6_scope_id(char *host)
92{
93 char *scope, *cp;
94
95 /* bbox wget actually handles IPv6 addresses without [], like
96 * wget "http://::1/xxx", but this is not standard.
97 * To save code, _here_ we do not support it. */
98
99 if (host[0] != '[')
100 return; /* not IPv6 */
101
102 scope = strchr(host, '%');
103 if (!scope)
104 return;
105
106 /* Remove the IPv6 zone identifier from the host address */
107 cp = strchr(host, ']');
108 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
109 /* malformed address (not "[xx]:nn" or "[xx]") */
110 return;
111 }
112
113 /* cp points to "]...", scope points to "%eth0]..." */
114 overlapping_strcpy(scope, cp);
115}
116
Denis Vlasenko12d21292007-06-27 21:40:07 +0000117/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
118 * and a short count if an eof or non-interrupt error is encountered. */
119static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
Matt Kraai854125f2001-05-09 19:15:46 +0000120{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000121 size_t ret;
122 char *p = (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000123
124 do {
125 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000126 errno = 0;
Denis Vlasenko12d21292007-06-27 21:40:07 +0000127 ret = fread(p, 1, nmemb, stream);
128 p += ret;
129 nmemb -= ret;
130 } while (nmemb && ferror(stream) && errno == EINTR);
Matt Kraai854125f2001-05-09 19:15:46 +0000131
Denis Vlasenko12d21292007-06-27 21:40:07 +0000132 return p - (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000133}
134
Denis Vlasenko12d21292007-06-27 21:40:07 +0000135/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
Matt Kraai854125f2001-05-09 19:15:46 +0000136 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
137static char *safe_fgets(char *s, int size, FILE *stream)
138{
139 char *ret;
140
141 do {
142 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000143 errno = 0;
Matt Kraai854125f2001-05-09 19:15:46 +0000144 ret = fgets(s, size, stream);
145 } while (ret == NULL && ferror(stream) && errno == EINTR);
146
147 return ret;
148}
149
Denis Vlasenko9cade082006-11-21 10:43:02 +0000150#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denis Vlasenko12d21292007-06-27 21:40:07 +0000151/* Base64-encode character string. buf is assumed to be char buf[512]. */
152static char *base64enc_512(char buf[512], const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000153{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000154 unsigned len = strlen(str);
155 if (len > 512/4*3 - 10) /* paranoia */
156 len = 512/4*3 - 10;
157 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
Rob Landley76ef08c2006-06-13 16:44:26 +0000158 return buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000159}
160#endif
161
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200162static char* sanitize_string(char *s)
163{
164 unsigned char *p = (void *) s;
165 while (*p >= ' ')
166 p++;
167 *p = '\0';
168 return s;
169}
170
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000171static FILE *open_socket(len_and_sockaddr *lsa)
172{
173 FILE *fp;
174
175 /* glibc 2.4 seems to try seeking on it - ??! */
176 /* hopefully it understands what ESPIPE means... */
177 fp = fdopen(xconnect_stream(lsa), "r+");
178 if (fp == NULL)
179 bb_perror_msg_and_die("fdopen");
180
181 return fp;
182}
183
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000184static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
185{
186 int result;
187 if (s1) {
188 if (!s2) s2 = "";
189 fprintf(fp, "%s%s\r\n", s1, s2);
190 fflush(fp);
191 }
192
193 do {
194 char *buf_ptr;
195
196 if (fgets(buf, 510, fp) == NULL) {
197 bb_perror_msg_and_die("error getting response");
198 }
199 buf_ptr = strstr(buf, "\r\n");
200 if (buf_ptr) {
201 *buf_ptr = '\0';
202 }
203 } while (!isdigit(buf[0]) || buf[3] != ' ');
204
205 buf[3] = '\0';
206 result = xatoi_u(buf);
207 buf[3] = ' ';
208 return result;
209}
210
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000211static void parse_url(char *src_url, struct host_info *h)
212{
213 char *url, *p, *sp;
214
215 /* h->allocated = */ url = xstrdup(src_url);
216
217 if (strncmp(url, "http://", 7) == 0) {
218 h->port = bb_lookup_port("http", "tcp", 80);
219 h->host = url + 7;
220 h->is_ftp = 0;
221 } else if (strncmp(url, "ftp://", 6) == 0) {
222 h->port = bb_lookup_port("ftp", "tcp", 21);
223 h->host = url + 6;
224 h->is_ftp = 1;
225 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200226 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000227
228 // FYI:
229 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
230 // 'GET /?var=a/b HTTP 1.0'
231 // and saves 'index.html?var=a%2Fb' (we save 'b')
232 // wget 'http://busybox.net?login=john@doe':
233 // request: 'GET /?login=john@doe HTTP/1.0'
234 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
235 // wget 'http://busybox.net#test/test':
236 // request: 'GET / HTTP/1.0'
237 // saves: 'index.html' (we save 'test')
238 //
239 // We also don't add unique .N suffix if file exists...
240 sp = strchr(h->host, '/');
241 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
242 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
243 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000244 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000245 } else if (*sp == '/') {
246 *sp = '\0';
247 h->path = sp + 1;
248 } else { // '#' or '?'
249 // http://busybox.net?login=john@doe is a valid URL
250 // memmove converts to:
251 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000252 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000253 h->host--;
254 sp[-1] = '\0';
255 h->path = sp;
256 }
257
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200258 // We used to set h->user to NULL here, but this interferes
259 // with handling of code 302 ("object was moved")
260
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000261 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000262 if (sp != NULL) {
263 h->user = h->host;
264 *sp = '\0';
265 h->host = sp + 1;
266 }
267
268 sp = h->host;
269}
270
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000271static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
272{
273 char *s, *hdrval;
274 int c;
275
276 /* *istrunc = 0; */
277
278 /* retrieve header line */
279 if (fgets(buf, bufsiz, fp) == NULL)
280 return NULL;
281
282 /* see if we are at the end of the headers */
283 for (s = buf; *s == '\r'; ++s)
284 continue;
285 if (*s == '\n')
286 return NULL;
287
288 /* convert the header name to lower case */
289 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s)
290 *s = tolower(*s);
291
292 /* verify we are at the end of the header name */
293 if (*s != ':')
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200294 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000295
296 /* locate the start of the header value */
297 *s++ = '\0';
298 hdrval = skip_whitespace(s);
299
300 /* locate the end of header */
301 while (*s && *s != '\r' && *s != '\n')
302 ++s;
303
304 /* end of header found */
305 if (*s) {
306 *s = '\0';
307 return hdrval;
308 }
309
Denys Vlasenko7f432802009-06-28 01:02:24 +0200310 /* Rats! The buffer isn't big enough to hold the entire header value */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000311 while (c = getc(fp), c != EOF && c != '\n')
312 continue;
313 /* *istrunc = 1; */
314 return hdrval;
315}
316
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000317#if ENABLE_FEATURE_WGET_LONG_OPTIONS
318static char *URL_escape(const char *str)
319{
320 /* URL encode, see RFC 2396 */
321 char *dst;
322 char *res = dst = xmalloc(strlen(str) * 3 + 1);
323 unsigned char c;
324
325 while (1) {
326 c = *str++;
327 if (c == '\0'
328 /* || strchr("!&'()*-.=_~", c) - more code */
329 || c == '!'
330 || c == '&'
331 || c == '\''
332 || c == '('
333 || c == ')'
334 || c == '*'
335 || c == '-'
336 || c == '.'
337 || c == '='
338 || c == '_'
339 || c == '~'
340 || (c >= '0' && c <= '9')
341 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
342 ) {
343 *dst++ = c;
344 if (c == '\0')
345 return res;
346 } else {
347 *dst++ = '%';
348 *dst++ = bb_hexdigits_upcase[c >> 4];
349 *dst++ = bb_hexdigits_upcase[c & 0xf];
350 }
351 }
352}
353#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000354
Denys Vlasenko7f432802009-06-28 01:02:24 +0200355static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
356{
357 char buf[512];
358 FILE *sfp;
359 char *str;
360 int port;
361
362 if (!target->user)
363 target->user = xstrdup("anonymous:busybox@");
364
365 sfp = open_socket(lsa);
366 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200367 bb_error_msg_and_die("%s", sanitize_string(buf+4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200368
369 /*
370 * Splitting username:password pair,
371 * trying to log in
372 */
373 str = strchr(target->user, ':');
374 if (str)
375 *str++ = '\0';
376 switch (ftpcmd("USER ", target->user, sfp, buf)) {
377 case 230:
378 break;
379 case 331:
380 if (ftpcmd("PASS ", str, sfp, buf) == 230)
381 break;
382 /* fall through (failed login) */
383 default:
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200384 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200385 }
386
387 ftpcmd("TYPE I", NULL, sfp, buf);
388
389 /*
390 * Querying file size
391 */
392 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
393 content_len = BB_STRTOOFF(buf+4, NULL, 10);
394 if (errno || content_len < 0) {
395 bb_error_msg_and_die("SIZE value is garbage");
396 }
397 G.got_clen = 1;
398 }
399
400 /*
401 * Entering passive mode
402 */
403 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
404 pasv_error:
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200405 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200406 }
407 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
408 // Server's IP is N1.N2.N3.N4 (we ignore it)
409 // Server's port for data connection is P1*256+P2
410 str = strrchr(buf, ')');
411 if (str) str[0] = '\0';
412 str = strrchr(buf, ',');
413 if (!str) goto pasv_error;
414 port = xatou_range(str+1, 0, 255);
415 *str = '\0';
416 str = strrchr(buf, ',');
417 if (!str) goto pasv_error;
418 port += xatou_range(str+1, 0, 255) * 256;
419 set_nport(lsa, htons(port));
420
421 *dfpp = open_socket(lsa);
422
423 if (beg_range) {
424 sprintf(buf, "REST %"OFF_FMT"d", beg_range);
425 if (ftpcmd(buf, NULL, sfp, buf) == 350)
426 content_len -= beg_range;
427 }
428
429 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200430 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200431
432 return sfp;
433}
434
435/* Must match option string! */
436enum {
437 WGET_OPT_CONTINUE = (1 << 0),
438 WGET_OPT_SPIDER = (1 << 1),
439 WGET_OPT_QUIET = (1 << 2),
440 WGET_OPT_OUTNAME = (1 << 3),
441 WGET_OPT_PREFIX = (1 << 4),
442 WGET_OPT_PROXY = (1 << 5),
443 WGET_OPT_USER_AGENT = (1 << 6),
444 WGET_OPT_RETRIES = (1 << 7),
445 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
446 WGET_OPT_PASSIVE = (1 << 9),
447 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
448 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
449};
450
451static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
452{
453 char buf[512];
454
455 if (!(option_mask32 & WGET_OPT_QUIET))
456 progress_meter(-1);
457
458 if (G.chunked)
459 goto get_clen;
460
461 /* Loops only if chunked */
462 while (1) {
463 while (content_len > 0 || !G.got_clen) {
464 int n;
465 unsigned rdsz = sizeof(buf);
466
467 if (content_len < sizeof(buf) && (G.chunked || G.got_clen))
468 rdsz = (unsigned)content_len;
469 n = safe_fread(buf, rdsz, dfp);
470 if (n <= 0) {
471 if (ferror(dfp)) {
472 /* perror will not work: ferror doesn't set errno */
473 bb_error_msg_and_die(bb_msg_read_error);
474 }
475 break;
476 }
477 xwrite(output_fd, buf, n);
478#if ENABLE_FEATURE_WGET_STATUSBAR
479 transferred += n;
480#endif
481 if (G.got_clen)
482 content_len -= n;
483 }
484
485 if (!G.chunked)
486 break;
487
488 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
489 get_clen:
490 safe_fgets(buf, sizeof(buf), dfp);
491 content_len = STRTOOFF(buf, NULL, 16);
492 /* FIXME: error check? */
493 if (content_len == 0)
494 break; /* all done! */
495 }
496
497 if (!(option_mask32 & WGET_OPT_QUIET))
498 progress_meter(0);
499}
500
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000501int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +0000502int wget_main(int argc UNUSED_PARAM, char **argv)
Eric Andersen96700832000-09-04 15:15:55 +0000503{
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000504 char buf[512];
Eric Andersen79757c92001-04-05 21:45:54 +0000505 struct host_info server, target;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000506 len_and_sockaddr *lsa;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000507 unsigned opt;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200508 int redir_limit;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200509 char *proxy = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000510 char *dir_prefix = NULL;
511#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000512 char *post_data;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000513 char *extra_headers = NULL;
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000514 llist_t *headers_llist = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000515#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200516 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000517 FILE *dfp; /* socket to ftp server (data) */
518 char *fname_out; /* where to direct output (-O) */
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000519 int output_fd = -1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200520 bool use_proxy; /* Use proxies if env vars are set */
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000521 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000522 const char *user_agent = "Wget";/* "User-Agent" header field */
Denis Vlasenko77105632007-09-24 15:04:00 +0000523
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000524 static const char keywords[] ALIGN1 =
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000525 "content-length\0""transfer-encoding\0""chunked\0""location\0";
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000526 enum {
527 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
528 };
Bernhard Reutner-Fischer289e86a2006-08-20 20:01:24 +0000529#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000530 static const char wget_longopts[] ALIGN1 =
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000531 /* name, has_arg, val */
532 "continue\0" No_argument "c"
533 "spider\0" No_argument "s"
534 "quiet\0" No_argument "q"
535 "output-document\0" Required_argument "O"
536 "directory-prefix\0" Required_argument "P"
537 "proxy\0" Required_argument "Y"
538 "user-agent\0" Required_argument "U"
Denis Vlasenko50af9262009-03-02 15:08:06 +0000539 /* Ignored: */
540 // "tries\0" Required_argument "t"
541 // "timeout\0" Required_argument "T"
542 /* Ignored (we always use PASV): */
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000543 "passive-ftp\0" No_argument "\xff"
544 "header\0" Required_argument "\xfe"
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000545 "post-data\0" Required_argument "\xfd"
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000546 ;
Denis Vlasenko77105632007-09-24 15:04:00 +0000547#endif
548
549 INIT_G();
550
551#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000552 applet_long_options = wget_longopts;
Bernhard Reutner-Fischer8d3a6f72006-05-31 14:11:38 +0000553#endif
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000554 /* server.allocated = target.allocated = NULL; */
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000555 opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
Denis Vlasenko540ab702008-06-29 00:32:35 +0000556 opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000557 &fname_out, &dir_prefix,
Denis Vlasenko540ab702008-06-29 00:32:35 +0000558 &proxy_flag, &user_agent,
559 NULL, /* -t RETRIES */
560 NULL /* -T NETWORK_READ_TIMEOUT */
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000561 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
562 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000563 );
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000564#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko7534e082006-10-23 23:21:58 +0000565 if (headers_llist) {
566 int size = 1;
567 char *cp;
Denis Vlasenko8d9f4952007-04-08 15:08:42 +0000568 llist_t *ll = headers_llist;
Denis Vlasenko7534e082006-10-23 23:21:58 +0000569 while (ll) {
570 size += strlen(ll->data) + 2;
571 ll = ll->link;
572 }
573 extra_headers = cp = xmalloc(size);
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000574 while (headers_llist) {
Denis Vlasenkod50dda82008-06-15 05:40:56 +0000575 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
Eric Andersen96700832000-09-04 15:15:55 +0000576 }
577 }
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000578#endif
Tim Rikerc1ef7bd2006-01-25 00:08:53 +0000579
Denys Vlasenko7f432802009-06-28 01:02:24 +0200580 /* TODO: compat issue: should handle "wget URL1 URL2..." */
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200581
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200582 target.user = NULL;
Eric Andersen79757c92001-04-05 21:45:54 +0000583 parse_url(argv[optind], &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000584
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000585 /* Use the proxy if necessary */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200586 use_proxy = (strcmp(proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000587 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000588 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200589 if (proxy && proxy[0]) {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000590 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000591 } else {
592 use_proxy = 0;
593 }
Robert Griebld7760112002-05-14 23:36:45 +0000594 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200595 if (!use_proxy) {
596 server.port = target.port;
597 if (ENABLE_FEATURE_IPV6) {
598 server.host = xstrdup(target.host);
599 } else {
600 server.host = target.host;
601 }
602 }
603
604 if (ENABLE_FEATURE_IPV6)
605 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000606
Denis Vlasenko818322b2007-09-24 18:27:04 +0000607 /* Guess an output filename, if there was no -O FILE */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000608 if (!(opt & WGET_OPT_OUTNAME)) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000609 fname_out = bb_get_last_path_component_nostrip(target.path);
610 /* handle "wget http://kernel.org//" */
611 if (fname_out[0] == '/' || !fname_out[0])
Denis Vlasenkob6aae0f2007-01-29 22:51:25 +0000612 fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000613 /* -P DIR is considered only if there was no -O FILE */
614 if (dir_prefix)
Matt Kraai0382eb82001-07-19 19:13:55 +0000615 fname_out = concat_path_file(dir_prefix, fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000616 } else {
617 if (LONE_DASH(fname_out)) {
618 /* -O - */
619 output_fd = 1;
620 opt &= ~WGET_OPT_CONTINUE;
621 }
Eric Andersen29edd002000-12-09 16:55:35 +0000622 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000623#if ENABLE_FEATURE_WGET_STATUSBAR
624 curfile = bb_get_last_path_component_nostrip(fname_out);
625#endif
626
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000627 /* Impossible?
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000628 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200629 bb_error_msg_and_die("cannot specify continue (-c) without a filename (-O)");
630 */
Eric Andersen29edd002000-12-09 16:55:35 +0000631
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000632 /* Determine where to start transfer */
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000633 if (opt & WGET_OPT_CONTINUE) {
Denis Vlasenko7039a662006-10-08 17:54:47 +0000634 output_fd = open(fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000635 if (output_fd >= 0) {
Denis Vlasenkoea620772006-10-14 02:23:43 +0000636 beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000637 }
638 /* File doesn't exist. We do not create file here yet.
Denys Vlasenko7f432802009-06-28 01:02:24 +0200639 * We are not sure it exists on remove side */
Eric Andersen96700832000-09-04 15:15:55 +0000640 }
641
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200642 redir_limit = 5;
643 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000644 lsa = xhost2sockaddr(server.host, server.port);
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000645 if (!(opt & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200646 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
647 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
648 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000649 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200650 establish_session:
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000651 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000652 /*
653 * HTTP session
654 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200655 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200656 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200657
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200658 /* Open socket to http server */
659 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200660
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200661 /* Send HTTP request */
662 if (use_proxy) {
663 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
664 target.is_ftp ? "f" : "ht", target.host,
665 target.path);
666 } else {
667 if (opt & WGET_OPT_POST_DATA)
668 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
669 else
670 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
671 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000672
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200673 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
674 target.host, user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000675
Denis Vlasenko9cade082006-11-21 10:43:02 +0000676#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200677 if (target.user) {
678 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
679 base64enc_512(buf, target.user));
680 }
681 if (use_proxy && server.user) {
682 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
683 base64enc_512(buf, server.user));
684 }
Eric Andersen79757c92001-04-05 21:45:54 +0000685#endif
686
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200687 if (beg_range)
688 fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000689#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200690 if (extra_headers)
691 fputs(extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000692
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200693 if (opt & WGET_OPT_POST_DATA) {
694 char *estr = URL_escape(post_data);
695 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
696 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
697 (int) strlen(estr), estr);
698 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
699 /*fprintf(sfp, "%s\r\n", estr);*/
700 free(estr);
701 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000702#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200703 { /* If "Connection:" is needed, document why */
704 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
705 }
Eric Andersen79757c92001-04-05 21:45:54 +0000706
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200707 /*
708 * Retrieve HTTP response line and check for "200" status code.
709 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000710 read_response:
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200711 if (fgets(buf, sizeof(buf), sfp) == NULL)
712 bb_error_msg_and_die("no response from server");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000713
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200714 str = buf;
715 str = skip_non_whitespace(str);
716 str = skip_whitespace(str);
717 // FIXME: no error check
718 // xatou wouldn't work: "200 OK"
719 status = atoi(str);
720 switch (status) {
721 case 0:
722 case 100:
723 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
724 /* eat all remaining headers */;
725 goto read_response;
726 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000727/*
728Response 204 doesn't say "null file", it says "metadata
729has changed but data didn't":
730
731"10.2.5 204 No Content
732The server has fulfilled the request but does not need to return
733an entity-body, and might want to return updated metainformation.
734The response MAY include new or updated metainformation in the form
735of entity-headers, which if present SHOULD be associated with
736the requested variant.
737
738If the client is a user agent, it SHOULD NOT change its document
739view from that which caused the request to be sent. This response
740is primarily intended to allow input for actions to take place
741without causing a change to the user agent's active document view,
742although any new or updated metainformation SHOULD be applied
743to the document currently in the user agent's active view.
744
745The 204 response MUST NOT include a message-body, and thus
746is always terminated by the first empty line after the header fields."
747
748However, in real world it was observed that some web servers
749(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
750*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200751 case 204:
752 break;
753 case 300: /* redirection */
754 case 301:
755 case 302:
756 case 303:
757 break;
758 case 206:
759 if (beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000760 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200761 /* fall through */
762 default:
763 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
764 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000765
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200766 /*
767 * Retrieve HTTP headers.
768 */
769 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
770 /* gethdr converted "FOO:" string to lowercase */
Matthijs van de Water0d586662009-08-22 20:19:48 +0200771 smalluint key;
772 /* strip trailing whitespace */
773 char *s = strchrnul(str, '\0') - 1;
774 while (s >= str && (*s == ' ' || *s == '\t')) {
775 *s = '\0';
776 s--;
777 }
778 key = index_in_strings(keywords, buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200779 if (key == KEY_content_length) {
780 content_len = BB_STRTOOFF(str, NULL, 10);
781 if (errno || content_len < 0) {
782 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000783 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200784 G.got_clen = 1;
785 continue;
786 }
787 if (key == KEY_transfer_encoding) {
788 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
789 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
790 G.chunked = G.got_clen = 1;
791 }
792 if (key == KEY_location && status >= 300) {
793 if (--redir_limit == 0)
794 bb_error_msg_and_die("too many redirections");
795 fclose(sfp);
796 G.got_clen = 0;
797 G.chunked = 0;
798 if (str[0] == '/')
799 /* free(target.allocated); */
800 target.path = /* target.allocated = */ xstrdup(str+1);
801 /* lsa stays the same: it's on the same server */
802 else {
803 parse_url(str, &target);
804 if (!use_proxy) {
805 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200806 /* strip_ipv6_scope_id(target.host); - no! */
807 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200808 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000809 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200810 goto resolve_lsa;
811 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000812 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200813 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000814 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200815 }
816// if (status >= 300)
817// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000818
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200819 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000820 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000821
822 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000823 /*
824 * FTP session
825 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200826 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000827 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000828
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000829 if (opt & WGET_OPT_SPIDER) {
830 if (ENABLE_FEATURE_CLEAN_UP)
831 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000832 return EXIT_SUCCESS;
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000833 }
Eric Andersen79757c92001-04-05 21:45:54 +0000834
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000835 if (output_fd < 0) {
836 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
837 /* compat with wget: -O FILE can overwrite */
838 if (opt & WGET_OPT_OUTNAME)
839 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
840 output_fd = xopen(fname_out, o_flags);
841 }
Denis Vlasenkof8aa1092006-10-01 10:58:54 +0000842
Denys Vlasenko7f432802009-06-28 01:02:24 +0200843 retrieve_file_data(dfp, output_fd);
Rob Landley19a39402006-06-13 17:10:26 +0000844
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200845 if (dfp != sfp) {
846 /* It's ftp. Close it properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000847 fclose(dfp);
848 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200849 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
Eric Andersen79757c92001-04-05 21:45:54 +0000850 ftpcmd("QUIT", NULL, sfp, buf);
851 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000852
853 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000854}