blob: c22a76b977b1a59f2393bc255cc3536868455ef4 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
12//usage:#define wget_trivial_usage
13//usage: IF_FEATURE_WGET_LONG_OPTIONS(
14//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16//usage: " [--no-check-certificate] [-U|--user-agent AGENT]"
17//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
18//usage: )
19//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
20//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
21//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
22//usage: )
23//usage:#define wget_full_usage "\n\n"
24//usage: "Retrieve files via HTTP or FTP\n"
25//usage: "\nOptions:"
26//usage: "\n -s Spider mode - only check file existence"
27//usage: "\n -c Continue retrieval of aborted transfer"
28//usage: "\n -q Quiet"
29//usage: "\n -P DIR Save to DIR (default .)"
30//usage: IF_FEATURE_WGET_TIMEOUT(
31//usage: "\n -T SEC Network read timeout is SEC seconds"
32//usage: )
33//usage: "\n -O FILE Save to FILE ('-' for stdout)"
34//usage: "\n -U STR Use STR for User-Agent header"
35//usage: "\n -Y Use proxy ('on' or 'off')"
36
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000037#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000038
Denys Vlasenkof836f012011-02-10 23:02:28 +010039//#define log_io(...) bb_error_msg(__VA_ARGS__)
40#define log_io(...) ((void)0)
41
42
Eric Andersen79757c92001-04-05 21:45:54 +000043struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010044 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +000045 const char *path;
46 const char *user;
47 char *host;
48 int port;
49 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000050};
51
Denis Vlasenko77105632007-09-24 15:04:00 +000052
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020053/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000054struct globals {
55 off_t content_len; /* Content-length of the file */
56 off_t beg_range; /* Range at which continue begins */
57#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000058 off_t transferred; /* Number of bytes transferred so far */
59 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010060 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000061#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +010062 char *dir_prefix;
63#if ENABLE_FEATURE_WGET_LONG_OPTIONS
64 char *post_data;
65 char *extra_headers;
66#endif
67 char *fname_out; /* where to direct output (-O) */
68 const char *proxy_flag; /* Use proxies if env vars are set */
69 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020070#if ENABLE_FEATURE_WGET_TIMEOUT
71 unsigned timeout_seconds;
72#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +010073 int output_fd;
74 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +020075 smallint chunked; /* chunked transfer encoding */
76 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010077 /* Local downloads do benefit from big buffer.
78 * With 512 byte buffer, it was measured to be
79 * an order of magnitude slower than with big one.
80 */
81 uint64_t just_to_align_next_member;
82 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010083} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010084#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020085#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010086 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020087 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
88} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000089
90
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020091/* Must match option string! */
92enum {
93 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020094 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020095 WGET_OPT_QUIET = (1 << 2),
96 WGET_OPT_OUTNAME = (1 << 3),
97 WGET_OPT_PREFIX = (1 << 4),
98 WGET_OPT_PROXY = (1 << 5),
99 WGET_OPT_USER_AGENT = (1 << 6),
100 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
101 WGET_OPT_RETRIES = (1 << 8),
102 WGET_OPT_PASSIVE = (1 << 9),
103 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
104 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
105};
106
107enum {
108 PROGRESS_START = -1,
109 PROGRESS_END = 0,
110 PROGRESS_BUMP = 1,
111};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000112#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000113static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000114{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200115 if (option_mask32 & WGET_OPT_QUIET)
116 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000117
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200118 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100119 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000120
Denys Vlasenko2384a352011-02-15 00:58:36 +0100121 bb_progress_update(&G.pmt,
122 G.beg_range,
123 G.transferred,
124 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
125 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000126
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200127 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100128 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200129 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100130 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000131 }
132}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200133#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000134static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000135#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000136
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000137
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200138/* IPv6 knows scoped address types i.e. link and site local addresses. Link
139 * local addresses can have a scope identifier to specify the
140 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
141 * identifier is only valid on a single node.
142 *
143 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
144 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
145 * in the Host header as invalid requests, see
146 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
147 */
148static void strip_ipv6_scope_id(char *host)
149{
150 char *scope, *cp;
151
152 /* bbox wget actually handles IPv6 addresses without [], like
153 * wget "http://::1/xxx", but this is not standard.
154 * To save code, _here_ we do not support it. */
155
156 if (host[0] != '[')
157 return; /* not IPv6 */
158
159 scope = strchr(host, '%');
160 if (!scope)
161 return;
162
163 /* Remove the IPv6 zone identifier from the host address */
164 cp = strchr(host, ']');
165 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
166 /* malformed address (not "[xx]:nn" or "[xx]") */
167 return;
168 }
169
170 /* cp points to "]...", scope points to "%eth0]..." */
171 overlapping_strcpy(scope, cp);
172}
173
Denis Vlasenko9cade082006-11-21 10:43:02 +0000174#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100175/* Base64-encode character string. */
176static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000177{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000178 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100179 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
180 len = sizeof(G.wget_buf)/4*3 - 10;
181 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
182 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000183}
184#endif
185
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200186static char* sanitize_string(char *s)
187{
188 unsigned char *p = (void *) s;
189 while (*p >= ' ')
190 p++;
191 *p = '\0';
192 return s;
193}
194
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000195static FILE *open_socket(len_and_sockaddr *lsa)
196{
197 FILE *fp;
198
199 /* glibc 2.4 seems to try seeking on it - ??! */
200 /* hopefully it understands what ESPIPE means... */
201 fp = fdopen(xconnect_stream(lsa), "r+");
202 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100203 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000204
205 return fp;
206}
207
Denys Vlasenkof836f012011-02-10 23:02:28 +0100208/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
209static char fgets_and_trim(FILE *fp)
210{
211 char c;
212 char *buf_ptr;
213
214 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
215 bb_perror_msg_and_die("error getting response");
216
217 buf_ptr = strchrnul(G.wget_buf, '\n');
218 c = *buf_ptr;
219 *buf_ptr = '\0';
220 buf_ptr = strchrnul(G.wget_buf, '\r');
221 *buf_ptr = '\0';
222
223 log_io("< %s", G.wget_buf);
224
225 return c;
226}
227
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100228static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000229{
230 int result;
231 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100232 if (!s2)
233 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000234 fprintf(fp, "%s%s\r\n", s1, s2);
235 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100236 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000237 }
238
239 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100240 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100241 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000242
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100243 G.wget_buf[3] = '\0';
244 result = xatoi_positive(G.wget_buf);
245 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000246 return result;
247}
248
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100249static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000250{
251 char *url, *p, *sp;
252
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100253 free(h->allocated);
254 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000255
256 if (strncmp(url, "http://", 7) == 0) {
257 h->port = bb_lookup_port("http", "tcp", 80);
258 h->host = url + 7;
259 h->is_ftp = 0;
260 } else if (strncmp(url, "ftp://", 6) == 0) {
261 h->port = bb_lookup_port("ftp", "tcp", 21);
262 h->host = url + 6;
263 h->is_ftp = 1;
264 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200265 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000266
267 // FYI:
268 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
269 // 'GET /?var=a/b HTTP 1.0'
270 // and saves 'index.html?var=a%2Fb' (we save 'b')
271 // wget 'http://busybox.net?login=john@doe':
272 // request: 'GET /?login=john@doe HTTP/1.0'
273 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
274 // wget 'http://busybox.net#test/test':
275 // request: 'GET / HTTP/1.0'
276 // saves: 'index.html' (we save 'test')
277 //
278 // We also don't add unique .N suffix if file exists...
279 sp = strchr(h->host, '/');
280 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
281 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
282 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000283 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000284 } else if (*sp == '/') {
285 *sp = '\0';
286 h->path = sp + 1;
287 } else { // '#' or '?'
288 // http://busybox.net?login=john@doe is a valid URL
289 // memmove converts to:
290 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000291 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000292 h->host--;
293 sp[-1] = '\0';
294 h->path = sp;
295 }
296
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200297 // We used to set h->user to NULL here, but this interferes
298 // with handling of code 302 ("object was moved")
299
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000300 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000301 if (sp != NULL) {
302 h->user = h->host;
303 *sp = '\0';
304 h->host = sp + 1;
305 }
306
307 sp = h->host;
308}
309
Denys Vlasenkof836f012011-02-10 23:02:28 +0100310static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000311{
312 char *s, *hdrval;
313 int c;
314
315 /* *istrunc = 0; */
316
317 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100318 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000319
Denys Vlasenkof836f012011-02-10 23:02:28 +0100320 /* end of the headers? */
321 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000322 return NULL;
323
324 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100325 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200326 /* tolower for "A-Z", no-op for "0-9a-z-." */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100327 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200328 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000329
330 /* verify we are at the end of the header name */
331 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100332 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000333
334 /* locate the start of the header value */
335 *s++ = '\0';
336 hdrval = skip_whitespace(s);
337
Denys Vlasenkof836f012011-02-10 23:02:28 +0100338 if (c != '\n') {
339 /* Rats! The buffer isn't big enough to hold the entire header value */
340 while (c = getc(fp), c != EOF && c != '\n')
341 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000342 }
343
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000344 return hdrval;
345}
346
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000347#if ENABLE_FEATURE_WGET_LONG_OPTIONS
348static char *URL_escape(const char *str)
349{
350 /* URL encode, see RFC 2396 */
351 char *dst;
352 char *res = dst = xmalloc(strlen(str) * 3 + 1);
353 unsigned char c;
354
355 while (1) {
356 c = *str++;
357 if (c == '\0'
358 /* || strchr("!&'()*-.=_~", c) - more code */
359 || c == '!'
360 || c == '&'
361 || c == '\''
362 || c == '('
363 || c == ')'
364 || c == '*'
365 || c == '-'
366 || c == '.'
367 || c == '='
368 || c == '_'
369 || c == '~'
370 || (c >= '0' && c <= '9')
371 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
372 ) {
373 *dst++ = c;
374 if (c == '\0')
375 return res;
376 } else {
377 *dst++ = '%';
378 *dst++ = bb_hexdigits_upcase[c >> 4];
379 *dst++ = bb_hexdigits_upcase[c & 0xf];
380 }
381 }
382}
383#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000384
Denys Vlasenko7f432802009-06-28 01:02:24 +0200385static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
386{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200387 FILE *sfp;
388 char *str;
389 int port;
390
391 if (!target->user)
392 target->user = xstrdup("anonymous:busybox@");
393
394 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100395 if (ftpcmd(NULL, NULL, sfp) != 220)
396 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200397
398 /*
399 * Splitting username:password pair,
400 * trying to log in
401 */
402 str = strchr(target->user, ':');
403 if (str)
404 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100405 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200406 case 230:
407 break;
408 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100409 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200410 break;
411 /* fall through (failed login) */
412 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100413 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200414 }
415
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100416 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200417
418 /*
419 * Querying file size
420 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100421 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
422 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100423 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200424 bb_error_msg_and_die("SIZE value is garbage");
425 }
426 G.got_clen = 1;
427 }
428
429 /*
430 * Entering passive mode
431 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100432 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200433 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100434 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200435 }
436 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
437 // Server's IP is N1.N2.N3.N4 (we ignore it)
438 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100439 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200440 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100441 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200442 if (!str) goto pasv_error;
443 port = xatou_range(str+1, 0, 255);
444 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100445 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200446 if (!str) goto pasv_error;
447 port += xatou_range(str+1, 0, 255) * 256;
448 set_nport(lsa, htons(port));
449
450 *dfpp = open_socket(lsa);
451
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100452 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100453 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
454 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100455 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200456 }
457
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100458 if (ftpcmd("RETR ", target->path, sfp) > 150)
459 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200460
461 return sfp;
462}
463
Denys Vlasenko2384a352011-02-15 00:58:36 +0100464static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200465{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200466#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
467# if ENABLE_FEATURE_WGET_TIMEOUT
468 unsigned second_cnt;
469# endif
470 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200471
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200472 polldata.fd = fileno(dfp);
473 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200474#endif
475 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200476
477 if (G.chunked)
478 goto get_clen;
479
480 /* Loops only if chunked */
481 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100482
483#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
484 /* Must use nonblocking I/O, otherwise fread will loop
485 * and *block* until it reads full buffer,
486 * which messes up progress bar and/or timeout logic.
487 * Because of nonblocking I/O, we need to dance
488 * very carefully around EAGAIN. See explanation at
489 * clearerr() call.
490 */
491 ndelay_on(polldata.fd);
492#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100493 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200494 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100495 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200496
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100497 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100498 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100499 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100500 if ((int)G.content_len <= 0)
501 break;
502 rdsz = (unsigned)G.content_len;
503 }
504 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100505
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200506#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
507# if ENABLE_FEATURE_WGET_TIMEOUT
508 second_cnt = G.timeout_seconds;
509# endif
510 while (1) {
511 if (safe_poll(&polldata, 1, 1000) != 0)
512 break; /* error, EOF, or data is available */
513# if ENABLE_FEATURE_WGET_TIMEOUT
514 if (second_cnt != 0 && --second_cnt == 0) {
515 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100516 bb_error_msg_and_die("download timed out");
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200517 }
518# endif
519 /* Needed for "stalled" indicator */
520 progress_meter(PROGRESS_BUMP);
521 }
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100522
Denys Vlasenko8766a792011-02-11 21:42:00 +0100523 /* fread internally uses read loop, which in our case
524 * is usually exited when we get EAGAIN.
525 * In this case, libc sets error marker on the stream.
526 * Need to clear it before next fread to avoid possible
527 * rare false positive ferror below. Rare because usually
528 * fread gets more than zero bytes, and we don't fall
529 * into if (n <= 0) ...
530 */
531 clearerr(dfp);
532 errno = 0;
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100533#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100534 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100535 /* man fread:
536 * If error occurs, or EOF is reached, the return value
537 * is a short item count (or zero).
538 * fread does not distinguish between EOF and error.
539 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200540 if (n <= 0) {
Denys Vlasenko8766a792011-02-11 21:42:00 +0100541#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
542 if (errno == EAGAIN) /* poll lied, there is no data? */
543 continue; /* yes */
544#endif
545 if (ferror(dfp))
546 bb_perror_msg_and_die(bb_msg_read_error);
547 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200548 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100549
Denys Vlasenko2384a352011-02-15 00:58:36 +0100550 xwrite(G.output_fd, G.wget_buf, n);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100551
Denys Vlasenko7f432802009-06-28 01:02:24 +0200552#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100553 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200554 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200555#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100556 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100557 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100558 if (G.content_len == 0)
559 break;
560 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200561 }
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100562#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
563 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100564 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100565#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200566 if (!G.chunked)
567 break;
568
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100569 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200570 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100571 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100572 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200573 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100574 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200575 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100576 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200577 }
578
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100579 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100580 G.chunked = 0; /* makes it show 100% even for chunked download */
581 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200582 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200583}
584
Pere Orga53695632011-02-16 20:09:36 +0100585static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000586{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100587 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200588 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100589 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200590 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000591 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100592 char *proxy = NULL;
593 char *fname_out_alloc;
594 struct host_info server;
595 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000596
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100597 server.allocated = NULL;
598 target.allocated = NULL;
599 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200600 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100601
602 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000603
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000604 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100605 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000606 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000607 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko2384a352011-02-15 00:58:36 +0100608 use_proxy = (proxy && proxy[0]);
609 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000610 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000611 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200612 if (!use_proxy) {
613 server.port = target.port;
614 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100615 //free(server.allocated); - can't be non-NULL
616 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200617 } else {
618 server.host = target.host;
619 }
620 }
621
622 if (ENABLE_FEATURE_IPV6)
623 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000624
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100625 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100626 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100627 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100628 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000629 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100630 if (G.fname_out[0] == '/' || !G.fname_out[0])
631 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000632 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenko625f2182011-03-21 00:29:37 +0100633 else {
634 if (G.dir_prefix)
635 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
636 else {
637 /* redirects may free target.path later, need to make a copy */
638 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
639 }
640 }
Eric Andersen29edd002000-12-09 16:55:35 +0000641 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000642#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100643 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000644#endif
645
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000646 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100647 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100648 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100649 G.output_fd = open(G.fname_out, O_WRONLY);
650 if (G.output_fd >= 0) {
651 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000652 }
653 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100654 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000655 }
656
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200657 redir_limit = 5;
658 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000659 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100660 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200661 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
662 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
663 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000664 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200665 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +0100666 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
667 G.got_clen = 0;
668 G.chunked = 0;
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000669 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000670 /*
671 * HTTP session
672 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200673 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200674 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200675
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100676
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200677 /* Open socket to http server */
678 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200679
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200680 /* Send HTTP request */
681 if (use_proxy) {
682 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
683 target.is_ftp ? "f" : "ht", target.host,
684 target.path);
685 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100686 if (option_mask32 & WGET_OPT_POST_DATA)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200687 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
688 else
689 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
690 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000691
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200692 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100693 target.host, G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000694
Denys Vlasenko9213a552011-02-10 13:23:45 +0100695 /* Ask server to close the connection as soon as we are done
696 * (IOW: we do not intend to send more requests)
697 */
698 fprintf(sfp, "Connection: close\r\n");
699
Denis Vlasenko9cade082006-11-21 10:43:02 +0000700#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200701 if (target.user) {
702 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100703 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200704 }
705 if (use_proxy && server.user) {
706 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100707 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200708 }
Eric Andersen79757c92001-04-05 21:45:54 +0000709#endif
710
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100711 if (G.beg_range)
712 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100713
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000714#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100715 if (G.extra_headers)
716 fputs(G.extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000717
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100718 if (option_mask32 & WGET_OPT_POST_DATA) {
719 char *estr = URL_escape(G.post_data);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100720 fprintf(sfp,
721 "Content-Type: application/x-www-form-urlencoded\r\n"
722 "Content-Length: %u\r\n"
723 "\r\n"
724 "%s",
725 (int) strlen(estr), estr
726 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200727 free(estr);
728 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000729#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100730 {
731 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200732 }
Eric Andersen79757c92001-04-05 21:45:54 +0000733
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200734 fflush(sfp);
735
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200736 /*
737 * Retrieve HTTP response line and check for "200" status code.
738 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000739 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100740 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000741
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100742 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200743 str = skip_non_whitespace(str);
744 str = skip_whitespace(str);
745 // FIXME: no error check
746 // xatou wouldn't work: "200 OK"
747 status = atoi(str);
748 switch (status) {
749 case 0:
750 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100751 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200752 /* eat all remaining headers */;
753 goto read_response;
754 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000755/*
756Response 204 doesn't say "null file", it says "metadata
757has changed but data didn't":
758
759"10.2.5 204 No Content
760The server has fulfilled the request but does not need to return
761an entity-body, and might want to return updated metainformation.
762The response MAY include new or updated metainformation in the form
763of entity-headers, which if present SHOULD be associated with
764the requested variant.
765
766If the client is a user agent, it SHOULD NOT change its document
767view from that which caused the request to be sent. This response
768is primarily intended to allow input for actions to take place
769without causing a change to the user agent's active document view,
770although any new or updated metainformation SHOULD be applied
771to the document currently in the user agent's active view.
772
773The 204 response MUST NOT include a message-body, and thus
774is always terminated by the first empty line after the header fields."
775
776However, in real world it was observed that some web servers
777(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
778*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200779 case 204:
780 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200781 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200782 case 301:
783 case 302:
784 case 303:
785 break;
786 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100787 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000788 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200789 /* fall through */
790 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100791 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200792 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000793
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200794 /*
795 * Retrieve HTTP headers.
796 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100797 while ((str = gethdr(sfp)) != NULL) {
798 static const char keywords[] ALIGN1 =
799 "content-length\0""transfer-encoding\0""location\0";
800 enum {
801 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
802 };
Matthijs van de Water0d586662009-08-22 20:19:48 +0200803 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100804
805 /* gethdr converted "FOO:" string to lowercase */
806
Matthijs van de Water0d586662009-08-22 20:19:48 +0200807 /* strip trailing whitespace */
808 char *s = strchrnul(str, '\0') - 1;
809 while (s >= str && (*s == ' ' || *s == '\t')) {
810 *s = '\0';
811 s--;
812 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100813 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200814 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100815 G.content_len = BB_STRTOOFF(str, NULL, 10);
816 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200817 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000818 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200819 G.got_clen = 1;
820 continue;
821 }
822 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100823 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200824 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100825 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200826 }
827 if (key == KEY_location && status >= 300) {
828 if (--redir_limit == 0)
829 bb_error_msg_and_die("too many redirections");
830 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100831 if (str[0] == '/') {
832 free(target.allocated);
833 target.path = target.allocated = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200834 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100835 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200836 parse_url(str, &target);
837 if (!use_proxy) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100838 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +0100839 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200840 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200841 /* strip_ipv6_scope_id(target.host); - no! */
842 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200843 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000844 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200845 goto resolve_lsa;
846 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000847 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200848 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000849 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200850 }
851// if (status >= 300)
852// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000853
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200854 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000855 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000856
857 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000858 /*
859 * FTP session
860 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200861 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000862 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000863
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100864 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100865
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100866 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100867 if (G.output_fd < 0)
868 G.output_fd = xopen(G.fname_out, G.o_flags);
869 retrieve_file_data(dfp);
870 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
871 xclose(G.output_fd);
872 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100873 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000874 }
Eric Andersen79757c92001-04-05 21:45:54 +0000875
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200876 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100877 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000878 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100879 if (ftpcmd(NULL, NULL, sfp) != 226)
880 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
881 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000882 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100883 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000884
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100885 free(server.allocated);
886 free(target.allocated);
887 free(fname_out_alloc);
Eric Andersen96700832000-09-04 15:15:55 +0000888}
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100889
890int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
891int wget_main(int argc UNUSED_PARAM, char **argv)
892{
893#if ENABLE_FEATURE_WGET_LONG_OPTIONS
894 static const char wget_longopts[] ALIGN1 =
895 /* name, has_arg, val */
896 "continue\0" No_argument "c"
897//FIXME: -s isn't --spider, it's --save-headers!
898 "spider\0" No_argument "s"
899 "quiet\0" No_argument "q"
900 "output-document\0" Required_argument "O"
901 "directory-prefix\0" Required_argument "P"
902 "proxy\0" Required_argument "Y"
903 "user-agent\0" Required_argument "U"
904#if ENABLE_FEATURE_WGET_TIMEOUT
905 "timeout\0" Required_argument "T"
906#endif
907 /* Ignored: */
908 // "tries\0" Required_argument "t"
909 /* Ignored (we always use PASV): */
910 "passive-ftp\0" No_argument "\xff"
911 "header\0" Required_argument "\xfe"
912 "post-data\0" Required_argument "\xfd"
913 /* Ignored (we don't do ssl) */
914 "no-check-certificate\0" No_argument "\xfc"
915 ;
916#endif
917
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100918#if ENABLE_FEATURE_WGET_LONG_OPTIONS
919 llist_t *headers_llist = NULL;
920#endif
921
922 INIT_G();
923
924 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
925 G.proxy_flag = "on"; /* use proxies if env vars are set */
926 G.user_agent = "Wget"; /* "User-Agent" header field */
927
928#if ENABLE_FEATURE_WGET_LONG_OPTIONS
929 applet_long_options = wget_longopts;
930#endif
931 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
932 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
933 &G.fname_out, &G.dir_prefix,
934 &G.proxy_flag, &G.user_agent,
935 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
936 NULL /* -t RETRIES */
937 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
938 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
939 );
940 argv += optind;
941
942#if ENABLE_FEATURE_WGET_LONG_OPTIONS
943 if (headers_llist) {
944 int size = 1;
945 char *cp;
946 llist_t *ll = headers_llist;
947 while (ll) {
948 size += strlen(ll->data) + 2;
949 ll = ll->link;
950 }
951 G.extra_headers = cp = xmalloc(size);
952 while (headers_llist) {
953 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
954 }
955 }
956#endif
957
Denys Vlasenko2384a352011-02-15 00:58:36 +0100958 G.output_fd = -1;
959 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
960 if (G.fname_out) { /* -O FILE ? */
961 if (LONE_DASH(G.fname_out)) { /* -O - ? */
962 G.output_fd = 1;
963 option_mask32 &= ~WGET_OPT_CONTINUE;
964 }
965 /* compat with wget: -O FILE can overwrite */
966 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
967 }
968
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100969 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +0100970 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100971
Denys Vlasenko28556b92011-02-15 11:03:53 +0100972 if (G.output_fd >= 0)
973 xclose(G.output_fd);
974
Pere Orga53695632011-02-16 20:09:36 +0100975 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100976}