blob: 5dac2b5001fcb657939139689259aa73b122be5b [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
12//usage:#define wget_trivial_usage
13//usage: IF_FEATURE_WGET_LONG_OPTIONS(
14//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +020016/* Since we ignore these opts, we don't show them in --help */
17/* //usage: " [--no-check-certificate] [--no-cache]" */
18//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010019//usage: )
20//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23//usage: )
24//usage:#define wget_full_usage "\n\n"
25//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010026//usage: "\n -s Spider mode - only check file existence"
27//usage: "\n -c Continue retrieval of aborted transfer"
28//usage: "\n -q Quiet"
29//usage: "\n -P DIR Save to DIR (default .)"
30//usage: IF_FEATURE_WGET_TIMEOUT(
31//usage: "\n -T SEC Network read timeout is SEC seconds"
32//usage: )
33//usage: "\n -O FILE Save to FILE ('-' for stdout)"
34//usage: "\n -U STR Use STR for User-Agent header"
35//usage: "\n -Y Use proxy ('on' or 'off')"
36
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000037#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000038
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +020039#if 0
40# define log_io(...) bb_error_msg(__VA_ARGS__)
41#else
42# define log_io(...) ((void)0)
43#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +010044
45
Eric Andersen79757c92001-04-05 21:45:54 +000046struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010047 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +000048 const char *path;
49 const char *user;
50 char *host;
51 int port;
52 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000053};
54
Denis Vlasenko77105632007-09-24 15:04:00 +000055
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000057struct globals {
58 off_t content_len; /* Content-length of the file */
59 off_t beg_range; /* Range at which continue begins */
60#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000061 off_t transferred; /* Number of bytes transferred so far */
62 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010063 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000064#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +020065 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +010066#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +020067 char *post_data;
68 char *extra_headers;
Denys Vlasenkoa3661092011-02-13 02:33:11 +010069#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +020070 char *fname_out; /* where to direct output (-O) */
71 const char *proxy_flag; /* Use proxies if env vars are set */
72 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020073#if ENABLE_FEATURE_WGET_TIMEOUT
74 unsigned timeout_seconds;
75#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +010076 int output_fd;
77 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +020078 smallint chunked; /* chunked transfer encoding */
79 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010080 /* Local downloads do benefit from big buffer.
81 * With 512 byte buffer, it was measured to be
82 * an order of magnitude slower than with big one.
83 */
84 uint64_t just_to_align_next_member;
85 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010086} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010087#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020088#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +020089 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020090 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
91} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000092
93
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020094/* Must match option string! */
95enum {
96 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020097 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020098 WGET_OPT_QUIET = (1 << 2),
99 WGET_OPT_OUTNAME = (1 << 3),
100 WGET_OPT_PREFIX = (1 << 4),
101 WGET_OPT_PROXY = (1 << 5),
102 WGET_OPT_USER_AGENT = (1 << 6),
103 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104 WGET_OPT_RETRIES = (1 << 8),
105 WGET_OPT_PASSIVE = (1 << 9),
106 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
108};
109
110enum {
111 PROGRESS_START = -1,
112 PROGRESS_END = 0,
113 PROGRESS_BUMP = 1,
114};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000115#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000116static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000117{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200118 if (option_mask32 & WGET_OPT_QUIET)
119 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000120
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200121 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100122 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000123
Denys Vlasenko2384a352011-02-15 00:58:36 +0100124 bb_progress_update(&G.pmt,
125 G.beg_range,
126 G.transferred,
127 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
128 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000129
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200130 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100131 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200132 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100133 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000134 }
135}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200136#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000137static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000138#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000139
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000140
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200141/* IPv6 knows scoped address types i.e. link and site local addresses. Link
142 * local addresses can have a scope identifier to specify the
143 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144 * identifier is only valid on a single node.
145 *
146 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148 * in the Host header as invalid requests, see
149 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
150 */
151static void strip_ipv6_scope_id(char *host)
152{
153 char *scope, *cp;
154
155 /* bbox wget actually handles IPv6 addresses without [], like
156 * wget "http://::1/xxx", but this is not standard.
157 * To save code, _here_ we do not support it. */
158
159 if (host[0] != '[')
160 return; /* not IPv6 */
161
162 scope = strchr(host, '%');
163 if (!scope)
164 return;
165
166 /* Remove the IPv6 zone identifier from the host address */
167 cp = strchr(host, ']');
168 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169 /* malformed address (not "[xx]:nn" or "[xx]") */
170 return;
171 }
172
173 /* cp points to "]...", scope points to "%eth0]..." */
174 overlapping_strcpy(scope, cp);
175}
176
Denis Vlasenko9cade082006-11-21 10:43:02 +0000177#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100178/* Base64-encode character string. */
179static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000180{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000181 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100182 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183 len = sizeof(G.wget_buf)/4*3 - 10;
184 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
185 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000186}
187#endif
188
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200189static char* sanitize_string(char *s)
190{
191 unsigned char *p = (void *) s;
192 while (*p >= ' ')
193 p++;
194 *p = '\0';
195 return s;
196}
197
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000198static FILE *open_socket(len_and_sockaddr *lsa)
199{
200 FILE *fp;
201
202 /* glibc 2.4 seems to try seeking on it - ??! */
203 /* hopefully it understands what ESPIPE means... */
204 fp = fdopen(xconnect_stream(lsa), "r+");
205 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100206 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000207
208 return fp;
209}
210
Denys Vlasenkof836f012011-02-10 23:02:28 +0100211/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
212static char fgets_and_trim(FILE *fp)
213{
214 char c;
215 char *buf_ptr;
216
217 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
218 bb_perror_msg_and_die("error getting response");
219
220 buf_ptr = strchrnul(G.wget_buf, '\n');
221 c = *buf_ptr;
222 *buf_ptr = '\0';
223 buf_ptr = strchrnul(G.wget_buf, '\r');
224 *buf_ptr = '\0';
225
226 log_io("< %s", G.wget_buf);
227
228 return c;
229}
230
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100231static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000232{
233 int result;
234 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100235 if (!s2)
236 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000237 fprintf(fp, "%s%s\r\n", s1, s2);
238 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100239 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000240 }
241
242 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100243 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100244 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000245
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100246 G.wget_buf[3] = '\0';
247 result = xatoi_positive(G.wget_buf);
248 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000249 return result;
250}
251
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100252static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000253{
254 char *url, *p, *sp;
255
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100256 free(h->allocated);
257 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000258
259 if (strncmp(url, "http://", 7) == 0) {
260 h->port = bb_lookup_port("http", "tcp", 80);
261 h->host = url + 7;
262 h->is_ftp = 0;
263 } else if (strncmp(url, "ftp://", 6) == 0) {
264 h->port = bb_lookup_port("ftp", "tcp", 21);
265 h->host = url + 6;
266 h->is_ftp = 1;
267 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200268 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000269
270 // FYI:
271 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
272 // 'GET /?var=a/b HTTP 1.0'
273 // and saves 'index.html?var=a%2Fb' (we save 'b')
274 // wget 'http://busybox.net?login=john@doe':
275 // request: 'GET /?login=john@doe HTTP/1.0'
276 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
277 // wget 'http://busybox.net#test/test':
278 // request: 'GET / HTTP/1.0'
279 // saves: 'index.html' (we save 'test')
280 //
281 // We also don't add unique .N suffix if file exists...
282 sp = strchr(h->host, '/');
283 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
284 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
285 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000286 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000287 } else if (*sp == '/') {
288 *sp = '\0';
289 h->path = sp + 1;
290 } else { // '#' or '?'
291 // http://busybox.net?login=john@doe is a valid URL
292 // memmove converts to:
293 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000294 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000295 h->host--;
296 sp[-1] = '\0';
297 h->path = sp;
298 }
299
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200300 // We used to set h->user to NULL here, but this interferes
301 // with handling of code 302 ("object was moved")
302
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000303 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000304 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200305 // URL-decode "user:password" string before base64-encoding:
306 // wget http://test:my%20pass@example.com should send
307 // Authorization: Basic dGVzdDpteSBwYXNz
308 // which decodes to "test:my pass".
309 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000310 *sp = '\0';
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200311 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000312 h->host = sp + 1;
313 }
314
315 sp = h->host;
316}
317
Denys Vlasenkof836f012011-02-10 23:02:28 +0100318static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000319{
320 char *s, *hdrval;
321 int c;
322
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000323 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100324 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000325
Denys Vlasenkof836f012011-02-10 23:02:28 +0100326 /* end of the headers? */
327 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000328 return NULL;
329
330 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200331 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
332 /*
333 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
334 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
335 * "A-Z" maps to "a-z".
336 * "@[\]" can't occur in header names.
337 * "^_" maps to "~,DEL" (which is wrong).
338 * "^" was never seen yet, "_" was seen from web.archive.org
339 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
340 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100341 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200342 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000343
344 /* verify we are at the end of the header name */
345 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100346 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000347
348 /* locate the start of the header value */
349 *s++ = '\0';
350 hdrval = skip_whitespace(s);
351
Denys Vlasenkof836f012011-02-10 23:02:28 +0100352 if (c != '\n') {
353 /* Rats! The buffer isn't big enough to hold the entire header value */
354 while (c = getc(fp), c != EOF && c != '\n')
355 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000356 }
357
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000358 return hdrval;
359}
360
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200361static void reset_beg_range_to_zero(void)
362{
Denys Vlasenko61441242012-06-17 19:52:25 +0200363 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200364 G.beg_range = 0;
365 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200366 /* Done at the end instead: */
367 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200368}
369
Denys Vlasenko7f432802009-06-28 01:02:24 +0200370static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
371{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200372 FILE *sfp;
373 char *str;
374 int port;
375
376 if (!target->user)
377 target->user = xstrdup("anonymous:busybox@");
378
379 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100380 if (ftpcmd(NULL, NULL, sfp) != 220)
381 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200382
383 /*
384 * Splitting username:password pair,
385 * trying to log in
386 */
387 str = strchr(target->user, ':');
388 if (str)
389 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100390 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200391 case 230:
392 break;
393 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100394 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200395 break;
396 /* fall through (failed login) */
397 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100398 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200399 }
400
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100401 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200402
403 /*
404 * Querying file size
405 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100406 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
407 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100408 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200409 bb_error_msg_and_die("SIZE value is garbage");
410 }
411 G.got_clen = 1;
412 }
413
414 /*
415 * Entering passive mode
416 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100417 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200418 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100419 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200420 }
421 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
422 // Server's IP is N1.N2.N3.N4 (we ignore it)
423 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100424 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200425 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100426 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200427 if (!str) goto pasv_error;
428 port = xatou_range(str+1, 0, 255);
429 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100430 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200431 if (!str) goto pasv_error;
432 port += xatou_range(str+1, 0, 255) * 256;
Denys Vlasenkoca183112011-04-07 17:52:20 +0200433 set_nport(&lsa->u.sa, htons(port));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200434
435 *dfpp = open_socket(lsa);
436
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200437 if (G.beg_range != 0) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100438 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
439 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100440 G.content_len -= G.beg_range;
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200441 else
442 reset_beg_range_to_zero();
Denys Vlasenko7f432802009-06-28 01:02:24 +0200443 }
444
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100445 if (ftpcmd("RETR ", target->path, sfp) > 150)
446 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200447
448 return sfp;
449}
450
Denys Vlasenko2384a352011-02-15 00:58:36 +0100451static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200452{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200453#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
454# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200455 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200456# endif
457 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200458
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200459 polldata.fd = fileno(dfp);
460 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200461#endif
462 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200463
464 if (G.chunked)
465 goto get_clen;
466
467 /* Loops only if chunked */
468 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100469
470#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
471 /* Must use nonblocking I/O, otherwise fread will loop
472 * and *block* until it reads full buffer,
473 * which messes up progress bar and/or timeout logic.
474 * Because of nonblocking I/O, we need to dance
475 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200476 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100477 */
478 ndelay_on(polldata.fd);
479#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100480 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200481 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100482 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200483
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200484#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100485 /* fread internally uses read loop, which in our case
486 * is usually exited when we get EAGAIN.
487 * In this case, libc sets error marker on the stream.
488 * Need to clear it before next fread to avoid possible
489 * rare false positive ferror below. Rare because usually
490 * fread gets more than zero bytes, and we don't fall
491 * into if (n <= 0) ...
492 */
493 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100494#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200495 errno = 0;
496 rdsz = sizeof(G.wget_buf);
497 if (G.got_clen) {
498 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
499 if ((int)G.content_len <= 0)
500 break;
501 rdsz = (unsigned)G.content_len;
502 }
503 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100504 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200505
506 if (n > 0) {
507 xwrite(G.output_fd, G.wget_buf, n);
508#if ENABLE_FEATURE_WGET_STATUSBAR
509 G.transferred += n;
510#endif
511 if (G.got_clen) {
512 G.content_len -= n;
513 if (G.content_len == 0)
514 break;
515 }
516#if ENABLE_FEATURE_WGET_TIMEOUT
517 second_cnt = G.timeout_seconds;
518#endif
519 continue;
520 }
521
522 /* n <= 0.
523 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100524 * If error occurs, or EOF is reached, the return value
525 * is a short item count (or zero).
526 * fread does not distinguish between EOF and error.
527 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200528 if (errno != EAGAIN) {
529 if (ferror(dfp)) {
530 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100531 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200532 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100533 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200534 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100535
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200536#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
537 /* It was EAGAIN. There is no data. Wait up to one second
538 * then abort if timed out, or update the bar and try reading again.
539 */
540 if (safe_poll(&polldata, 1, 1000) == 0) {
541# if ENABLE_FEATURE_WGET_TIMEOUT
542 if (second_cnt != 0 && --second_cnt == 0) {
543 progress_meter(PROGRESS_END);
544 bb_error_msg_and_die("download timed out");
545 }
546# endif
547 /* We used to loop back to poll here,
548 * but there is no great harm in letting fread
549 * to try reading anyway.
550 */
551 }
552 /* Need to do it _every_ second for "stalled" indicator
553 * to be shown properly.
554 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200555 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200556#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200557 } /* while (reading data) */
558
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100559#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
560 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100561 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100562#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200563 if (!G.chunked)
564 break;
565
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100566 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200567 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100568 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100569 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200570 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100571 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200572 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100573 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200574 /*
575 * Note that fgets may result in some data being buffered in dfp.
576 * We loop back to fread, which will retrieve this data.
577 * Also note that code has to be arranged so that fread
578 * is done _before_ one-second poll wait - poll doesn't know
579 * about stdio buffering and can result in spurious one second waits!
580 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200581 }
582
Denys Vlasenko61441242012-06-17 19:52:25 +0200583 /* If -c failed, we restart from the beginning,
584 * but we do not truncate file then, we do it only now, at the end.
585 * This lets user to ^C if his 99% complete 10 GB file download
586 * failed to restart *without* losing the almost complete file.
587 */
588 {
589 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
590 if (pos != (off_t)-1)
591 ftruncate(G.output_fd, pos);
592 }
593
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100594 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100595 G.chunked = 0; /* makes it show 100% even for chunked download */
596 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200597 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200598}
599
Pere Orga53695632011-02-16 20:09:36 +0100600static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000601{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100602 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200603 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100604 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200605 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000606 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100607 char *proxy = NULL;
608 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100609 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100610 struct host_info server;
611 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000612
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100613 server.allocated = NULL;
614 target.allocated = NULL;
615 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200616 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100617
618 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000619
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000620 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100621 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000622 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000623 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko2384a352011-02-15 00:58:36 +0100624 use_proxy = (proxy && proxy[0]);
625 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000626 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000627 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200628 if (!use_proxy) {
629 server.port = target.port;
630 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100631 //free(server.allocated); - can't be non-NULL
632 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200633 } else {
634 server.host = target.host;
635 }
636 }
637
638 if (ENABLE_FEATURE_IPV6)
639 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000640
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100641 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100642 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100643 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100644 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000645 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100646 if (G.fname_out[0] == '/' || !G.fname_out[0])
647 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000648 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200649 if (G.dir_prefix)
650 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100651 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200652 /* redirects may free target.path later, need to make a copy */
653 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100654 }
Eric Andersen29edd002000-12-09 16:55:35 +0000655 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000656#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100657 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000658#endif
659
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000660 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100661 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100662 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100663 G.output_fd = open(G.fname_out, O_WRONLY);
664 if (G.output_fd >= 0) {
665 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000666 }
667 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100668 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000669 }
670
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200671 redir_limit = 5;
672 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000673 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100674 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200675 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
676 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
677 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000678 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200679 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +0100680 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
681 G.got_clen = 0;
682 G.chunked = 0;
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000683 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000684 /*
685 * HTTP session
686 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200687 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200688 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200689
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100690
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200691 /* Open socket to http server */
692 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200693
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200694 /* Send HTTP request */
695 if (use_proxy) {
696 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
697 target.is_ftp ? "f" : "ht", target.host,
698 target.path);
699 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100700 if (option_mask32 & WGET_OPT_POST_DATA)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200701 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
702 else
703 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
704 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000705
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200706 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100707 target.host, G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000708
Denys Vlasenko9213a552011-02-10 13:23:45 +0100709 /* Ask server to close the connection as soon as we are done
710 * (IOW: we do not intend to send more requests)
711 */
712 fprintf(sfp, "Connection: close\r\n");
713
Denis Vlasenko9cade082006-11-21 10:43:02 +0000714#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200715 if (target.user) {
716 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100717 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200718 }
719 if (use_proxy && server.user) {
720 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100721 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200722 }
Eric Andersen79757c92001-04-05 21:45:54 +0000723#endif
724
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200725 if (G.beg_range != 0)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100726 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100727
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000728#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100729 if (G.extra_headers)
730 fputs(G.extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000731
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100732 if (option_mask32 & WGET_OPT_POST_DATA) {
Denys Vlasenko9213a552011-02-10 13:23:45 +0100733 fprintf(sfp,
734 "Content-Type: application/x-www-form-urlencoded\r\n"
735 "Content-Length: %u\r\n"
736 "\r\n"
737 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +0200738 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +0100739 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200740 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000741#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100742 {
743 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200744 }
Eric Andersen79757c92001-04-05 21:45:54 +0000745
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200746 fflush(sfp);
747
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200748 /*
749 * Retrieve HTTP response line and check for "200" status code.
750 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000751 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100752 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000753
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100754 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200755 str = skip_non_whitespace(str);
756 str = skip_whitespace(str);
757 // FIXME: no error check
758 // xatou wouldn't work: "200 OK"
759 status = atoi(str);
760 switch (status) {
761 case 0:
762 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100763 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200764 /* eat all remaining headers */;
765 goto read_response;
766 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000767/*
768Response 204 doesn't say "null file", it says "metadata
769has changed but data didn't":
770
771"10.2.5 204 No Content
772The server has fulfilled the request but does not need to return
773an entity-body, and might want to return updated metainformation.
774The response MAY include new or updated metainformation in the form
775of entity-headers, which if present SHOULD be associated with
776the requested variant.
777
778If the client is a user agent, it SHOULD NOT change its document
779view from that which caused the request to be sent. This response
780is primarily intended to allow input for actions to take place
781without causing a change to the user agent's active document view,
782although any new or updated metainformation SHOULD be applied
783to the document currently in the user agent's active view.
784
785The 204 response MUST NOT include a message-body, and thus
786is always terminated by the first empty line after the header fields."
787
788However, in real world it was observed that some web servers
789(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
790*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200791 case 204:
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200792 if (G.beg_range != 0) {
793 /* "Range:..." was not honored by the server.
794 * Restart download from the beginning.
795 */
796 reset_beg_range_to_zero();
797 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200798 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200799 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200800 case 301:
801 case 302:
802 case 303:
803 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200804 case 206: /* Partial Content */
805 if (G.beg_range != 0)
806 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000807 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200808 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200809 /* fall through */
810 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100811 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200812 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000813
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200814 /*
815 * Retrieve HTTP headers.
816 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100817 while ((str = gethdr(sfp)) != NULL) {
818 static const char keywords[] ALIGN1 =
819 "content-length\0""transfer-encoding\0""location\0";
820 enum {
821 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
822 };
Matthijs van de Water0d586662009-08-22 20:19:48 +0200823 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100824
825 /* gethdr converted "FOO:" string to lowercase */
826
Matthijs van de Water0d586662009-08-22 20:19:48 +0200827 /* strip trailing whitespace */
828 char *s = strchrnul(str, '\0') - 1;
829 while (s >= str && (*s == ' ' || *s == '\t')) {
830 *s = '\0';
831 s--;
832 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100833 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200834 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100835 G.content_len = BB_STRTOOFF(str, NULL, 10);
836 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200837 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000838 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200839 G.got_clen = 1;
840 continue;
841 }
842 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100843 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200844 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100845 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200846 }
847 if (key == KEY_location && status >= 300) {
848 if (--redir_limit == 0)
849 bb_error_msg_and_die("too many redirections");
850 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100851 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100852 free(redirected_path);
853 target.path = redirected_path = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200854 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100855 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200856 parse_url(str, &target);
857 if (!use_proxy) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100858 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +0100859 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200860 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200861 /* strip_ipv6_scope_id(target.host); - no! */
862 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200863 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000864 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200865 goto resolve_lsa;
866 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000867 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200868 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000869 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200870 }
871// if (status >= 300)
872// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000873
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200874 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000875 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000876
877 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000878 /*
879 * FTP session
880 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200881 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000882 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000883
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100884 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100885
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100886 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100887 if (G.output_fd < 0)
888 G.output_fd = xopen(G.fname_out, G.o_flags);
889 retrieve_file_data(dfp);
890 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
891 xclose(G.output_fd);
892 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100893 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000894 }
Eric Andersen79757c92001-04-05 21:45:54 +0000895
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200896 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100897 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000898 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100899 if (ftpcmd(NULL, NULL, sfp) != 226)
900 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
901 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000902 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100903 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000904
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100905 free(server.allocated);
906 free(target.allocated);
907 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100908 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +0000909}
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100910
911int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
912int wget_main(int argc UNUSED_PARAM, char **argv)
913{
914#if ENABLE_FEATURE_WGET_LONG_OPTIONS
915 static const char wget_longopts[] ALIGN1 =
916 /* name, has_arg, val */
917 "continue\0" No_argument "c"
918//FIXME: -s isn't --spider, it's --save-headers!
919 "spider\0" No_argument "s"
920 "quiet\0" No_argument "q"
921 "output-document\0" Required_argument "O"
922 "directory-prefix\0" Required_argument "P"
923 "proxy\0" Required_argument "Y"
924 "user-agent\0" Required_argument "U"
925#if ENABLE_FEATURE_WGET_TIMEOUT
926 "timeout\0" Required_argument "T"
927#endif
928 /* Ignored: */
929 // "tries\0" Required_argument "t"
930 /* Ignored (we always use PASV): */
931 "passive-ftp\0" No_argument "\xff"
932 "header\0" Required_argument "\xfe"
933 "post-data\0" Required_argument "\xfd"
934 /* Ignored (we don't do ssl) */
935 "no-check-certificate\0" No_argument "\xfc"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200936 /* Ignored (we don't support caching) */
937 "no-cache\0" No_argument "\xfb"
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100938 ;
939#endif
940
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100941#if ENABLE_FEATURE_WGET_LONG_OPTIONS
942 llist_t *headers_llist = NULL;
943#endif
944
945 INIT_G();
946
947 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
948 G.proxy_flag = "on"; /* use proxies if env vars are set */
949 G.user_agent = "Wget"; /* "User-Agent" header field */
950
951#if ENABLE_FEATURE_WGET_LONG_OPTIONS
952 applet_long_options = wget_longopts;
953#endif
954 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
955 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
956 &G.fname_out, &G.dir_prefix,
957 &G.proxy_flag, &G.user_agent,
958 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
959 NULL /* -t RETRIES */
960 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
961 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
962 );
963 argv += optind;
964
965#if ENABLE_FEATURE_WGET_LONG_OPTIONS
966 if (headers_llist) {
967 int size = 1;
968 char *cp;
969 llist_t *ll = headers_llist;
970 while (ll) {
971 size += strlen(ll->data) + 2;
972 ll = ll->link;
973 }
974 G.extra_headers = cp = xmalloc(size);
975 while (headers_llist) {
976 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
977 }
978 }
979#endif
980
Denys Vlasenko2384a352011-02-15 00:58:36 +0100981 G.output_fd = -1;
982 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
983 if (G.fname_out) { /* -O FILE ? */
984 if (LONE_DASH(G.fname_out)) { /* -O - ? */
985 G.output_fd = 1;
986 option_mask32 &= ~WGET_OPT_CONTINUE;
987 }
988 /* compat with wget: -O FILE can overwrite */
989 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
990 }
991
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100992 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +0100993 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100994
Denys Vlasenko28556b92011-02-15 11:03:53 +0100995 if (G.output_fd >= 0)
996 xclose(G.output_fd);
997
Pere Orga53695632011-02-16 20:09:36 +0100998 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100999}