blob: cfbacecede849884335dbbf94125bdfa1cabdde7 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
12//usage:#define wget_trivial_usage
13//usage: IF_FEATURE_WGET_LONG_OPTIONS(
14//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +020016/* Since we ignore these opts, we don't show them in --help */
17/* //usage: " [--no-check-certificate] [--no-cache]" */
18//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010019//usage: )
20//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23//usage: )
24//usage:#define wget_full_usage "\n\n"
25//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010026//usage: "\n -s Spider mode - only check file existence"
27//usage: "\n -c Continue retrieval of aborted transfer"
28//usage: "\n -q Quiet"
29//usage: "\n -P DIR Save to DIR (default .)"
30//usage: IF_FEATURE_WGET_TIMEOUT(
31//usage: "\n -T SEC Network read timeout is SEC seconds"
32//usage: )
33//usage: "\n -O FILE Save to FILE ('-' for stdout)"
34//usage: "\n -U STR Use STR for User-Agent header"
35//usage: "\n -Y Use proxy ('on' or 'off')"
36
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000037#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000038
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +020039#if 0
40# define log_io(...) bb_error_msg(__VA_ARGS__)
41#else
42# define log_io(...) ((void)0)
43#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +010044
45
Eric Andersen79757c92001-04-05 21:45:54 +000046struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010047 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +000048 const char *path;
49 const char *user;
50 char *host;
51 int port;
52 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000053};
54
Denis Vlasenko77105632007-09-24 15:04:00 +000055
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000057struct globals {
58 off_t content_len; /* Content-length of the file */
59 off_t beg_range; /* Range at which continue begins */
60#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000061 off_t transferred; /* Number of bytes transferred so far */
62 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010063 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000064#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +020065 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +010066#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +020067 char *post_data;
68 char *extra_headers;
Denys Vlasenkoa3661092011-02-13 02:33:11 +010069#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +020070 char *fname_out; /* where to direct output (-O) */
71 const char *proxy_flag; /* Use proxies if env vars are set */
72 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020073#if ENABLE_FEATURE_WGET_TIMEOUT
74 unsigned timeout_seconds;
Lauri Kasanend074b412013-10-12 21:47:07 +020075 bool connecting;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020076#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +010077 int output_fd;
78 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +020079 smallint chunked; /* chunked transfer encoding */
80 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010081 /* Local downloads do benefit from big buffer.
82 * With 512 byte buffer, it was measured to be
83 * an order of magnitude slower than with big one.
84 */
85 uint64_t just_to_align_next_member;
86 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010087} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010088#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020089#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +020090 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020091} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +020092#define FINI_G() do { \
93 FREE_PTR_TO_GLOBALS(); \
94} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000095
96
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020097/* Must match option string! */
98enum {
99 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200100 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200101 WGET_OPT_QUIET = (1 << 2),
102 WGET_OPT_OUTNAME = (1 << 3),
103 WGET_OPT_PREFIX = (1 << 4),
104 WGET_OPT_PROXY = (1 << 5),
105 WGET_OPT_USER_AGENT = (1 << 6),
106 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
107 WGET_OPT_RETRIES = (1 << 8),
108 WGET_OPT_PASSIVE = (1 << 9),
109 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
110 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
111};
112
113enum {
114 PROGRESS_START = -1,
115 PROGRESS_END = 0,
116 PROGRESS_BUMP = 1,
117};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000118#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000119static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000120{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200121 if (option_mask32 & WGET_OPT_QUIET)
122 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000123
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200124 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100125 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000126
Denys Vlasenko2384a352011-02-15 00:58:36 +0100127 bb_progress_update(&G.pmt,
128 G.beg_range,
129 G.transferred,
130 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
131 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000132
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200133 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100134 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200135 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100136 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000137 }
138}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200139#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000140static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000141#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000142
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000143
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200144/* IPv6 knows scoped address types i.e. link and site local addresses. Link
145 * local addresses can have a scope identifier to specify the
146 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
147 * identifier is only valid on a single node.
148 *
149 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
150 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
151 * in the Host header as invalid requests, see
152 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
153 */
154static void strip_ipv6_scope_id(char *host)
155{
156 char *scope, *cp;
157
158 /* bbox wget actually handles IPv6 addresses without [], like
159 * wget "http://::1/xxx", but this is not standard.
160 * To save code, _here_ we do not support it. */
161
162 if (host[0] != '[')
163 return; /* not IPv6 */
164
165 scope = strchr(host, '%');
166 if (!scope)
167 return;
168
169 /* Remove the IPv6 zone identifier from the host address */
170 cp = strchr(host, ']');
171 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
172 /* malformed address (not "[xx]:nn" or "[xx]") */
173 return;
174 }
175
176 /* cp points to "]...", scope points to "%eth0]..." */
177 overlapping_strcpy(scope, cp);
178}
179
Denis Vlasenko9cade082006-11-21 10:43:02 +0000180#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100181/* Base64-encode character string. */
182static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000183{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000184 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100185 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
186 len = sizeof(G.wget_buf)/4*3 - 10;
187 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
188 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000189}
190#endif
191
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200192static char* sanitize_string(char *s)
193{
194 unsigned char *p = (void *) s;
195 while (*p >= ' ')
196 p++;
197 *p = '\0';
198 return s;
199}
200
Lauri Kasanend074b412013-10-12 21:47:07 +0200201#if ENABLE_FEATURE_WGET_TIMEOUT
202static void alarm_handler(int sig UNUSED_PARAM)
203{
204 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
205 if (G.connecting)
206 bb_error_msg_and_die("download timed out");
207}
208#endif
209
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000210static FILE *open_socket(len_and_sockaddr *lsa)
211{
Lauri Kasanend074b412013-10-12 21:47:07 +0200212 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000213 FILE *fp;
214
Lauri Kasanend074b412013-10-12 21:47:07 +0200215 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
216 fd = xconnect_stream(lsa);
217 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
218
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000219 /* glibc 2.4 seems to try seeking on it - ??! */
220 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200221 fp = fdopen(fd, "r+");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000222 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100223 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000224
225 return fp;
226}
227
Denys Vlasenkof836f012011-02-10 23:02:28 +0100228/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Lauri Kasanend074b412013-10-12 21:47:07 +0200229/* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100230static char fgets_and_trim(FILE *fp)
231{
232 char c;
233 char *buf_ptr;
234
235 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
236 bb_perror_msg_and_die("error getting response");
237
238 buf_ptr = strchrnul(G.wget_buf, '\n');
239 c = *buf_ptr;
240 *buf_ptr = '\0';
241 buf_ptr = strchrnul(G.wget_buf, '\r');
242 *buf_ptr = '\0';
243
244 log_io("< %s", G.wget_buf);
245
246 return c;
247}
248
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100249static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000250{
251 int result;
252 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100253 if (!s2)
254 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000255 fprintf(fp, "%s%s\r\n", s1, s2);
256 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100257 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000258 }
259
260 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100261 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100262 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000263
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100264 G.wget_buf[3] = '\0';
265 result = xatoi_positive(G.wget_buf);
266 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000267 return result;
268}
269
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100270static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000271{
272 char *url, *p, *sp;
273
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100274 free(h->allocated);
275 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000276
277 if (strncmp(url, "http://", 7) == 0) {
278 h->port = bb_lookup_port("http", "tcp", 80);
279 h->host = url + 7;
280 h->is_ftp = 0;
281 } else if (strncmp(url, "ftp://", 6) == 0) {
282 h->port = bb_lookup_port("ftp", "tcp", 21);
283 h->host = url + 6;
284 h->is_ftp = 1;
285 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200286 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000287
288 // FYI:
289 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
290 // 'GET /?var=a/b HTTP 1.0'
291 // and saves 'index.html?var=a%2Fb' (we save 'b')
292 // wget 'http://busybox.net?login=john@doe':
293 // request: 'GET /?login=john@doe HTTP/1.0'
294 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
295 // wget 'http://busybox.net#test/test':
296 // request: 'GET / HTTP/1.0'
297 // saves: 'index.html' (we save 'test')
298 //
299 // We also don't add unique .N suffix if file exists...
300 sp = strchr(h->host, '/');
301 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
302 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
303 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000304 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000305 } else if (*sp == '/') {
306 *sp = '\0';
307 h->path = sp + 1;
308 } else { // '#' or '?'
309 // http://busybox.net?login=john@doe is a valid URL
310 // memmove converts to:
311 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000312 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000313 h->host--;
314 sp[-1] = '\0';
315 h->path = sp;
316 }
317
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200318 // We used to set h->user to NULL here, but this interferes
319 // with handling of code 302 ("object was moved")
320
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000321 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000322 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200323 // URL-decode "user:password" string before base64-encoding:
324 // wget http://test:my%20pass@example.com should send
325 // Authorization: Basic dGVzdDpteSBwYXNz
326 // which decodes to "test:my pass".
327 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000328 *sp = '\0';
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200329 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000330 h->host = sp + 1;
331 }
332
333 sp = h->host;
334}
335
Denys Vlasenkof836f012011-02-10 23:02:28 +0100336static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000337{
338 char *s, *hdrval;
339 int c;
340
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000341 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100342 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000343
Denys Vlasenkof836f012011-02-10 23:02:28 +0100344 /* end of the headers? */
345 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000346 return NULL;
347
348 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200349 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
350 /*
351 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
352 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
353 * "A-Z" maps to "a-z".
354 * "@[\]" can't occur in header names.
355 * "^_" maps to "~,DEL" (which is wrong).
356 * "^" was never seen yet, "_" was seen from web.archive.org
357 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
358 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100359 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200360 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000361
362 /* verify we are at the end of the header name */
363 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100364 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000365
366 /* locate the start of the header value */
367 *s++ = '\0';
368 hdrval = skip_whitespace(s);
369
Denys Vlasenkof836f012011-02-10 23:02:28 +0100370 if (c != '\n') {
371 /* Rats! The buffer isn't big enough to hold the entire header value */
372 while (c = getc(fp), c != EOF && c != '\n')
373 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000374 }
375
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000376 return hdrval;
377}
378
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200379static void reset_beg_range_to_zero(void)
380{
Denys Vlasenko61441242012-06-17 19:52:25 +0200381 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200382 G.beg_range = 0;
383 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200384 /* Done at the end instead: */
385 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200386}
387
Denys Vlasenko7f432802009-06-28 01:02:24 +0200388static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
389{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200390 FILE *sfp;
391 char *str;
392 int port;
393
394 if (!target->user)
395 target->user = xstrdup("anonymous:busybox@");
396
397 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100398 if (ftpcmd(NULL, NULL, sfp) != 220)
399 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200400
401 /*
402 * Splitting username:password pair,
403 * trying to log in
404 */
405 str = strchr(target->user, ':');
406 if (str)
407 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100408 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200409 case 230:
410 break;
411 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100412 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200413 break;
414 /* fall through (failed login) */
415 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100416 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200417 }
418
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100419 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200420
421 /*
422 * Querying file size
423 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100424 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
425 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100426 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200427 bb_error_msg_and_die("SIZE value is garbage");
428 }
429 G.got_clen = 1;
430 }
431
432 /*
433 * Entering passive mode
434 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100435 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200436 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100437 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200438 }
439 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
440 // Server's IP is N1.N2.N3.N4 (we ignore it)
441 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100442 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200443 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100444 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200445 if (!str) goto pasv_error;
446 port = xatou_range(str+1, 0, 255);
447 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100448 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200449 if (!str) goto pasv_error;
450 port += xatou_range(str+1, 0, 255) * 256;
Denys Vlasenkoca183112011-04-07 17:52:20 +0200451 set_nport(&lsa->u.sa, htons(port));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200452
453 *dfpp = open_socket(lsa);
454
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200455 if (G.beg_range != 0) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100456 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
457 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100458 G.content_len -= G.beg_range;
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200459 else
460 reset_beg_range_to_zero();
Denys Vlasenko7f432802009-06-28 01:02:24 +0200461 }
462
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100463 if (ftpcmd("RETR ", target->path, sfp) > 150)
464 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200465
466 return sfp;
467}
468
Denys Vlasenko2384a352011-02-15 00:58:36 +0100469static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200470{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200471#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
472# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200473 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200474# endif
475 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200476
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200477 polldata.fd = fileno(dfp);
478 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200479#endif
480 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200481
482 if (G.chunked)
483 goto get_clen;
484
485 /* Loops only if chunked */
486 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100487
488#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
489 /* Must use nonblocking I/O, otherwise fread will loop
490 * and *block* until it reads full buffer,
491 * which messes up progress bar and/or timeout logic.
492 * Because of nonblocking I/O, we need to dance
493 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200494 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100495 */
496 ndelay_on(polldata.fd);
497#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100498 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200499 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100500 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200501
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200502#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100503 /* fread internally uses read loop, which in our case
504 * is usually exited when we get EAGAIN.
505 * In this case, libc sets error marker on the stream.
506 * Need to clear it before next fread to avoid possible
507 * rare false positive ferror below. Rare because usually
508 * fread gets more than zero bytes, and we don't fall
509 * into if (n <= 0) ...
510 */
511 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100512#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200513 errno = 0;
514 rdsz = sizeof(G.wget_buf);
515 if (G.got_clen) {
516 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
517 if ((int)G.content_len <= 0)
518 break;
519 rdsz = (unsigned)G.content_len;
520 }
521 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100522 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200523
524 if (n > 0) {
525 xwrite(G.output_fd, G.wget_buf, n);
526#if ENABLE_FEATURE_WGET_STATUSBAR
527 G.transferred += n;
528#endif
529 if (G.got_clen) {
530 G.content_len -= n;
531 if (G.content_len == 0)
532 break;
533 }
534#if ENABLE_FEATURE_WGET_TIMEOUT
535 second_cnt = G.timeout_seconds;
536#endif
537 continue;
538 }
539
540 /* n <= 0.
541 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100542 * If error occurs, or EOF is reached, the return value
543 * is a short item count (or zero).
544 * fread does not distinguish between EOF and error.
545 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200546 if (errno != EAGAIN) {
547 if (ferror(dfp)) {
548 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100549 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200550 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100551 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200552 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100553
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200554#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
555 /* It was EAGAIN. There is no data. Wait up to one second
556 * then abort if timed out, or update the bar and try reading again.
557 */
558 if (safe_poll(&polldata, 1, 1000) == 0) {
559# if ENABLE_FEATURE_WGET_TIMEOUT
560 if (second_cnt != 0 && --second_cnt == 0) {
561 progress_meter(PROGRESS_END);
562 bb_error_msg_and_die("download timed out");
563 }
564# endif
565 /* We used to loop back to poll here,
566 * but there is no great harm in letting fread
567 * to try reading anyway.
568 */
569 }
570 /* Need to do it _every_ second for "stalled" indicator
571 * to be shown properly.
572 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200573 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200574#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200575 } /* while (reading data) */
576
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100577#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
578 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100579 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100580#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200581 if (!G.chunked)
582 break;
583
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100584 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200585 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100586 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100587 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200588 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100589 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200590 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100591 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200592 /*
593 * Note that fgets may result in some data being buffered in dfp.
594 * We loop back to fread, which will retrieve this data.
595 * Also note that code has to be arranged so that fread
596 * is done _before_ one-second poll wait - poll doesn't know
597 * about stdio buffering and can result in spurious one second waits!
598 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200599 }
600
Denys Vlasenko61441242012-06-17 19:52:25 +0200601 /* If -c failed, we restart from the beginning,
602 * but we do not truncate file then, we do it only now, at the end.
603 * This lets user to ^C if his 99% complete 10 GB file download
604 * failed to restart *without* losing the almost complete file.
605 */
606 {
607 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
608 if (pos != (off_t)-1)
609 ftruncate(G.output_fd, pos);
610 }
611
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100612 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100613 G.chunked = 0; /* makes it show 100% even for chunked download */
614 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200615 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200616}
617
Pere Orga53695632011-02-16 20:09:36 +0100618static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000619{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100620 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200621 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100622 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200623 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000624 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100625 char *proxy = NULL;
626 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100627 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100628 struct host_info server;
629 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000630
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100631 server.allocated = NULL;
632 target.allocated = NULL;
633 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200634 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100635
636 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000637
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000638 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100639 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000640 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000641 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko2384a352011-02-15 00:58:36 +0100642 use_proxy = (proxy && proxy[0]);
643 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000644 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000645 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200646 if (!use_proxy) {
647 server.port = target.port;
648 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100649 //free(server.allocated); - can't be non-NULL
650 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200651 } else {
652 server.host = target.host;
653 }
654 }
655
656 if (ENABLE_FEATURE_IPV6)
657 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000658
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100659 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100660 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100661 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100662 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000663 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100664 if (G.fname_out[0] == '/' || !G.fname_out[0])
665 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000666 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200667 if (G.dir_prefix)
668 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100669 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200670 /* redirects may free target.path later, need to make a copy */
671 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100672 }
Eric Andersen29edd002000-12-09 16:55:35 +0000673 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000674#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100675 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000676#endif
677
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000678 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100679 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100680 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100681 G.output_fd = open(G.fname_out, O_WRONLY);
682 if (G.output_fd >= 0) {
683 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000684 }
685 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100686 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000687 }
688
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200689 redir_limit = 5;
690 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000691 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100692 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200693 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
694 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
695 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000696 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200697 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +0100698 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
699 G.got_clen = 0;
700 G.chunked = 0;
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000701 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000702 /*
703 * HTTP session
704 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200705 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200706 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200707
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100708
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200709 /* Open socket to http server */
710 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200711
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200712 /* Send HTTP request */
713 if (use_proxy) {
714 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
715 target.is_ftp ? "f" : "ht", target.host,
716 target.path);
717 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100718 if (option_mask32 & WGET_OPT_POST_DATA)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200719 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
720 else
721 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
722 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000723
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200724 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100725 target.host, G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000726
Denys Vlasenko9213a552011-02-10 13:23:45 +0100727 /* Ask server to close the connection as soon as we are done
728 * (IOW: we do not intend to send more requests)
729 */
730 fprintf(sfp, "Connection: close\r\n");
731
Denis Vlasenko9cade082006-11-21 10:43:02 +0000732#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200733 if (target.user) {
734 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100735 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200736 }
737 if (use_proxy && server.user) {
738 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100739 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200740 }
Eric Andersen79757c92001-04-05 21:45:54 +0000741#endif
742
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200743 if (G.beg_range != 0)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100744 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100745
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000746#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100747 if (G.extra_headers)
748 fputs(G.extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000749
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100750 if (option_mask32 & WGET_OPT_POST_DATA) {
Denys Vlasenko9213a552011-02-10 13:23:45 +0100751 fprintf(sfp,
752 "Content-Type: application/x-www-form-urlencoded\r\n"
753 "Content-Length: %u\r\n"
754 "\r\n"
755 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +0200756 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +0100757 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200758 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000759#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100760 {
761 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200762 }
Eric Andersen79757c92001-04-05 21:45:54 +0000763
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200764 fflush(sfp);
765
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200766 /*
767 * Retrieve HTTP response line and check for "200" status code.
768 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000769 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100770 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000771
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100772 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200773 str = skip_non_whitespace(str);
774 str = skip_whitespace(str);
775 // FIXME: no error check
776 // xatou wouldn't work: "200 OK"
777 status = atoi(str);
778 switch (status) {
779 case 0:
780 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100781 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200782 /* eat all remaining headers */;
783 goto read_response;
784 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000785/*
786Response 204 doesn't say "null file", it says "metadata
787has changed but data didn't":
788
789"10.2.5 204 No Content
790The server has fulfilled the request but does not need to return
791an entity-body, and might want to return updated metainformation.
792The response MAY include new or updated metainformation in the form
793of entity-headers, which if present SHOULD be associated with
794the requested variant.
795
796If the client is a user agent, it SHOULD NOT change its document
797view from that which caused the request to be sent. This response
798is primarily intended to allow input for actions to take place
799without causing a change to the user agent's active document view,
800although any new or updated metainformation SHOULD be applied
801to the document currently in the user agent's active view.
802
803The 204 response MUST NOT include a message-body, and thus
804is always terminated by the first empty line after the header fields."
805
806However, in real world it was observed that some web servers
807(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
808*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200809 case 204:
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200810 if (G.beg_range != 0) {
811 /* "Range:..." was not honored by the server.
812 * Restart download from the beginning.
813 */
814 reset_beg_range_to_zero();
815 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200816 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200817 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200818 case 301:
819 case 302:
820 case 303:
821 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200822 case 206: /* Partial Content */
823 if (G.beg_range != 0)
824 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000825 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200826 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200827 /* fall through */
828 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100829 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200830 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000831
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200832 /*
833 * Retrieve HTTP headers.
834 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100835 while ((str = gethdr(sfp)) != NULL) {
836 static const char keywords[] ALIGN1 =
837 "content-length\0""transfer-encoding\0""location\0";
838 enum {
839 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
840 };
Matthijs van de Water0d586662009-08-22 20:19:48 +0200841 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100842
843 /* gethdr converted "FOO:" string to lowercase */
844
Matthijs van de Water0d586662009-08-22 20:19:48 +0200845 /* strip trailing whitespace */
846 char *s = strchrnul(str, '\0') - 1;
847 while (s >= str && (*s == ' ' || *s == '\t')) {
848 *s = '\0';
849 s--;
850 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100851 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200852 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100853 G.content_len = BB_STRTOOFF(str, NULL, 10);
854 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200855 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000856 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200857 G.got_clen = 1;
858 continue;
859 }
860 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100861 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200862 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100863 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200864 }
865 if (key == KEY_location && status >= 300) {
866 if (--redir_limit == 0)
867 bb_error_msg_and_die("too many redirections");
868 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100869 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100870 free(redirected_path);
871 target.path = redirected_path = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200872 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100873 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200874 parse_url(str, &target);
875 if (!use_proxy) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100876 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +0100877 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200878 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200879 /* strip_ipv6_scope_id(target.host); - no! */
880 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200881 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000882 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200883 goto resolve_lsa;
884 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000885 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200886 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000887 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200888 }
889// if (status >= 300)
890// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000891
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200892 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000893 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000894
895 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000896 /*
897 * FTP session
898 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200899 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000900 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000901
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100902 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100903
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100904 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100905 if (G.output_fd < 0)
906 G.output_fd = xopen(G.fname_out, G.o_flags);
907 retrieve_file_data(dfp);
908 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
909 xclose(G.output_fd);
910 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100911 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000912 }
Eric Andersen79757c92001-04-05 21:45:54 +0000913
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200914 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100915 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000916 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100917 if (ftpcmd(NULL, NULL, sfp) != 226)
918 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
919 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000920 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100921 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000922
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100923 free(server.allocated);
924 free(target.allocated);
925 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100926 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +0000927}
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100928
929int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
930int wget_main(int argc UNUSED_PARAM, char **argv)
931{
932#if ENABLE_FEATURE_WGET_LONG_OPTIONS
933 static const char wget_longopts[] ALIGN1 =
934 /* name, has_arg, val */
935 "continue\0" No_argument "c"
936//FIXME: -s isn't --spider, it's --save-headers!
937 "spider\0" No_argument "s"
938 "quiet\0" No_argument "q"
939 "output-document\0" Required_argument "O"
940 "directory-prefix\0" Required_argument "P"
941 "proxy\0" Required_argument "Y"
942 "user-agent\0" Required_argument "U"
943#if ENABLE_FEATURE_WGET_TIMEOUT
944 "timeout\0" Required_argument "T"
945#endif
946 /* Ignored: */
947 // "tries\0" Required_argument "t"
948 /* Ignored (we always use PASV): */
949 "passive-ftp\0" No_argument "\xff"
950 "header\0" Required_argument "\xfe"
951 "post-data\0" Required_argument "\xfd"
952 /* Ignored (we don't do ssl) */
953 "no-check-certificate\0" No_argument "\xfc"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200954 /* Ignored (we don't support caching) */
955 "no-cache\0" No_argument "\xfb"
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100956 ;
957#endif
958
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100959#if ENABLE_FEATURE_WGET_LONG_OPTIONS
960 llist_t *headers_llist = NULL;
961#endif
962
963 INIT_G();
964
Lauri Kasanend074b412013-10-12 21:47:07 +0200965#if ENABLE_FEATURE_WGET_TIMEOUT
966 G.timeout_seconds = 900;
967 signal(SIGALRM, alarm_handler);
968#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100969 G.proxy_flag = "on"; /* use proxies if env vars are set */
970 G.user_agent = "Wget"; /* "User-Agent" header field */
971
972#if ENABLE_FEATURE_WGET_LONG_OPTIONS
973 applet_long_options = wget_longopts;
974#endif
975 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
976 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
977 &G.fname_out, &G.dir_prefix,
978 &G.proxy_flag, &G.user_agent,
979 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
980 NULL /* -t RETRIES */
981 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
982 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
983 );
984 argv += optind;
985
986#if ENABLE_FEATURE_WGET_LONG_OPTIONS
987 if (headers_llist) {
988 int size = 1;
989 char *cp;
990 llist_t *ll = headers_llist;
991 while (ll) {
992 size += strlen(ll->data) + 2;
993 ll = ll->link;
994 }
995 G.extra_headers = cp = xmalloc(size);
996 while (headers_llist) {
997 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
998 }
999 }
1000#endif
1001
Denys Vlasenko2384a352011-02-15 00:58:36 +01001002 G.output_fd = -1;
1003 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1004 if (G.fname_out) { /* -O FILE ? */
1005 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1006 G.output_fd = 1;
1007 option_mask32 &= ~WGET_OPT_CONTINUE;
1008 }
1009 /* compat with wget: -O FILE can overwrite */
1010 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1011 }
1012
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001013 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001014 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001015
Denys Vlasenko28556b92011-02-15 11:03:53 +01001016 if (G.output_fd >= 0)
1017 xclose(G.output_fd);
1018
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001019#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1020 free(G.extra_headers);
1021#endif
1022 FINI_G();
1023
Pere Orga53695632011-02-16 20:09:36 +01001024 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001025}