blob: 1013f66cb46cbbbfb6e64d386efd3c2766eeef33 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
12//usage:#define wget_trivial_usage
13//usage: IF_FEATURE_WGET_LONG_OPTIONS(
14//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +020016/* Since we ignore these opts, we don't show them in --help */
17/* //usage: " [--no-check-certificate] [--no-cache]" */
18//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010019//usage: )
20//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23//usage: )
24//usage:#define wget_full_usage "\n\n"
25//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010026//usage: "\n -s Spider mode - only check file existence"
27//usage: "\n -c Continue retrieval of aborted transfer"
28//usage: "\n -q Quiet"
29//usage: "\n -P DIR Save to DIR (default .)"
30//usage: IF_FEATURE_WGET_TIMEOUT(
31//usage: "\n -T SEC Network read timeout is SEC seconds"
32//usage: )
33//usage: "\n -O FILE Save to FILE ('-' for stdout)"
34//usage: "\n -U STR Use STR for User-Agent header"
35//usage: "\n -Y Use proxy ('on' or 'off')"
36
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000037#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000038
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +020039#if 0
40# define log_io(...) bb_error_msg(__VA_ARGS__)
41#else
42# define log_io(...) ((void)0)
43#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +010044
45
Eric Andersen79757c92001-04-05 21:45:54 +000046struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010047 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +000048 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +010049 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +010050 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +000051 char *host;
52 int port;
Eric Andersen79757c92001-04-05 21:45:54 +000053};
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +010054static const char P_FTP[] = "ftp";
55static const char P_HTTP[] = "http";
56static const char P_HTTPS[] = "https";
Eric Andersen79757c92001-04-05 21:45:54 +000057
Denis Vlasenko77105632007-09-24 15:04:00 +000058
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020059/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000060struct globals {
61 off_t content_len; /* Content-length of the file */
62 off_t beg_range; /* Range at which continue begins */
63#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000064 off_t transferred; /* Number of bytes transferred so far */
65 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010066 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000067#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +020068 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +010069#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +020070 char *post_data;
71 char *extra_headers;
Denys Vlasenkoa3661092011-02-13 02:33:11 +010072#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +020073 char *fname_out; /* where to direct output (-O) */
74 const char *proxy_flag; /* Use proxies if env vars are set */
75 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020076#if ENABLE_FEATURE_WGET_TIMEOUT
77 unsigned timeout_seconds;
Lauri Kasanend074b412013-10-12 21:47:07 +020078 bool connecting;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020079#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +010080 int output_fd;
81 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +020082 smallint chunked; /* chunked transfer encoding */
83 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010084 /* Local downloads do benefit from big buffer.
85 * With 512 byte buffer, it was measured to be
86 * an order of magnitude slower than with big one.
87 */
88 uint64_t just_to_align_next_member;
89 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010090} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010091#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020092#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +020093 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020094} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +020095#define FINI_G() do { \
96 FREE_PTR_TO_GLOBALS(); \
97} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000098
99
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200100/* Must match option string! */
101enum {
102 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200103 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200104 WGET_OPT_QUIET = (1 << 2),
105 WGET_OPT_OUTNAME = (1 << 3),
106 WGET_OPT_PREFIX = (1 << 4),
107 WGET_OPT_PROXY = (1 << 5),
108 WGET_OPT_USER_AGENT = (1 << 6),
109 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
110 WGET_OPT_RETRIES = (1 << 8),
111 WGET_OPT_PASSIVE = (1 << 9),
112 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
113 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
114};
115
116enum {
117 PROGRESS_START = -1,
118 PROGRESS_END = 0,
119 PROGRESS_BUMP = 1,
120};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000121#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000122static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000123{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200124 if (option_mask32 & WGET_OPT_QUIET)
125 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000126
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200127 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100128 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000129
Denys Vlasenko2384a352011-02-15 00:58:36 +0100130 bb_progress_update(&G.pmt,
131 G.beg_range,
132 G.transferred,
133 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
134 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000135
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200136 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100137 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200138 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100139 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000140 }
141}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200142#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000143static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000144#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000145
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000146
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200147/* IPv6 knows scoped address types i.e. link and site local addresses. Link
148 * local addresses can have a scope identifier to specify the
149 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
150 * identifier is only valid on a single node.
151 *
152 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
153 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
154 * in the Host header as invalid requests, see
155 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
156 */
157static void strip_ipv6_scope_id(char *host)
158{
159 char *scope, *cp;
160
161 /* bbox wget actually handles IPv6 addresses without [], like
162 * wget "http://::1/xxx", but this is not standard.
163 * To save code, _here_ we do not support it. */
164
165 if (host[0] != '[')
166 return; /* not IPv6 */
167
168 scope = strchr(host, '%');
169 if (!scope)
170 return;
171
172 /* Remove the IPv6 zone identifier from the host address */
173 cp = strchr(host, ']');
174 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
175 /* malformed address (not "[xx]:nn" or "[xx]") */
176 return;
177 }
178
179 /* cp points to "]...", scope points to "%eth0]..." */
180 overlapping_strcpy(scope, cp);
181}
182
Denis Vlasenko9cade082006-11-21 10:43:02 +0000183#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100184/* Base64-encode character string. */
185static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000186{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000187 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100188 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
189 len = sizeof(G.wget_buf)/4*3 - 10;
190 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
191 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000192}
193#endif
194
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200195static char* sanitize_string(char *s)
196{
197 unsigned char *p = (void *) s;
198 while (*p >= ' ')
199 p++;
200 *p = '\0';
201 return s;
202}
203
Lauri Kasanend074b412013-10-12 21:47:07 +0200204#if ENABLE_FEATURE_WGET_TIMEOUT
205static void alarm_handler(int sig UNUSED_PARAM)
206{
207 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
208 if (G.connecting)
209 bb_error_msg_and_die("download timed out");
210}
211#endif
212
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000213static FILE *open_socket(len_and_sockaddr *lsa)
214{
Lauri Kasanend074b412013-10-12 21:47:07 +0200215 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000216 FILE *fp;
217
Lauri Kasanend074b412013-10-12 21:47:07 +0200218 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
219 fd = xconnect_stream(lsa);
220 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
221
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000222 /* glibc 2.4 seems to try seeking on it - ??! */
223 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200224 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100225 if (!fp)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100226 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000227
228 return fp;
229}
230
Denys Vlasenkof836f012011-02-10 23:02:28 +0100231/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Lauri Kasanend074b412013-10-12 21:47:07 +0200232/* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100233static char fgets_and_trim(FILE *fp)
234{
235 char c;
236 char *buf_ptr;
237
238 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
239 bb_perror_msg_and_die("error getting response");
240
241 buf_ptr = strchrnul(G.wget_buf, '\n');
242 c = *buf_ptr;
243 *buf_ptr = '\0';
244 buf_ptr = strchrnul(G.wget_buf, '\r');
245 *buf_ptr = '\0';
246
247 log_io("< %s", G.wget_buf);
248
249 return c;
250}
251
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100252static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000253{
254 int result;
255 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100256 if (!s2)
257 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000258 fprintf(fp, "%s%s\r\n", s1, s2);
259 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100260 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000261 }
262
263 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100264 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100265 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000266
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100267 G.wget_buf[3] = '\0';
268 result = xatoi_positive(G.wget_buf);
269 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000270 return result;
271}
272
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100273static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000274{
275 char *url, *p, *sp;
276
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100277 free(h->allocated);
278 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000279
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100280 h->protocol = P_FTP;
281 p = strstr(url, "://");
282 if (p) {
283 *p = '\0';
284 h->host = p + 3;
285 if (strcmp(url, P_FTP) == 0) {
286 h->port = bb_lookup_port(P_FTP, "tcp", 21);
287 } else
288 if (strcmp(url, P_HTTPS) == 0) {
289 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
290 h->protocol = P_HTTPS;
291 } else
292 if (strcmp(url, P_HTTP) == 0) {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100293 http:
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100294 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
295 h->protocol = P_HTTP;
296 } else {
297 *p = ':';
298 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
299 }
300 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100301 // GNU wget is user-friendly and falls back to http://
302 h->host = url;
303 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100304 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000305
306 // FYI:
307 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
308 // 'GET /?var=a/b HTTP 1.0'
309 // and saves 'index.html?var=a%2Fb' (we save 'b')
310 // wget 'http://busybox.net?login=john@doe':
311 // request: 'GET /?login=john@doe HTTP/1.0'
312 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
313 // wget 'http://busybox.net#test/test':
314 // request: 'GET / HTTP/1.0'
315 // saves: 'index.html' (we save 'test')
316 //
317 // We also don't add unique .N suffix if file exists...
318 sp = strchr(h->host, '/');
319 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
320 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
321 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000322 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000323 } else if (*sp == '/') {
324 *sp = '\0';
325 h->path = sp + 1;
326 } else { // '#' or '?'
327 // http://busybox.net?login=john@doe is a valid URL
328 // memmove converts to:
329 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000330 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000331 h->host--;
332 sp[-1] = '\0';
333 h->path = sp;
334 }
335
336 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000337 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200338 // URL-decode "user:password" string before base64-encoding:
339 // wget http://test:my%20pass@example.com should send
340 // Authorization: Basic dGVzdDpteSBwYXNz
341 // which decodes to "test:my pass".
342 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000343 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100344 free(h->user);
345 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000346 h->host = sp + 1;
347 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100348 /* else: h->user remains NULL, or as set by original request
349 * before redirect (if we are here after a redirect).
350 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000351}
352
Denys Vlasenkof836f012011-02-10 23:02:28 +0100353static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000354{
355 char *s, *hdrval;
356 int c;
357
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000358 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100359 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000360
Denys Vlasenkof836f012011-02-10 23:02:28 +0100361 /* end of the headers? */
362 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000363 return NULL;
364
365 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200366 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
367 /*
368 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
369 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
370 * "A-Z" maps to "a-z".
371 * "@[\]" can't occur in header names.
372 * "^_" maps to "~,DEL" (which is wrong).
373 * "^" was never seen yet, "_" was seen from web.archive.org
374 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
375 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100376 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200377 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000378
379 /* verify we are at the end of the header name */
380 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100381 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000382
383 /* locate the start of the header value */
384 *s++ = '\0';
385 hdrval = skip_whitespace(s);
386
Denys Vlasenkof836f012011-02-10 23:02:28 +0100387 if (c != '\n') {
388 /* Rats! The buffer isn't big enough to hold the entire header value */
389 while (c = getc(fp), c != EOF && c != '\n')
390 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000391 }
392
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000393 return hdrval;
394}
395
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200396static void reset_beg_range_to_zero(void)
397{
Denys Vlasenko61441242012-06-17 19:52:25 +0200398 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200399 G.beg_range = 0;
400 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200401 /* Done at the end instead: */
402 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200403}
404
Denys Vlasenko7f432802009-06-28 01:02:24 +0200405static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
406{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200407 FILE *sfp;
408 char *str;
409 int port;
410
411 if (!target->user)
412 target->user = xstrdup("anonymous:busybox@");
413
414 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100415 if (ftpcmd(NULL, NULL, sfp) != 220)
416 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200417
418 /*
419 * Splitting username:password pair,
420 * trying to log in
421 */
422 str = strchr(target->user, ':');
423 if (str)
424 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100425 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200426 case 230:
427 break;
428 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100429 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200430 break;
431 /* fall through (failed login) */
432 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100433 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200434 }
435
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100436 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200437
438 /*
439 * Querying file size
440 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100441 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
442 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100443 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200444 bb_error_msg_and_die("SIZE value is garbage");
445 }
446 G.got_clen = 1;
447 }
448
449 /*
450 * Entering passive mode
451 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100452 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200453 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100454 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200455 }
456 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
457 // Server's IP is N1.N2.N3.N4 (we ignore it)
458 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100459 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200460 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100461 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200462 if (!str) goto pasv_error;
463 port = xatou_range(str+1, 0, 255);
464 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100465 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200466 if (!str) goto pasv_error;
467 port += xatou_range(str+1, 0, 255) * 256;
Denys Vlasenkoca183112011-04-07 17:52:20 +0200468 set_nport(&lsa->u.sa, htons(port));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200469
470 *dfpp = open_socket(lsa);
471
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200472 if (G.beg_range != 0) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100473 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
474 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100475 G.content_len -= G.beg_range;
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200476 else
477 reset_beg_range_to_zero();
Denys Vlasenko7f432802009-06-28 01:02:24 +0200478 }
479
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100480 if (ftpcmd("RETR ", target->path, sfp) > 150)
481 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200482
483 return sfp;
484}
485
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100486static int spawn_https_helper(const char *host, unsigned port)
487{
488 char *allocated = NULL;
489 int sp[2];
490 int pid;
491
492 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
493 /* Kernel can have AF_UNIX support disabled */
494 bb_perror_msg_and_die("socketpair");
495
496 if (!strchr(host, ':'))
497 host = allocated = xasprintf("%s:%u", host, port);
498
499 pid = BB_MMU ? xfork() : xvfork();
500 if (pid == 0) {
501 /* Child */
502 char *argv[6];
503
504 close(sp[0]);
505 xmove_fd(sp[1], 0);
506 xdup2(0, 1);
507 /*
508 * TODO: develop a tiny ssl/tls helper (using matrixssl?),
509 * try to exec it here before falling back to big fat openssl.
510 */
511 /*
512 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
513 * It prints some debug stuff on stderr, don't know how to suppress it.
514 * Work around by dev-nulling stderr. We lose all error messages :(
515 */
516 xmove_fd(2, 3);
517 xopen("/dev/null", O_RDWR);
518 argv[0] = (char*)"openssl";
519 argv[1] = (char*)"s_client";
520 argv[2] = (char*)"-quiet";
521 argv[3] = (char*)"-connect";
522 argv[4] = (char*)host;
523 argv[5] = NULL;
524 BB_EXECVP(argv[0], argv);
525 xmove_fd(3, 2);
526 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
527 /* notreached */
528 }
529
Denys Vlasenko53315572014-02-23 23:39:47 +0100530 /* Parent */
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100531 free(allocated);
532 close(sp[1]);
533 return sp[0];
534}
535
Denys Vlasenko53315572014-02-23 23:39:47 +0100536/* See networking/ssl_helper/README */
537#define SSL_HELPER 0
538
539#if SSL_HELPER
540static void spawn_https_helper1(int network_fd)
541{
542 int sp[2];
543 int pid;
544
545 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
546 /* Kernel can have AF_UNIX support disabled */
547 bb_perror_msg_and_die("socketpair");
548
549 pid = BB_MMU ? xfork() : xvfork();
550 if (pid == 0) {
551 /* Child */
552 char *argv[3];
553
554 close(sp[0]);
555 xmove_fd(sp[1], 0);
556 xdup2(0, 1);
557 xmove_fd(network_fd, 3);
558 /*
559 * A simple ssl/tls helper
560 */
561 argv[0] = (char*)"ssl_helper";
562 argv[1] = (char*)"-d3";
563 argv[2] = NULL;
564 BB_EXECVP(argv[0], argv);
565 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
566 /* notreached */
567 }
568
569 /* Parent */
570 close(sp[1]);
571 xmove_fd(sp[0], network_fd);
572}
573#endif
574
Denys Vlasenko2384a352011-02-15 00:58:36 +0100575static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200576{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200577#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
578# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200579 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200580# endif
581 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200582
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200583 polldata.fd = fileno(dfp);
584 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200585#endif
586 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200587
588 if (G.chunked)
589 goto get_clen;
590
591 /* Loops only if chunked */
592 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100593
594#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
595 /* Must use nonblocking I/O, otherwise fread will loop
596 * and *block* until it reads full buffer,
597 * which messes up progress bar and/or timeout logic.
598 * Because of nonblocking I/O, we need to dance
599 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200600 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100601 */
602 ndelay_on(polldata.fd);
603#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100604 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200605 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100606 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200607
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200608#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100609 /* fread internally uses read loop, which in our case
610 * is usually exited when we get EAGAIN.
611 * In this case, libc sets error marker on the stream.
612 * Need to clear it before next fread to avoid possible
613 * rare false positive ferror below. Rare because usually
614 * fread gets more than zero bytes, and we don't fall
615 * into if (n <= 0) ...
616 */
617 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100618#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200619 errno = 0;
620 rdsz = sizeof(G.wget_buf);
621 if (G.got_clen) {
622 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
623 if ((int)G.content_len <= 0)
624 break;
625 rdsz = (unsigned)G.content_len;
626 }
627 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100628 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200629
630 if (n > 0) {
631 xwrite(G.output_fd, G.wget_buf, n);
632#if ENABLE_FEATURE_WGET_STATUSBAR
633 G.transferred += n;
634#endif
635 if (G.got_clen) {
636 G.content_len -= n;
637 if (G.content_len == 0)
638 break;
639 }
640#if ENABLE_FEATURE_WGET_TIMEOUT
641 second_cnt = G.timeout_seconds;
642#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100643 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200644 }
645
646 /* n <= 0.
647 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100648 * If error occurs, or EOF is reached, the return value
649 * is a short item count (or zero).
650 * fread does not distinguish between EOF and error.
651 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200652 if (errno != EAGAIN) {
653 if (ferror(dfp)) {
654 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100655 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200656 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100657 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200658 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100659
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200660#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
661 /* It was EAGAIN. There is no data. Wait up to one second
662 * then abort if timed out, or update the bar and try reading again.
663 */
664 if (safe_poll(&polldata, 1, 1000) == 0) {
665# if ENABLE_FEATURE_WGET_TIMEOUT
666 if (second_cnt != 0 && --second_cnt == 0) {
667 progress_meter(PROGRESS_END);
668 bb_error_msg_and_die("download timed out");
669 }
670# endif
671 /* We used to loop back to poll here,
672 * but there is no great harm in letting fread
673 * to try reading anyway.
674 */
675 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100676#endif
677 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200678 /* Need to do it _every_ second for "stalled" indicator
679 * to be shown properly.
680 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200681 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200682 } /* while (reading data) */
683
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100684#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
685 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100686 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100687#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200688 if (!G.chunked)
689 break;
690
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100691 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200692 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100693 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100694 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200695 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100696 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200697 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100698 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200699 /*
700 * Note that fgets may result in some data being buffered in dfp.
701 * We loop back to fread, which will retrieve this data.
702 * Also note that code has to be arranged so that fread
703 * is done _before_ one-second poll wait - poll doesn't know
704 * about stdio buffering and can result in spurious one second waits!
705 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200706 }
707
Denys Vlasenko61441242012-06-17 19:52:25 +0200708 /* If -c failed, we restart from the beginning,
709 * but we do not truncate file then, we do it only now, at the end.
710 * This lets user to ^C if his 99% complete 10 GB file download
711 * failed to restart *without* losing the almost complete file.
712 */
713 {
714 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
715 if (pos != (off_t)-1)
716 ftruncate(G.output_fd, pos);
717 }
718
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100719 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100720 G.chunked = 0; /* makes it show 100% even for chunked download */
721 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200722 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200723}
724
Pere Orga53695632011-02-16 20:09:36 +0100725static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000726{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100727 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200728 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100729 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200730 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000731 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100732 char *proxy = NULL;
733 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100734 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100735 struct host_info server;
736 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000737
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100738 server.allocated = NULL;
739 target.allocated = NULL;
740 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200741 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100742
743 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000744
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000745 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100746 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000747 if (use_proxy) {
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100748 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
749//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +0100750 use_proxy = (proxy && proxy[0]);
751 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000752 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000753 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200754 if (!use_proxy) {
755 server.port = target.port;
756 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100757 //free(server.allocated); - can't be non-NULL
758 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200759 } else {
760 server.host = target.host;
761 }
762 }
763
764 if (ENABLE_FEATURE_IPV6)
765 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000766
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100767 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100768 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100769 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100770 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000771 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100772 if (G.fname_out[0] == '/' || !G.fname_out[0])
773 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000774 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200775 if (G.dir_prefix)
776 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100777 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200778 /* redirects may free target.path later, need to make a copy */
779 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100780 }
Eric Andersen29edd002000-12-09 16:55:35 +0000781 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000782#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100783 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000784#endif
785
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000786 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100787 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100788 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100789 G.output_fd = open(G.fname_out, O_WRONLY);
790 if (G.output_fd >= 0) {
791 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000792 }
793 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100794 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000795 }
796
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200797 redir_limit = 5;
798 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000799 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100800 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200801 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
802 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
803 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000804 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200805 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +0100806 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
807 G.got_clen = 0;
808 G.chunked = 0;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100809 if (use_proxy || target.protocol != P_FTP) {
Eric Andersen79757c92001-04-05 21:45:54 +0000810 /*
811 * HTTP session
812 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200813 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200814 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200815
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100816 /* Open socket to http(s) server */
817 if (target.protocol == P_HTTPS) {
Denys Vlasenko53315572014-02-23 23:39:47 +0100818/* openssl-based helper
Denys Vlasenkoa2796222014-02-24 17:20:40 +0100819 * Inconvenient API since we can't give it an open fd
Denys Vlasenko53315572014-02-23 23:39:47 +0100820 */
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100821 int fd = spawn_https_helper(server.host, server.port);
822 sfp = fdopen(fd, "r+");
823 if (!sfp)
824 bb_perror_msg_and_die(bb_msg_memory_exhausted);
825 } else
826 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +0100827#if SSL_HELPER
828 if (target.protocol == P_HTTPS)
829 spawn_https_helper1(fileno(sfp));
830#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200831 /* Send HTTP request */
832 if (use_proxy) {
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100833 fprintf(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
834 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200835 target.path);
836 } else {
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100837 fprintf(sfp, "%s /%s HTTP/1.1\r\n",
838 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
839 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200840 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000841
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200842 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100843 target.host, G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000844
Denys Vlasenko9213a552011-02-10 13:23:45 +0100845 /* Ask server to close the connection as soon as we are done
846 * (IOW: we do not intend to send more requests)
847 */
848 fprintf(sfp, "Connection: close\r\n");
849
Denis Vlasenko9cade082006-11-21 10:43:02 +0000850#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200851 if (target.user) {
852 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100853 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200854 }
855 if (use_proxy && server.user) {
856 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100857 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200858 }
Eric Andersen79757c92001-04-05 21:45:54 +0000859#endif
860
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200861 if (G.beg_range != 0)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100862 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100863
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000864#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100865 if (G.extra_headers)
866 fputs(G.extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000867
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100868 if (option_mask32 & WGET_OPT_POST_DATA) {
Denys Vlasenko9213a552011-02-10 13:23:45 +0100869 fprintf(sfp,
870 "Content-Type: application/x-www-form-urlencoded\r\n"
871 "Content-Length: %u\r\n"
872 "\r\n"
873 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +0200874 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +0100875 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200876 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000877#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100878 {
879 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200880 }
Eric Andersen79757c92001-04-05 21:45:54 +0000881
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200882 fflush(sfp);
883
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200884 /*
885 * Retrieve HTTP response line and check for "200" status code.
886 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000887 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100888 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000889
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100890 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200891 str = skip_non_whitespace(str);
892 str = skip_whitespace(str);
893 // FIXME: no error check
894 // xatou wouldn't work: "200 OK"
895 status = atoi(str);
896 switch (status) {
897 case 0:
898 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100899 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200900 /* eat all remaining headers */;
901 goto read_response;
902 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000903/*
904Response 204 doesn't say "null file", it says "metadata
905has changed but data didn't":
906
907"10.2.5 204 No Content
908The server has fulfilled the request but does not need to return
909an entity-body, and might want to return updated metainformation.
910The response MAY include new or updated metainformation in the form
911of entity-headers, which if present SHOULD be associated with
912the requested variant.
913
914If the client is a user agent, it SHOULD NOT change its document
915view from that which caused the request to be sent. This response
916is primarily intended to allow input for actions to take place
917without causing a change to the user agent's active document view,
918although any new or updated metainformation SHOULD be applied
919to the document currently in the user agent's active view.
920
921The 204 response MUST NOT include a message-body, and thus
922is always terminated by the first empty line after the header fields."
923
924However, in real world it was observed that some web servers
925(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
926*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200927 case 204:
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200928 if (G.beg_range != 0) {
929 /* "Range:..." was not honored by the server.
930 * Restart download from the beginning.
931 */
932 reset_beg_range_to_zero();
933 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200934 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200935 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200936 case 301:
937 case 302:
938 case 303:
939 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200940 case 206: /* Partial Content */
941 if (G.beg_range != 0)
942 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000943 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200944 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200945 /* fall through */
946 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100947 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200948 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000949
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200950 /*
951 * Retrieve HTTP headers.
952 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100953 while ((str = gethdr(sfp)) != NULL) {
954 static const char keywords[] ALIGN1 =
955 "content-length\0""transfer-encoding\0""location\0";
956 enum {
957 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
958 };
Matthijs van de Water0d586662009-08-22 20:19:48 +0200959 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100960
961 /* gethdr converted "FOO:" string to lowercase */
962
Matthijs van de Water0d586662009-08-22 20:19:48 +0200963 /* strip trailing whitespace */
964 char *s = strchrnul(str, '\0') - 1;
965 while (s >= str && (*s == ' ' || *s == '\t')) {
966 *s = '\0';
967 s--;
968 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100969 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200970 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100971 G.content_len = BB_STRTOOFF(str, NULL, 10);
972 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200973 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000974 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200975 G.got_clen = 1;
976 continue;
977 }
978 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100979 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200980 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100981 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200982 }
983 if (key == KEY_location && status >= 300) {
984 if (--redir_limit == 0)
985 bb_error_msg_and_die("too many redirections");
986 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100987 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100988 free(redirected_path);
989 target.path = redirected_path = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200990 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100991 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200992 parse_url(str, &target);
993 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100994 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100995 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +0100996 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200997 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200998 /* strip_ipv6_scope_id(target.host); - no! */
999 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001000 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001001 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001002 goto resolve_lsa;
1003 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001004 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001005 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001006 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001007 }
1008// if (status >= 300)
1009// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001010
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001011 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001012 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001013
1014 } else {
Eric Andersen79757c92001-04-05 21:45:54 +00001015 /*
1016 * FTP session
1017 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001018 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +00001019 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001020
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001021 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001022
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001023 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001024 if (G.output_fd < 0)
1025 G.output_fd = xopen(G.fname_out, G.o_flags);
1026 retrieve_file_data(dfp);
1027 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1028 xclose(G.output_fd);
1029 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001030 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001031 }
Eric Andersen79757c92001-04-05 21:45:54 +00001032
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001033 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001034 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001035 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001036 if (ftpcmd(NULL, NULL, sfp) != 226)
1037 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1038 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001039 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001040 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001041
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001042 free(server.allocated);
1043 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001044 free(server.user);
1045 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001046 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001047 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001048}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001049
1050int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1051int wget_main(int argc UNUSED_PARAM, char **argv)
1052{
1053#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1054 static const char wget_longopts[] ALIGN1 =
1055 /* name, has_arg, val */
1056 "continue\0" No_argument "c"
1057//FIXME: -s isn't --spider, it's --save-headers!
1058 "spider\0" No_argument "s"
1059 "quiet\0" No_argument "q"
1060 "output-document\0" Required_argument "O"
1061 "directory-prefix\0" Required_argument "P"
1062 "proxy\0" Required_argument "Y"
1063 "user-agent\0" Required_argument "U"
1064#if ENABLE_FEATURE_WGET_TIMEOUT
1065 "timeout\0" Required_argument "T"
1066#endif
1067 /* Ignored: */
1068 // "tries\0" Required_argument "t"
1069 /* Ignored (we always use PASV): */
1070 "passive-ftp\0" No_argument "\xff"
1071 "header\0" Required_argument "\xfe"
1072 "post-data\0" Required_argument "\xfd"
1073 /* Ignored (we don't do ssl) */
1074 "no-check-certificate\0" No_argument "\xfc"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001075 /* Ignored (we don't support caching) */
1076 "no-cache\0" No_argument "\xfb"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001077 ;
1078#endif
1079
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001080#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1081 llist_t *headers_llist = NULL;
1082#endif
1083
1084 INIT_G();
1085
Lauri Kasanend074b412013-10-12 21:47:07 +02001086#if ENABLE_FEATURE_WGET_TIMEOUT
1087 G.timeout_seconds = 900;
1088 signal(SIGALRM, alarm_handler);
1089#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001090 G.proxy_flag = "on"; /* use proxies if env vars are set */
1091 G.user_agent = "Wget"; /* "User-Agent" header field */
1092
1093#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1094 applet_long_options = wget_longopts;
1095#endif
1096 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
1097 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
1098 &G.fname_out, &G.dir_prefix,
1099 &G.proxy_flag, &G.user_agent,
1100 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1101 NULL /* -t RETRIES */
1102 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1103 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1104 );
1105 argv += optind;
1106
1107#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1108 if (headers_llist) {
1109 int size = 1;
1110 char *cp;
1111 llist_t *ll = headers_llist;
1112 while (ll) {
1113 size += strlen(ll->data) + 2;
1114 ll = ll->link;
1115 }
1116 G.extra_headers = cp = xmalloc(size);
1117 while (headers_llist) {
1118 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
1119 }
1120 }
1121#endif
1122
Denys Vlasenko2384a352011-02-15 00:58:36 +01001123 G.output_fd = -1;
1124 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1125 if (G.fname_out) { /* -O FILE ? */
1126 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1127 G.output_fd = 1;
1128 option_mask32 &= ~WGET_OPT_CONTINUE;
1129 }
1130 /* compat with wget: -O FILE can overwrite */
1131 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1132 }
1133
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001134 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001135 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001136
Denys Vlasenko28556b92011-02-15 11:03:53 +01001137 if (G.output_fd >= 0)
1138 xclose(G.output_fd);
1139
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001140#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1141 free(G.extra_headers);
1142#endif
1143 FINI_G();
1144
Pere Orga53695632011-02-16 20:09:36 +01001145 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001146}