blob: 7f27e4e7b8aa0e8bc56ebbd5e30cbc5caf07c64a [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020012//config:config WGET
13//config: bool "wget"
14//config: default y
15//config: help
16//config: wget is a utility for non-interactive download of files from HTTP
17//config: and FTP servers.
18//config:
19//config:config FEATURE_WGET_STATUSBAR
20//config: bool "Enable a nifty process meter (+2k)"
21//config: default y
22//config: depends on WGET
23//config: help
24//config: Enable the transfer progress bar for wget transfers.
25//config:
26//config:config FEATURE_WGET_AUTHENTICATION
27//config: bool "Enable HTTP authentication"
28//config: default y
29//config: depends on WGET
30//config: help
31//config: Support authenticated HTTP transfers.
32//config:
33//config:config FEATURE_WGET_LONG_OPTIONS
34//config: bool "Enable long options"
35//config: default y
36//config: depends on WGET && LONG_OPTS
37//config: help
38//config: Support long options for the wget applet.
39//config:
40//config:config FEATURE_WGET_TIMEOUT
41//config: bool "Enable timeout option -T SEC"
42//config: default y
43//config: depends on WGET
44//config: help
45//config: Supports network read and connect timeouts for wget,
46//config: so that wget will give up and timeout, through the -T
47//config: command line option.
48//config:
49//config: Currently only connect and network data read timeout are
50//config: supported (i.e., timeout is not applied to the DNS query). When
51//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
52//config: will work in addition to -T.
53//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020054//config:config FEATURE_WGET_OPENSSL
55//config: bool "Try to connect to HTTPS using openssl"
56//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020057//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020058//config: help
59//config: Choose how wget establishes SSL connection for https:// URLs.
60//config:
61//config: Busybox itself contains no SSL code. wget will spawn
62//config: a helper program to talk over HTTPS.
63//config:
64//config: OpenSSL has a simple SSL client for debug purposes.
65//config: If you select "openssl" helper, wget will effectively call
66//config: "openssl s_client -quiet -connect IP:443 2>/dev/null"
67//config: and pipe its data through it.
68//config: Note inconvenient API: host resolution is done twice,
69//config: and there is no guarantee openssl's idea of IPv6 address
70//config: format is the same as ours.
71//config: Another problem is that s_client prints debug information
72//config: to stderr, and it needs to be suppressed. This means
73//config: all error messages get suppressed too.
74//config: openssl is also a big binary, often dynamically linked
75//config: against ~15 libraries.
76//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020077//config:config FEATURE_WGET_SSL_HELPER
78//config: bool "Try to connect to HTTPS using ssl_helper"
79//config: default y
80//config: depends on WGET
81//config: help
82//config: Choose how wget establishes SSL connection for https:// URLs.
83//config:
84//config: Busybox itself contains no SSL code. wget will spawn
85//config: a helper program to talk over HTTPS.
86//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020087//config: ssl_helper is a tool which can be built statically
88//config: from busybox sources against a small embedded SSL library.
89//config: Please see networking/ssl_helper/README.
90//config: It does not require double host resolution and emits
91//config: error messages to stderr.
92//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020093//config: Precompiled static binary may be available at
94//config: http://busybox.net/downloads/binaries/
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020095
96//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
97
98//kbuild:lib-$(CONFIG_WGET) += wget.o
99
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100100//usage:#define wget_trivial_usage
101//usage: IF_FEATURE_WGET_LONG_OPTIONS(
102//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
103//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200104/* Since we ignore these opts, we don't show them in --help */
Denys Vlasenko92e1b082015-10-20 21:51:52 +0200105/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
106/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200107//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100108//usage: )
109//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
110//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
111//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
112//usage: )
113//usage:#define wget_full_usage "\n\n"
114//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100115//usage: "\n -s Spider mode - only check file existence"
116//usage: "\n -c Continue retrieval of aborted transfer"
117//usage: "\n -q Quiet"
118//usage: "\n -P DIR Save to DIR (default .)"
119//usage: IF_FEATURE_WGET_TIMEOUT(
120//usage: "\n -T SEC Network read timeout is SEC seconds"
121//usage: )
122//usage: "\n -O FILE Save to FILE ('-' for stdout)"
123//usage: "\n -U STR Use STR for User-Agent header"
124//usage: "\n -Y Use proxy ('on' or 'off')"
125
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000126#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000127
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200128#if 0
129# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100130# define SENDFMT(fp, fmt, ...) \
131 do { \
132 log_io("> " fmt, ##__VA_ARGS__); \
133 fprintf(fp, fmt, ##__VA_ARGS__); \
134 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200135#else
136# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100137# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200138#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100139
140
Eric Andersen79757c92001-04-05 21:45:54 +0000141struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100142 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000143 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100144 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100145 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000146 char *host;
147 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000148};
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100149static const char P_FTP[] = "ftp";
150static const char P_HTTP[] = "http";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100151#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100152static const char P_HTTPS[] = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100153#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000154
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100155#if ENABLE_FEATURE_WGET_LONG_OPTIONS
156/* User-specified headers prevent using our corresponding built-in headers. */
157enum {
158 HDR_HOST = (1<<0),
159 HDR_USER_AGENT = (1<<1),
160 HDR_RANGE = (1<<2),
161 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
162 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
163};
164static const char wget_user_headers[] ALIGN1 =
165 "Host:\0"
166 "User-Agent:\0"
167 "Range:\0"
168# if ENABLE_FEATURE_WGET_AUTHENTICATION
169 "Authorization:\0"
170 "Proxy-Authorization:\0"
171# endif
172 ;
173# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
174# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
175# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
176# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
177# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
178#else /* No long options, no user-headers :( */
179# define USR_HEADER_HOST 0
180# define USR_HEADER_USER_AGENT 0
181# define USR_HEADER_RANGE 0
182# define USR_HEADER_AUTH 0
183# define USR_HEADER_PROXY_AUTH 0
184#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000185
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200186/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000187struct globals {
188 off_t content_len; /* Content-length of the file */
189 off_t beg_range; /* Range at which continue begins */
190#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000191 off_t transferred; /* Number of bytes transferred so far */
192 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100193 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000194#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200195 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100196#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200197 char *post_data;
198 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100199 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100200#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200201 char *fname_out; /* where to direct output (-O) */
202 const char *proxy_flag; /* Use proxies if env vars are set */
203 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200204#if ENABLE_FEATURE_WGET_TIMEOUT
205 unsigned timeout_seconds;
Lauri Kasanend074b412013-10-12 21:47:07 +0200206 bool connecting;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200207#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +0100208 int output_fd;
209 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200210 smallint chunked; /* chunked transfer encoding */
211 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100212 /* Local downloads do benefit from big buffer.
213 * With 512 byte buffer, it was measured to be
214 * an order of magnitude slower than with big one.
215 */
216 uint64_t just_to_align_next_member;
217 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100218} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100219#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200220#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200221 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200222} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200223#define FINI_G() do { \
224 FREE_PTR_TO_GLOBALS(); \
225} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000226
227
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200228/* Must match option string! */
229enum {
230 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200231 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200232 WGET_OPT_QUIET = (1 << 2),
233 WGET_OPT_OUTNAME = (1 << 3),
234 WGET_OPT_PREFIX = (1 << 4),
235 WGET_OPT_PROXY = (1 << 5),
236 WGET_OPT_USER_AGENT = (1 << 6),
237 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
238 WGET_OPT_RETRIES = (1 << 8),
239 WGET_OPT_PASSIVE = (1 << 9),
240 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
241 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
242};
243
244enum {
245 PROGRESS_START = -1,
246 PROGRESS_END = 0,
247 PROGRESS_BUMP = 1,
248};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000249#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000250static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000251{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200252 if (option_mask32 & WGET_OPT_QUIET)
253 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000254
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200255 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100256 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000257
Denys Vlasenko2384a352011-02-15 00:58:36 +0100258 bb_progress_update(&G.pmt,
259 G.beg_range,
260 G.transferred,
261 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
262 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000263
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200264 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100265 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200266 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100267 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000268 }
269}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200270#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000271static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000272#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000273
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000274
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200275/* IPv6 knows scoped address types i.e. link and site local addresses. Link
276 * local addresses can have a scope identifier to specify the
277 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
278 * identifier is only valid on a single node.
279 *
280 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
281 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
282 * in the Host header as invalid requests, see
283 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
284 */
285static void strip_ipv6_scope_id(char *host)
286{
287 char *scope, *cp;
288
289 /* bbox wget actually handles IPv6 addresses without [], like
290 * wget "http://::1/xxx", but this is not standard.
291 * To save code, _here_ we do not support it. */
292
293 if (host[0] != '[')
294 return; /* not IPv6 */
295
296 scope = strchr(host, '%');
297 if (!scope)
298 return;
299
300 /* Remove the IPv6 zone identifier from the host address */
301 cp = strchr(host, ']');
302 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
303 /* malformed address (not "[xx]:nn" or "[xx]") */
304 return;
305 }
306
307 /* cp points to "]...", scope points to "%eth0]..." */
308 overlapping_strcpy(scope, cp);
309}
310
Denis Vlasenko9cade082006-11-21 10:43:02 +0000311#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100312/* Base64-encode character string. */
313static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000314{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000315 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100316 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
317 len = sizeof(G.wget_buf)/4*3 - 10;
318 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
319 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000320}
321#endif
322
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200323static char* sanitize_string(char *s)
324{
325 unsigned char *p = (void *) s;
326 while (*p >= ' ')
327 p++;
328 *p = '\0';
329 return s;
330}
331
Lauri Kasanend074b412013-10-12 21:47:07 +0200332#if ENABLE_FEATURE_WGET_TIMEOUT
333static void alarm_handler(int sig UNUSED_PARAM)
334{
335 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
336 if (G.connecting)
337 bb_error_msg_and_die("download timed out");
338}
339#endif
340
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000341static FILE *open_socket(len_and_sockaddr *lsa)
342{
Lauri Kasanend074b412013-10-12 21:47:07 +0200343 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000344 FILE *fp;
345
Lauri Kasanend074b412013-10-12 21:47:07 +0200346 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
347 fd = xconnect_stream(lsa);
348 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
349
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000350 /* glibc 2.4 seems to try seeking on it - ??! */
351 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200352 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100353 if (!fp)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100354 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000355
356 return fp;
357}
358
Denys Vlasenkof836f012011-02-10 23:02:28 +0100359/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Lauri Kasanend074b412013-10-12 21:47:07 +0200360/* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100361static char fgets_and_trim(FILE *fp)
362{
363 char c;
364 char *buf_ptr;
365
366 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
367 bb_perror_msg_and_die("error getting response");
368
369 buf_ptr = strchrnul(G.wget_buf, '\n');
370 c = *buf_ptr;
371 *buf_ptr = '\0';
372 buf_ptr = strchrnul(G.wget_buf, '\r');
373 *buf_ptr = '\0';
374
375 log_io("< %s", G.wget_buf);
376
377 return c;
378}
379
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100380static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000381{
382 int result;
383 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100384 if (!s2)
385 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000386 fprintf(fp, "%s%s\r\n", s1, s2);
387 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100388 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000389 }
390
391 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100392 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100393 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000394
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100395 G.wget_buf[3] = '\0';
396 result = xatoi_positive(G.wget_buf);
397 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000398 return result;
399}
400
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100401static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000402{
403 char *url, *p, *sp;
404
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100405 free(h->allocated);
406 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000407
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100408 h->protocol = P_FTP;
409 p = strstr(url, "://");
410 if (p) {
411 *p = '\0';
412 h->host = p + 3;
413 if (strcmp(url, P_FTP) == 0) {
414 h->port = bb_lookup_port(P_FTP, "tcp", 21);
415 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100416#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100417 if (strcmp(url, P_HTTPS) == 0) {
418 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
419 h->protocol = P_HTTPS;
420 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100421#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100422 if (strcmp(url, P_HTTP) == 0) {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100423 http:
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100424 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
425 h->protocol = P_HTTP;
426 } else {
427 *p = ':';
428 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
429 }
430 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100431 // GNU wget is user-friendly and falls back to http://
432 h->host = url;
433 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100434 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000435
436 // FYI:
437 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
438 // 'GET /?var=a/b HTTP 1.0'
439 // and saves 'index.html?var=a%2Fb' (we save 'b')
440 // wget 'http://busybox.net?login=john@doe':
441 // request: 'GET /?login=john@doe HTTP/1.0'
442 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
443 // wget 'http://busybox.net#test/test':
444 // request: 'GET / HTTP/1.0'
445 // saves: 'index.html' (we save 'test')
446 //
447 // We also don't add unique .N suffix if file exists...
448 sp = strchr(h->host, '/');
449 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
450 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
451 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000452 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000453 } else if (*sp == '/') {
454 *sp = '\0';
455 h->path = sp + 1;
456 } else { // '#' or '?'
457 // http://busybox.net?login=john@doe is a valid URL
458 // memmove converts to:
459 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000460 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000461 h->host--;
462 sp[-1] = '\0';
463 h->path = sp;
464 }
465
466 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000467 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200468 // URL-decode "user:password" string before base64-encoding:
469 // wget http://test:my%20pass@example.com should send
470 // Authorization: Basic dGVzdDpteSBwYXNz
471 // which decodes to "test:my pass".
472 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000473 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100474 free(h->user);
475 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000476 h->host = sp + 1;
477 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100478 /* else: h->user remains NULL, or as set by original request
479 * before redirect (if we are here after a redirect).
480 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000481}
482
Denys Vlasenkof836f012011-02-10 23:02:28 +0100483static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000484{
485 char *s, *hdrval;
486 int c;
487
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000488 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100489 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000490
Denys Vlasenkof836f012011-02-10 23:02:28 +0100491 /* end of the headers? */
492 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000493 return NULL;
494
495 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200496 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
497 /*
498 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
499 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
500 * "A-Z" maps to "a-z".
501 * "@[\]" can't occur in header names.
502 * "^_" maps to "~,DEL" (which is wrong).
503 * "^" was never seen yet, "_" was seen from web.archive.org
504 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
505 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100506 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200507 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000508
509 /* verify we are at the end of the header name */
510 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100511 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000512
513 /* locate the start of the header value */
514 *s++ = '\0';
515 hdrval = skip_whitespace(s);
516
Denys Vlasenkof836f012011-02-10 23:02:28 +0100517 if (c != '\n') {
518 /* Rats! The buffer isn't big enough to hold the entire header value */
519 while (c = getc(fp), c != EOF && c != '\n')
520 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000521 }
522
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000523 return hdrval;
524}
525
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200526static void reset_beg_range_to_zero(void)
527{
Denys Vlasenko61441242012-06-17 19:52:25 +0200528 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200529 G.beg_range = 0;
530 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200531 /* Done at the end instead: */
532 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200533}
534
Denys Vlasenko7f432802009-06-28 01:02:24 +0200535static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
536{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200537 FILE *sfp;
538 char *str;
539 int port;
540
541 if (!target->user)
542 target->user = xstrdup("anonymous:busybox@");
543
544 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100545 if (ftpcmd(NULL, NULL, sfp) != 220)
546 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200547
548 /*
549 * Splitting username:password pair,
550 * trying to log in
551 */
552 str = strchr(target->user, ':');
553 if (str)
554 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100555 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200556 case 230:
557 break;
558 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100559 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200560 break;
561 /* fall through (failed login) */
562 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100563 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200564 }
565
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100566 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200567
568 /*
569 * Querying file size
570 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100571 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
572 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100573 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200574 bb_error_msg_and_die("SIZE value is garbage");
575 }
576 G.got_clen = 1;
577 }
578
579 /*
580 * Entering passive mode
581 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100582 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200583 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100584 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200585 }
586 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
587 // Server's IP is N1.N2.N3.N4 (we ignore it)
588 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100589 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200590 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100591 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200592 if (!str) goto pasv_error;
593 port = xatou_range(str+1, 0, 255);
594 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100595 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200596 if (!str) goto pasv_error;
597 port += xatou_range(str+1, 0, 255) * 256;
Denys Vlasenkoca183112011-04-07 17:52:20 +0200598 set_nport(&lsa->u.sa, htons(port));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200599
600 *dfpp = open_socket(lsa);
601
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200602 if (G.beg_range != 0) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100603 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
604 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100605 G.content_len -= G.beg_range;
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200606 else
607 reset_beg_range_to_zero();
Denys Vlasenko7f432802009-06-28 01:02:24 +0200608 }
609
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100610 if (ftpcmd("RETR ", target->path, sfp) > 150)
611 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200612
613 return sfp;
614}
615
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200616#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200617static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100618{
619 char *allocated = NULL;
620 int sp[2];
621 int pid;
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200622 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100623
624 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
625 /* Kernel can have AF_UNIX support disabled */
626 bb_perror_msg_and_die("socketpair");
627
628 if (!strchr(host, ':'))
629 host = allocated = xasprintf("%s:%u", host, port);
630
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200631 fflush_all();
632 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100633 if (pid == 0) {
634 /* Child */
635 char *argv[6];
636
637 close(sp[0]);
638 xmove_fd(sp[1], 0);
639 xdup2(0, 1);
640 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100641 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
642 * It prints some debug stuff on stderr, don't know how to suppress it.
643 * Work around by dev-nulling stderr. We lose all error messages :(
644 */
645 xmove_fd(2, 3);
646 xopen("/dev/null", O_RDWR);
647 argv[0] = (char*)"openssl";
648 argv[1] = (char*)"s_client";
649 argv[2] = (char*)"-quiet";
650 argv[3] = (char*)"-connect";
651 argv[4] = (char*)host;
652 argv[5] = NULL;
653 BB_EXECVP(argv[0], argv);
654 xmove_fd(3, 2);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200655# if ENABLE_FEATURE_WGET_SSL_HELPER
656 child_failed = 1;
657 xfunc_die();
658# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100659 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200660# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100661 /* notreached */
662 }
663
Denys Vlasenko53315572014-02-23 23:39:47 +0100664 /* Parent */
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100665 free(allocated);
666 close(sp[1]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200667# if ENABLE_FEATURE_WGET_SSL_HELPER
668 if (child_failed) {
669 close(sp[0]);
670 return -1;
671 }
672# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100673 return sp[0];
674}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200675#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100676
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200677/* See networking/ssl_helper/README how to build one */
678#if ENABLE_FEATURE_WGET_SSL_HELPER
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200679static void spawn_https_helper_small(int network_fd)
Denys Vlasenko53315572014-02-23 23:39:47 +0100680{
681 int sp[2];
682 int pid;
683
684 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
685 /* Kernel can have AF_UNIX support disabled */
686 bb_perror_msg_and_die("socketpair");
687
688 pid = BB_MMU ? xfork() : xvfork();
689 if (pid == 0) {
690 /* Child */
691 char *argv[3];
692
693 close(sp[0]);
694 xmove_fd(sp[1], 0);
695 xdup2(0, 1);
696 xmove_fd(network_fd, 3);
697 /*
698 * A simple ssl/tls helper
699 */
700 argv[0] = (char*)"ssl_helper";
701 argv[1] = (char*)"-d3";
702 argv[2] = NULL;
703 BB_EXECVP(argv[0], argv);
704 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
705 /* notreached */
706 }
707
708 /* Parent */
709 close(sp[1]);
710 xmove_fd(sp[0], network_fd);
711}
712#endif
713
Denys Vlasenko2384a352011-02-15 00:58:36 +0100714static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200715{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200716#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
717# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200718 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200719# endif
720 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200721
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200722 polldata.fd = fileno(dfp);
723 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200724#endif
725 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200726
727 if (G.chunked)
728 goto get_clen;
729
730 /* Loops only if chunked */
731 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100732
733#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
734 /* Must use nonblocking I/O, otherwise fread will loop
735 * and *block* until it reads full buffer,
736 * which messes up progress bar and/or timeout logic.
737 * Because of nonblocking I/O, we need to dance
738 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200739 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100740 */
741 ndelay_on(polldata.fd);
742#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100743 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200744 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100745 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200746
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200747#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100748 /* fread internally uses read loop, which in our case
749 * is usually exited when we get EAGAIN.
750 * In this case, libc sets error marker on the stream.
751 * Need to clear it before next fread to avoid possible
752 * rare false positive ferror below. Rare because usually
753 * fread gets more than zero bytes, and we don't fall
754 * into if (n <= 0) ...
755 */
756 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100757#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200758 errno = 0;
759 rdsz = sizeof(G.wget_buf);
760 if (G.got_clen) {
761 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
762 if ((int)G.content_len <= 0)
763 break;
764 rdsz = (unsigned)G.content_len;
765 }
766 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100767 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200768
769 if (n > 0) {
770 xwrite(G.output_fd, G.wget_buf, n);
771#if ENABLE_FEATURE_WGET_STATUSBAR
772 G.transferred += n;
773#endif
774 if (G.got_clen) {
775 G.content_len -= n;
776 if (G.content_len == 0)
777 break;
778 }
779#if ENABLE_FEATURE_WGET_TIMEOUT
780 second_cnt = G.timeout_seconds;
781#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100782 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200783 }
784
785 /* n <= 0.
786 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100787 * If error occurs, or EOF is reached, the return value
788 * is a short item count (or zero).
789 * fread does not distinguish between EOF and error.
790 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200791 if (errno != EAGAIN) {
792 if (ferror(dfp)) {
793 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100794 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200795 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100796 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200797 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100798
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200799#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
800 /* It was EAGAIN. There is no data. Wait up to one second
801 * then abort if timed out, or update the bar and try reading again.
802 */
803 if (safe_poll(&polldata, 1, 1000) == 0) {
804# if ENABLE_FEATURE_WGET_TIMEOUT
805 if (second_cnt != 0 && --second_cnt == 0) {
806 progress_meter(PROGRESS_END);
807 bb_error_msg_and_die("download timed out");
808 }
809# endif
810 /* We used to loop back to poll here,
811 * but there is no great harm in letting fread
812 * to try reading anyway.
813 */
814 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100815#endif
816 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200817 /* Need to do it _every_ second for "stalled" indicator
818 * to be shown properly.
819 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200820 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200821 } /* while (reading data) */
822
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100823#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
824 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100825 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100826#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200827 if (!G.chunked)
828 break;
829
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100830 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200831 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100832 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100833 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200834 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100835 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200836 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100837 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200838 /*
839 * Note that fgets may result in some data being buffered in dfp.
840 * We loop back to fread, which will retrieve this data.
841 * Also note that code has to be arranged so that fread
842 * is done _before_ one-second poll wait - poll doesn't know
843 * about stdio buffering and can result in spurious one second waits!
844 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200845 }
846
Denys Vlasenko61441242012-06-17 19:52:25 +0200847 /* If -c failed, we restart from the beginning,
848 * but we do not truncate file then, we do it only now, at the end.
849 * This lets user to ^C if his 99% complete 10 GB file download
850 * failed to restart *without* losing the almost complete file.
851 */
852 {
853 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
854 if (pos != (off_t)-1)
855 ftruncate(G.output_fd, pos);
856 }
857
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100858 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100859 G.chunked = 0; /* makes it show 100% even for chunked download */
860 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200861 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200862}
863
Pere Orga53695632011-02-16 20:09:36 +0100864static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000865{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100866 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200867 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100868 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200869 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000870 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100871 char *proxy = NULL;
872 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100873 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100874 struct host_info server;
875 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000876
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100877 server.allocated = NULL;
878 target.allocated = NULL;
879 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200880 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100881
882 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000883
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000884 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100885 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000886 if (use_proxy) {
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100887 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
888//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +0100889 use_proxy = (proxy && proxy[0]);
890 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000891 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000892 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200893 if (!use_proxy) {
894 server.port = target.port;
895 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100896 //free(server.allocated); - can't be non-NULL
897 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200898 } else {
899 server.host = target.host;
900 }
901 }
902
903 if (ENABLE_FEATURE_IPV6)
904 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000905
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100906 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100907 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100908 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100909 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000910 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100911 if (G.fname_out[0] == '/' || !G.fname_out[0])
912 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000913 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200914 if (G.dir_prefix)
915 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100916 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200917 /* redirects may free target.path later, need to make a copy */
918 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100919 }
Eric Andersen29edd002000-12-09 16:55:35 +0000920 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000921#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100922 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000923#endif
924
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000925 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100926 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100927 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100928 G.output_fd = open(G.fname_out, O_WRONLY);
929 if (G.output_fd >= 0) {
930 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000931 }
932 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100933 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000934 }
935
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200936 redir_limit = 5;
937 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000938 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100939 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200940 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
941 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
942 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000943 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200944 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +0100945 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
946 G.got_clen = 0;
947 G.chunked = 0;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100948 if (use_proxy || target.protocol != P_FTP) {
Eric Andersen79757c92001-04-05 21:45:54 +0000949 /*
950 * HTTP session
951 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200952 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200953 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200954
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100955 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200956#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200957 /* openssl (and maybe ssl_helper) support is configured */
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100958 if (target.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200959 /* openssl-based helper
960 * Inconvenient API since we can't give it an open fd
961 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200962 int fd = spawn_https_helper_openssl(server.host, server.port);
963# if ENABLE_FEATURE_WGET_SSL_HELPER
964 if (fd < 0) { /* no openssl? try ssl_helper */
965 sfp = open_socket(lsa);
966 spawn_https_helper_small(fileno(sfp));
967 goto socket_opened;
968 }
969# else
970 /* We don't check for exec("openssl") failure in this case */
971# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100972 sfp = fdopen(fd, "r+");
973 if (!sfp)
974 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200975 goto socket_opened;
976 }
977 sfp = open_socket(lsa);
978 socket_opened:
979#elif ENABLE_FEATURE_WGET_SSL_HELPER
980 /* Only ssl_helper support is configured */
981 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +0100982 if (target.protocol == P_HTTPS)
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200983 spawn_https_helper_small(fileno(sfp));
984#else
985 /* ssl (https) support is not configured */
986 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +0100987#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200988 /* Send HTTP request */
989 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100990 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100991 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200992 target.path);
993 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100994 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100995 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
996 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200997 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100998 if (!USR_HEADER_HOST)
999 SENDFMT(sfp, "Host: %s\r\n", target.host);
1000 if (!USR_HEADER_USER_AGENT)
1001 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001002
Denys Vlasenko9213a552011-02-10 13:23:45 +01001003 /* Ask server to close the connection as soon as we are done
1004 * (IOW: we do not intend to send more requests)
1005 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001006 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001007
Denis Vlasenko9cade082006-11-21 10:43:02 +00001008#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001009 if (target.user && !USR_HEADER_AUTH) {
1010 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001011 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001012 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001013 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1014 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001015 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001016 }
Eric Andersen79757c92001-04-05 21:45:54 +00001017#endif
1018
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001019 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1020 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001021
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001022#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001023 if (G.extra_headers) {
1024 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001025 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001026 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001027
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001028 if (option_mask32 & WGET_OPT_POST_DATA) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001029 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001030 "Content-Type: application/x-www-form-urlencoded\r\n"
1031 "Content-Length: %u\r\n"
1032 "\r\n"
1033 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001034 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001035 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001036 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001037#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001038 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001039 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001040 }
Eric Andersen79757c92001-04-05 21:45:54 +00001041
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001042 fflush(sfp);
1043
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001044 /*
1045 * Retrieve HTTP response line and check for "200" status code.
1046 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001047 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +01001048 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001049
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001050 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001051 str = skip_non_whitespace(str);
1052 str = skip_whitespace(str);
1053 // FIXME: no error check
1054 // xatou wouldn't work: "200 OK"
1055 status = atoi(str);
1056 switch (status) {
1057 case 0:
1058 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001059 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001060 /* eat all remaining headers */;
1061 goto read_response;
1062 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001063/*
1064Response 204 doesn't say "null file", it says "metadata
1065has changed but data didn't":
1066
1067"10.2.5 204 No Content
1068The server has fulfilled the request but does not need to return
1069an entity-body, and might want to return updated metainformation.
1070The response MAY include new or updated metainformation in the form
1071of entity-headers, which if present SHOULD be associated with
1072the requested variant.
1073
1074If the client is a user agent, it SHOULD NOT change its document
1075view from that which caused the request to be sent. This response
1076is primarily intended to allow input for actions to take place
1077without causing a change to the user agent's active document view,
1078although any new or updated metainformation SHOULD be applied
1079to the document currently in the user agent's active view.
1080
1081The 204 response MUST NOT include a message-body, and thus
1082is always terminated by the first empty line after the header fields."
1083
1084However, in real world it was observed that some web servers
1085(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1086*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001087 case 204:
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001088 if (G.beg_range != 0) {
1089 /* "Range:..." was not honored by the server.
1090 * Restart download from the beginning.
1091 */
1092 reset_beg_range_to_zero();
1093 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001094 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001095 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001096 case 301:
1097 case 302:
1098 case 303:
1099 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001100 case 206: /* Partial Content */
1101 if (G.beg_range != 0)
1102 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001103 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001104 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001105 /* fall through */
1106 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001107 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001108 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001109
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001110 /*
1111 * Retrieve HTTP headers.
1112 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001113 while ((str = gethdr(sfp)) != NULL) {
1114 static const char keywords[] ALIGN1 =
1115 "content-length\0""transfer-encoding\0""location\0";
1116 enum {
1117 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1118 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001119 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001120
1121 /* gethdr converted "FOO:" string to lowercase */
1122
Matthijs van de Water0d586662009-08-22 20:19:48 +02001123 /* strip trailing whitespace */
1124 char *s = strchrnul(str, '\0') - 1;
1125 while (s >= str && (*s == ' ' || *s == '\t')) {
1126 *s = '\0';
1127 s--;
1128 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001129 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001130 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001131 G.content_len = BB_STRTOOFF(str, NULL, 10);
1132 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001133 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +00001134 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001135 G.got_clen = 1;
1136 continue;
1137 }
1138 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001139 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001140 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001141 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001142 }
1143 if (key == KEY_location && status >= 300) {
1144 if (--redir_limit == 0)
1145 bb_error_msg_and_die("too many redirections");
1146 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001147 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001148 free(redirected_path);
1149 target.path = redirected_path = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001150 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001151 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001152 parse_url(str, &target);
1153 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001154 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001155 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001156 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001157 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001158 /* strip_ipv6_scope_id(target.host); - no! */
1159 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001160 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001161 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001162 goto resolve_lsa;
1163 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001164 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001165 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001166 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001167 }
1168// if (status >= 300)
1169// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001170
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001171 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001172 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001173 } else {
Eric Andersen79757c92001-04-05 21:45:54 +00001174 /*
1175 * FTP session
1176 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001177 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +00001178 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001179
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001180 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001181
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001182 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001183 if (G.output_fd < 0)
1184 G.output_fd = xopen(G.fname_out, G.o_flags);
1185 retrieve_file_data(dfp);
1186 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1187 xclose(G.output_fd);
1188 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001189 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001190 }
Eric Andersen79757c92001-04-05 21:45:54 +00001191
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001192 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001193 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001194 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001195 if (ftpcmd(NULL, NULL, sfp) != 226)
1196 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1197 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001198 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001199 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001200
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001201 free(server.allocated);
1202 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001203 free(server.user);
1204 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001205 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001206 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001207}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001208
1209int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1210int wget_main(int argc UNUSED_PARAM, char **argv)
1211{
1212#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1213 static const char wget_longopts[] ALIGN1 =
1214 /* name, has_arg, val */
1215 "continue\0" No_argument "c"
1216//FIXME: -s isn't --spider, it's --save-headers!
1217 "spider\0" No_argument "s"
1218 "quiet\0" No_argument "q"
1219 "output-document\0" Required_argument "O"
1220 "directory-prefix\0" Required_argument "P"
1221 "proxy\0" Required_argument "Y"
1222 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001223IF_FEATURE_WGET_TIMEOUT(
1224 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001225 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001226IF_DESKTOP( "tries\0" Required_argument "t")
1227 "header\0" Required_argument "\xff"
1228 "post-data\0" Required_argument "\xfe"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001229 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001230IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001231 /* Ignored (we don't do ssl) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001232IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001233 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001234IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1235IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1236IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1237IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1238IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001239 ;
1240#endif
1241
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001242#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1243 llist_t *headers_llist = NULL;
1244#endif
1245
1246 INIT_G();
1247
Lauri Kasanend074b412013-10-12 21:47:07 +02001248#if ENABLE_FEATURE_WGET_TIMEOUT
1249 G.timeout_seconds = 900;
1250 signal(SIGALRM, alarm_handler);
1251#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001252 G.proxy_flag = "on"; /* use proxies if env vars are set */
1253 G.user_agent = "Wget"; /* "User-Agent" header field */
1254
1255#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1256 applet_long_options = wget_longopts;
1257#endif
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001258 opt_complementary = "-1" /* at least one URL */
1259 IF_FEATURE_WGET_TIMEOUT(":T+") /* -T NUM */
1260 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
1261 getopt32(argv, "csqO:P:Y:U:T:"
1262 /*ignored:*/ "t:"
1263 /*ignored:*/ "n::"
1264 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1265 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1266 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1267 * -nH --no-host-directories: wget -r http://host/ won't create host/
1268 * -np --no-parent
1269 * "n::" above says that we accept -n[ARG].
1270 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1271 */
1272 , &G.fname_out, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001273 &G.proxy_flag, &G.user_agent,
1274 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001275 NULL, /* -t RETRIES */
1276 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001277 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1278 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1279 );
1280 argv += optind;
1281
1282#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1283 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001284 int size = 0;
1285 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001286 llist_t *ll = headers_llist;
1287 while (ll) {
1288 size += strlen(ll->data) + 2;
1289 ll = ll->link;
1290 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001291 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001292 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001293 int bit;
1294 const char *words;
1295
1296 size = sprintf(hdr, "%s\r\n",
1297 (char*)llist_pop(&headers_llist));
1298 /* a bit like index_in_substrings but don't match full key */
1299 bit = 1;
1300 words = wget_user_headers;
1301 while (*words) {
1302 if (strstr(hdr, words) == hdr) {
1303 G.user_headers |= bit;
1304 break;
1305 }
1306 bit <<= 1;
1307 words += strlen(words) + 1;
1308 }
1309 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001310 }
1311 }
1312#endif
1313
Denys Vlasenko2384a352011-02-15 00:58:36 +01001314 G.output_fd = -1;
1315 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1316 if (G.fname_out) { /* -O FILE ? */
1317 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1318 G.output_fd = 1;
1319 option_mask32 &= ~WGET_OPT_CONTINUE;
1320 }
1321 /* compat with wget: -O FILE can overwrite */
1322 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1323 }
1324
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001325 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001326 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001327
Denys Vlasenko28556b92011-02-15 11:03:53 +01001328 if (G.output_fd >= 0)
1329 xclose(G.output_fd);
1330
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001331#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1332 free(G.extra_headers);
1333#endif
1334 FINI_G();
1335
Pere Orga53695632011-02-16 20:09:36 +01001336 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001337}