blob: 252f94dc6b1be801dfdd9a4704d9a4af62f78a05 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020012//config:config WGET
13//config: bool "wget"
14//config: default y
15//config: help
16//config: wget is a utility for non-interactive download of files from HTTP
17//config: and FTP servers.
18//config:
Denys Vlasenkof5604222017-01-10 14:58:54 +010019//config:config FEATURE_WGET_LONG_OPTIONS
20//config: bool "Enable long options"
21//config: default y
22//config: depends on WGET && LONG_OPTS
23//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020024//config:config FEATURE_WGET_STATUSBAR
Denys Vlasenkof5604222017-01-10 14:58:54 +010025//config: bool "Enable progress bar (+2k)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020026//config: default y
27//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020028//config:
29//config:config FEATURE_WGET_AUTHENTICATION
30//config: bool "Enable HTTP authentication"
31//config: default y
32//config: depends on WGET
33//config: help
34//config: Support authenticated HTTP transfers.
35//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020036//config:config FEATURE_WGET_TIMEOUT
37//config: bool "Enable timeout option -T SEC"
38//config: default y
39//config: depends on WGET
40//config: help
41//config: Supports network read and connect timeouts for wget,
42//config: so that wget will give up and timeout, through the -T
43//config: command line option.
44//config:
45//config: Currently only connect and network data read timeout are
46//config: supported (i.e., timeout is not applied to the DNS query). When
47//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
48//config: will work in addition to -T.
49//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +010050//config:config FEATURE_WGET_HTTPS
51//config: bool "Support HTTPS using internal TLS code"
52//config: default y
53//config: depends on WGET
54//config: select TLS
55//config: help
56//config: wget will use internal TLS code to connect to https:// URLs.
57//config: Note:
58//config: On NOMMU machines, ssl_helper applet should be available
59//config: in the $PATH for this to work. Make sure to select that applet.
60//config:
Denys Vlasenko67f6db62017-01-30 16:27:37 +010061//config: Note: currently, TLS code only makes TLS I/O work, it
62//config: does *not* check that the peer is who it claims to be, etc.
63//config: IOW: it uses peer-supplied public keys to establish encryption
64//config: and signing keys, then encrypts and signs outgoing data and
65//config: decrypts incoming data.
66//config: It does not check signature hashes on the incoming data:
67//config: this means that attackers manipulating TCP packets can
68//config: send altered data and we unknowingly receive garbage.
69//config: (This check might be relatively easy to add).
70//config: It does not check public key's certificate:
71//config: this means that the peer may be an attacker impersonating
72//config: the server we think we are talking to.
73//config:
74//config: If you think this is unacceptable, consider this. As more and more
75//config: servers switch to HTTPS-only operation, without such "crippled"
76//config: TLS code it is *impossible* to simply download a kernel source
77//config: from kernel.org. Which can in real world translate into
78//config: "my small automatic tooling to build cross-compilers from sources
79//config: no longer works, I need to additionally keep a local copy
80//config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
81//config: source of wget, need to compile and built both before I can
82//config: download anything. All this despite the fact that the build
83//config: is done in a QEMU sandbox on a machine with absolutely nothing
84//config: worth stealing, so I don't care if someone would go to a lot
85//config: of trouble to intercept my HTTPS download to send me an altered
86//config: kernel tarball".
87//config:
88//config: If you still think this is unacceptable, send patches.
89//config:
90//config: If you still think this is unacceptable, do not want to send
91//config: patches, but do want to waste bandwidth expaining how wrong
92//config: it is, you will be ignored.
93//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020094//config:config FEATURE_WGET_OPENSSL
95//config: bool "Try to connect to HTTPS using openssl"
96//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020097//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020098//config: help
Denys Vlasenko9a647c32017-01-23 01:08:16 +010099//config: Try to use openssl to handle HTTPS.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200100//config:
101//config: OpenSSL has a simple SSL client for debug purposes.
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100102//config: If you select this option, wget will effectively run:
Denys Vlasenkoed727612016-07-25 21:34:57 +0200103//config: "openssl s_client -quiet -connect hostname:443
104//config: -servername hostname 2>/dev/null" and pipe its data
105//config: through it. -servername is not used if hostname is numeric.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200106//config: Note inconvenient API: host resolution is done twice,
107//config: and there is no guarantee openssl's idea of IPv6 address
108//config: format is the same as ours.
109//config: Another problem is that s_client prints debug information
110//config: to stderr, and it needs to be suppressed. This means
111//config: all error messages get suppressed too.
112//config: openssl is also a big binary, often dynamically linked
113//config: against ~15 libraries.
114//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100115//config: If openssl can't be executed, internal TLS code will be used
116//config: (if you enabled it); if openssl can be executed but fails later,
117//config: wget can't detect this, and download will fail.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200118
119//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
120
121//kbuild:lib-$(CONFIG_WGET) += wget.o
122
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100123//usage:#define wget_trivial_usage
124//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200125//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100126//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200127/* Since we ignore these opts, we don't show them in --help */
Denys Vlasenko92e1b082015-10-20 21:51:52 +0200128/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
129/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100130//usage: " [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100131//usage: )
132//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100133//usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100134//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
135//usage: )
136//usage:#define wget_full_usage "\n\n"
137//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200138//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100139//usage: "\n --spider Only check URL existence: $? is 0 if exists"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100140//usage: )
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200141//usage: "\n -c Continue retrieval of aborted transfer"
142//usage: "\n -q Quiet"
143//usage: "\n -P DIR Save to DIR (default .)"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100144//usage: "\n -S Show server response"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200145//usage: IF_FEATURE_WGET_TIMEOUT(
146//usage: "\n -T SEC Network read timeout is SEC seconds"
147//usage: )
148//usage: "\n -O FILE Save to FILE ('-' for stdout)"
149//usage: "\n -U STR Use STR for User-Agent header"
150//usage: "\n -Y on/off Use proxy"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100151
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000152#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000153
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200154#if 0
155# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100156# define SENDFMT(fp, fmt, ...) \
157 do { \
158 log_io("> " fmt, ##__VA_ARGS__); \
159 fprintf(fp, fmt, ##__VA_ARGS__); \
160 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200161#else
162# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100163# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200164#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100165
166
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100167#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100168
Eric Andersen79757c92001-04-05 21:45:54 +0000169struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100170 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000171 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100172 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100173 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000174 char *host;
175 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000176};
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200177static const char P_FTP[] ALIGN1 = "ftp";
178static const char P_HTTP[] ALIGN1 = "http";
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100179#if SSL_SUPPORTED
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200180static const char P_HTTPS[] ALIGN1 = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100181#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000182
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100183#if ENABLE_FEATURE_WGET_LONG_OPTIONS
184/* User-specified headers prevent using our corresponding built-in headers. */
185enum {
186 HDR_HOST = (1<<0),
187 HDR_USER_AGENT = (1<<1),
188 HDR_RANGE = (1<<2),
189 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
190 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
191};
192static const char wget_user_headers[] ALIGN1 =
193 "Host:\0"
194 "User-Agent:\0"
195 "Range:\0"
196# if ENABLE_FEATURE_WGET_AUTHENTICATION
197 "Authorization:\0"
198 "Proxy-Authorization:\0"
199# endif
200 ;
201# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
202# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
203# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
204# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
205# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
206#else /* No long options, no user-headers :( */
207# define USR_HEADER_HOST 0
208# define USR_HEADER_USER_AGENT 0
209# define USR_HEADER_RANGE 0
210# define USR_HEADER_AUTH 0
211# define USR_HEADER_PROXY_AUTH 0
212#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000213
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200214/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000215struct globals {
216 off_t content_len; /* Content-length of the file */
217 off_t beg_range; /* Range at which continue begins */
218#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000219 off_t transferred; /* Number of bytes transferred so far */
220 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100221 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000222#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200223 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100224#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200225 char *post_data;
226 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100227 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100228#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200229 char *fname_out; /* where to direct output (-O) */
230 const char *proxy_flag; /* Use proxies if env vars are set */
231 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200232#if ENABLE_FEATURE_WGET_TIMEOUT
233 unsigned timeout_seconds;
Denys Vlasenko6701e912016-03-17 15:58:16 +0100234 bool die_if_timed_out;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200235#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +0100236 int output_fd;
237 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200238 smallint chunked; /* chunked transfer encoding */
239 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100240 /* Local downloads do benefit from big buffer.
241 * With 512 byte buffer, it was measured to be
242 * an order of magnitude slower than with big one.
243 */
244 uint64_t just_to_align_next_member;
245 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100246} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100247#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200248#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200249 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200250} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200251#define FINI_G() do { \
252 FREE_PTR_TO_GLOBALS(); \
253} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000254
255
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200256/* Must match option string! */
257enum {
258 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200259 WGET_OPT_QUIET = (1 << 1),
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100260 WGET_OPT_SERVER_RESPONSE = (1 << 2),
261 WGET_OPT_OUTNAME = (1 << 3),
262 WGET_OPT_PREFIX = (1 << 4),
263 WGET_OPT_PROXY = (1 << 5),
264 WGET_OPT_USER_AGENT = (1 << 6),
265 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
266 WGET_OPT_RETRIES = (1 << 8),
267 WGET_OPT_nsomething = (1 << 9),
268 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
269 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
270 WGET_OPT_SPIDER = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200271};
272
273enum {
274 PROGRESS_START = -1,
275 PROGRESS_END = 0,
276 PROGRESS_BUMP = 1,
277};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000278#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000279static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000280{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200281 if (option_mask32 & WGET_OPT_QUIET)
282 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000283
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200284 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100285 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000286
Denys Vlasenko2384a352011-02-15 00:58:36 +0100287 bb_progress_update(&G.pmt,
288 G.beg_range,
289 G.transferred,
290 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
291 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000292
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200293 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100294 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200295 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100296 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000297 }
298}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200299#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000300static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000301#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000302
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000303
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200304/* IPv6 knows scoped address types i.e. link and site local addresses. Link
305 * local addresses can have a scope identifier to specify the
306 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
307 * identifier is only valid on a single node.
308 *
309 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
310 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
311 * in the Host header as invalid requests, see
312 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
313 */
314static void strip_ipv6_scope_id(char *host)
315{
316 char *scope, *cp;
317
318 /* bbox wget actually handles IPv6 addresses without [], like
319 * wget "http://::1/xxx", but this is not standard.
320 * To save code, _here_ we do not support it. */
321
322 if (host[0] != '[')
323 return; /* not IPv6 */
324
325 scope = strchr(host, '%');
326 if (!scope)
327 return;
328
329 /* Remove the IPv6 zone identifier from the host address */
330 cp = strchr(host, ']');
331 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
332 /* malformed address (not "[xx]:nn" or "[xx]") */
333 return;
334 }
335
336 /* cp points to "]...", scope points to "%eth0]..." */
337 overlapping_strcpy(scope, cp);
338}
339
Denis Vlasenko9cade082006-11-21 10:43:02 +0000340#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100341/* Base64-encode character string. */
342static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000343{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000344 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100345 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
346 len = sizeof(G.wget_buf)/4*3 - 10;
347 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
348 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000349}
350#endif
351
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200352static char* sanitize_string(char *s)
353{
354 unsigned char *p = (void *) s;
355 while (*p >= ' ')
356 p++;
357 *p = '\0';
358 return s;
359}
360
Lauri Kasanend074b412013-10-12 21:47:07 +0200361#if ENABLE_FEATURE_WGET_TIMEOUT
362static void alarm_handler(int sig UNUSED_PARAM)
363{
364 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
Denys Vlasenko6701e912016-03-17 15:58:16 +0100365 if (G.die_if_timed_out)
Lauri Kasanend074b412013-10-12 21:47:07 +0200366 bb_error_msg_and_die("download timed out");
367}
Denys Vlasenko6701e912016-03-17 15:58:16 +0100368static void set_alarm(void)
369{
370 if (G.timeout_seconds) {
371 alarm(G.timeout_seconds);
372 G.die_if_timed_out = 1;
373 }
374}
375# define clear_alarm() ((void)(G.die_if_timed_out = 0))
376#else
377# define set_alarm() ((void)0)
378# define clear_alarm() ((void)0)
Lauri Kasanend074b412013-10-12 21:47:07 +0200379#endif
380
Denys Vlasenkoed727612016-07-25 21:34:57 +0200381#if ENABLE_FEATURE_WGET_OPENSSL
382/*
383 * is_ip_address() attempts to verify whether or not a string
384 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
385 * of inet_pton() can be used to determine this.
386 *
387 * TODO add proper error checking when inet_pton() returns -1
388 * (some form of system error has occurred, and errno is set)
389 */
390static int is_ip_address(const char *string)
391{
392 struct sockaddr_in sa;
393
394 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
395# if ENABLE_FEATURE_IPV6
396 if (result == 0) {
397 struct sockaddr_in6 sa6;
398 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
399 }
400# endif
401 return (result == 1);
402}
403#endif
404
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000405static FILE *open_socket(len_and_sockaddr *lsa)
406{
Lauri Kasanend074b412013-10-12 21:47:07 +0200407 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000408 FILE *fp;
409
Denys Vlasenko6701e912016-03-17 15:58:16 +0100410 set_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200411 fd = xconnect_stream(lsa);
Denys Vlasenko6701e912016-03-17 15:58:16 +0100412 clear_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200413
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000414 /* glibc 2.4 seems to try seeking on it - ??! */
415 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200416 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100417 if (!fp)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100418 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000419
420 return fp;
421}
422
Denys Vlasenkof836f012011-02-10 23:02:28 +0100423/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100424static char fgets_and_trim(FILE *fp, const char *fmt)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100425{
426 char c;
427 char *buf_ptr;
428
Denys Vlasenko6701e912016-03-17 15:58:16 +0100429 set_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100430 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
431 bb_perror_msg_and_die("error getting response");
Denys Vlasenko6701e912016-03-17 15:58:16 +0100432 clear_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100433
434 buf_ptr = strchrnul(G.wget_buf, '\n');
435 c = *buf_ptr;
436 *buf_ptr = '\0';
437 buf_ptr = strchrnul(G.wget_buf, '\r');
438 *buf_ptr = '\0';
439
440 log_io("< %s", G.wget_buf);
441
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100442 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
443 fprintf(stderr, fmt, G.wget_buf);
444
Denys Vlasenkof836f012011-02-10 23:02:28 +0100445 return c;
446}
447
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100448static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000449{
450 int result;
451 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100452 if (!s2)
453 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000454 fprintf(fp, "%s%s\r\n", s1, s2);
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100455 /* With --server-response, wget also shows its ftp commands */
456 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
457 fprintf(stderr, "--> %s%s\n\n", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000458 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100459 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000460 }
461
462 do {
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100463 fgets_and_trim(fp, "%s\n");
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100464 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000465
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100466 G.wget_buf[3] = '\0';
467 result = xatoi_positive(G.wget_buf);
468 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000469 return result;
470}
471
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100472static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000473{
474 char *url, *p, *sp;
475
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100476 free(h->allocated);
477 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000478
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100479 h->protocol = P_FTP;
480 p = strstr(url, "://");
481 if (p) {
482 *p = '\0';
483 h->host = p + 3;
484 if (strcmp(url, P_FTP) == 0) {
485 h->port = bb_lookup_port(P_FTP, "tcp", 21);
486 } else
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100487#if SSL_SUPPORTED
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100488 if (strcmp(url, P_HTTPS) == 0) {
489 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
490 h->protocol = P_HTTPS;
491 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100492#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100493 if (strcmp(url, P_HTTP) == 0) {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100494 http:
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100495 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
496 h->protocol = P_HTTP;
497 } else {
498 *p = ':';
499 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
500 }
501 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100502 // GNU wget is user-friendly and falls back to http://
503 h->host = url;
504 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100505 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000506
507 // FYI:
508 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
Denys Vlasenkoa0aae9f2017-01-20 14:12:10 +0100509 // 'GET /?var=a/b HTTP/1.0'
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000510 // and saves 'index.html?var=a%2Fb' (we save 'b')
511 // wget 'http://busybox.net?login=john@doe':
512 // request: 'GET /?login=john@doe HTTP/1.0'
513 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
514 // wget 'http://busybox.net#test/test':
515 // request: 'GET / HTTP/1.0'
516 // saves: 'index.html' (we save 'test')
517 //
518 // We also don't add unique .N suffix if file exists...
519 sp = strchr(h->host, '/');
520 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
521 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
522 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000523 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000524 } else if (*sp == '/') {
525 *sp = '\0';
526 h->path = sp + 1;
527 } else { // '#' or '?'
528 // http://busybox.net?login=john@doe is a valid URL
529 // memmove converts to:
530 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000531 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000532 h->host--;
533 sp[-1] = '\0';
534 h->path = sp;
535 }
536
537 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000538 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200539 // URL-decode "user:password" string before base64-encoding:
540 // wget http://test:my%20pass@example.com should send
541 // Authorization: Basic dGVzdDpteSBwYXNz
542 // which decodes to "test:my pass".
543 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000544 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100545 free(h->user);
546 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000547 h->host = sp + 1;
548 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100549 /* else: h->user remains NULL, or as set by original request
550 * before redirect (if we are here after a redirect).
551 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000552}
553
Denys Vlasenkof836f012011-02-10 23:02:28 +0100554static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000555{
556 char *s, *hdrval;
557 int c;
558
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000559 /* retrieve header line */
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100560 c = fgets_and_trim(fp, " %s\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000561
Denys Vlasenkof836f012011-02-10 23:02:28 +0100562 /* end of the headers? */
563 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000564 return NULL;
565
566 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200567 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
568 /*
569 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
570 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
571 * "A-Z" maps to "a-z".
572 * "@[\]" can't occur in header names.
573 * "^_" maps to "~,DEL" (which is wrong).
574 * "^" was never seen yet, "_" was seen from web.archive.org
575 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
576 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100577 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200578 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000579
580 /* verify we are at the end of the header name */
581 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100582 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000583
584 /* locate the start of the header value */
585 *s++ = '\0';
586 hdrval = skip_whitespace(s);
587
Denys Vlasenkof836f012011-02-10 23:02:28 +0100588 if (c != '\n') {
589 /* Rats! The buffer isn't big enough to hold the entire header value */
590 while (c = getc(fp), c != EOF && c != '\n')
591 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000592 }
593
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000594 return hdrval;
595}
596
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200597static void reset_beg_range_to_zero(void)
598{
Denys Vlasenko61441242012-06-17 19:52:25 +0200599 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200600 G.beg_range = 0;
601 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200602 /* Done at the end instead: */
603 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200604}
605
Denys Vlasenko7f432802009-06-28 01:02:24 +0200606static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
607{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200608 FILE *sfp;
609 char *str;
610 int port;
611
612 if (!target->user)
613 target->user = xstrdup("anonymous:busybox@");
614
615 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100616 if (ftpcmd(NULL, NULL, sfp) != 220)
617 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200618
619 /*
620 * Splitting username:password pair,
621 * trying to log in
622 */
623 str = strchr(target->user, ':');
624 if (str)
625 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100626 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200627 case 230:
628 break;
629 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100630 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200631 break;
632 /* fall through (failed login) */
633 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100634 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200635 }
636
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100637 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200638
639 /*
640 * Querying file size
641 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100642 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
643 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100644 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200645 bb_error_msg_and_die("SIZE value is garbage");
646 }
647 G.got_clen = 1;
648 }
649
650 /*
651 * Entering passive mode
652 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100653 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200654 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100655 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200656 }
657 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
658 // Server's IP is N1.N2.N3.N4 (we ignore it)
659 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100660 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200661 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100662 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200663 if (!str) goto pasv_error;
664 port = xatou_range(str+1, 0, 255);
665 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100666 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200667 if (!str) goto pasv_error;
668 port += xatou_range(str+1, 0, 255) * 256;
Denys Vlasenkoca183112011-04-07 17:52:20 +0200669 set_nport(&lsa->u.sa, htons(port));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200670
671 *dfpp = open_socket(lsa);
672
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200673 if (G.beg_range != 0) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100674 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
675 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100676 G.content_len -= G.beg_range;
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200677 else
678 reset_beg_range_to_zero();
Denys Vlasenko7f432802009-06-28 01:02:24 +0200679 }
680
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100681 if (ftpcmd("RETR ", target->path, sfp) > 150)
682 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200683
684 return sfp;
685}
686
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200687#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200688static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100689{
690 char *allocated = NULL;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200691 char *servername;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100692 int sp[2];
693 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100694 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100695
696 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
697 /* Kernel can have AF_UNIX support disabled */
698 bb_perror_msg_and_die("socketpair");
699
700 if (!strchr(host, ':'))
701 host = allocated = xasprintf("%s:%u", host, port);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200702 servername = xstrdup(host);
703 strrchr(servername, ':')[0] = '\0';
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100704
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200705 fflush_all();
706 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100707 if (pid == 0) {
708 /* Child */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200709 char *argv[8];
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100710
711 close(sp[0]);
712 xmove_fd(sp[1], 0);
713 xdup2(0, 1);
714 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100715 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
716 * It prints some debug stuff on stderr, don't know how to suppress it.
717 * Work around by dev-nulling stderr. We lose all error messages :(
718 */
719 xmove_fd(2, 3);
720 xopen("/dev/null", O_RDWR);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200721 memset(&argv, 0, sizeof(argv));
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100722 argv[0] = (char*)"openssl";
723 argv[1] = (char*)"s_client";
724 argv[2] = (char*)"-quiet";
725 argv[3] = (char*)"-connect";
726 argv[4] = (char*)host;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200727 /*
728 * Per RFC 6066 Section 3, the only permitted values in the
729 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
730 * IPv4 and IPv6 addresses, port numbers are not allowed.
731 */
732 if (!is_ip_address(servername)) {
733 argv[5] = (char*)"-servername";
734 argv[6] = (char*)servername;
735 }
736
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100737 BB_EXECVP(argv[0], argv);
738 xmove_fd(3, 2);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100739# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200740 child_failed = 1;
741 xfunc_die();
742# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100743 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200744# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100745 /* notreached */
746 }
747
Denys Vlasenko53315572014-02-23 23:39:47 +0100748 /* Parent */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200749 free(servername);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100750 free(allocated);
751 close(sp[1]);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100752# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200753 if (child_failed) {
754 close(sp[0]);
755 return -1;
756 }
757# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100758 return sp[0];
759}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200760#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100761
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100762#if ENABLE_FEATURE_WGET_HTTPS
763static void spawn_ssl_client(const char *host, int network_fd)
Denys Vlasenko53315572014-02-23 23:39:47 +0100764{
765 int sp[2];
766 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100767 char *servername, *p;
768
769 servername = xstrdup(host);
770 p = strrchr(servername, ':');
771 if (p) *p = '\0';
Denys Vlasenko53315572014-02-23 23:39:47 +0100772
773 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
774 /* Kernel can have AF_UNIX support disabled */
775 bb_perror_msg_and_die("socketpair");
776
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100777 fflush_all();
Denys Vlasenko53315572014-02-23 23:39:47 +0100778 pid = BB_MMU ? xfork() : xvfork();
779 if (pid == 0) {
780 /* Child */
Denys Vlasenko53315572014-02-23 23:39:47 +0100781 close(sp[0]);
782 xmove_fd(sp[1], 0);
783 xdup2(0, 1);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100784 if (BB_MMU) {
785 tls_state_t *tls = new_tls_state();
786 tls->ifd = tls->ofd = network_fd;
787 tls_handshake(tls, servername);
788 tls_run_copy_loop(tls);
789 exit(0);
790 } else {
791 char *argv[5];
792 xmove_fd(network_fd, 3);
793 argv[0] = (char*)"ssl_client";
794 argv[1] = (char*)"-s3";
795 //TODO: if (!is_ip_address(servername))...
796 argv[2] = (char*)"-n";
797 argv[3] = servername;
798 argv[4] = NULL;
799 BB_EXECVP(argv[0], argv);
800 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
801 }
Denys Vlasenko53315572014-02-23 23:39:47 +0100802 /* notreached */
803 }
804
805 /* Parent */
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100806 free(servername);
Denys Vlasenko53315572014-02-23 23:39:47 +0100807 close(sp[1]);
808 xmove_fd(sp[0], network_fd);
809}
810#endif
811
Denys Vlasenko2384a352011-02-15 00:58:36 +0100812static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200813{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200814#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
815# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200816 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200817# endif
818 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200819
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200820 polldata.fd = fileno(dfp);
821 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200822#endif
823 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200824
825 if (G.chunked)
826 goto get_clen;
827
828 /* Loops only if chunked */
829 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100830
831#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
832 /* Must use nonblocking I/O, otherwise fread will loop
833 * and *block* until it reads full buffer,
834 * which messes up progress bar and/or timeout logic.
835 * Because of nonblocking I/O, we need to dance
836 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200837 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100838 */
839 ndelay_on(polldata.fd);
840#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100841 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200842 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100843 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200844
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200845#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100846 /* fread internally uses read loop, which in our case
847 * is usually exited when we get EAGAIN.
848 * In this case, libc sets error marker on the stream.
849 * Need to clear it before next fread to avoid possible
850 * rare false positive ferror below. Rare because usually
851 * fread gets more than zero bytes, and we don't fall
852 * into if (n <= 0) ...
853 */
854 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100855#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200856 errno = 0;
857 rdsz = sizeof(G.wget_buf);
858 if (G.got_clen) {
859 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
860 if ((int)G.content_len <= 0)
861 break;
862 rdsz = (unsigned)G.content_len;
863 }
864 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100865 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200866
867 if (n > 0) {
868 xwrite(G.output_fd, G.wget_buf, n);
869#if ENABLE_FEATURE_WGET_STATUSBAR
870 G.transferred += n;
871#endif
872 if (G.got_clen) {
873 G.content_len -= n;
874 if (G.content_len == 0)
875 break;
876 }
877#if ENABLE_FEATURE_WGET_TIMEOUT
878 second_cnt = G.timeout_seconds;
879#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100880 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200881 }
882
883 /* n <= 0.
884 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100885 * If error occurs, or EOF is reached, the return value
886 * is a short item count (or zero).
887 * fread does not distinguish between EOF and error.
888 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200889 if (errno != EAGAIN) {
890 if (ferror(dfp)) {
891 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100892 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200893 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100894 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200895 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100896
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200897#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
898 /* It was EAGAIN. There is no data. Wait up to one second
899 * then abort if timed out, or update the bar and try reading again.
900 */
901 if (safe_poll(&polldata, 1, 1000) == 0) {
902# if ENABLE_FEATURE_WGET_TIMEOUT
903 if (second_cnt != 0 && --second_cnt == 0) {
904 progress_meter(PROGRESS_END);
905 bb_error_msg_and_die("download timed out");
906 }
907# endif
908 /* We used to loop back to poll here,
909 * but there is no great harm in letting fread
910 * to try reading anyway.
911 */
912 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100913#endif
914 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200915 /* Need to do it _every_ second for "stalled" indicator
916 * to be shown properly.
917 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200918 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200919 } /* while (reading data) */
920
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100921#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
922 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100923 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100924#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200925 if (!G.chunked)
926 break;
927
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100928 fgets_and_trim(dfp, NULL); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200929 get_clen:
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100930 fgets_and_trim(dfp, NULL);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100931 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200932 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100933 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200934 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100935 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200936 /*
937 * Note that fgets may result in some data being buffered in dfp.
938 * We loop back to fread, which will retrieve this data.
939 * Also note that code has to be arranged so that fread
940 * is done _before_ one-second poll wait - poll doesn't know
941 * about stdio buffering and can result in spurious one second waits!
942 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200943 }
944
Denys Vlasenko61441242012-06-17 19:52:25 +0200945 /* If -c failed, we restart from the beginning,
946 * but we do not truncate file then, we do it only now, at the end.
947 * This lets user to ^C if his 99% complete 10 GB file download
948 * failed to restart *without* losing the almost complete file.
949 */
950 {
951 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
952 if (pos != (off_t)-1)
953 ftruncate(G.output_fd, pos);
954 }
955
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100956 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100957 G.chunked = 0; /* makes it show 100% even for chunked download */
958 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200959 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200960}
961
Pere Orga53695632011-02-16 20:09:36 +0100962static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000963{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100964 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200965 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100966 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200967 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000968 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100969 char *proxy = NULL;
970 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100971 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100972 struct host_info server;
973 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000974
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100975 server.allocated = NULL;
976 target.allocated = NULL;
977 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200978 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100979
980 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000981
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000982 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100983 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000984 if (use_proxy) {
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100985 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
986//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +0100987 use_proxy = (proxy && proxy[0]);
988 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000989 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000990 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200991 if (!use_proxy) {
992 server.port = target.port;
993 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100994 //free(server.allocated); - can't be non-NULL
995 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200996 } else {
997 server.host = target.host;
998 }
999 }
1000
1001 if (ENABLE_FEATURE_IPV6)
1002 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001003
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001004 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001005 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001006 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001007 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001008 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001009 if (G.fname_out[0] == '/' || !G.fname_out[0])
1010 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +00001011 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001012 if (G.dir_prefix)
1013 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001014 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001015 /* redirects may free target.path later, need to make a copy */
1016 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001017 }
Eric Andersen29edd002000-12-09 16:55:35 +00001018 }
Denis Vlasenko818322b2007-09-24 18:27:04 +00001019#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001020 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001021#endif
1022
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001023 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001024 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001025 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001026 G.output_fd = open(G.fname_out, O_WRONLY);
1027 if (G.output_fd >= 0) {
1028 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +00001029 }
1030 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +01001031 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +00001032 }
1033
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001034 redir_limit = 5;
1035 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +00001036 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001037 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001038 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1039 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1040 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +00001041 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001042 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +01001043 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1044 G.got_clen = 0;
1045 G.chunked = 0;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001046 if (use_proxy || target.protocol != P_FTP) {
Eric Andersen79757c92001-04-05 21:45:54 +00001047 /*
1048 * HTTP session
1049 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001050 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001051 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001052
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001053 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001054#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001055 /* openssl (and maybe internal TLS) support is configured */
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001056 if (target.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001057 /* openssl-based helper
1058 * Inconvenient API since we can't give it an open fd
1059 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001060 int fd = spawn_https_helper_openssl(server.host, server.port);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001061# if ENABLE_FEATURE_WGET_HTTPS
1062 if (fd < 0) { /* no openssl? try internal */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001063 sfp = open_socket(lsa);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001064 spawn_ssl_client(server.host, fileno(sfp));
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001065 goto socket_opened;
1066 }
1067# else
1068 /* We don't check for exec("openssl") failure in this case */
1069# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001070 sfp = fdopen(fd, "r+");
1071 if (!sfp)
1072 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001073 goto socket_opened;
1074 }
1075 sfp = open_socket(lsa);
1076 socket_opened:
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001077#elif ENABLE_FEATURE_WGET_HTTPS
1078 /* Only internal TLS support is configured */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001079 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001080 if (target.protocol == P_HTTPS)
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001081 spawn_ssl_client(server.host, fileno(sfp));
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001082#else
1083 /* ssl (https) support is not configured */
1084 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001085#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001086 /* Send HTTP request */
1087 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001088 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001089 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001090 target.path);
1091 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001092 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001093 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1094 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001095 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001096 if (!USR_HEADER_HOST)
1097 SENDFMT(sfp, "Host: %s\r\n", target.host);
1098 if (!USR_HEADER_USER_AGENT)
1099 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001100
Denys Vlasenko9213a552011-02-10 13:23:45 +01001101 /* Ask server to close the connection as soon as we are done
1102 * (IOW: we do not intend to send more requests)
1103 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001104 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001105
Denis Vlasenko9cade082006-11-21 10:43:02 +00001106#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001107 if (target.user && !USR_HEADER_AUTH) {
1108 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001109 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001110 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001111 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1112 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001113 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001114 }
Eric Andersen79757c92001-04-05 21:45:54 +00001115#endif
1116
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001117 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1118 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001119
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001120#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001121 if (G.extra_headers) {
1122 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001123 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001124 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001125
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001126 if (option_mask32 & WGET_OPT_POST_DATA) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001127 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001128 "Content-Type: application/x-www-form-urlencoded\r\n"
1129 "Content-Length: %u\r\n"
1130 "\r\n"
1131 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001132 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001133 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001134 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001135#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001136 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001137 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001138 }
Eric Andersen79757c92001-04-05 21:45:54 +00001139
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001140 fflush(sfp);
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001141
Denys Vlasenko4e08a122017-01-16 17:31:05 +01001142/* Tried doing this unconditionally.
1143 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1144 */
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001145#if SSL_SUPPORTED
1146 if (target.protocol == P_HTTPS) {
1147 /* If we use SSL helper, keeping our end of the socket open for writing
1148 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1149 * even after child closes its copy of the fd.
1150 * This helps:
1151 */
1152 shutdown(fileno(sfp), SHUT_WR);
1153 }
1154#endif
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001155
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001156 /*
1157 * Retrieve HTTP response line and check for "200" status code.
1158 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001159 read_response:
Denys Vlasenkodff9fef2017-01-24 21:41:43 +01001160 fgets_and_trim(sfp, " %s\n");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001161
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001162 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001163 str = skip_non_whitespace(str);
1164 str = skip_whitespace(str);
1165 // FIXME: no error check
1166 // xatou wouldn't work: "200 OK"
1167 status = atoi(str);
1168 switch (status) {
1169 case 0:
1170 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001171 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001172 /* eat all remaining headers */;
1173 goto read_response;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001174
1175 /* Success responses */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001176 case 200:
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001177 /* fall through */
1178 case 201: /* 201 Created */
1179/* "The request has been fulfilled and resulted in a new resource being created" */
Denys Vlasenkoef159702016-09-01 11:16:22 +02001180 /* Standard wget is reported to treat this as success */
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001181 /* fall through */
1182 case 202: /* 202 Accepted */
1183/* "The request has been accepted for processing, but the processing has not been completed" */
1184 /* Treat as success: fall through */
1185 case 203: /* 203 Non-Authoritative Information */
1186/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1187 /* fall through */
1188 case 204: /* 204 No Content */
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001189/*
1190Response 204 doesn't say "null file", it says "metadata
1191has changed but data didn't":
1192
1193"10.2.5 204 No Content
1194The server has fulfilled the request but does not need to return
1195an entity-body, and might want to return updated metainformation.
1196The response MAY include new or updated metainformation in the form
1197of entity-headers, which if present SHOULD be associated with
1198the requested variant.
1199
1200If the client is a user agent, it SHOULD NOT change its document
1201view from that which caused the request to be sent. This response
1202is primarily intended to allow input for actions to take place
1203without causing a change to the user agent's active document view,
1204although any new or updated metainformation SHOULD be applied
1205to the document currently in the user agent's active view.
1206
1207The 204 response MUST NOT include a message-body, and thus
1208is always terminated by the first empty line after the header fields."
1209
1210However, in real world it was observed that some web servers
1211(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1212*/
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001213 if (G.beg_range != 0) {
1214 /* "Range:..." was not honored by the server.
1215 * Restart download from the beginning.
1216 */
1217 reset_beg_range_to_zero();
1218 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001219 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001220 /* 205 Reset Content ?? what to do on this ?? */
1221
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001222 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001223 case 301:
1224 case 302:
1225 case 303:
1226 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001227
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001228 case 206: /* Partial Content */
1229 if (G.beg_range != 0)
1230 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001231 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001232 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001233 /* fall through */
1234 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001235 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001236 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001237
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001238 /*
1239 * Retrieve HTTP headers.
1240 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001241 while ((str = gethdr(sfp)) != NULL) {
1242 static const char keywords[] ALIGN1 =
1243 "content-length\0""transfer-encoding\0""location\0";
1244 enum {
1245 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1246 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001247 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001248
1249 /* gethdr converted "FOO:" string to lowercase */
1250
Matthijs van de Water0d586662009-08-22 20:19:48 +02001251 /* strip trailing whitespace */
1252 char *s = strchrnul(str, '\0') - 1;
1253 while (s >= str && (*s == ' ' || *s == '\t')) {
1254 *s = '\0';
1255 s--;
1256 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001257 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001258 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001259 G.content_len = BB_STRTOOFF(str, NULL, 10);
1260 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001261 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +00001262 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001263 G.got_clen = 1;
1264 continue;
1265 }
1266 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001267 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001268 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001269 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001270 }
1271 if (key == KEY_location && status >= 300) {
1272 if (--redir_limit == 0)
1273 bb_error_msg_and_die("too many redirections");
1274 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001275 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001276 free(redirected_path);
1277 target.path = redirected_path = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001278 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001279 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001280 parse_url(str, &target);
1281 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001282 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001283 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001284 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001285 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001286 /* strip_ipv6_scope_id(target.host); - no! */
1287 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001288 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001289 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001290 goto resolve_lsa;
1291 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001292 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001293 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001294 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001295 }
1296// if (status >= 300)
1297// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001298
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001299 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001300 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001301 } else {
Eric Andersen79757c92001-04-05 21:45:54 +00001302 /*
1303 * FTP session
1304 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001305 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +00001306 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001307
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001308 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001309
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001310 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001311 if (G.output_fd < 0)
1312 G.output_fd = xopen(G.fname_out, G.o_flags);
1313 retrieve_file_data(dfp);
1314 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1315 xclose(G.output_fd);
1316 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001317 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001318 }
Eric Andersen79757c92001-04-05 21:45:54 +00001319
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001320 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001321 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001322 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001323 if (ftpcmd(NULL, NULL, sfp) != 226)
1324 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1325 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001326 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001327 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001328
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001329 free(server.allocated);
1330 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001331 free(server.user);
1332 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001333 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001334 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001335}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001336
1337int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1338int wget_main(int argc UNUSED_PARAM, char **argv)
1339{
1340#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1341 static const char wget_longopts[] ALIGN1 =
1342 /* name, has_arg, val */
1343 "continue\0" No_argument "c"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001344 "quiet\0" No_argument "q"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +01001345 "server-response\0" No_argument "S"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001346 "output-document\0" Required_argument "O"
1347 "directory-prefix\0" Required_argument "P"
1348 "proxy\0" Required_argument "Y"
1349 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001350IF_FEATURE_WGET_TIMEOUT(
1351 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001352 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001353IF_DESKTOP( "tries\0" Required_argument "t")
1354 "header\0" Required_argument "\xff"
1355 "post-data\0" Required_argument "\xfe"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001356 "spider\0" No_argument "\xfd"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001357 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001358IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001359 /* Ignored (we don't do ssl) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001360IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001361 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001362IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1363IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1364IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1365IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1366IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001367 ;
1368#endif
1369
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001370#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1371 llist_t *headers_llist = NULL;
1372#endif
1373
1374 INIT_G();
1375
Lauri Kasanend074b412013-10-12 21:47:07 +02001376#if ENABLE_FEATURE_WGET_TIMEOUT
1377 G.timeout_seconds = 900;
1378 signal(SIGALRM, alarm_handler);
1379#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001380 G.proxy_flag = "on"; /* use proxies if env vars are set */
1381 G.user_agent = "Wget"; /* "User-Agent" header field */
1382
1383#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1384 applet_long_options = wget_longopts;
1385#endif
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001386 opt_complementary = "-1" /* at least one URL */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001387 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
Denys Vlasenkodff9fef2017-01-24 21:41:43 +01001388 getopt32(argv, "cqSO:P:Y:U:T:+"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001389 /*ignored:*/ "t:"
1390 /*ignored:*/ "n::"
1391 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1392 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1393 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1394 * -nH --no-host-directories: wget -r http://host/ won't create host/
1395 * -np --no-parent
1396 * "n::" above says that we accept -n[ARG].
1397 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1398 */
1399 , &G.fname_out, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001400 &G.proxy_flag, &G.user_agent,
1401 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001402 NULL, /* -t RETRIES */
1403 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001404 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1405 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1406 );
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001407#if 0 /* option bits debug */
1408 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1409 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1410 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1411 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1412 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1413 exit(0);
1414#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001415 argv += optind;
1416
1417#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1418 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001419 int size = 0;
1420 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001421 llist_t *ll = headers_llist;
1422 while (ll) {
1423 size += strlen(ll->data) + 2;
1424 ll = ll->link;
1425 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001426 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001427 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001428 int bit;
1429 const char *words;
1430
1431 size = sprintf(hdr, "%s\r\n",
1432 (char*)llist_pop(&headers_llist));
1433 /* a bit like index_in_substrings but don't match full key */
1434 bit = 1;
1435 words = wget_user_headers;
1436 while (*words) {
1437 if (strstr(hdr, words) == hdr) {
1438 G.user_headers |= bit;
1439 break;
1440 }
1441 bit <<= 1;
1442 words += strlen(words) + 1;
1443 }
1444 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001445 }
1446 }
1447#endif
1448
Denys Vlasenko2384a352011-02-15 00:58:36 +01001449 G.output_fd = -1;
1450 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1451 if (G.fname_out) { /* -O FILE ? */
1452 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1453 G.output_fd = 1;
1454 option_mask32 &= ~WGET_OPT_CONTINUE;
1455 }
1456 /* compat with wget: -O FILE can overwrite */
1457 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1458 }
1459
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001460 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001461 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001462
Denys Vlasenko28556b92011-02-15 11:03:53 +01001463 if (G.output_fd >= 0)
1464 xclose(G.output_fd);
1465
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001466#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1467 free(G.extra_headers);
1468#endif
1469 FINI_G();
1470
Pere Orga53695632011-02-16 20:09:36 +01001471 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001472}