blob: ff0df4ca0bc6db17dcdbcc1fb9f03c09d174705a [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020011//config:config WGET
Denys Vlasenkob097a842018-12-28 03:20:17 +010012//config: bool "wget (38 kb)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020013//config: default y
14//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020015//config: wget is a utility for non-interactive download of files from HTTP
16//config: and FTP servers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020017//config:
Denys Vlasenkof5604222017-01-10 14:58:54 +010018//config:config FEATURE_WGET_LONG_OPTIONS
19//config: bool "Enable long options"
20//config: default y
21//config: depends on WGET && LONG_OPTS
22//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020023//config:config FEATURE_WGET_STATUSBAR
Denys Vlasenkof5604222017-01-10 14:58:54 +010024//config: bool "Enable progress bar (+2k)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020025//config: default y
26//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020027//config:
28//config:config FEATURE_WGET_AUTHENTICATION
29//config: bool "Enable HTTP authentication"
30//config: default y
31//config: depends on WGET
32//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020033//config: Support authenticated HTTP transfers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020034//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020035//config:config FEATURE_WGET_TIMEOUT
36//config: bool "Enable timeout option -T SEC"
37//config: default y
38//config: depends on WGET
39//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020040//config: Supports network read and connect timeouts for wget,
41//config: so that wget will give up and timeout, through the -T
42//config: command line option.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020043//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020044//config: Currently only connect and network data read timeout are
45//config: supported (i.e., timeout is not applied to the DNS query). When
46//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
47//config: will work in addition to -T.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020048//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +010049//config:config FEATURE_WGET_HTTPS
50//config: bool "Support HTTPS using internal TLS code"
Denys Vlasenko403f2992018-02-06 15:15:08 +010051//it also enables FTPS support, but it's not well tested yet
Denys Vlasenko9a647c32017-01-23 01:08:16 +010052//config: default y
53//config: depends on WGET
54//config: select TLS
55//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020056//config: wget will use internal TLS code to connect to https:// URLs.
57//config: Note:
58//config: On NOMMU machines, ssl_helper applet should be available
59//config: in the $PATH for this to work. Make sure to select that applet.
Denys Vlasenko9a647c32017-01-23 01:08:16 +010060//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020061//config: Note: currently, TLS code only makes TLS I/O work, it
62//config: does *not* check that the peer is who it claims to be, etc.
63//config: IOW: it uses peer-supplied public keys to establish encryption
64//config: and signing keys, then encrypts and signs outgoing data and
65//config: decrypts incoming data.
66//config: It does not check signature hashes on the incoming data:
67//config: this means that attackers manipulating TCP packets can
68//config: send altered data and we unknowingly receive garbage.
69//config: (This check might be relatively easy to add).
70//config: It does not check public key's certificate:
71//config: this means that the peer may be an attacker impersonating
72//config: the server we think we are talking to.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010073//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020074//config: If you think this is unacceptable, consider this. As more and more
75//config: servers switch to HTTPS-only operation, without such "crippled"
76//config: TLS code it is *impossible* to simply download a kernel source
77//config: from kernel.org. Which can in real world translate into
78//config: "my small automatic tooling to build cross-compilers from sources
79//config: no longer works, I need to additionally keep a local copy
80//config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
81//config: source of wget, need to compile and built both before I can
82//config: download anything. All this despite the fact that the build
83//config: is done in a QEMU sandbox on a machine with absolutely nothing
84//config: worth stealing, so I don't care if someone would go to a lot
85//config: of trouble to intercept my HTTPS download to send me an altered
86//config: kernel tarball".
Denys Vlasenko67f6db62017-01-30 16:27:37 +010087//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020088//config: If you still think this is unacceptable, send patches.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010089//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020090//config: If you still think this is unacceptable, do not want to send
91//config: patches, but do want to waste bandwidth expaining how wrong
92//config: it is, you will be ignored.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010093//config:
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +010094//config: FEATURE_WGET_OPENSSL does implement TLS verification
95//config: using the certificates available to OpenSSL.
96//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020097//config:config FEATURE_WGET_OPENSSL
98//config: bool "Try to connect to HTTPS using openssl"
99//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200100//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200101//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200102//config: Try to use openssl to handle HTTPS.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200103//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200104//config: OpenSSL has a simple SSL client for debug purposes.
105//config: If you select this option, wget will effectively run:
106//config: "openssl s_client -quiet -connect hostname:443
107//config: -servername hostname 2>/dev/null" and pipe its data
108//config: through it. -servername is not used if hostname is numeric.
109//config: Note inconvenient API: host resolution is done twice,
110//config: and there is no guarantee openssl's idea of IPv6 address
111//config: format is the same as ours.
112//config: Another problem is that s_client prints debug information
113//config: to stderr, and it needs to be suppressed. This means
114//config: all error messages get suppressed too.
115//config: openssl is also a big binary, often dynamically linked
116//config: against ~15 libraries.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200117//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200118//config: If openssl can't be executed, internal TLS code will be used
119//config: (if you enabled it); if openssl can be executed but fails later,
120//config: wget can't detect this, and download will fail.
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100121//config:
122//config: By default TLS verification is performed, unless
123//config: --no-check-certificate option is passed.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200124
125//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
126
127//kbuild:lib-$(CONFIG_WGET) += wget.o
128
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100129//usage:#define wget_trivial_usage
130//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200131//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
Martin Lewis64f35362018-12-26 16:28:45 +0100132//usage: " [-o|--output-file FILE] [--header 'header: value'] [-Y|--proxy on/off]\n"
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100133//usage: IF_FEATURE_WGET_OPENSSL(
134//usage: " [--no-check-certificate]\n"
135//usage: )
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200136/* Since we ignore these opts, we don't show them in --help */
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100137/* //usage: " [--no-cache] [--passive-ftp] [-t TRIES]" */
Denys Vlasenko92e1b082015-10-20 21:51:52 +0200138/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Martin Lewis64f35362018-12-26 16:28:45 +0100139//usage: " [-P DIR] [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100140//usage: )
141//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
Martin Lewis64f35362018-12-26 16:28:45 +0100142//usage: "[-cq] [-O FILE] [-o FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100143//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
144//usage: )
145//usage:#define wget_full_usage "\n\n"
146//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200147//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100148//usage: "\n --spider Only check URL existence: $? is 0 if exists"
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100149//usage: IF_FEATURE_WGET_OPENSSL(
150//usage: "\n --no-check-certificate Don't validate the server's certificate"
151//usage: )
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100152//usage: )
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200153//usage: "\n -c Continue retrieval of aborted transfer"
154//usage: "\n -q Quiet"
155//usage: "\n -P DIR Save to DIR (default .)"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100156//usage: "\n -S Show server response"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200157//usage: IF_FEATURE_WGET_TIMEOUT(
158//usage: "\n -T SEC Network read timeout is SEC seconds"
159//usage: )
160//usage: "\n -O FILE Save to FILE ('-' for stdout)"
Martin Lewis64f35362018-12-26 16:28:45 +0100161//usage: "\n -o FILE Log messages to FILE"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200162//usage: "\n -U STR Use STR for User-Agent header"
163//usage: "\n -Y on/off Use proxy"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100164
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000165#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000166
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200167#if 0
168# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100169# define SENDFMT(fp, fmt, ...) \
170 do { \
171 log_io("> " fmt, ##__VA_ARGS__); \
172 fprintf(fp, fmt, ##__VA_ARGS__); \
173 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200174#else
175# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100176# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200177#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100178
179
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100180#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100181
Eric Andersen79757c92001-04-05 21:45:54 +0000182struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100183 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000184 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100185 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100186 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000187 char *host;
188 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000189};
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200190static const char P_FTP[] ALIGN1 = "ftp";
191static const char P_HTTP[] ALIGN1 = "http";
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100192#if SSL_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100193# if ENABLE_FEATURE_WGET_HTTPS
194static const char P_FTPS[] ALIGN1 = "ftps";
195# endif
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200196static const char P_HTTPS[] ALIGN1 = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100197#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000198
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100199#if ENABLE_FEATURE_WGET_LONG_OPTIONS
200/* User-specified headers prevent using our corresponding built-in headers. */
201enum {
202 HDR_HOST = (1<<0),
203 HDR_USER_AGENT = (1<<1),
204 HDR_RANGE = (1<<2),
205 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
206 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
207};
208static const char wget_user_headers[] ALIGN1 =
209 "Host:\0"
210 "User-Agent:\0"
211 "Range:\0"
212# if ENABLE_FEATURE_WGET_AUTHENTICATION
213 "Authorization:\0"
214 "Proxy-Authorization:\0"
215# endif
216 ;
217# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
218# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
219# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
220# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
221# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
222#else /* No long options, no user-headers :( */
223# define USR_HEADER_HOST 0
224# define USR_HEADER_USER_AGENT 0
225# define USR_HEADER_RANGE 0
226# define USR_HEADER_AUTH 0
227# define USR_HEADER_PROXY_AUTH 0
228#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000229
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200230/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000231struct globals {
232 off_t content_len; /* Content-length of the file */
233 off_t beg_range; /* Range at which continue begins */
234#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000235 off_t transferred; /* Number of bytes transferred so far */
236 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100237 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000238#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200239 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100240#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200241 char *post_data;
242 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100243 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100244#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200245 char *fname_out; /* where to direct output (-O) */
Martin Lewis64f35362018-12-26 16:28:45 +0100246 char *fname_log; /* where to direct log (-o) */
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200247 const char *proxy_flag; /* Use proxies if env vars are set */
248 const char *user_agent; /* "User-Agent" header field */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100249 int output_fd;
Martin Lewis64f35362018-12-26 16:28:45 +0100250 int log_fd;
Denys Vlasenko2384a352011-02-15 00:58:36 +0100251 int o_flags;
Denys Vlasenko5084bae2018-11-24 21:56:21 +0100252#if ENABLE_FEATURE_WGET_TIMEOUT
253 unsigned timeout_seconds;
254 smallint die_if_timed_out;
255#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200256 smallint chunked; /* chunked transfer encoding */
257 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100258 /* Local downloads do benefit from big buffer.
259 * With 512 byte buffer, it was measured to be
260 * an order of magnitude slower than with big one.
261 */
Denys Vlasenko9b313dd2019-01-21 13:53:26 +0100262 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024] ALIGNED(16);
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100263} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100264#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200265#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200266 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200267} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200268#define FINI_G() do { \
269 FREE_PTR_TO_GLOBALS(); \
270} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000271
272
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200273/* Must match option string! */
274enum {
275 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200276 WGET_OPT_QUIET = (1 << 1),
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100277 WGET_OPT_SERVER_RESPONSE = (1 << 2),
278 WGET_OPT_OUTNAME = (1 << 3),
Martin Lewis64f35362018-12-26 16:28:45 +0100279 WGET_OPT_LOGNAME = (1 << 4),
280 WGET_OPT_PREFIX = (1 << 5),
281 WGET_OPT_PROXY = (1 << 6),
282 WGET_OPT_USER_AGENT = (1 << 7),
283 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
284 WGET_OPT_RETRIES = (1 << 9),
285 WGET_OPT_nsomething = (1 << 10),
286 WGET_OPT_HEADER = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
287 WGET_OPT_POST_DATA = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
288 WGET_OPT_SPIDER = (1 << 13) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
289 WGET_OPT_NO_CHECK_CERT = (1 << 14) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Denys Vlasenkoe7d853b2020-12-08 19:06:28 +0100290 /* hijack this bit for other than opts purposes: */
291 WGET_NO_FTRUNCATE = (1 << 31)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200292};
293
294enum {
295 PROGRESS_START = -1,
296 PROGRESS_END = 0,
297 PROGRESS_BUMP = 1,
298};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000299#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000300static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000301{
Denys Vlasenko26602b82018-11-23 19:14:52 +0100302 int notty;
303
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200304 if (option_mask32 & WGET_OPT_QUIET)
305 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000306
Martin Lewis64f35362018-12-26 16:28:45 +0100307 /* Don't save progress to log file */
308 if (G.log_fd >= 0)
309 return;
310
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200311 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100312 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000313
Denys Vlasenko26602b82018-11-23 19:14:52 +0100314 notty = bb_progress_update(&G.pmt,
Denys Vlasenko2384a352011-02-15 00:58:36 +0100315 G.beg_range,
316 G.transferred,
317 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
318 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000319
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200320 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100321 bb_progress_free(&G.pmt);
Denys Vlasenko26602b82018-11-23 19:14:52 +0100322 if (notty == 0)
323 bb_putchar_stderr('\n'); /* it's tty */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100324 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000325 }
326}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200327#else
Denys Vlasenko8c317f02019-05-14 17:26:47 +0200328static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) {}
Eric Andersenb520e082000-10-03 00:21:45 +0000329#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000330
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000331
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200332/* IPv6 knows scoped address types i.e. link and site local addresses. Link
333 * local addresses can have a scope identifier to specify the
334 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
335 * identifier is only valid on a single node.
336 *
337 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
338 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
339 * in the Host header as invalid requests, see
340 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
341 */
342static void strip_ipv6_scope_id(char *host)
343{
344 char *scope, *cp;
345
346 /* bbox wget actually handles IPv6 addresses without [], like
347 * wget "http://::1/xxx", but this is not standard.
348 * To save code, _here_ we do not support it. */
349
350 if (host[0] != '[')
351 return; /* not IPv6 */
352
353 scope = strchr(host, '%');
354 if (!scope)
355 return;
356
357 /* Remove the IPv6 zone identifier from the host address */
358 cp = strchr(host, ']');
359 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
360 /* malformed address (not "[xx]:nn" or "[xx]") */
361 return;
362 }
363
364 /* cp points to "]...", scope points to "%eth0]..." */
365 overlapping_strcpy(scope, cp);
366}
367
Denis Vlasenko9cade082006-11-21 10:43:02 +0000368#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100369/* Base64-encode character string. */
370static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000371{
Denys Vlasenko5084bae2018-11-24 21:56:21 +0100372 /* paranoia */
373 unsigned len = strnlen(str, sizeof(G.wget_buf)/4*3 - 10);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100374 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
375 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000376}
377#endif
378
Lauri Kasanend074b412013-10-12 21:47:07 +0200379#if ENABLE_FEATURE_WGET_TIMEOUT
380static void alarm_handler(int sig UNUSED_PARAM)
381{
382 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
Denys Vlasenko6701e912016-03-17 15:58:16 +0100383 if (G.die_if_timed_out)
James Byrne69374872019-07-02 11:35:03 +0200384 bb_simple_error_msg_and_die("download timed out");
Lauri Kasanend074b412013-10-12 21:47:07 +0200385}
Denys Vlasenko6701e912016-03-17 15:58:16 +0100386static void set_alarm(void)
387{
388 if (G.timeout_seconds) {
389 alarm(G.timeout_seconds);
390 G.die_if_timed_out = 1;
391 }
392}
393# define clear_alarm() ((void)(G.die_if_timed_out = 0))
394#else
395# define set_alarm() ((void)0)
396# define clear_alarm() ((void)0)
Lauri Kasanend074b412013-10-12 21:47:07 +0200397#endif
398
Denys Vlasenkoed727612016-07-25 21:34:57 +0200399#if ENABLE_FEATURE_WGET_OPENSSL
400/*
401 * is_ip_address() attempts to verify whether or not a string
402 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
403 * of inet_pton() can be used to determine this.
Denys Vlasenkoed727612016-07-25 21:34:57 +0200404 */
405static int is_ip_address(const char *string)
406{
407 struct sockaddr_in sa;
408
409 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
410# if ENABLE_FEATURE_IPV6
411 if (result == 0) {
412 struct sockaddr_in6 sa6;
413 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
414 }
415# endif
416 return (result == 1);
417}
418#endif
419
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000420static FILE *open_socket(len_and_sockaddr *lsa)
421{
Lauri Kasanend074b412013-10-12 21:47:07 +0200422 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000423 FILE *fp;
424
Denys Vlasenko6701e912016-03-17 15:58:16 +0100425 set_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200426 fd = xconnect_stream(lsa);
Denys Vlasenko6701e912016-03-17 15:58:16 +0100427 clear_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200428
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000429 /* glibc 2.4 seems to try seeking on it - ??! */
430 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200431 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100432 if (!fp)
Denys Vlasenko899ae532018-04-01 19:59:37 +0200433 bb_die_memory_exhausted();
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000434
435 return fp;
436}
437
Denys Vlasenko34590242018-02-12 16:46:13 +0100438/* We balk at any control chars in other side's messages.
439 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
440 * and error messages.
441 *
442 * The only exception is tabs, which are converted to (one) space:
443 * HTTP's "headers: <whitespace> values" may have those.
444 */
445static char* sanitize_string(char *s)
446{
447 unsigned char *p = (void *) s;
448 while (*p) {
449 if (*p < ' ') {
450 if (*p != '\t')
451 break;
452 *p = ' ';
453 }
454 p++;
455 }
456 *p = '\0';
457 return s;
458}
459
Denys Vlasenkof836f012011-02-10 23:02:28 +0100460/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Denys Vlasenko34590242018-02-12 16:46:13 +0100461static char fgets_trim_sanitize(FILE *fp, const char *fmt)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100462{
463 char c;
464 char *buf_ptr;
465
Denys Vlasenko6701e912016-03-17 15:58:16 +0100466 set_alarm();
Denys Vlasenko34590242018-02-12 16:46:13 +0100467 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
James Byrne69374872019-07-02 11:35:03 +0200468 bb_simple_perror_msg_and_die("error getting response");
Denys Vlasenko6701e912016-03-17 15:58:16 +0100469 clear_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100470
471 buf_ptr = strchrnul(G.wget_buf, '\n');
472 c = *buf_ptr;
Denys Vlasenko34590242018-02-12 16:46:13 +0100473#if 1
474 /* Disallow any control chars: trim at first char < 0x20 */
475 sanitize_string(G.wget_buf);
476#else
Denys Vlasenkof836f012011-02-10 23:02:28 +0100477 *buf_ptr = '\0';
478 buf_ptr = strchrnul(G.wget_buf, '\r');
479 *buf_ptr = '\0';
Denys Vlasenko34590242018-02-12 16:46:13 +0100480#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100481
482 log_io("< %s", G.wget_buf);
483
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100484 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
485 fprintf(stderr, fmt, G.wget_buf);
486
Denys Vlasenkof836f012011-02-10 23:02:28 +0100487 return c;
488}
489
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100490static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000491{
492 int result;
493 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100494 if (!s2)
495 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000496 fprintf(fp, "%s%s\r\n", s1, s2);
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100497 /* With --server-response, wget also shows its ftp commands */
498 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
499 fprintf(stderr, "--> %s%s\n\n", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000500 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100501 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000502 }
503
Denys Vlasenko34590242018-02-12 16:46:13 +0100504 /* Read until "Nxx something" is received */
505 G.wget_buf[3] = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000506 do {
Denys Vlasenko34590242018-02-12 16:46:13 +0100507 fgets_trim_sanitize(fp, "%s\n");
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100508 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000509
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100510 G.wget_buf[3] = '\0';
511 result = xatoi_positive(G.wget_buf);
512 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000513 return result;
514}
515
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100516static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000517{
518 char *url, *p, *sp;
519
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100520 free(h->allocated);
521 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000522
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100523 h->protocol = P_FTP;
524 p = strstr(url, "://");
525 if (p) {
526 *p = '\0';
527 h->host = p + 3;
528 if (strcmp(url, P_FTP) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200529 h->port = bb_lookup_std_port(P_FTP, "tcp", 21);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100530 } else
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100531#if SSL_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100532# if ENABLE_FEATURE_WGET_HTTPS
533 if (strcmp(url, P_FTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200534 h->port = bb_lookup_std_port(P_FTPS, "tcp", 990);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100535 h->protocol = P_FTPS;
536 } else
537# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100538 if (strcmp(url, P_HTTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200539 h->port = bb_lookup_std_port(P_HTTPS, "tcp", 443);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100540 h->protocol = P_HTTPS;
541 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100542#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100543 if (strcmp(url, P_HTTP) == 0) {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100544 http:
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200545 h->port = bb_lookup_std_port(P_HTTP, "tcp", 80);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100546 h->protocol = P_HTTP;
547 } else {
548 *p = ':';
Denys Vlasenko34590242018-02-12 16:46:13 +0100549 bb_error_msg_and_die("not an http or ftp url: %s", url);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100550 }
551 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100552 // GNU wget is user-friendly and falls back to http://
553 h->host = url;
554 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100555 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000556
557 // FYI:
558 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
Denys Vlasenkoa0aae9f2017-01-20 14:12:10 +0100559 // 'GET /?var=a/b HTTP/1.0'
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000560 // and saves 'index.html?var=a%2Fb' (we save 'b')
561 // wget 'http://busybox.net?login=john@doe':
562 // request: 'GET /?login=john@doe HTTP/1.0'
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200563 // saves: 'index.html?login=john@doe' (we save 'login=john@doe')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000564 // wget 'http://busybox.net#test/test':
565 // request: 'GET / HTTP/1.0'
566 // saves: 'index.html' (we save 'test')
567 //
568 // We also don't add unique .N suffix if file exists...
569 sp = strchr(h->host, '/');
570 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
571 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
572 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000573 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000574 } else if (*sp == '/') {
575 *sp = '\0';
576 h->path = sp + 1;
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200577 } else {
578 // sp points to '#' or '?'
579 // Note:
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000580 // http://busybox.net?login=john@doe is a valid URL
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200581 // (without '/' between ".net" and "?"),
582 // can't store NUL at sp[-1] - this destroys hostname.
583 *sp++ = '\0';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000584 h->path = sp;
585 }
586
587 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000588 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200589 // URL-decode "user:password" string before base64-encoding:
590 // wget http://test:my%20pass@example.com should send
591 // Authorization: Basic dGVzdDpteSBwYXNz
592 // which decodes to "test:my pass".
593 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000594 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100595 free(h->user);
596 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000597 h->host = sp + 1;
598 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100599 /* else: h->user remains NULL, or as set by original request
600 * before redirect (if we are here after a redirect).
601 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000602}
603
Denys Vlasenko34590242018-02-12 16:46:13 +0100604static char *get_sanitized_hdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000605{
606 char *s, *hdrval;
607 int c;
608
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000609 /* retrieve header line */
Denys Vlasenko34590242018-02-12 16:46:13 +0100610 c = fgets_trim_sanitize(fp, " %s\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000611
Denys Vlasenkof836f012011-02-10 23:02:28 +0100612 /* end of the headers? */
613 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000614 return NULL;
615
616 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200617 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
618 /*
619 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
620 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
621 * "A-Z" maps to "a-z".
622 * "@[\]" can't occur in header names.
623 * "^_" maps to "~,DEL" (which is wrong).
624 * "^" was never seen yet, "_" was seen from web.archive.org
625 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
626 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100627 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200628 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000629
630 /* verify we are at the end of the header name */
631 if (*s != ':')
Denys Vlasenko34590242018-02-12 16:46:13 +0100632 bb_error_msg_and_die("bad header line: %s", G.wget_buf);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000633
634 /* locate the start of the header value */
635 *s++ = '\0';
636 hdrval = skip_whitespace(s);
637
Denys Vlasenkof836f012011-02-10 23:02:28 +0100638 if (c != '\n') {
639 /* Rats! The buffer isn't big enough to hold the entire header value */
640 while (c = getc(fp), c != EOF && c != '\n')
641 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000642 }
643
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000644 return hdrval;
645}
646
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200647static void reset_beg_range_to_zero(void)
648{
James Byrne69374872019-07-02 11:35:03 +0200649 bb_simple_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200650 G.beg_range = 0;
651 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200652 /* Done at the end instead: */
653 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200654}
655
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200656#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200657static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100658{
659 char *allocated = NULL;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200660 char *servername;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100661 int sp[2];
662 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100663 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100664
665 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
666 /* Kernel can have AF_UNIX support disabled */
James Byrne69374872019-07-02 11:35:03 +0200667 bb_simple_perror_msg_and_die("socketpair");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100668
669 if (!strchr(host, ':'))
670 host = allocated = xasprintf("%s:%u", host, port);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200671 servername = xstrdup(host);
672 strrchr(servername, ':')[0] = '\0';
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100673
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200674 fflush_all();
675 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100676 if (pid == 0) {
677 /* Child */
Scott Courtfc2ce042020-06-29 14:30:12 +0200678 char *argv[13];
679 char **argp;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100680
681 close(sp[0]);
682 xmove_fd(sp[1], 0);
683 xdup2(0, 1);
684 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100685 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
686 * It prints some debug stuff on stderr, don't know how to suppress it.
687 * Work around by dev-nulling stderr. We lose all error messages :(
688 */
689 xmove_fd(2, 3);
690 xopen("/dev/null", O_RDWR);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200691 memset(&argv, 0, sizeof(argv));
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100692 argv[0] = (char*)"openssl";
693 argv[1] = (char*)"s_client";
694 argv[2] = (char*)"-quiet";
695 argv[3] = (char*)"-connect";
696 argv[4] = (char*)host;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200697 /*
698 * Per RFC 6066 Section 3, the only permitted values in the
699 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
700 * IPv4 and IPv6 addresses, port numbers are not allowed.
701 */
Scott Courtfc2ce042020-06-29 14:30:12 +0200702 argp = &argv[5];
Denys Vlasenkoed727612016-07-25 21:34:57 +0200703 if (!is_ip_address(servername)) {
Scott Courtfc2ce042020-06-29 14:30:12 +0200704 *argp++ = (char*)"-servername"; //[5]
705 *argp++ = (char*)servername; //[6]
Denys Vlasenkoed727612016-07-25 21:34:57 +0200706 }
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100707 if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
Scott Courtfc2ce042020-06-29 14:30:12 +0200708 /* Abort on bad server certificate */
709 *argp++ = (char*)"-verify"; //[7]
710 *argp++ = (char*)"100"; //[8]
711 *argp++ = (char*)"-verify_return_error"; //[9]
712 if (!is_ip_address(servername)) {
713 *argp++ = (char*)"-verify_hostname"; //[10]
714 *argp++ = (char*)servername; //[11]
715 } else {
716 *argp++ = (char*)"-verify_ip"; //[10]
717 *argp++ = (char*)host; //[11]
718 }
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100719 }
Scott Courtfc2ce042020-06-29 14:30:12 +0200720 //[12] (or earlier) is NULL terminator
Denys Vlasenkoed727612016-07-25 21:34:57 +0200721
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100722 BB_EXECVP(argv[0], argv);
723 xmove_fd(3, 2);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100724# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200725 child_failed = 1;
726 xfunc_die();
727# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100728 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200729# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100730 /* notreached */
731 }
732
Denys Vlasenko53315572014-02-23 23:39:47 +0100733 /* Parent */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200734 free(servername);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100735 free(allocated);
736 close(sp[1]);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100737# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200738 if (child_failed) {
739 close(sp[0]);
740 return -1;
741 }
742# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100743 return sp[0];
744}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200745#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100746
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100747#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko403f2992018-02-06 15:15:08 +0100748static void spawn_ssl_client(const char *host, int network_fd, int flags)
Denys Vlasenko53315572014-02-23 23:39:47 +0100749{
750 int sp[2];
751 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100752 char *servername, *p;
753
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100754 if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100755 option_mask32 |= WGET_OPT_NO_CHECK_CERT;
James Byrne69374872019-07-02 11:35:03 +0200756 bb_simple_error_msg("note: TLS certificate validation not implemented");
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100757 }
Denys Vlasenko0972c7f2018-05-28 14:36:26 +0200758
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100759 servername = xstrdup(host);
760 p = strrchr(servername, ':');
761 if (p) *p = '\0';
Denys Vlasenko53315572014-02-23 23:39:47 +0100762
763 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
764 /* Kernel can have AF_UNIX support disabled */
James Byrne69374872019-07-02 11:35:03 +0200765 bb_simple_perror_msg_and_die("socketpair");
Denys Vlasenko53315572014-02-23 23:39:47 +0100766
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100767 fflush_all();
Denys Vlasenko53315572014-02-23 23:39:47 +0100768 pid = BB_MMU ? xfork() : xvfork();
769 if (pid == 0) {
770 /* Child */
Denys Vlasenko53315572014-02-23 23:39:47 +0100771 close(sp[0]);
772 xmove_fd(sp[1], 0);
773 xdup2(0, 1);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100774 if (BB_MMU) {
775 tls_state_t *tls = new_tls_state();
776 tls->ifd = tls->ofd = network_fd;
777 tls_handshake(tls, servername);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100778 tls_run_copy_loop(tls, flags);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100779 exit(0);
780 } else {
Denys Vlasenko403f2992018-02-06 15:15:08 +0100781 char *argv[6];
782
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100783 xmove_fd(network_fd, 3);
784 argv[0] = (char*)"ssl_client";
785 argv[1] = (char*)"-s3";
786 //TODO: if (!is_ip_address(servername))...
787 argv[2] = (char*)"-n";
788 argv[3] = servername;
Denys Vlasenko403f2992018-02-06 15:15:08 +0100789 argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
790 argv[5] = NULL;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100791 BB_EXECVP(argv[0], argv);
792 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
793 }
Denys Vlasenko53315572014-02-23 23:39:47 +0100794 /* notreached */
795 }
796
797 /* Parent */
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100798 free(servername);
Denys Vlasenko53315572014-02-23 23:39:47 +0100799 close(sp[1]);
800 xmove_fd(sp[0], network_fd);
801}
802#endif
803
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100804static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
805{
806 FILE *sfp;
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200807 char *pass;
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100808 int port;
809
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100810 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100811#if ENABLE_FEATURE_WGET_HTTPS
812 if (target->protocol == P_FTPS)
813 spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
814#endif
815
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100816 if (ftpcmd(NULL, NULL, sfp) != 220)
James Byrne69374872019-07-02 11:35:03 +0200817 bb_simple_error_msg_and_die(G.wget_buf);
Denys Vlasenko34590242018-02-12 16:46:13 +0100818 /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100819
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200820 /* Split username:password pair */
821 pass = (char*)"busybox"; /* password for "anonymous" */
822 if (target->user) {
823 pass = strchr(target->user, ':');
824 if (pass)
825 *pass++ = '\0';
826 }
827
828 /* Log in */
829 switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100830 case 230:
831 break;
832 case 331:
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200833 if (ftpcmd("PASS ", pass, sfp) == 230)
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100834 break;
835 /* fall through (failed login) */
836 default:
Denys Vlasenko34590242018-02-12 16:46:13 +0100837 bb_error_msg_and_die("ftp login: %s", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100838 }
839
840 ftpcmd("TYPE I", NULL, sfp);
841
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200842 /* Query file size */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100843 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
844 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
845 if (G.content_len < 0 || errno) {
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200846 bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100847 }
848 G.got_clen = 1;
849 }
850
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200851 /* Enter passive mode */
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100852 if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
853 /* good */
854 } else
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100855 if (ftpcmd("PASV", NULL, sfp) != 227) {
856 pasv_error:
Denys Vlasenko34590242018-02-12 16:46:13 +0100857 bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100858 }
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100859 port = parse_pasv_epsv(G.wget_buf);
860 if (port < 0)
861 goto pasv_error;
862
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100863 set_nport(&lsa->u.sa, htons(port));
864
865 *dfpp = open_socket(lsa);
866
Denys Vlasenko2b751572018-02-06 20:49:27 +0100867#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko237a9002018-02-08 00:28:30 +0100868 if (target->protocol == P_FTPS) {
869 /* "PROT P" enables encryption of data stream.
870 * Without it (or with "PROT C"), data is sent unencrypted.
871 */
872 if (ftpcmd("PROT P", NULL, sfp) == 200)
873 spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
874 }
Denys Vlasenko2b751572018-02-06 20:49:27 +0100875#endif
Denys Vlasenko403f2992018-02-06 15:15:08 +0100876
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100877 if (G.beg_range != 0) {
878 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
879 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
880 G.content_len -= G.beg_range;
881 else
882 reset_beg_range_to_zero();
883 }
884
Denys Vlasenko34590242018-02-12 16:46:13 +0100885//TODO: needs ftp-escaping 0xff and '\n' bytes here.
886//Or disallow '\n' altogether via sanitize_string() in parse_url().
887//But 0xff's are possible in valid utf8 filenames.
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100888 if (ftpcmd("RETR ", target->path, sfp) > 150)
Denys Vlasenko34590242018-02-12 16:46:13 +0100889 bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100890
891 return sfp;
892}
893
Denys Vlasenko2384a352011-02-15 00:58:36 +0100894static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200895{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200896#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
897# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200898 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200899# endif
900 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200901
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200902 polldata.fd = fileno(dfp);
903 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200904#endif
Martin Lewis94e748d2019-01-10 13:59:30 +0100905 if (!(option_mask32 & WGET_OPT_QUIET)) {
906 if (G.output_fd == 1)
907 fprintf(stderr, "writing to stdout\n");
908 else
909 fprintf(stderr, "saving to '%s'\n", G.fname_out);
910 }
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200911 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200912
913 if (G.chunked)
914 goto get_clen;
915
916 /* Loops only if chunked */
917 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100918
919#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
920 /* Must use nonblocking I/O, otherwise fread will loop
921 * and *block* until it reads full buffer,
922 * which messes up progress bar and/or timeout logic.
923 * Because of nonblocking I/O, we need to dance
924 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200925 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100926 */
927 ndelay_on(polldata.fd);
928#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100929 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200930 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100931 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200932
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200933#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100934 /* fread internally uses read loop, which in our case
935 * is usually exited when we get EAGAIN.
936 * In this case, libc sets error marker on the stream.
937 * Need to clear it before next fread to avoid possible
938 * rare false positive ferror below. Rare because usually
939 * fread gets more than zero bytes, and we don't fall
940 * into if (n <= 0) ...
941 */
942 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100943#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200944 errno = 0;
945 rdsz = sizeof(G.wget_buf);
946 if (G.got_clen) {
947 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
948 if ((int)G.content_len <= 0)
949 break;
950 rdsz = (unsigned)G.content_len;
951 }
952 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100953 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200954
955 if (n > 0) {
956 xwrite(G.output_fd, G.wget_buf, n);
957#if ENABLE_FEATURE_WGET_STATUSBAR
958 G.transferred += n;
959#endif
960 if (G.got_clen) {
961 G.content_len -= n;
962 if (G.content_len == 0)
963 break;
964 }
965#if ENABLE_FEATURE_WGET_TIMEOUT
966 second_cnt = G.timeout_seconds;
967#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100968 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200969 }
970
971 /* n <= 0.
972 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100973 * If error occurs, or EOF is reached, the return value
974 * is a short item count (or zero).
975 * fread does not distinguish between EOF and error.
976 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200977 if (errno != EAGAIN) {
978 if (ferror(dfp)) {
979 progress_meter(PROGRESS_END);
James Byrne69374872019-07-02 11:35:03 +0200980 bb_simple_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200981 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100982 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200983 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100984
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200985#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
986 /* It was EAGAIN. There is no data. Wait up to one second
987 * then abort if timed out, or update the bar and try reading again.
988 */
989 if (safe_poll(&polldata, 1, 1000) == 0) {
990# if ENABLE_FEATURE_WGET_TIMEOUT
991 if (second_cnt != 0 && --second_cnt == 0) {
992 progress_meter(PROGRESS_END);
James Byrne69374872019-07-02 11:35:03 +0200993 bb_simple_error_msg_and_die("download timed out");
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200994 }
995# endif
996 /* We used to loop back to poll here,
997 * but there is no great harm in letting fread
998 * to try reading anyway.
999 */
1000 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +01001001#endif
1002 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001003 /* Need to do it _every_ second for "stalled" indicator
1004 * to be shown properly.
1005 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02001006 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001007 } /* while (reading data) */
1008
Denys Vlasenkoc60f4462011-02-11 22:23:23 +01001009#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
1010 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +01001011 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +01001012#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +02001013 if (!G.chunked)
1014 break;
1015
Denys Vlasenko8e2174e2018-04-08 18:06:24 +02001016 /* Each chunk ends with "\r\n" - eat it */
Denys Vlasenko34590242018-02-12 16:46:13 +01001017 fgets_trim_sanitize(dfp, NULL);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +02001018 get_clen:
1019 /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
1020 fgets_trim_sanitize(dfp, NULL);
1021 errno = 0;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001022 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +02001023 /*
1024 * Had a bug with inputs like "ffffffff0001f400"
1025 * smashing the heap later. Ensure >= 0.
1026 */
1027 if (G.content_len < 0 || errno)
1028 bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001029 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +02001030 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001031 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001032 /*
1033 * Note that fgets may result in some data being buffered in dfp.
1034 * We loop back to fread, which will retrieve this data.
1035 * Also note that code has to be arranged so that fread
1036 * is done _before_ one-second poll wait - poll doesn't know
1037 * about stdio buffering and can result in spurious one second waits!
1038 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001039 }
1040
Denys Vlasenko9b313dd2019-01-21 13:53:26 +01001041 /* Draw full bar and free its resources */
1042 G.chunked = 0; /* makes it show 100% even for chunked download */
1043 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
1044 progress_meter(PROGRESS_END);
1045 if (G.content_len != 0) {
James Byrne69374872019-07-02 11:35:03 +02001046 bb_simple_perror_msg_and_die("connection closed prematurely");
Denys Vlasenko9b313dd2019-01-21 13:53:26 +01001047 /* GNU wget says "DATE TIME (NN MB/s) - Connection closed at byte NNN. Retrying." */
1048 }
1049
Denys Vlasenko61441242012-06-17 19:52:25 +02001050 /* If -c failed, we restart from the beginning,
1051 * but we do not truncate file then, we do it only now, at the end.
1052 * This lets user to ^C if his 99% complete 10 GB file download
1053 * failed to restart *without* losing the almost complete file.
1054 */
1055 {
1056 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
Denys Vlasenkoe7d853b2020-12-08 19:06:28 +01001057 if (pos != (off_t)-1) {
1058 /* do not truncate if -O- is in use, a user complained about
1059 * "wget -qO- 'http://example.com/empty' >>FILE" truncating FILE.
1060 */
1061 if (!(option_mask32 & WGET_NO_FTRUNCATE))
1062 ftruncate(G.output_fd, pos);
1063 }
Denys Vlasenko61441242012-06-17 19:52:25 +02001064 }
1065
Martin Lewis94e748d2019-01-10 13:59:30 +01001066 if (!(option_mask32 & WGET_OPT_QUIET)) {
1067 if (G.output_fd == 1)
1068 fprintf(stderr, "written to stdout\n");
1069 else
1070 fprintf(stderr, "'%s' saved\n", G.fname_out);
1071 }
Denys Vlasenko7f432802009-06-28 01:02:24 +02001072}
1073
Pere Orga53695632011-02-16 20:09:36 +01001074static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +00001075{
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001076 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001077 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001078 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001079 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +00001080 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001081 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001082 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001083 struct host_info server;
1084 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +00001085
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001086 server.allocated = NULL;
1087 target.allocated = NULL;
1088 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +02001089 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001090
1091 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +00001092
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001093 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001094 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +00001095 if (use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001096 char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001097//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +01001098 use_proxy = (proxy && proxy[0]);
1099 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001100 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +00001101 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001102 if (!use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001103 server.protocol = target.protocol;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001104 server.port = target.port;
1105 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001106 //free(server.allocated); - can't be non-NULL
1107 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001108 } else {
1109 server.host = target.host;
1110 }
1111 }
1112
1113 if (ENABLE_FEATURE_IPV6)
1114 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001115
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001116 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001117 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001118 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001119 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001120 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001121 if (G.fname_out[0] == '/' || !G.fname_out[0])
1122 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +00001123 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001124 if (G.dir_prefix)
1125 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001126 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001127 /* redirects may free target.path later, need to make a copy */
1128 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001129 }
Eric Andersen29edd002000-12-09 16:55:35 +00001130 }
Denis Vlasenko818322b2007-09-24 18:27:04 +00001131#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001132 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001133#endif
1134
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001135 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001136 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001137 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001138 G.output_fd = open(G.fname_out, O_WRONLY);
1139 if (G.output_fd >= 0) {
1140 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +00001141 }
1142 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +01001143 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +00001144 }
1145
David Demelier4a9daf22019-08-29 14:05:27 +02001146 redir_limit = 16;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001147 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +00001148 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001149 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001150 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1151 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1152 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +00001153 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001154 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +01001155 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1156 G.got_clen = 0;
1157 G.chunked = 0;
Denys Vlasenko403f2992018-02-06 15:15:08 +01001158 if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
Eric Andersen79757c92001-04-05 21:45:54 +00001159 /*
1160 * HTTP session
1161 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001162 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001163 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001164
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001165 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001166#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001167 /* openssl (and maybe internal TLS) support is configured */
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001168 if (server.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001169 /* openssl-based helper
1170 * Inconvenient API since we can't give it an open fd
1171 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001172 int fd = spawn_https_helper_openssl(server.host, server.port);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001173# if ENABLE_FEATURE_WGET_HTTPS
1174 if (fd < 0) { /* no openssl? try internal */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001175 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +01001176 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001177 goto socket_opened;
1178 }
1179# else
1180 /* We don't check for exec("openssl") failure in this case */
1181# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001182 sfp = fdopen(fd, "r+");
1183 if (!sfp)
Denys Vlasenko899ae532018-04-01 19:59:37 +02001184 bb_die_memory_exhausted();
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001185 goto socket_opened;
1186 }
1187 sfp = open_socket(lsa);
1188 socket_opened:
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001189#elif ENABLE_FEATURE_WGET_HTTPS
1190 /* Only internal TLS support is configured */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001191 sfp = open_socket(lsa);
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001192 if (server.protocol == P_HTTPS)
Denys Vlasenko403f2992018-02-06 15:15:08 +01001193 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001194#else
1195 /* ssl (https) support is not configured */
1196 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001197#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001198 /* Send HTTP request */
1199 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001200 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001201 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001202 target.path);
1203 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001204 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001205 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1206 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001207 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001208 if (!USR_HEADER_HOST)
1209 SENDFMT(sfp, "Host: %s\r\n", target.host);
1210 if (!USR_HEADER_USER_AGENT)
1211 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001212
Denys Vlasenko9213a552011-02-10 13:23:45 +01001213 /* Ask server to close the connection as soon as we are done
1214 * (IOW: we do not intend to send more requests)
1215 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001216 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001217
Denis Vlasenko9cade082006-11-21 10:43:02 +00001218#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001219 if (target.user && !USR_HEADER_AUTH) {
1220 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001221 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001222 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001223 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1224 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001225 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001226 }
Eric Andersen79757c92001-04-05 21:45:54 +00001227#endif
1228
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001229 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1230 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001231
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001232#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001233 if (G.extra_headers) {
1234 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001235 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001236 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001237
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001238 if (option_mask32 & WGET_OPT_POST_DATA) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001239 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001240 "Content-Type: application/x-www-form-urlencoded\r\n"
1241 "Content-Length: %u\r\n"
1242 "\r\n"
1243 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001244 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001245 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001246 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001247#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001248 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001249 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001250 }
Eric Andersen79757c92001-04-05 21:45:54 +00001251
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001252 fflush(sfp);
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001253
Denys Vlasenko4e08a122017-01-16 17:31:05 +01001254/* Tried doing this unconditionally.
1255 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1256 */
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001257#if SSL_SUPPORTED
1258 if (target.protocol == P_HTTPS) {
1259 /* If we use SSL helper, keeping our end of the socket open for writing
1260 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1261 * even after child closes its copy of the fd.
1262 * This helps:
1263 */
1264 shutdown(fileno(sfp), SHUT_WR);
1265 }
1266#endif
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001267
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001268 /*
1269 * Retrieve HTTP response line and check for "200" status code.
1270 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001271 read_response:
Denys Vlasenko34590242018-02-12 16:46:13 +01001272 fgets_trim_sanitize(sfp, " %s\n");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001273
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001274 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001275 str = skip_non_whitespace(str);
1276 str = skip_whitespace(str);
1277 // FIXME: no error check
1278 // xatou wouldn't work: "200 OK"
1279 status = atoi(str);
1280 switch (status) {
1281 case 0:
1282 case 100:
Denys Vlasenko34590242018-02-12 16:46:13 +01001283 while (get_sanitized_hdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001284 /* eat all remaining headers */;
1285 goto read_response;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001286
1287 /* Success responses */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001288 case 200:
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001289 /* fall through */
1290 case 201: /* 201 Created */
1291/* "The request has been fulfilled and resulted in a new resource being created" */
Denys Vlasenkoef159702016-09-01 11:16:22 +02001292 /* Standard wget is reported to treat this as success */
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001293 /* fall through */
1294 case 202: /* 202 Accepted */
1295/* "The request has been accepted for processing, but the processing has not been completed" */
1296 /* Treat as success: fall through */
1297 case 203: /* 203 Non-Authoritative Information */
1298/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1299 /* fall through */
1300 case 204: /* 204 No Content */
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001301/*
1302Response 204 doesn't say "null file", it says "metadata
1303has changed but data didn't":
1304
1305"10.2.5 204 No Content
1306The server has fulfilled the request but does not need to return
1307an entity-body, and might want to return updated metainformation.
1308The response MAY include new or updated metainformation in the form
1309of entity-headers, which if present SHOULD be associated with
1310the requested variant.
1311
1312If the client is a user agent, it SHOULD NOT change its document
1313view from that which caused the request to be sent. This response
1314is primarily intended to allow input for actions to take place
1315without causing a change to the user agent's active document view,
1316although any new or updated metainformation SHOULD be applied
1317to the document currently in the user agent's active view.
1318
1319The 204 response MUST NOT include a message-body, and thus
1320is always terminated by the first empty line after the header fields."
1321
1322However, in real world it was observed that some web servers
1323(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1324*/
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001325 if (G.beg_range != 0) {
1326 /* "Range:..." was not honored by the server.
1327 * Restart download from the beginning.
1328 */
1329 reset_beg_range_to_zero();
1330 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001331 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001332 /* 205 Reset Content ?? what to do on this ?? */
1333
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001334 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001335 case 301:
1336 case 302:
1337 case 303:
1338 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001339
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001340 case 206: /* Partial Content */
1341 if (G.beg_range != 0)
1342 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001343 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001344 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001345 /* fall through */
1346 default:
Denys Vlasenko34590242018-02-12 16:46:13 +01001347 bb_error_msg_and_die("server returned error: %s", G.wget_buf);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001348 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001349
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001350 /*
1351 * Retrieve HTTP headers.
1352 */
Denys Vlasenko34590242018-02-12 16:46:13 +01001353 while ((str = get_sanitized_hdr(sfp)) != NULL) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001354 static const char keywords[] ALIGN1 =
1355 "content-length\0""transfer-encoding\0""location\0";
1356 enum {
1357 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1358 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001359 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001360
Denys Vlasenko34590242018-02-12 16:46:13 +01001361 /* get_sanitized_hdr converted "FOO:" string to lowercase */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001362
Matthijs van de Water0d586662009-08-22 20:19:48 +02001363 /* strip trailing whitespace */
1364 char *s = strchrnul(str, '\0') - 1;
1365 while (s >= str && (*s == ' ' || *s == '\t')) {
1366 *s = '\0';
1367 s--;
1368 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001369 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001370 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001371 G.content_len = BB_STRTOOFF(str, NULL, 10);
1372 if (G.content_len < 0 || errno) {
Denys Vlasenko34590242018-02-12 16:46:13 +01001373 bb_error_msg_and_die("content-length %s is garbage", str);
Eric Andersen79757c92001-04-05 21:45:54 +00001374 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001375 G.got_clen = 1;
1376 continue;
1377 }
1378 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001379 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenko34590242018-02-12 16:46:13 +01001380 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001381 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001382 }
1383 if (key == KEY_location && status >= 300) {
1384 if (--redir_limit == 0)
James Byrne69374872019-07-02 11:35:03 +02001385 bb_simple_error_msg_and_die("too many redirections");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001386 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001387 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001388 free(redirected_path);
Denys Vlasenko34590242018-02-12 16:46:13 +01001389 target.path = redirected_path = xstrdup(str + 1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001390 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001391 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001392 parse_url(str, &target);
1393 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001394 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001395 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001396 server.allocated = NULL;
Denys Vlasenko9634e8a2018-07-02 18:31:02 +02001397 server.protocol = target.protocol;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001398 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001399 /* strip_ipv6_scope_id(target.host); - no! */
1400 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001401 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001402 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001403 goto resolve_lsa;
1404 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001405 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001406 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001407 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001408 }
1409// if (status >= 300)
1410// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001411
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001412 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001413 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001414 } else {
Eric Andersen79757c92001-04-05 21:45:54 +00001415 /*
1416 * FTP session
1417 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001418 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +00001419 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001420
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001421 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001422
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001423 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001424 if (G.output_fd < 0)
1425 G.output_fd = xopen(G.fname_out, G.o_flags);
1426 retrieve_file_data(dfp);
1427 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1428 xclose(G.output_fd);
1429 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001430 }
Martin Lewis46fc3292019-01-04 18:26:04 +01001431 } else {
Martin Lewis94e748d2019-01-10 13:59:30 +01001432 if (!(option_mask32 & WGET_OPT_QUIET))
1433 fprintf(stderr, "remote file exists\n");
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001434 }
Eric Andersen79757c92001-04-05 21:45:54 +00001435
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001436 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001437 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001438 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001439 if (ftpcmd(NULL, NULL, sfp) != 226)
Denys Vlasenko34590242018-02-12 16:46:13 +01001440 bb_error_msg_and_die("ftp error: %s", G.wget_buf);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001441 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001442 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001443 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001444
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001445 free(server.allocated);
1446 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001447 free(server.user);
1448 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001449 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001450 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001451}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001452
1453int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1454int wget_main(int argc UNUSED_PARAM, char **argv)
1455{
1456#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1457 static const char wget_longopts[] ALIGN1 =
1458 /* name, has_arg, val */
1459 "continue\0" No_argument "c"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001460 "quiet\0" No_argument "q"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +01001461 "server-response\0" No_argument "S"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001462 "output-document\0" Required_argument "O"
Martin Lewis64f35362018-12-26 16:28:45 +01001463 "output-file\0" Required_argument "o"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001464 "directory-prefix\0" Required_argument "P"
1465 "proxy\0" Required_argument "Y"
1466 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001467IF_FEATURE_WGET_TIMEOUT(
1468 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001469 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001470IF_DESKTOP( "tries\0" Required_argument "t")
1471 "header\0" Required_argument "\xff"
1472 "post-data\0" Required_argument "\xfe"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001473 "spider\0" No_argument "\xfd"
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001474 "no-check-certificate\0" No_argument "\xfc"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001475 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001476IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001477 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001478IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1479IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1480IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1481IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1482IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001483 ;
Denys Vlasenko036585a2017-08-08 16:38:18 +02001484# define GETOPT32 getopt32long
1485# define LONGOPTS ,wget_longopts
1486#else
1487# define GETOPT32 getopt32
1488# define LONGOPTS
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001489#endif
1490
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001491#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1492 llist_t *headers_llist = NULL;
1493#endif
1494
1495 INIT_G();
1496
Lauri Kasanend074b412013-10-12 21:47:07 +02001497#if ENABLE_FEATURE_WGET_TIMEOUT
1498 G.timeout_seconds = 900;
1499 signal(SIGALRM, alarm_handler);
1500#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001501 G.proxy_flag = "on"; /* use proxies if env vars are set */
1502 G.user_agent = "Wget"; /* "User-Agent" header field */
1503
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001504 GETOPT32(argv, "^"
Martin Lewis64f35362018-12-26 16:28:45 +01001505 "cqSO:o:P:Y:U:T:+"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001506 /*ignored:*/ "t:"
1507 /*ignored:*/ "n::"
1508 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1509 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1510 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1511 * -nH --no-host-directories: wget -r http://host/ won't create host/
1512 * -np --no-parent
1513 * "n::" above says that we accept -n[ARG].
1514 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1515 */
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001516 "\0"
1517 "-1" /* at least one URL */
1518 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
Denys Vlasenko036585a2017-08-08 16:38:18 +02001519 LONGOPTS
Martin Lewis64f35362018-12-26 16:28:45 +01001520 , &G.fname_out, &G.fname_log, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001521 &G.proxy_flag, &G.user_agent,
1522 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001523 NULL, /* -t RETRIES */
1524 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001525 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1526 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1527 );
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001528#if 0 /* option bits debug */
1529 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1530 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1531 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1532 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1533 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001534 if (option_mask32 & WGET_OPT_NO_CHECK_CERT) bb_error_msg("--no-check-certificate");
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001535 exit(0);
1536#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001537 argv += optind;
1538
1539#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1540 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001541 int size = 0;
1542 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001543 llist_t *ll = headers_llist;
1544 while (ll) {
1545 size += strlen(ll->data) + 2;
1546 ll = ll->link;
1547 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001548 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001549 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001550 int bit;
1551 const char *words;
1552
1553 size = sprintf(hdr, "%s\r\n",
1554 (char*)llist_pop(&headers_llist));
1555 /* a bit like index_in_substrings but don't match full key */
1556 bit = 1;
1557 words = wget_user_headers;
1558 while (*words) {
1559 if (strstr(hdr, words) == hdr) {
1560 G.user_headers |= bit;
1561 break;
1562 }
1563 bit <<= 1;
1564 words += strlen(words) + 1;
1565 }
1566 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001567 }
1568 }
1569#endif
1570
Denys Vlasenko2384a352011-02-15 00:58:36 +01001571 G.output_fd = -1;
1572 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1573 if (G.fname_out) { /* -O FILE ? */
1574 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1575 G.output_fd = 1;
Denys Vlasenkoe7d853b2020-12-08 19:06:28 +01001576 option_mask32 = (option_mask32 & (~WGET_OPT_CONTINUE)) | WGET_NO_FTRUNCATE;
Denys Vlasenko2384a352011-02-15 00:58:36 +01001577 }
1578 /* compat with wget: -O FILE can overwrite */
1579 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1580 }
1581
Martin Lewis64f35362018-12-26 16:28:45 +01001582 G.log_fd = -1;
1583 if (G.fname_log) { /* -o FILE ? */
1584 if (!LONE_DASH(G.fname_log)) { /* not -o - ? */
1585 /* compat with wget: -o FILE can overwrite */
1586 G.log_fd = xopen(G.fname_log, O_WRONLY | O_CREAT | O_TRUNC);
1587 /* Redirect only stderr to log file, so -O - will work */
1588 xdup2(G.log_fd, STDERR_FILENO);
1589 }
1590 }
1591
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001592 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001593 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001594
Denys Vlasenko28556b92011-02-15 11:03:53 +01001595 if (G.output_fd >= 0)
1596 xclose(G.output_fd);
1597
Martin Lewis64f35362018-12-26 16:28:45 +01001598 if (G.log_fd >= 0)
1599 xclose(G.log_fd);
1600
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001601#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1602 free(G.extra_headers);
1603#endif
1604 FINI_G();
1605
Pere Orga53695632011-02-16 20:09:36 +01001606 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001607}