blob: 9ec0e67b989f0f9956dc95b739a1d398402224fa [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020011//config:config WGET
Denys Vlasenkob097a842018-12-28 03:20:17 +010012//config: bool "wget (38 kb)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020013//config: default y
14//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020015//config: wget is a utility for non-interactive download of files from HTTP
16//config: and FTP servers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020017//config:
Denys Vlasenkof5604222017-01-10 14:58:54 +010018//config:config FEATURE_WGET_LONG_OPTIONS
19//config: bool "Enable long options"
20//config: default y
21//config: depends on WGET && LONG_OPTS
22//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020023//config:config FEATURE_WGET_STATUSBAR
Denys Vlasenkof5604222017-01-10 14:58:54 +010024//config: bool "Enable progress bar (+2k)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020025//config: default y
26//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020027//config:
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +020028//config:config FEATURE_WGET_FTP
29//config: bool "Enable FTP protocol (+1k)"
30//config: default y
31//config: depends on WGET
32//config: help
33//config: To support FTPS, enable FEATURE_WGET_HTTPS as well.
34//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020035//config:config FEATURE_WGET_AUTHENTICATION
36//config: bool "Enable HTTP authentication"
37//config: default y
38//config: depends on WGET
39//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020040//config: Support authenticated HTTP transfers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020041//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020042//config:config FEATURE_WGET_TIMEOUT
43//config: bool "Enable timeout option -T SEC"
44//config: default y
45//config: depends on WGET
46//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020047//config: Supports network read and connect timeouts for wget,
48//config: so that wget will give up and timeout, through the -T
49//config: command line option.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020050//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020051//config: Currently only connect and network data read timeout are
52//config: supported (i.e., timeout is not applied to the DNS query). When
53//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
54//config: will work in addition to -T.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020055//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +010056//config:config FEATURE_WGET_HTTPS
57//config: bool "Support HTTPS using internal TLS code"
58//config: default y
59//config: depends on WGET
60//config: select TLS
61//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020062//config: wget will use internal TLS code to connect to https:// URLs.
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +020063//config: It also enables FTPS support, but it's not well tested yet.
Denys Vlasenko72089cf2017-07-21 09:50:55 +020064//config: Note:
65//config: On NOMMU machines, ssl_helper applet should be available
66//config: in the $PATH for this to work. Make sure to select that applet.
Denys Vlasenko9a647c32017-01-23 01:08:16 +010067//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020068//config: Note: currently, TLS code only makes TLS I/O work, it
69//config: does *not* check that the peer is who it claims to be, etc.
70//config: IOW: it uses peer-supplied public keys to establish encryption
71//config: and signing keys, then encrypts and signs outgoing data and
72//config: decrypts incoming data.
73//config: It does not check signature hashes on the incoming data:
74//config: this means that attackers manipulating TCP packets can
75//config: send altered data and we unknowingly receive garbage.
76//config: (This check might be relatively easy to add).
77//config: It does not check public key's certificate:
78//config: this means that the peer may be an attacker impersonating
79//config: the server we think we are talking to.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010080//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020081//config: If you think this is unacceptable, consider this. As more and more
82//config: servers switch to HTTPS-only operation, without such "crippled"
83//config: TLS code it is *impossible* to simply download a kernel source
84//config: from kernel.org. Which can in real world translate into
85//config: "my small automatic tooling to build cross-compilers from sources
86//config: no longer works, I need to additionally keep a local copy
87//config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
88//config: source of wget, need to compile and built both before I can
89//config: download anything. All this despite the fact that the build
90//config: is done in a QEMU sandbox on a machine with absolutely nothing
91//config: worth stealing, so I don't care if someone would go to a lot
92//config: of trouble to intercept my HTTPS download to send me an altered
93//config: kernel tarball".
Denys Vlasenko67f6db62017-01-30 16:27:37 +010094//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020095//config: If you still think this is unacceptable, send patches.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010096//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020097//config: If you still think this is unacceptable, do not want to send
98//config: patches, but do want to waste bandwidth expaining how wrong
99//config: it is, you will be ignored.
Denys Vlasenko67f6db62017-01-30 16:27:37 +0100100//config:
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100101//config: FEATURE_WGET_OPENSSL does implement TLS verification
102//config: using the certificates available to OpenSSL.
103//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200104//config:config FEATURE_WGET_OPENSSL
105//config: bool "Try to connect to HTTPS using openssl"
106//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200107//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200108//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200109//config: Try to use openssl to handle HTTPS.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200110//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200111//config: OpenSSL has a simple SSL client for debug purposes.
112//config: If you select this option, wget will effectively run:
113//config: "openssl s_client -quiet -connect hostname:443
114//config: -servername hostname 2>/dev/null" and pipe its data
115//config: through it. -servername is not used if hostname is numeric.
116//config: Note inconvenient API: host resolution is done twice,
117//config: and there is no guarantee openssl's idea of IPv6 address
118//config: format is the same as ours.
119//config: Another problem is that s_client prints debug information
120//config: to stderr, and it needs to be suppressed. This means
121//config: all error messages get suppressed too.
122//config: openssl is also a big binary, often dynamically linked
123//config: against ~15 libraries.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200124//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200125//config: If openssl can't be executed, internal TLS code will be used
126//config: (if you enabled it); if openssl can be executed but fails later,
127//config: wget can't detect this, and download will fail.
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100128//config:
129//config: By default TLS verification is performed, unless
130//config: --no-check-certificate option is passed.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200131
132//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
133
134//kbuild:lib-$(CONFIG_WGET) += wget.o
135
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100136//usage:#define wget_trivial_usage
137//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Ron Yorstoned9aa892021-09-28 09:27:40 +0100138//usage: "[-cqS] [--spider] [-O FILE] [-o LOGFILE] [--header STR]\n"
139//usage: " [--post-data STR | --post-file FILE] [-Y on/off]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200140/* Since we ignore these opts, we don't show them in --help */
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100141/* //usage: " [--no-cache] [--passive-ftp] [-t TRIES]" */
Denys Vlasenko92e1b082015-10-20 21:51:52 +0200142/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Denys Vlasenkoa2f18d92020-12-18 04:12:51 +0100143//usage: " "IF_FEATURE_WGET_OPENSSL("[--no-check-certificate] ")"[-P DIR] [-U AGENT]"IF_FEATURE_WGET_TIMEOUT(" [-T SEC]")" URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100144//usage: )
145//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkoa2f18d92020-12-18 04:12:51 +0100146//usage: "[-cqS] [-O FILE] [-o LOGFILE] [-Y on/off] [-P DIR] [-U AGENT]"IF_FEATURE_WGET_TIMEOUT(" [-T SEC]")" URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100147//usage: )
148//usage:#define wget_full_usage "\n\n"
149//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200150//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100151//usage: "\n --spider Only check URL existence: $? is 0 if exists"
Ron Yorstoned9aa892021-09-28 09:27:40 +0100152//usage: "\n --header STR Add STR (of form 'header: value') to headers"
153//usage: "\n --post-data STR Send STR using POST method"
154//usage: "\n --post-file FILE Send FILE using POST method"
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100155//usage: IF_FEATURE_WGET_OPENSSL(
156//usage: "\n --no-check-certificate Don't validate the server's certificate"
157//usage: )
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100158//usage: )
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200159//usage: "\n -c Continue retrieval of aborted transfer"
160//usage: "\n -q Quiet"
161//usage: "\n -P DIR Save to DIR (default .)"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100162//usage: "\n -S Show server response"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200163//usage: IF_FEATURE_WGET_TIMEOUT(
164//usage: "\n -T SEC Network read timeout is SEC seconds"
165//usage: )
166//usage: "\n -O FILE Save to FILE ('-' for stdout)"
Denys Vlasenkoa2f18d92020-12-18 04:12:51 +0100167//usage: "\n -o LOGFILE Log messages to FILE"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200168//usage: "\n -U STR Use STR for User-Agent header"
169//usage: "\n -Y on/off Use proxy"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100170
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000171#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000172
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200173#if 0
174# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100175# define SENDFMT(fp, fmt, ...) \
176 do { \
177 log_io("> " fmt, ##__VA_ARGS__); \
178 fprintf(fp, fmt, ##__VA_ARGS__); \
179 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200180#else
181# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100182# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200183#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100184
185
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100186#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200187#define FTPS_SUPPORTED (ENABLE_FEATURE_WGET_FTP && ENABLE_FEATURE_WGET_HTTPS)
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100188
Eric Andersen79757c92001-04-05 21:45:54 +0000189struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100190 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000191 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100192 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100193 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000194 char *host;
195 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000196};
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200197static const char P_HTTP[] ALIGN1 = "http";
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100198#if SSL_SUPPORTED
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200199static const char P_HTTPS[] ALIGN1 = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100200#endif
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200201#if ENABLE_FEATURE_WGET_FTP
202static const char P_FTP[] ALIGN1 = "ftp";
203#endif
204#if FTPS_SUPPORTED
205static const char P_FTPS[] ALIGN1 = "ftps";
206#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000207
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100208#if ENABLE_FEATURE_WGET_LONG_OPTIONS
209/* User-specified headers prevent using our corresponding built-in headers. */
210enum {
211 HDR_HOST = (1<<0),
212 HDR_USER_AGENT = (1<<1),
213 HDR_RANGE = (1<<2),
Ildar Shaimordanovb9fba182021-12-12 03:19:13 +0100214 HDR_CONTENT_TYPE = (1<<3),
215 HDR_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
216 HDR_PROXY_AUTH = (1<<5) * ENABLE_FEATURE_WGET_AUTHENTICATION,
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100217};
218static const char wget_user_headers[] ALIGN1 =
219 "Host:\0"
220 "User-Agent:\0"
221 "Range:\0"
Ildar Shaimordanovb9fba182021-12-12 03:19:13 +0100222 "Content-Type:\0"
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100223# if ENABLE_FEATURE_WGET_AUTHENTICATION
224 "Authorization:\0"
225 "Proxy-Authorization:\0"
226# endif
227 ;
Ildar Shaimordanovb9fba182021-12-12 03:19:13 +0100228# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
229# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
230# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
231# define USR_HEADER_CONTENT_TYPE (G.user_headers & HDR_CONTENT_TYPE)
232# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
233# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100234#else /* No long options, no user-headers :( */
Ildar Shaimordanovb9fba182021-12-12 03:19:13 +0100235# define USR_HEADER_HOST 0
236# define USR_HEADER_USER_AGENT 0
237# define USR_HEADER_RANGE 0
238# define USR_HEADER_CONTENT_TYPE 0
239# define USR_HEADER_AUTH 0
240# define USR_HEADER_PROXY_AUTH 0
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100241#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000242
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200243/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000244struct globals {
245 off_t content_len; /* Content-length of the file */
246 off_t beg_range; /* Range at which continue begins */
247#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000248 off_t transferred; /* Number of bytes transferred so far */
249 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100250 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000251#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200252 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100253#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200254 char *post_data;
Ron Yorstoned9aa892021-09-28 09:27:40 +0100255 char *post_file;
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200256 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100257 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100258#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200259 char *fname_out; /* where to direct output (-O) */
Martin Lewis64f35362018-12-26 16:28:45 +0100260 char *fname_log; /* where to direct log (-o) */
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200261 const char *proxy_flag; /* Use proxies if env vars are set */
262 const char *user_agent; /* "User-Agent" header field */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100263 int output_fd;
Martin Lewis64f35362018-12-26 16:28:45 +0100264 int log_fd;
Denys Vlasenko2384a352011-02-15 00:58:36 +0100265 int o_flags;
Denys Vlasenko5084bae2018-11-24 21:56:21 +0100266#if ENABLE_FEATURE_WGET_TIMEOUT
267 unsigned timeout_seconds;
268 smallint die_if_timed_out;
269#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200270 smallint chunked; /* chunked transfer encoding */
271 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100272 /* Local downloads do benefit from big buffer.
273 * With 512 byte buffer, it was measured to be
274 * an order of magnitude slower than with big one.
275 */
Denys Vlasenko9b313dd2019-01-21 13:53:26 +0100276 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024] ALIGNED(16);
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100277} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100278#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200279#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200280 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200281} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200282#define FINI_G() do { \
283 FREE_PTR_TO_GLOBALS(); \
284} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000285
286
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200287/* Must match option string! */
288enum {
289 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200290 WGET_OPT_QUIET = (1 << 1),
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100291 WGET_OPT_SERVER_RESPONSE = (1 << 2),
292 WGET_OPT_OUTNAME = (1 << 3),
Martin Lewis64f35362018-12-26 16:28:45 +0100293 WGET_OPT_LOGNAME = (1 << 4),
294 WGET_OPT_PREFIX = (1 << 5),
295 WGET_OPT_PROXY = (1 << 6),
296 WGET_OPT_USER_AGENT = (1 << 7),
297 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
298 WGET_OPT_RETRIES = (1 << 9),
299 WGET_OPT_nsomething = (1 << 10),
300 WGET_OPT_HEADER = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
301 WGET_OPT_POST_DATA = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
302 WGET_OPT_SPIDER = (1 << 13) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
303 WGET_OPT_NO_CHECK_CERT = (1 << 14) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Ron Yorstoned9aa892021-09-28 09:27:40 +0100304 WGET_OPT_POST_FILE = (1 << 15) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Denys Vlasenkoe7d853b2020-12-08 19:06:28 +0100305 /* hijack this bit for other than opts purposes: */
306 WGET_NO_FTRUNCATE = (1 << 31)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200307};
308
Ron Yorstoned9aa892021-09-28 09:27:40 +0100309#define WGET_OPT_POST (WGET_OPT_POST_DATA | WGET_OPT_POST_FILE)
310
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200311enum {
312 PROGRESS_START = -1,
313 PROGRESS_END = 0,
314 PROGRESS_BUMP = 1,
315};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000316#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000317static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000318{
Denys Vlasenko26602b82018-11-23 19:14:52 +0100319 int notty;
320
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200321 if (option_mask32 & WGET_OPT_QUIET)
322 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000323
Martin Lewis64f35362018-12-26 16:28:45 +0100324 /* Don't save progress to log file */
325 if (G.log_fd >= 0)
326 return;
327
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200328 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100329 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000330
Denys Vlasenko26602b82018-11-23 19:14:52 +0100331 notty = bb_progress_update(&G.pmt,
Denys Vlasenko2384a352011-02-15 00:58:36 +0100332 G.beg_range,
333 G.transferred,
334 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
335 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000336
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200337 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100338 bb_progress_free(&G.pmt);
Denys Vlasenko26602b82018-11-23 19:14:52 +0100339 if (notty == 0)
340 bb_putchar_stderr('\n'); /* it's tty */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100341 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000342 }
343}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200344#else
Denys Vlasenko8c317f02019-05-14 17:26:47 +0200345static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) {}
Eric Andersenb520e082000-10-03 00:21:45 +0000346#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000347
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000348
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200349/* IPv6 knows scoped address types i.e. link and site local addresses. Link
350 * local addresses can have a scope identifier to specify the
351 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
352 * identifier is only valid on a single node.
353 *
354 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
355 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
356 * in the Host header as invalid requests, see
357 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
358 */
359static void strip_ipv6_scope_id(char *host)
360{
361 char *scope, *cp;
362
363 /* bbox wget actually handles IPv6 addresses without [], like
364 * wget "http://::1/xxx", but this is not standard.
365 * To save code, _here_ we do not support it. */
366
367 if (host[0] != '[')
368 return; /* not IPv6 */
369
370 scope = strchr(host, '%');
371 if (!scope)
372 return;
373
374 /* Remove the IPv6 zone identifier from the host address */
375 cp = strchr(host, ']');
376 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
377 /* malformed address (not "[xx]:nn" or "[xx]") */
378 return;
379 }
380
381 /* cp points to "]...", scope points to "%eth0]..." */
382 overlapping_strcpy(scope, cp);
383}
384
Denis Vlasenko9cade082006-11-21 10:43:02 +0000385#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100386/* Base64-encode character string. */
387static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000388{
Denys Vlasenko5084bae2018-11-24 21:56:21 +0100389 /* paranoia */
390 unsigned len = strnlen(str, sizeof(G.wget_buf)/4*3 - 10);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100391 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
392 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000393}
394#endif
395
Lauri Kasanend074b412013-10-12 21:47:07 +0200396#if ENABLE_FEATURE_WGET_TIMEOUT
397static void alarm_handler(int sig UNUSED_PARAM)
398{
399 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
Denys Vlasenko6701e912016-03-17 15:58:16 +0100400 if (G.die_if_timed_out)
James Byrne69374872019-07-02 11:35:03 +0200401 bb_simple_error_msg_and_die("download timed out");
Lauri Kasanend074b412013-10-12 21:47:07 +0200402}
Denys Vlasenko6701e912016-03-17 15:58:16 +0100403static void set_alarm(void)
404{
405 if (G.timeout_seconds) {
406 alarm(G.timeout_seconds);
407 G.die_if_timed_out = 1;
408 }
409}
410# define clear_alarm() ((void)(G.die_if_timed_out = 0))
411#else
412# define set_alarm() ((void)0)
413# define clear_alarm() ((void)0)
Lauri Kasanend074b412013-10-12 21:47:07 +0200414#endif
415
Denys Vlasenkoed727612016-07-25 21:34:57 +0200416#if ENABLE_FEATURE_WGET_OPENSSL
417/*
418 * is_ip_address() attempts to verify whether or not a string
419 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
420 * of inet_pton() can be used to determine this.
Denys Vlasenkoed727612016-07-25 21:34:57 +0200421 */
422static int is_ip_address(const char *string)
423{
424 struct sockaddr_in sa;
425
426 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
427# if ENABLE_FEATURE_IPV6
428 if (result == 0) {
429 struct sockaddr_in6 sa6;
430 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
431 }
432# endif
433 return (result == 1);
434}
435#endif
436
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000437static FILE *open_socket(len_and_sockaddr *lsa)
438{
Lauri Kasanend074b412013-10-12 21:47:07 +0200439 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000440 FILE *fp;
441
Denys Vlasenko6701e912016-03-17 15:58:16 +0100442 set_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200443 fd = xconnect_stream(lsa);
Denys Vlasenko6701e912016-03-17 15:58:16 +0100444 clear_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200445
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000446 /* glibc 2.4 seems to try seeking on it - ??! */
447 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200448 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100449 if (!fp)
Denys Vlasenko899ae532018-04-01 19:59:37 +0200450 bb_die_memory_exhausted();
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000451
452 return fp;
453}
454
Denys Vlasenko34590242018-02-12 16:46:13 +0100455/* We balk at any control chars in other side's messages.
456 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
457 * and error messages.
458 *
459 * The only exception is tabs, which are converted to (one) space:
460 * HTTP's "headers: <whitespace> values" may have those.
461 */
462static char* sanitize_string(char *s)
463{
464 unsigned char *p = (void *) s;
465 while (*p) {
466 if (*p < ' ') {
467 if (*p != '\t')
468 break;
469 *p = ' ';
470 }
471 p++;
472 }
473 *p = '\0';
474 return s;
475}
476
Denys Vlasenkof836f012011-02-10 23:02:28 +0100477/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Denys Vlasenko34590242018-02-12 16:46:13 +0100478static char fgets_trim_sanitize(FILE *fp, const char *fmt)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100479{
480 char c;
481 char *buf_ptr;
482
Denys Vlasenko6701e912016-03-17 15:58:16 +0100483 set_alarm();
Denys Vlasenko34590242018-02-12 16:46:13 +0100484 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
James Byrne69374872019-07-02 11:35:03 +0200485 bb_simple_perror_msg_and_die("error getting response");
Denys Vlasenko6701e912016-03-17 15:58:16 +0100486 clear_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100487
488 buf_ptr = strchrnul(G.wget_buf, '\n');
489 c = *buf_ptr;
Denys Vlasenko34590242018-02-12 16:46:13 +0100490#if 1
491 /* Disallow any control chars: trim at first char < 0x20 */
492 sanitize_string(G.wget_buf);
493#else
Denys Vlasenkof836f012011-02-10 23:02:28 +0100494 *buf_ptr = '\0';
495 buf_ptr = strchrnul(G.wget_buf, '\r');
496 *buf_ptr = '\0';
Denys Vlasenko34590242018-02-12 16:46:13 +0100497#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100498
499 log_io("< %s", G.wget_buf);
500
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100501 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
502 fprintf(stderr, fmt, G.wget_buf);
503
Denys Vlasenkof836f012011-02-10 23:02:28 +0100504 return c;
505}
506
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200507#if ENABLE_FEATURE_WGET_FTP
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100508static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000509{
510 int result;
511 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100512 if (!s2)
513 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000514 fprintf(fp, "%s%s\r\n", s1, s2);
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100515 /* With --server-response, wget also shows its ftp commands */
516 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
517 fprintf(stderr, "--> %s%s\n\n", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000518 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100519 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000520 }
521
Denys Vlasenko34590242018-02-12 16:46:13 +0100522 /* Read until "Nxx something" is received */
523 G.wget_buf[3] = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000524 do {
Denys Vlasenko34590242018-02-12 16:46:13 +0100525 fgets_trim_sanitize(fp, "%s\n");
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100526 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000527
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100528 G.wget_buf[3] = '\0';
529 result = xatoi_positive(G.wget_buf);
530 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000531 return result;
532}
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200533#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000534
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100535static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000536{
537 char *url, *p, *sp;
538
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100539 free(h->allocated);
540 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000541
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200542 h->protocol = P_HTTP;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100543 p = strstr(url, "://");
544 if (p) {
545 *p = '\0';
546 h->host = p + 3;
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200547#if ENABLE_FEATURE_WGET_FTP
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100548 if (strcmp(url, P_FTP) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200549 h->port = bb_lookup_std_port(P_FTP, "tcp", 21);
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200550 h->protocol = P_FTP;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100551 } else
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200552#endif
553#if FTPS_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100554 if (strcmp(url, P_FTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200555 h->port = bb_lookup_std_port(P_FTPS, "tcp", 990);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100556 h->protocol = P_FTPS;
557 } else
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200558#endif
559#if SSL_SUPPORTED
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100560 if (strcmp(url, P_HTTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200561 h->port = bb_lookup_std_port(P_HTTPS, "tcp", 443);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100562 h->protocol = P_HTTPS;
563 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100564#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100565 if (strcmp(url, P_HTTP) == 0) {
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200566 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100567 } else {
568 *p = ':';
Denys Vlasenko34590242018-02-12 16:46:13 +0100569 bb_error_msg_and_die("not an http or ftp url: %s", url);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100570 }
571 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100572 // GNU wget is user-friendly and falls back to http://
573 h->host = url;
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200574 http:
575 h->port = bb_lookup_std_port(P_HTTP, "tcp", 80);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100576 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000577
578 // FYI:
579 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
Denys Vlasenkoa0aae9f2017-01-20 14:12:10 +0100580 // 'GET /?var=a/b HTTP/1.0'
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000581 // and saves 'index.html?var=a%2Fb' (we save 'b')
582 // wget 'http://busybox.net?login=john@doe':
583 // request: 'GET /?login=john@doe HTTP/1.0'
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200584 // saves: 'index.html?login=john@doe' (we save 'login=john@doe')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000585 // wget 'http://busybox.net#test/test':
586 // request: 'GET / HTTP/1.0'
587 // saves: 'index.html' (we save 'test')
588 //
589 // We also don't add unique .N suffix if file exists...
590 sp = strchr(h->host, '/');
591 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
592 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
593 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000594 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000595 } else if (*sp == '/') {
596 *sp = '\0';
597 h->path = sp + 1;
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200598 } else {
599 // sp points to '#' or '?'
600 // Note:
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000601 // http://busybox.net?login=john@doe is a valid URL
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200602 // (without '/' between ".net" and "?"),
603 // can't store NUL at sp[-1] - this destroys hostname.
604 *sp++ = '\0';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000605 h->path = sp;
606 }
607
608 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000609 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200610 // URL-decode "user:password" string before base64-encoding:
611 // wget http://test:my%20pass@example.com should send
612 // Authorization: Basic dGVzdDpteSBwYXNz
613 // which decodes to "test:my pass".
614 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000615 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100616 free(h->user);
617 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000618 h->host = sp + 1;
619 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100620 /* else: h->user remains NULL, or as set by original request
621 * before redirect (if we are here after a redirect).
622 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000623}
624
Denys Vlasenko34590242018-02-12 16:46:13 +0100625static char *get_sanitized_hdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000626{
627 char *s, *hdrval;
628 int c;
629
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000630 /* retrieve header line */
Denys Vlasenko34590242018-02-12 16:46:13 +0100631 c = fgets_trim_sanitize(fp, " %s\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000632
Denys Vlasenkof836f012011-02-10 23:02:28 +0100633 /* end of the headers? */
634 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000635 return NULL;
636
637 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200638 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
639 /*
640 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
641 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
642 * "A-Z" maps to "a-z".
643 * "@[\]" can't occur in header names.
644 * "^_" maps to "~,DEL" (which is wrong).
645 * "^" was never seen yet, "_" was seen from web.archive.org
646 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
647 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100648 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200649 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000650
651 /* verify we are at the end of the header name */
652 if (*s != ':')
Denys Vlasenko34590242018-02-12 16:46:13 +0100653 bb_error_msg_and_die("bad header line: %s", G.wget_buf);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000654
655 /* locate the start of the header value */
656 *s++ = '\0';
657 hdrval = skip_whitespace(s);
658
Denys Vlasenkof836f012011-02-10 23:02:28 +0100659 if (c != '\n') {
660 /* Rats! The buffer isn't big enough to hold the entire header value */
661 while (c = getc(fp), c != EOF && c != '\n')
662 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000663 }
664
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000665 return hdrval;
666}
667
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200668static void reset_beg_range_to_zero(void)
669{
James Byrne69374872019-07-02 11:35:03 +0200670 bb_simple_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200671 G.beg_range = 0;
672 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200673 /* Done at the end instead: */
674 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200675}
676
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200677#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200678static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100679{
680 char *allocated = NULL;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200681 char *servername;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100682 int sp[2];
683 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100684 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100685
686 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
687 /* Kernel can have AF_UNIX support disabled */
James Byrne69374872019-07-02 11:35:03 +0200688 bb_simple_perror_msg_and_die("socketpair");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100689
690 if (!strchr(host, ':'))
691 host = allocated = xasprintf("%s:%u", host, port);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200692 servername = xstrdup(host);
693 strrchr(servername, ':')[0] = '\0';
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100694
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200695 fflush_all();
696 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100697 if (pid == 0) {
698 /* Child */
Scott Courtfc2ce042020-06-29 14:30:12 +0200699 char *argv[13];
700 char **argp;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100701
702 close(sp[0]);
703 xmove_fd(sp[1], 0);
704 xdup2(0, 1);
705 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100706 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
707 * It prints some debug stuff on stderr, don't know how to suppress it.
708 * Work around by dev-nulling stderr. We lose all error messages :(
709 */
710 xmove_fd(2, 3);
711 xopen("/dev/null", O_RDWR);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200712 memset(&argv, 0, sizeof(argv));
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100713 argv[0] = (char*)"openssl";
714 argv[1] = (char*)"s_client";
715 argv[2] = (char*)"-quiet";
716 argv[3] = (char*)"-connect";
717 argv[4] = (char*)host;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200718 /*
719 * Per RFC 6066 Section 3, the only permitted values in the
720 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
721 * IPv4 and IPv6 addresses, port numbers are not allowed.
722 */
Scott Courtfc2ce042020-06-29 14:30:12 +0200723 argp = &argv[5];
Denys Vlasenkoed727612016-07-25 21:34:57 +0200724 if (!is_ip_address(servername)) {
Scott Courtfc2ce042020-06-29 14:30:12 +0200725 *argp++ = (char*)"-servername"; //[5]
726 *argp++ = (char*)servername; //[6]
Denys Vlasenkoed727612016-07-25 21:34:57 +0200727 }
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100728 if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
Scott Courtfc2ce042020-06-29 14:30:12 +0200729 /* Abort on bad server certificate */
730 *argp++ = (char*)"-verify"; //[7]
731 *argp++ = (char*)"100"; //[8]
732 *argp++ = (char*)"-verify_return_error"; //[9]
733 if (!is_ip_address(servername)) {
734 *argp++ = (char*)"-verify_hostname"; //[10]
735 *argp++ = (char*)servername; //[11]
736 } else {
737 *argp++ = (char*)"-verify_ip"; //[10]
738 *argp++ = (char*)host; //[11]
739 }
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100740 }
Scott Courtfc2ce042020-06-29 14:30:12 +0200741 //[12] (or earlier) is NULL terminator
Denys Vlasenkoed727612016-07-25 21:34:57 +0200742
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100743 BB_EXECVP(argv[0], argv);
744 xmove_fd(3, 2);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100745# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200746 child_failed = 1;
747 xfunc_die();
748# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100749 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200750# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100751 /* notreached */
752 }
753
Denys Vlasenko53315572014-02-23 23:39:47 +0100754 /* Parent */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200755 free(servername);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100756 free(allocated);
757 close(sp[1]);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100758# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200759 if (child_failed) {
760 close(sp[0]);
761 return -1;
762 }
763# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100764 return sp[0];
765}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200766#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100767
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100768#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko403f2992018-02-06 15:15:08 +0100769static void spawn_ssl_client(const char *host, int network_fd, int flags)
Denys Vlasenko53315572014-02-23 23:39:47 +0100770{
771 int sp[2];
772 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100773 char *servername, *p;
774
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100775 if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100776 option_mask32 |= WGET_OPT_NO_CHECK_CERT;
James Byrne69374872019-07-02 11:35:03 +0200777 bb_simple_error_msg("note: TLS certificate validation not implemented");
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100778 }
Denys Vlasenko0972c7f2018-05-28 14:36:26 +0200779
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100780 servername = xstrdup(host);
781 p = strrchr(servername, ':');
782 if (p) *p = '\0';
Denys Vlasenko53315572014-02-23 23:39:47 +0100783
784 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
785 /* Kernel can have AF_UNIX support disabled */
James Byrne69374872019-07-02 11:35:03 +0200786 bb_simple_perror_msg_and_die("socketpair");
Denys Vlasenko53315572014-02-23 23:39:47 +0100787
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100788 fflush_all();
Denys Vlasenko53315572014-02-23 23:39:47 +0100789 pid = BB_MMU ? xfork() : xvfork();
790 if (pid == 0) {
791 /* Child */
Denys Vlasenko53315572014-02-23 23:39:47 +0100792 close(sp[0]);
793 xmove_fd(sp[1], 0);
794 xdup2(0, 1);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100795 if (BB_MMU) {
796 tls_state_t *tls = new_tls_state();
797 tls->ifd = tls->ofd = network_fd;
798 tls_handshake(tls, servername);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100799 tls_run_copy_loop(tls, flags);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100800 exit(0);
801 } else {
Denys Vlasenko403f2992018-02-06 15:15:08 +0100802 char *argv[6];
803
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100804 xmove_fd(network_fd, 3);
805 argv[0] = (char*)"ssl_client";
806 argv[1] = (char*)"-s3";
807 //TODO: if (!is_ip_address(servername))...
808 argv[2] = (char*)"-n";
809 argv[3] = servername;
Denys Vlasenko403f2992018-02-06 15:15:08 +0100810 argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
811 argv[5] = NULL;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100812 BB_EXECVP(argv[0], argv);
813 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
814 }
Denys Vlasenko53315572014-02-23 23:39:47 +0100815 /* notreached */
816 }
817
818 /* Parent */
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100819 free(servername);
Denys Vlasenko53315572014-02-23 23:39:47 +0100820 close(sp[1]);
821 xmove_fd(sp[0], network_fd);
822}
823#endif
824
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200825#if ENABLE_FEATURE_WGET_FTP
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100826static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
827{
828 FILE *sfp;
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200829 char *pass;
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100830 int port;
831
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100832 sfp = open_socket(lsa);
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200833#if FTPS_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100834 if (target->protocol == P_FTPS)
835 spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
836#endif
837
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100838 if (ftpcmd(NULL, NULL, sfp) != 220)
James Byrne69374872019-07-02 11:35:03 +0200839 bb_simple_error_msg_and_die(G.wget_buf);
Denys Vlasenko34590242018-02-12 16:46:13 +0100840 /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100841
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200842 /* Split username:password pair */
843 pass = (char*)"busybox"; /* password for "anonymous" */
844 if (target->user) {
845 pass = strchr(target->user, ':');
846 if (pass)
847 *pass++ = '\0';
848 }
849
850 /* Log in */
851 switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100852 case 230:
853 break;
854 case 331:
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200855 if (ftpcmd("PASS ", pass, sfp) == 230)
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100856 break;
857 /* fall through (failed login) */
858 default:
Denys Vlasenko34590242018-02-12 16:46:13 +0100859 bb_error_msg_and_die("ftp login: %s", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100860 }
861
862 ftpcmd("TYPE I", NULL, sfp);
863
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200864 /* Query file size */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100865 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
866 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
867 if (G.content_len < 0 || errno) {
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200868 bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100869 }
870 G.got_clen = 1;
871 }
872
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200873 /* Enter passive mode */
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100874 if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
875 /* good */
876 } else
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100877 if (ftpcmd("PASV", NULL, sfp) != 227) {
878 pasv_error:
Denys Vlasenko34590242018-02-12 16:46:13 +0100879 bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100880 }
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100881 port = parse_pasv_epsv(G.wget_buf);
882 if (port < 0)
883 goto pasv_error;
884
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100885 set_nport(&lsa->u.sa, htons(port));
886
887 *dfpp = open_socket(lsa);
888
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200889#if FTPS_SUPPORTED
Denys Vlasenko237a9002018-02-08 00:28:30 +0100890 if (target->protocol == P_FTPS) {
891 /* "PROT P" enables encryption of data stream.
892 * Without it (or with "PROT C"), data is sent unencrypted.
893 */
894 if (ftpcmd("PROT P", NULL, sfp) == 200)
895 spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
896 }
Denys Vlasenko2b751572018-02-06 20:49:27 +0100897#endif
Denys Vlasenko403f2992018-02-06 15:15:08 +0100898
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100899 if (G.beg_range != 0) {
900 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
901 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
902 G.content_len -= G.beg_range;
903 else
904 reset_beg_range_to_zero();
905 }
906
Denys Vlasenko34590242018-02-12 16:46:13 +0100907//TODO: needs ftp-escaping 0xff and '\n' bytes here.
908//Or disallow '\n' altogether via sanitize_string() in parse_url().
909//But 0xff's are possible in valid utf8 filenames.
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100910 if (ftpcmd("RETR ", target->path, sfp) > 150)
Denys Vlasenko34590242018-02-12 16:46:13 +0100911 bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100912
913 return sfp;
914}
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200915#endif
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100916
Denys Vlasenko2384a352011-02-15 00:58:36 +0100917static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200918{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200919#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
920# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200921 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200922# endif
923 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200924
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200925 polldata.fd = fileno(dfp);
926 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200927#endif
Martin Lewis94e748d2019-01-10 13:59:30 +0100928 if (!(option_mask32 & WGET_OPT_QUIET)) {
929 if (G.output_fd == 1)
930 fprintf(stderr, "writing to stdout\n");
931 else
932 fprintf(stderr, "saving to '%s'\n", G.fname_out);
933 }
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200934 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200935
936 if (G.chunked)
937 goto get_clen;
938
939 /* Loops only if chunked */
940 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100941
942#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
943 /* Must use nonblocking I/O, otherwise fread will loop
944 * and *block* until it reads full buffer,
945 * which messes up progress bar and/or timeout logic.
946 * Because of nonblocking I/O, we need to dance
947 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200948 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100949 */
950 ndelay_on(polldata.fd);
951#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100952 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200953 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100954 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200955
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200956#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100957 /* fread internally uses read loop, which in our case
958 * is usually exited when we get EAGAIN.
959 * In this case, libc sets error marker on the stream.
960 * Need to clear it before next fread to avoid possible
961 * rare false positive ferror below. Rare because usually
962 * fread gets more than zero bytes, and we don't fall
963 * into if (n <= 0) ...
964 */
965 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100966#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200967 errno = 0;
968 rdsz = sizeof(G.wget_buf);
969 if (G.got_clen) {
970 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
971 if ((int)G.content_len <= 0)
972 break;
973 rdsz = (unsigned)G.content_len;
974 }
975 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100976 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200977
978 if (n > 0) {
979 xwrite(G.output_fd, G.wget_buf, n);
980#if ENABLE_FEATURE_WGET_STATUSBAR
981 G.transferred += n;
982#endif
983 if (G.got_clen) {
984 G.content_len -= n;
985 if (G.content_len == 0)
986 break;
987 }
988#if ENABLE_FEATURE_WGET_TIMEOUT
989 second_cnt = G.timeout_seconds;
990#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100991 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200992 }
993
994 /* n <= 0.
995 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100996 * If error occurs, or EOF is reached, the return value
997 * is a short item count (or zero).
998 * fread does not distinguish between EOF and error.
999 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001000 if (errno != EAGAIN) {
1001 if (ferror(dfp)) {
1002 progress_meter(PROGRESS_END);
James Byrne69374872019-07-02 11:35:03 +02001003 bb_simple_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001004 }
Denys Vlasenko8766a792011-02-11 21:42:00 +01001005 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001006 }
Denys Vlasenko8766a792011-02-11 21:42:00 +01001007
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001008#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
1009 /* It was EAGAIN. There is no data. Wait up to one second
1010 * then abort if timed out, or update the bar and try reading again.
1011 */
1012 if (safe_poll(&polldata, 1, 1000) == 0) {
1013# if ENABLE_FEATURE_WGET_TIMEOUT
1014 if (second_cnt != 0 && --second_cnt == 0) {
1015 progress_meter(PROGRESS_END);
James Byrne69374872019-07-02 11:35:03 +02001016 bb_simple_error_msg_and_die("download timed out");
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001017 }
1018# endif
1019 /* We used to loop back to poll here,
1020 * but there is no great harm in letting fread
1021 * to try reading anyway.
1022 */
1023 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +01001024#endif
1025 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001026 /* Need to do it _every_ second for "stalled" indicator
1027 * to be shown properly.
1028 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02001029 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001030 } /* while (reading data) */
1031
Denys Vlasenkoc60f4462011-02-11 22:23:23 +01001032#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
1033 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +01001034 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +01001035#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +02001036 if (!G.chunked)
1037 break;
1038
Denys Vlasenko8e2174e2018-04-08 18:06:24 +02001039 /* Each chunk ends with "\r\n" - eat it */
Denys Vlasenko34590242018-02-12 16:46:13 +01001040 fgets_trim_sanitize(dfp, NULL);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +02001041 get_clen:
1042 /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
1043 fgets_trim_sanitize(dfp, NULL);
1044 errno = 0;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001045 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +02001046 /*
1047 * Had a bug with inputs like "ffffffff0001f400"
1048 * smashing the heap later. Ensure >= 0.
1049 */
1050 if (G.content_len < 0 || errno)
1051 bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001052 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +02001053 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001054 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001055 /*
1056 * Note that fgets may result in some data being buffered in dfp.
1057 * We loop back to fread, which will retrieve this data.
1058 * Also note that code has to be arranged so that fread
1059 * is done _before_ one-second poll wait - poll doesn't know
1060 * about stdio buffering and can result in spurious one second waits!
1061 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001062 }
1063
Denys Vlasenko9b313dd2019-01-21 13:53:26 +01001064 /* Draw full bar and free its resources */
1065 G.chunked = 0; /* makes it show 100% even for chunked download */
1066 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
1067 progress_meter(PROGRESS_END);
1068 if (G.content_len != 0) {
James Byrne69374872019-07-02 11:35:03 +02001069 bb_simple_perror_msg_and_die("connection closed prematurely");
Denys Vlasenko9b313dd2019-01-21 13:53:26 +01001070 /* GNU wget says "DATE TIME (NN MB/s) - Connection closed at byte NNN. Retrying." */
1071 }
1072
Denys Vlasenko61441242012-06-17 19:52:25 +02001073 /* If -c failed, we restart from the beginning,
1074 * but we do not truncate file then, we do it only now, at the end.
1075 * This lets user to ^C if his 99% complete 10 GB file download
1076 * failed to restart *without* losing the almost complete file.
1077 */
1078 {
1079 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
Denys Vlasenkoe7d853b2020-12-08 19:06:28 +01001080 if (pos != (off_t)-1) {
1081 /* do not truncate if -O- is in use, a user complained about
1082 * "wget -qO- 'http://example.com/empty' >>FILE" truncating FILE.
1083 */
1084 if (!(option_mask32 & WGET_NO_FTRUNCATE))
1085 ftruncate(G.output_fd, pos);
1086 }
Denys Vlasenko61441242012-06-17 19:52:25 +02001087 }
1088
Martin Lewis94e748d2019-01-10 13:59:30 +01001089 if (!(option_mask32 & WGET_OPT_QUIET)) {
1090 if (G.output_fd == 1)
1091 fprintf(stderr, "written to stdout\n");
1092 else
1093 fprintf(stderr, "'%s' saved\n", G.fname_out);
1094 }
Denys Vlasenko7f432802009-06-28 01:02:24 +02001095}
1096
Pere Orga53695632011-02-16 20:09:36 +01001097static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +00001098{
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001099 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001100 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001101 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001102 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +00001103 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001104 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001105 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001106 struct host_info server;
1107 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +00001108
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001109 server.allocated = NULL;
1110 target.allocated = NULL;
1111 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +02001112 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001113
1114 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +00001115
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001116 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001117 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +00001118 if (use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001119 char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001120//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +01001121 use_proxy = (proxy && proxy[0]);
1122 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001123 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +00001124 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001125 if (!use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001126 server.protocol = target.protocol;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001127 server.port = target.port;
1128 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001129 //free(server.allocated); - can't be non-NULL
1130 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001131 } else {
1132 server.host = target.host;
1133 }
1134 }
1135
1136 if (ENABLE_FEATURE_IPV6)
1137 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001138
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001139 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001140 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001141 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001142 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001143 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001144 if (G.fname_out[0] == '/' || !G.fname_out[0])
1145 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +00001146 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001147 if (G.dir_prefix)
1148 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001149 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001150 /* redirects may free target.path later, need to make a copy */
1151 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001152 }
Eric Andersen29edd002000-12-09 16:55:35 +00001153 }
Denis Vlasenko818322b2007-09-24 18:27:04 +00001154#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001155 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001156#endif
1157
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001158 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001159 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001160 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001161 G.output_fd = open(G.fname_out, O_WRONLY);
1162 if (G.output_fd >= 0) {
1163 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +00001164 }
1165 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +01001166 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +00001167 }
1168
David Demelier4a9daf22019-08-29 14:05:27 +02001169 redir_limit = 16;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001170 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +00001171 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001172 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001173 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1174 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1175 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +00001176 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001177 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +01001178 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1179 G.got_clen = 0;
1180 G.chunked = 0;
Denys Vlasenko403f2992018-02-06 15:15:08 +01001181 if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
Eric Andersen79757c92001-04-05 21:45:54 +00001182 /*
1183 * HTTP session
1184 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001185 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001186 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001187
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001188 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001189#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001190 /* openssl (and maybe internal TLS) support is configured */
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001191 if (server.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001192 /* openssl-based helper
1193 * Inconvenient API since we can't give it an open fd
1194 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001195 int fd = spawn_https_helper_openssl(server.host, server.port);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001196# if ENABLE_FEATURE_WGET_HTTPS
1197 if (fd < 0) { /* no openssl? try internal */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001198 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +01001199 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001200 goto socket_opened;
1201 }
1202# else
1203 /* We don't check for exec("openssl") failure in this case */
1204# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001205 sfp = fdopen(fd, "r+");
1206 if (!sfp)
Denys Vlasenko899ae532018-04-01 19:59:37 +02001207 bb_die_memory_exhausted();
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001208 goto socket_opened;
1209 }
1210 sfp = open_socket(lsa);
1211 socket_opened:
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001212#elif ENABLE_FEATURE_WGET_HTTPS
1213 /* Only internal TLS support is configured */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001214 sfp = open_socket(lsa);
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001215 if (server.protocol == P_HTTPS)
Denys Vlasenko403f2992018-02-06 15:15:08 +01001216 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001217#else
1218 /* ssl (https) support is not configured */
1219 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001220#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001221 /* Send HTTP request */
1222 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001223 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001224 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001225 target.path);
1226 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001227 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Ron Yorstoned9aa892021-09-28 09:27:40 +01001228 (option_mask32 & WGET_OPT_POST) ? "POST" : "GET",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001229 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001230 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001231 if (!USR_HEADER_HOST)
1232 SENDFMT(sfp, "Host: %s\r\n", target.host);
1233 if (!USR_HEADER_USER_AGENT)
1234 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001235
Denys Vlasenko9213a552011-02-10 13:23:45 +01001236 /* Ask server to close the connection as soon as we are done
1237 * (IOW: we do not intend to send more requests)
1238 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001239 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001240
Denis Vlasenko9cade082006-11-21 10:43:02 +00001241#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001242 if (target.user && !USR_HEADER_AUTH) {
1243 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001244 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001245 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001246 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1247 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001248 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001249 }
Eric Andersen79757c92001-04-05 21:45:54 +00001250#endif
1251
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001252 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1253 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001254
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001255#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001256 if (G.extra_headers) {
1257 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001258 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001259 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001260
Ron Yorstoned9aa892021-09-28 09:27:40 +01001261 if (option_mask32 & WGET_OPT_POST_FILE) {
1262 int fd = xopen_stdin(G.post_file);
1263 G.post_data = xmalloc_read(fd, NULL);
1264 close(fd);
1265 }
1266
1267 if (G.post_data) {
Ildar Shaimordanovb9fba182021-12-12 03:19:13 +01001268 /* If user did not override it... */
1269 if (!USR_HEADER_CONTENT_TYPE) {
1270 SENDFMT(sfp,
1271 "Content-Type: application/x-www-form-urlencoded\r\n"
1272 );
1273 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001274 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001275 "Content-Length: %u\r\n"
1276 "\r\n"
1277 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001278 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001279 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001280 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001281#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001282 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001283 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001284 }
Eric Andersen79757c92001-04-05 21:45:54 +00001285
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001286 fflush(sfp);
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001287
Denys Vlasenko4e08a122017-01-16 17:31:05 +01001288/* Tried doing this unconditionally.
1289 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1290 */
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001291#if SSL_SUPPORTED
1292 if (target.protocol == P_HTTPS) {
1293 /* If we use SSL helper, keeping our end of the socket open for writing
1294 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1295 * even after child closes its copy of the fd.
1296 * This helps:
1297 */
1298 shutdown(fileno(sfp), SHUT_WR);
1299 }
1300#endif
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001301
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001302 /*
1303 * Retrieve HTTP response line and check for "200" status code.
1304 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001305 read_response:
Denys Vlasenko34590242018-02-12 16:46:13 +01001306 fgets_trim_sanitize(sfp, " %s\n");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001307
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001308 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001309 str = skip_non_whitespace(str);
1310 str = skip_whitespace(str);
1311 // FIXME: no error check
1312 // xatou wouldn't work: "200 OK"
1313 status = atoi(str);
1314 switch (status) {
1315 case 0:
1316 case 100:
Denys Vlasenko34590242018-02-12 16:46:13 +01001317 while (get_sanitized_hdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001318 /* eat all remaining headers */;
1319 goto read_response;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001320
1321 /* Success responses */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001322 case 200:
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001323 /* fall through */
1324 case 201: /* 201 Created */
1325/* "The request has been fulfilled and resulted in a new resource being created" */
Denys Vlasenkoef159702016-09-01 11:16:22 +02001326 /* Standard wget is reported to treat this as success */
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001327 /* fall through */
1328 case 202: /* 202 Accepted */
1329/* "The request has been accepted for processing, but the processing has not been completed" */
1330 /* Treat as success: fall through */
1331 case 203: /* 203 Non-Authoritative Information */
1332/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1333 /* fall through */
1334 case 204: /* 204 No Content */
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001335/*
1336Response 204 doesn't say "null file", it says "metadata
1337has changed but data didn't":
1338
1339"10.2.5 204 No Content
1340The server has fulfilled the request but does not need to return
1341an entity-body, and might want to return updated metainformation.
1342The response MAY include new or updated metainformation in the form
1343of entity-headers, which if present SHOULD be associated with
1344the requested variant.
1345
1346If the client is a user agent, it SHOULD NOT change its document
1347view from that which caused the request to be sent. This response
1348is primarily intended to allow input for actions to take place
1349without causing a change to the user agent's active document view,
1350although any new or updated metainformation SHOULD be applied
1351to the document currently in the user agent's active view.
1352
1353The 204 response MUST NOT include a message-body, and thus
1354is always terminated by the first empty line after the header fields."
1355
1356However, in real world it was observed that some web servers
1357(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1358*/
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001359 if (G.beg_range != 0) {
1360 /* "Range:..." was not honored by the server.
1361 * Restart download from the beginning.
1362 */
1363 reset_beg_range_to_zero();
1364 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001365 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001366 /* 205 Reset Content ?? what to do on this ?? */
1367
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001368 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001369 case 301:
1370 case 302:
1371 case 303:
Jeremy Line71ea6c2021-04-28 20:34:24 -07001372 case 307:
1373 case 308:
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001374 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001375
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001376 case 206: /* Partial Content */
1377 if (G.beg_range != 0)
1378 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001379 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001380 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001381 /* fall through */
1382 default:
Denys Vlasenko34590242018-02-12 16:46:13 +01001383 bb_error_msg_and_die("server returned error: %s", G.wget_buf);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001384 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001385
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001386 /*
1387 * Retrieve HTTP headers.
1388 */
Denys Vlasenko34590242018-02-12 16:46:13 +01001389 while ((str = get_sanitized_hdr(sfp)) != NULL) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001390 static const char keywords[] ALIGN1 =
1391 "content-length\0""transfer-encoding\0""location\0";
1392 enum {
1393 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1394 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001395 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001396
Denys Vlasenko34590242018-02-12 16:46:13 +01001397 /* get_sanitized_hdr converted "FOO:" string to lowercase */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001398
Matthijs van de Water0d586662009-08-22 20:19:48 +02001399 /* strip trailing whitespace */
1400 char *s = strchrnul(str, '\0') - 1;
1401 while (s >= str && (*s == ' ' || *s == '\t')) {
1402 *s = '\0';
1403 s--;
1404 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001405 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001406 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001407 G.content_len = BB_STRTOOFF(str, NULL, 10);
1408 if (G.content_len < 0 || errno) {
Denys Vlasenko34590242018-02-12 16:46:13 +01001409 bb_error_msg_and_die("content-length %s is garbage", str);
Eric Andersen79757c92001-04-05 21:45:54 +00001410 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001411 G.got_clen = 1;
1412 continue;
1413 }
1414 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001415 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenko34590242018-02-12 16:46:13 +01001416 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001417 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001418 }
1419 if (key == KEY_location && status >= 300) {
1420 if (--redir_limit == 0)
James Byrne69374872019-07-02 11:35:03 +02001421 bb_simple_error_msg_and_die("too many redirections");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001422 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001423 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001424 free(redirected_path);
Denys Vlasenko34590242018-02-12 16:46:13 +01001425 target.path = redirected_path = xstrdup(str + 1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001426 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001427 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001428 parse_url(str, &target);
1429 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001430 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001431 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001432 server.allocated = NULL;
Denys Vlasenko9634e8a2018-07-02 18:31:02 +02001433 server.protocol = target.protocol;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001434 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001435 /* strip_ipv6_scope_id(target.host); - no! */
1436 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001437 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001438 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001439 goto resolve_lsa;
1440 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001441 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001442 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001443 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001444 }
1445// if (status >= 300)
1446// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001447
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001448 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001449 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001450 } else {
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +02001451#if ENABLE_FEATURE_WGET_FTP
Eric Andersen79757c92001-04-05 21:45:54 +00001452 /*
1453 * FTP session
1454 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001455 sfp = prepare_ftp_session(&dfp, &target, lsa);
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +02001456#endif
Eric Andersen96700832000-09-04 15:15:55 +00001457 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001458
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001459 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001460
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001461 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001462 if (G.output_fd < 0)
1463 G.output_fd = xopen(G.fname_out, G.o_flags);
1464 retrieve_file_data(dfp);
1465 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1466 xclose(G.output_fd);
1467 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001468 }
Martin Lewis46fc3292019-01-04 18:26:04 +01001469 } else {
Martin Lewis94e748d2019-01-10 13:59:30 +01001470 if (!(option_mask32 & WGET_OPT_QUIET))
1471 fprintf(stderr, "remote file exists\n");
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001472 }
Eric Andersen79757c92001-04-05 21:45:54 +00001473
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +02001474#if ENABLE_FEATURE_WGET_FTP
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001475 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001476 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001477 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001478 if (ftpcmd(NULL, NULL, sfp) != 226)
Denys Vlasenko34590242018-02-12 16:46:13 +01001479 bb_error_msg_and_die("ftp error: %s", G.wget_buf);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001480 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001481 }
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +02001482#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001483 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001484
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001485 free(server.allocated);
1486 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001487 free(server.user);
1488 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001489 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001490 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001491}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001492
1493int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1494int wget_main(int argc UNUSED_PARAM, char **argv)
1495{
1496#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1497 static const char wget_longopts[] ALIGN1 =
1498 /* name, has_arg, val */
1499 "continue\0" No_argument "c"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001500 "quiet\0" No_argument "q"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +01001501 "server-response\0" No_argument "S"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001502 "output-document\0" Required_argument "O"
Martin Lewis64f35362018-12-26 16:28:45 +01001503 "output-file\0" Required_argument "o"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001504 "directory-prefix\0" Required_argument "P"
1505 "proxy\0" Required_argument "Y"
1506 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001507IF_FEATURE_WGET_TIMEOUT(
1508 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001509 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001510IF_DESKTOP( "tries\0" Required_argument "t")
1511 "header\0" Required_argument "\xff"
1512 "post-data\0" Required_argument "\xfe"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001513 "spider\0" No_argument "\xfd"
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001514 "no-check-certificate\0" No_argument "\xfc"
Ron Yorstoned9aa892021-09-28 09:27:40 +01001515 "post-file\0" Required_argument "\xfb"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001516 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001517IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001518 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001519IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1520IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1521IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1522IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1523IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001524 ;
Denys Vlasenko036585a2017-08-08 16:38:18 +02001525# define GETOPT32 getopt32long
1526# define LONGOPTS ,wget_longopts
1527#else
1528# define GETOPT32 getopt32
1529# define LONGOPTS
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001530#endif
1531
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001532#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1533 llist_t *headers_llist = NULL;
1534#endif
1535
1536 INIT_G();
1537
Lauri Kasanend074b412013-10-12 21:47:07 +02001538#if ENABLE_FEATURE_WGET_TIMEOUT
1539 G.timeout_seconds = 900;
1540 signal(SIGALRM, alarm_handler);
1541#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001542 G.proxy_flag = "on"; /* use proxies if env vars are set */
1543 G.user_agent = "Wget"; /* "User-Agent" header field */
1544
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001545 GETOPT32(argv, "^"
Martin Lewis64f35362018-12-26 16:28:45 +01001546 "cqSO:o:P:Y:U:T:+"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001547 /*ignored:*/ "t:"
1548 /*ignored:*/ "n::"
1549 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1550 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1551 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1552 * -nH --no-host-directories: wget -r http://host/ won't create host/
1553 * -np --no-parent
1554 * "n::" above says that we accept -n[ARG].
1555 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1556 */
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001557 "\0"
1558 "-1" /* at least one URL */
Ron Yorstoned9aa892021-09-28 09:27:40 +01001559 IF_FEATURE_WGET_LONG_OPTIONS(":\xfe--\xfb")
1560 IF_FEATURE_WGET_LONG_OPTIONS(":\xfe--\xfe")
1561 IF_FEATURE_WGET_LONG_OPTIONS(":\xfb--\xfb")
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001562 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
Denys Vlasenko036585a2017-08-08 16:38:18 +02001563 LONGOPTS
Martin Lewis64f35362018-12-26 16:28:45 +01001564 , &G.fname_out, &G.fname_log, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001565 &G.proxy_flag, &G.user_agent,
1566 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001567 NULL, /* -t RETRIES */
1568 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001569 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1570 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
Ron Yorstoned9aa892021-09-28 09:27:40 +01001571 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_file)
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001572 );
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001573#if 0 /* option bits debug */
1574 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1575 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1576 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1577 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1578 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001579 if (option_mask32 & WGET_OPT_NO_CHECK_CERT) bb_error_msg("--no-check-certificate");
Ron Yorstoned9aa892021-09-28 09:27:40 +01001580 if (option_mask32 & WGET_OPT_POST_FILE) bb_error_msg("--post-file");
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001581 exit(0);
1582#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001583 argv += optind;
1584
1585#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1586 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001587 int size = 0;
1588 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001589 llist_t *ll = headers_llist;
1590 while (ll) {
1591 size += strlen(ll->data) + 2;
1592 ll = ll->link;
1593 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001594 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001595 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001596 int bit;
1597 const char *words;
1598
1599 size = sprintf(hdr, "%s\r\n",
1600 (char*)llist_pop(&headers_llist));
1601 /* a bit like index_in_substrings but don't match full key */
1602 bit = 1;
1603 words = wget_user_headers;
1604 while (*words) {
1605 if (strstr(hdr, words) == hdr) {
1606 G.user_headers |= bit;
1607 break;
1608 }
1609 bit <<= 1;
1610 words += strlen(words) + 1;
1611 }
1612 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001613 }
1614 }
1615#endif
1616
Denys Vlasenko2384a352011-02-15 00:58:36 +01001617 G.output_fd = -1;
1618 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1619 if (G.fname_out) { /* -O FILE ? */
1620 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1621 G.output_fd = 1;
Denys Vlasenkoe7d853b2020-12-08 19:06:28 +01001622 option_mask32 = (option_mask32 & (~WGET_OPT_CONTINUE)) | WGET_NO_FTRUNCATE;
Denys Vlasenko2384a352011-02-15 00:58:36 +01001623 }
1624 /* compat with wget: -O FILE can overwrite */
1625 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1626 }
1627
Martin Lewis64f35362018-12-26 16:28:45 +01001628 G.log_fd = -1;
1629 if (G.fname_log) { /* -o FILE ? */
1630 if (!LONE_DASH(G.fname_log)) { /* not -o - ? */
1631 /* compat with wget: -o FILE can overwrite */
1632 G.log_fd = xopen(G.fname_log, O_WRONLY | O_CREAT | O_TRUNC);
1633 /* Redirect only stderr to log file, so -O - will work */
1634 xdup2(G.log_fd, STDERR_FILENO);
1635 }
1636 }
1637
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001638 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001639 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001640
Denys Vlasenko28556b92011-02-15 11:03:53 +01001641 if (G.output_fd >= 0)
1642 xclose(G.output_fd);
1643
Martin Lewis64f35362018-12-26 16:28:45 +01001644 if (G.log_fd >= 0)
1645 xclose(G.log_fd);
1646
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001647#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1648 free(G.extra_headers);
1649#endif
1650 FINI_G();
1651
Pere Orga53695632011-02-16 20:09:36 +01001652 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001653}