blob: 3edc5f870df85cc68ceec37ec8c7c1fc12aec28f [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020011//config:config WGET
Denys Vlasenkob097a842018-12-28 03:20:17 +010012//config: bool "wget (38 kb)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020013//config: default y
14//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020015//config: wget is a utility for non-interactive download of files from HTTP
16//config: and FTP servers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020017//config:
Denys Vlasenkof5604222017-01-10 14:58:54 +010018//config:config FEATURE_WGET_LONG_OPTIONS
19//config: bool "Enable long options"
20//config: default y
21//config: depends on WGET && LONG_OPTS
22//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020023//config:config FEATURE_WGET_STATUSBAR
Denys Vlasenkof5604222017-01-10 14:58:54 +010024//config: bool "Enable progress bar (+2k)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020025//config: default y
26//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020027//config:
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +020028//config:config FEATURE_WGET_FTP
29//config: bool "Enable FTP protocol (+1k)"
30//config: default y
31//config: depends on WGET
32//config: help
33//config: To support FTPS, enable FEATURE_WGET_HTTPS as well.
34//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020035//config:config FEATURE_WGET_AUTHENTICATION
36//config: bool "Enable HTTP authentication"
37//config: default y
38//config: depends on WGET
39//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020040//config: Support authenticated HTTP transfers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020041//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020042//config:config FEATURE_WGET_TIMEOUT
43//config: bool "Enable timeout option -T SEC"
44//config: default y
45//config: depends on WGET
46//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020047//config: Supports network read and connect timeouts for wget,
48//config: so that wget will give up and timeout, through the -T
49//config: command line option.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020050//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020051//config: Currently only connect and network data read timeout are
52//config: supported (i.e., timeout is not applied to the DNS query). When
53//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
54//config: will work in addition to -T.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020055//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +010056//config:config FEATURE_WGET_HTTPS
57//config: bool "Support HTTPS using internal TLS code"
58//config: default y
59//config: depends on WGET
60//config: select TLS
61//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020062//config: wget will use internal TLS code to connect to https:// URLs.
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +020063//config: It also enables FTPS support, but it's not well tested yet.
Denys Vlasenko72089cf2017-07-21 09:50:55 +020064//config: Note:
65//config: On NOMMU machines, ssl_helper applet should be available
66//config: in the $PATH for this to work. Make sure to select that applet.
Denys Vlasenko9a647c32017-01-23 01:08:16 +010067//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020068//config: Note: currently, TLS code only makes TLS I/O work, it
69//config: does *not* check that the peer is who it claims to be, etc.
70//config: IOW: it uses peer-supplied public keys to establish encryption
71//config: and signing keys, then encrypts and signs outgoing data and
72//config: decrypts incoming data.
73//config: It does not check signature hashes on the incoming data:
74//config: this means that attackers manipulating TCP packets can
75//config: send altered data and we unknowingly receive garbage.
76//config: (This check might be relatively easy to add).
77//config: It does not check public key's certificate:
78//config: this means that the peer may be an attacker impersonating
79//config: the server we think we are talking to.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010080//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020081//config: If you think this is unacceptable, consider this. As more and more
82//config: servers switch to HTTPS-only operation, without such "crippled"
83//config: TLS code it is *impossible* to simply download a kernel source
84//config: from kernel.org. Which can in real world translate into
85//config: "my small automatic tooling to build cross-compilers from sources
86//config: no longer works, I need to additionally keep a local copy
87//config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
88//config: source of wget, need to compile and built both before I can
89//config: download anything. All this despite the fact that the build
90//config: is done in a QEMU sandbox on a machine with absolutely nothing
91//config: worth stealing, so I don't care if someone would go to a lot
92//config: of trouble to intercept my HTTPS download to send me an altered
93//config: kernel tarball".
Denys Vlasenko67f6db62017-01-30 16:27:37 +010094//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020095//config: If you still think this is unacceptable, send patches.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010096//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020097//config: If you still think this is unacceptable, do not want to send
98//config: patches, but do want to waste bandwidth expaining how wrong
99//config: it is, you will be ignored.
Denys Vlasenko67f6db62017-01-30 16:27:37 +0100100//config:
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100101//config: FEATURE_WGET_OPENSSL does implement TLS verification
102//config: using the certificates available to OpenSSL.
103//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200104//config:config FEATURE_WGET_OPENSSL
105//config: bool "Try to connect to HTTPS using openssl"
106//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200107//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200108//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200109//config: Try to use openssl to handle HTTPS.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200110//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200111//config: OpenSSL has a simple SSL client for debug purposes.
112//config: If you select this option, wget will effectively run:
113//config: "openssl s_client -quiet -connect hostname:443
114//config: -servername hostname 2>/dev/null" and pipe its data
115//config: through it. -servername is not used if hostname is numeric.
116//config: Note inconvenient API: host resolution is done twice,
117//config: and there is no guarantee openssl's idea of IPv6 address
118//config: format is the same as ours.
119//config: Another problem is that s_client prints debug information
120//config: to stderr, and it needs to be suppressed. This means
121//config: all error messages get suppressed too.
122//config: openssl is also a big binary, often dynamically linked
123//config: against ~15 libraries.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200124//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200125//config: If openssl can't be executed, internal TLS code will be used
126//config: (if you enabled it); if openssl can be executed but fails later,
127//config: wget can't detect this, and download will fail.
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100128//config:
129//config: By default TLS verification is performed, unless
130//config: --no-check-certificate option is passed.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200131
132//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
133
134//kbuild:lib-$(CONFIG_WGET) += wget.o
135
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100136//usage:#define wget_trivial_usage
137//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkoa2f18d92020-12-18 04:12:51 +0100138//usage: "[-cqS] [--spider] [-O FILE] [-o LOGFILE] [--header 'HEADER: VALUE'] [-Y on/off]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200139/* Since we ignore these opts, we don't show them in --help */
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100140/* //usage: " [--no-cache] [--passive-ftp] [-t TRIES]" */
Denys Vlasenko92e1b082015-10-20 21:51:52 +0200141/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Denys Vlasenkoa2f18d92020-12-18 04:12:51 +0100142//usage: " "IF_FEATURE_WGET_OPENSSL("[--no-check-certificate] ")"[-P DIR] [-U AGENT]"IF_FEATURE_WGET_TIMEOUT(" [-T SEC]")" URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100143//usage: )
144//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkoa2f18d92020-12-18 04:12:51 +0100145//usage: "[-cqS] [-O FILE] [-o LOGFILE] [-Y on/off] [-P DIR] [-U AGENT]"IF_FEATURE_WGET_TIMEOUT(" [-T SEC]")" URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100146//usage: )
147//usage:#define wget_full_usage "\n\n"
148//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200149//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100150//usage: "\n --spider Only check URL existence: $? is 0 if exists"
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100151//usage: IF_FEATURE_WGET_OPENSSL(
152//usage: "\n --no-check-certificate Don't validate the server's certificate"
153//usage: )
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100154//usage: )
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200155//usage: "\n -c Continue retrieval of aborted transfer"
156//usage: "\n -q Quiet"
157//usage: "\n -P DIR Save to DIR (default .)"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100158//usage: "\n -S Show server response"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200159//usage: IF_FEATURE_WGET_TIMEOUT(
160//usage: "\n -T SEC Network read timeout is SEC seconds"
161//usage: )
162//usage: "\n -O FILE Save to FILE ('-' for stdout)"
Denys Vlasenkoa2f18d92020-12-18 04:12:51 +0100163//usage: "\n -o LOGFILE Log messages to FILE"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200164//usage: "\n -U STR Use STR for User-Agent header"
165//usage: "\n -Y on/off Use proxy"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100166
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000167#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000168
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200169#if 0
170# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100171# define SENDFMT(fp, fmt, ...) \
172 do { \
173 log_io("> " fmt, ##__VA_ARGS__); \
174 fprintf(fp, fmt, ##__VA_ARGS__); \
175 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200176#else
177# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100178# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200179#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100180
181
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100182#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200183#define FTPS_SUPPORTED (ENABLE_FEATURE_WGET_FTP && ENABLE_FEATURE_WGET_HTTPS)
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100184
Eric Andersen79757c92001-04-05 21:45:54 +0000185struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100186 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000187 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100188 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100189 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000190 char *host;
191 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000192};
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200193static const char P_HTTP[] ALIGN1 = "http";
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100194#if SSL_SUPPORTED
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200195static const char P_HTTPS[] ALIGN1 = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100196#endif
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200197#if ENABLE_FEATURE_WGET_FTP
198static const char P_FTP[] ALIGN1 = "ftp";
199#endif
200#if FTPS_SUPPORTED
201static const char P_FTPS[] ALIGN1 = "ftps";
202#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000203
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100204#if ENABLE_FEATURE_WGET_LONG_OPTIONS
205/* User-specified headers prevent using our corresponding built-in headers. */
206enum {
207 HDR_HOST = (1<<0),
208 HDR_USER_AGENT = (1<<1),
209 HDR_RANGE = (1<<2),
210 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
211 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
212};
213static const char wget_user_headers[] ALIGN1 =
214 "Host:\0"
215 "User-Agent:\0"
216 "Range:\0"
217# if ENABLE_FEATURE_WGET_AUTHENTICATION
218 "Authorization:\0"
219 "Proxy-Authorization:\0"
220# endif
221 ;
222# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
223# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
224# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
225# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
226# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
227#else /* No long options, no user-headers :( */
228# define USR_HEADER_HOST 0
229# define USR_HEADER_USER_AGENT 0
230# define USR_HEADER_RANGE 0
231# define USR_HEADER_AUTH 0
232# define USR_HEADER_PROXY_AUTH 0
233#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000234
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200235/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000236struct globals {
237 off_t content_len; /* Content-length of the file */
238 off_t beg_range; /* Range at which continue begins */
239#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000240 off_t transferred; /* Number of bytes transferred so far */
241 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100242 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000243#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200244 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100245#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200246 char *post_data;
247 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100248 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100249#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200250 char *fname_out; /* where to direct output (-O) */
Martin Lewis64f35362018-12-26 16:28:45 +0100251 char *fname_log; /* where to direct log (-o) */
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200252 const char *proxy_flag; /* Use proxies if env vars are set */
253 const char *user_agent; /* "User-Agent" header field */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100254 int output_fd;
Martin Lewis64f35362018-12-26 16:28:45 +0100255 int log_fd;
Denys Vlasenko2384a352011-02-15 00:58:36 +0100256 int o_flags;
Denys Vlasenko5084bae2018-11-24 21:56:21 +0100257#if ENABLE_FEATURE_WGET_TIMEOUT
258 unsigned timeout_seconds;
259 smallint die_if_timed_out;
260#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200261 smallint chunked; /* chunked transfer encoding */
262 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100263 /* Local downloads do benefit from big buffer.
264 * With 512 byte buffer, it was measured to be
265 * an order of magnitude slower than with big one.
266 */
Denys Vlasenko9b313dd2019-01-21 13:53:26 +0100267 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024] ALIGNED(16);
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100268} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100269#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200270#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200271 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200272} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200273#define FINI_G() do { \
274 FREE_PTR_TO_GLOBALS(); \
275} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000276
277
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200278/* Must match option string! */
279enum {
280 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200281 WGET_OPT_QUIET = (1 << 1),
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100282 WGET_OPT_SERVER_RESPONSE = (1 << 2),
283 WGET_OPT_OUTNAME = (1 << 3),
Martin Lewis64f35362018-12-26 16:28:45 +0100284 WGET_OPT_LOGNAME = (1 << 4),
285 WGET_OPT_PREFIX = (1 << 5),
286 WGET_OPT_PROXY = (1 << 6),
287 WGET_OPT_USER_AGENT = (1 << 7),
288 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8),
289 WGET_OPT_RETRIES = (1 << 9),
290 WGET_OPT_nsomething = (1 << 10),
291 WGET_OPT_HEADER = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
292 WGET_OPT_POST_DATA = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
293 WGET_OPT_SPIDER = (1 << 13) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
294 WGET_OPT_NO_CHECK_CERT = (1 << 14) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Denys Vlasenkoe7d853b2020-12-08 19:06:28 +0100295 /* hijack this bit for other than opts purposes: */
296 WGET_NO_FTRUNCATE = (1 << 31)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200297};
298
299enum {
300 PROGRESS_START = -1,
301 PROGRESS_END = 0,
302 PROGRESS_BUMP = 1,
303};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000304#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000305static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000306{
Denys Vlasenko26602b82018-11-23 19:14:52 +0100307 int notty;
308
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200309 if (option_mask32 & WGET_OPT_QUIET)
310 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000311
Martin Lewis64f35362018-12-26 16:28:45 +0100312 /* Don't save progress to log file */
313 if (G.log_fd >= 0)
314 return;
315
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200316 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100317 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000318
Denys Vlasenko26602b82018-11-23 19:14:52 +0100319 notty = bb_progress_update(&G.pmt,
Denys Vlasenko2384a352011-02-15 00:58:36 +0100320 G.beg_range,
321 G.transferred,
322 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
323 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000324
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200325 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100326 bb_progress_free(&G.pmt);
Denys Vlasenko26602b82018-11-23 19:14:52 +0100327 if (notty == 0)
328 bb_putchar_stderr('\n'); /* it's tty */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100329 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000330 }
331}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200332#else
Denys Vlasenko8c317f02019-05-14 17:26:47 +0200333static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) {}
Eric Andersenb520e082000-10-03 00:21:45 +0000334#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000335
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000336
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200337/* IPv6 knows scoped address types i.e. link and site local addresses. Link
338 * local addresses can have a scope identifier to specify the
339 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
340 * identifier is only valid on a single node.
341 *
342 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
343 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
344 * in the Host header as invalid requests, see
345 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
346 */
347static void strip_ipv6_scope_id(char *host)
348{
349 char *scope, *cp;
350
351 /* bbox wget actually handles IPv6 addresses without [], like
352 * wget "http://::1/xxx", but this is not standard.
353 * To save code, _here_ we do not support it. */
354
355 if (host[0] != '[')
356 return; /* not IPv6 */
357
358 scope = strchr(host, '%');
359 if (!scope)
360 return;
361
362 /* Remove the IPv6 zone identifier from the host address */
363 cp = strchr(host, ']');
364 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
365 /* malformed address (not "[xx]:nn" or "[xx]") */
366 return;
367 }
368
369 /* cp points to "]...", scope points to "%eth0]..." */
370 overlapping_strcpy(scope, cp);
371}
372
Denis Vlasenko9cade082006-11-21 10:43:02 +0000373#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100374/* Base64-encode character string. */
375static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000376{
Denys Vlasenko5084bae2018-11-24 21:56:21 +0100377 /* paranoia */
378 unsigned len = strnlen(str, sizeof(G.wget_buf)/4*3 - 10);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100379 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
380 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000381}
382#endif
383
Lauri Kasanend074b412013-10-12 21:47:07 +0200384#if ENABLE_FEATURE_WGET_TIMEOUT
385static void alarm_handler(int sig UNUSED_PARAM)
386{
387 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
Denys Vlasenko6701e912016-03-17 15:58:16 +0100388 if (G.die_if_timed_out)
James Byrne69374872019-07-02 11:35:03 +0200389 bb_simple_error_msg_and_die("download timed out");
Lauri Kasanend074b412013-10-12 21:47:07 +0200390}
Denys Vlasenko6701e912016-03-17 15:58:16 +0100391static void set_alarm(void)
392{
393 if (G.timeout_seconds) {
394 alarm(G.timeout_seconds);
395 G.die_if_timed_out = 1;
396 }
397}
398# define clear_alarm() ((void)(G.die_if_timed_out = 0))
399#else
400# define set_alarm() ((void)0)
401# define clear_alarm() ((void)0)
Lauri Kasanend074b412013-10-12 21:47:07 +0200402#endif
403
Denys Vlasenkoed727612016-07-25 21:34:57 +0200404#if ENABLE_FEATURE_WGET_OPENSSL
405/*
406 * is_ip_address() attempts to verify whether or not a string
407 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
408 * of inet_pton() can be used to determine this.
Denys Vlasenkoed727612016-07-25 21:34:57 +0200409 */
410static int is_ip_address(const char *string)
411{
412 struct sockaddr_in sa;
413
414 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
415# if ENABLE_FEATURE_IPV6
416 if (result == 0) {
417 struct sockaddr_in6 sa6;
418 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
419 }
420# endif
421 return (result == 1);
422}
423#endif
424
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000425static FILE *open_socket(len_and_sockaddr *lsa)
426{
Lauri Kasanend074b412013-10-12 21:47:07 +0200427 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000428 FILE *fp;
429
Denys Vlasenko6701e912016-03-17 15:58:16 +0100430 set_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200431 fd = xconnect_stream(lsa);
Denys Vlasenko6701e912016-03-17 15:58:16 +0100432 clear_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200433
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000434 /* glibc 2.4 seems to try seeking on it - ??! */
435 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200436 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100437 if (!fp)
Denys Vlasenko899ae532018-04-01 19:59:37 +0200438 bb_die_memory_exhausted();
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000439
440 return fp;
441}
442
Denys Vlasenko34590242018-02-12 16:46:13 +0100443/* We balk at any control chars in other side's messages.
444 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
445 * and error messages.
446 *
447 * The only exception is tabs, which are converted to (one) space:
448 * HTTP's "headers: <whitespace> values" may have those.
449 */
450static char* sanitize_string(char *s)
451{
452 unsigned char *p = (void *) s;
453 while (*p) {
454 if (*p < ' ') {
455 if (*p != '\t')
456 break;
457 *p = ' ';
458 }
459 p++;
460 }
461 *p = '\0';
462 return s;
463}
464
Denys Vlasenkof836f012011-02-10 23:02:28 +0100465/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Denys Vlasenko34590242018-02-12 16:46:13 +0100466static char fgets_trim_sanitize(FILE *fp, const char *fmt)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100467{
468 char c;
469 char *buf_ptr;
470
Denys Vlasenko6701e912016-03-17 15:58:16 +0100471 set_alarm();
Denys Vlasenko34590242018-02-12 16:46:13 +0100472 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
James Byrne69374872019-07-02 11:35:03 +0200473 bb_simple_perror_msg_and_die("error getting response");
Denys Vlasenko6701e912016-03-17 15:58:16 +0100474 clear_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100475
476 buf_ptr = strchrnul(G.wget_buf, '\n');
477 c = *buf_ptr;
Denys Vlasenko34590242018-02-12 16:46:13 +0100478#if 1
479 /* Disallow any control chars: trim at first char < 0x20 */
480 sanitize_string(G.wget_buf);
481#else
Denys Vlasenkof836f012011-02-10 23:02:28 +0100482 *buf_ptr = '\0';
483 buf_ptr = strchrnul(G.wget_buf, '\r');
484 *buf_ptr = '\0';
Denys Vlasenko34590242018-02-12 16:46:13 +0100485#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100486
487 log_io("< %s", G.wget_buf);
488
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100489 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
490 fprintf(stderr, fmt, G.wget_buf);
491
Denys Vlasenkof836f012011-02-10 23:02:28 +0100492 return c;
493}
494
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200495#if ENABLE_FEATURE_WGET_FTP
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100496static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000497{
498 int result;
499 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100500 if (!s2)
501 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000502 fprintf(fp, "%s%s\r\n", s1, s2);
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100503 /* With --server-response, wget also shows its ftp commands */
504 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
505 fprintf(stderr, "--> %s%s\n\n", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000506 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100507 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000508 }
509
Denys Vlasenko34590242018-02-12 16:46:13 +0100510 /* Read until "Nxx something" is received */
511 G.wget_buf[3] = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000512 do {
Denys Vlasenko34590242018-02-12 16:46:13 +0100513 fgets_trim_sanitize(fp, "%s\n");
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100514 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000515
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100516 G.wget_buf[3] = '\0';
517 result = xatoi_positive(G.wget_buf);
518 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000519 return result;
520}
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200521#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000522
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100523static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000524{
525 char *url, *p, *sp;
526
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100527 free(h->allocated);
528 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000529
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200530 h->protocol = P_HTTP;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100531 p = strstr(url, "://");
532 if (p) {
533 *p = '\0';
534 h->host = p + 3;
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200535#if ENABLE_FEATURE_WGET_FTP
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100536 if (strcmp(url, P_FTP) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200537 h->port = bb_lookup_std_port(P_FTP, "tcp", 21);
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200538 h->protocol = P_FTP;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100539 } else
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200540#endif
541#if FTPS_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100542 if (strcmp(url, P_FTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200543 h->port = bb_lookup_std_port(P_FTPS, "tcp", 990);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100544 h->protocol = P_FTPS;
545 } else
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200546#endif
547#if SSL_SUPPORTED
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100548 if (strcmp(url, P_HTTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200549 h->port = bb_lookup_std_port(P_HTTPS, "tcp", 443);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100550 h->protocol = P_HTTPS;
551 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100552#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100553 if (strcmp(url, P_HTTP) == 0) {
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200554 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100555 } else {
556 *p = ':';
Denys Vlasenko34590242018-02-12 16:46:13 +0100557 bb_error_msg_and_die("not an http or ftp url: %s", url);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100558 }
559 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100560 // GNU wget is user-friendly and falls back to http://
561 h->host = url;
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200562 http:
563 h->port = bb_lookup_std_port(P_HTTP, "tcp", 80);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100564 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000565
566 // FYI:
567 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
Denys Vlasenkoa0aae9f2017-01-20 14:12:10 +0100568 // 'GET /?var=a/b HTTP/1.0'
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000569 // and saves 'index.html?var=a%2Fb' (we save 'b')
570 // wget 'http://busybox.net?login=john@doe':
571 // request: 'GET /?login=john@doe HTTP/1.0'
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200572 // saves: 'index.html?login=john@doe' (we save 'login=john@doe')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000573 // wget 'http://busybox.net#test/test':
574 // request: 'GET / HTTP/1.0'
575 // saves: 'index.html' (we save 'test')
576 //
577 // We also don't add unique .N suffix if file exists...
578 sp = strchr(h->host, '/');
579 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
580 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
581 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000582 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000583 } else if (*sp == '/') {
584 *sp = '\0';
585 h->path = sp + 1;
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200586 } else {
587 // sp points to '#' or '?'
588 // Note:
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000589 // http://busybox.net?login=john@doe is a valid URL
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200590 // (without '/' between ".net" and "?"),
591 // can't store NUL at sp[-1] - this destroys hostname.
592 *sp++ = '\0';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000593 h->path = sp;
594 }
595
596 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000597 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200598 // URL-decode "user:password" string before base64-encoding:
599 // wget http://test:my%20pass@example.com should send
600 // Authorization: Basic dGVzdDpteSBwYXNz
601 // which decodes to "test:my pass".
602 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000603 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100604 free(h->user);
605 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000606 h->host = sp + 1;
607 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100608 /* else: h->user remains NULL, or as set by original request
609 * before redirect (if we are here after a redirect).
610 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000611}
612
Denys Vlasenko34590242018-02-12 16:46:13 +0100613static char *get_sanitized_hdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000614{
615 char *s, *hdrval;
616 int c;
617
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000618 /* retrieve header line */
Denys Vlasenko34590242018-02-12 16:46:13 +0100619 c = fgets_trim_sanitize(fp, " %s\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000620
Denys Vlasenkof836f012011-02-10 23:02:28 +0100621 /* end of the headers? */
622 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000623 return NULL;
624
625 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200626 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
627 /*
628 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
629 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
630 * "A-Z" maps to "a-z".
631 * "@[\]" can't occur in header names.
632 * "^_" maps to "~,DEL" (which is wrong).
633 * "^" was never seen yet, "_" was seen from web.archive.org
634 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
635 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100636 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200637 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000638
639 /* verify we are at the end of the header name */
640 if (*s != ':')
Denys Vlasenko34590242018-02-12 16:46:13 +0100641 bb_error_msg_and_die("bad header line: %s", G.wget_buf);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000642
643 /* locate the start of the header value */
644 *s++ = '\0';
645 hdrval = skip_whitespace(s);
646
Denys Vlasenkof836f012011-02-10 23:02:28 +0100647 if (c != '\n') {
648 /* Rats! The buffer isn't big enough to hold the entire header value */
649 while (c = getc(fp), c != EOF && c != '\n')
650 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000651 }
652
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000653 return hdrval;
654}
655
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200656static void reset_beg_range_to_zero(void)
657{
James Byrne69374872019-07-02 11:35:03 +0200658 bb_simple_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200659 G.beg_range = 0;
660 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200661 /* Done at the end instead: */
662 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200663}
664
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200665#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200666static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100667{
668 char *allocated = NULL;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200669 char *servername;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100670 int sp[2];
671 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100672 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100673
674 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
675 /* Kernel can have AF_UNIX support disabled */
James Byrne69374872019-07-02 11:35:03 +0200676 bb_simple_perror_msg_and_die("socketpair");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100677
678 if (!strchr(host, ':'))
679 host = allocated = xasprintf("%s:%u", host, port);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200680 servername = xstrdup(host);
681 strrchr(servername, ':')[0] = '\0';
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100682
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200683 fflush_all();
684 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100685 if (pid == 0) {
686 /* Child */
Scott Courtfc2ce042020-06-29 14:30:12 +0200687 char *argv[13];
688 char **argp;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100689
690 close(sp[0]);
691 xmove_fd(sp[1], 0);
692 xdup2(0, 1);
693 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100694 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
695 * It prints some debug stuff on stderr, don't know how to suppress it.
696 * Work around by dev-nulling stderr. We lose all error messages :(
697 */
698 xmove_fd(2, 3);
699 xopen("/dev/null", O_RDWR);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200700 memset(&argv, 0, sizeof(argv));
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100701 argv[0] = (char*)"openssl";
702 argv[1] = (char*)"s_client";
703 argv[2] = (char*)"-quiet";
704 argv[3] = (char*)"-connect";
705 argv[4] = (char*)host;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200706 /*
707 * Per RFC 6066 Section 3, the only permitted values in the
708 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
709 * IPv4 and IPv6 addresses, port numbers are not allowed.
710 */
Scott Courtfc2ce042020-06-29 14:30:12 +0200711 argp = &argv[5];
Denys Vlasenkoed727612016-07-25 21:34:57 +0200712 if (!is_ip_address(servername)) {
Scott Courtfc2ce042020-06-29 14:30:12 +0200713 *argp++ = (char*)"-servername"; //[5]
714 *argp++ = (char*)servername; //[6]
Denys Vlasenkoed727612016-07-25 21:34:57 +0200715 }
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100716 if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
Scott Courtfc2ce042020-06-29 14:30:12 +0200717 /* Abort on bad server certificate */
718 *argp++ = (char*)"-verify"; //[7]
719 *argp++ = (char*)"100"; //[8]
720 *argp++ = (char*)"-verify_return_error"; //[9]
721 if (!is_ip_address(servername)) {
722 *argp++ = (char*)"-verify_hostname"; //[10]
723 *argp++ = (char*)servername; //[11]
724 } else {
725 *argp++ = (char*)"-verify_ip"; //[10]
726 *argp++ = (char*)host; //[11]
727 }
Dimitri John Ledkov45fa3f12020-05-19 18:20:39 +0100728 }
Scott Courtfc2ce042020-06-29 14:30:12 +0200729 //[12] (or earlier) is NULL terminator
Denys Vlasenkoed727612016-07-25 21:34:57 +0200730
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100731 BB_EXECVP(argv[0], argv);
732 xmove_fd(3, 2);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100733# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200734 child_failed = 1;
735 xfunc_die();
736# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100737 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200738# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100739 /* notreached */
740 }
741
Denys Vlasenko53315572014-02-23 23:39:47 +0100742 /* Parent */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200743 free(servername);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100744 free(allocated);
745 close(sp[1]);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100746# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200747 if (child_failed) {
748 close(sp[0]);
749 return -1;
750 }
751# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100752 return sp[0];
753}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200754#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100755
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100756#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko403f2992018-02-06 15:15:08 +0100757static void spawn_ssl_client(const char *host, int network_fd, int flags)
Denys Vlasenko53315572014-02-23 23:39:47 +0100758{
759 int sp[2];
760 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100761 char *servername, *p;
762
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100763 if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100764 option_mask32 |= WGET_OPT_NO_CHECK_CERT;
James Byrne69374872019-07-02 11:35:03 +0200765 bb_simple_error_msg("note: TLS certificate validation not implemented");
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100766 }
Denys Vlasenko0972c7f2018-05-28 14:36:26 +0200767
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100768 servername = xstrdup(host);
769 p = strrchr(servername, ':');
770 if (p) *p = '\0';
Denys Vlasenko53315572014-02-23 23:39:47 +0100771
772 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
773 /* Kernel can have AF_UNIX support disabled */
James Byrne69374872019-07-02 11:35:03 +0200774 bb_simple_perror_msg_and_die("socketpair");
Denys Vlasenko53315572014-02-23 23:39:47 +0100775
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100776 fflush_all();
Denys Vlasenko53315572014-02-23 23:39:47 +0100777 pid = BB_MMU ? xfork() : xvfork();
778 if (pid == 0) {
779 /* Child */
Denys Vlasenko53315572014-02-23 23:39:47 +0100780 close(sp[0]);
781 xmove_fd(sp[1], 0);
782 xdup2(0, 1);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100783 if (BB_MMU) {
784 tls_state_t *tls = new_tls_state();
785 tls->ifd = tls->ofd = network_fd;
786 tls_handshake(tls, servername);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100787 tls_run_copy_loop(tls, flags);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100788 exit(0);
789 } else {
Denys Vlasenko403f2992018-02-06 15:15:08 +0100790 char *argv[6];
791
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100792 xmove_fd(network_fd, 3);
793 argv[0] = (char*)"ssl_client";
794 argv[1] = (char*)"-s3";
795 //TODO: if (!is_ip_address(servername))...
796 argv[2] = (char*)"-n";
797 argv[3] = servername;
Denys Vlasenko403f2992018-02-06 15:15:08 +0100798 argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
799 argv[5] = NULL;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100800 BB_EXECVP(argv[0], argv);
801 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
802 }
Denys Vlasenko53315572014-02-23 23:39:47 +0100803 /* notreached */
804 }
805
806 /* Parent */
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100807 free(servername);
Denys Vlasenko53315572014-02-23 23:39:47 +0100808 close(sp[1]);
809 xmove_fd(sp[0], network_fd);
810}
811#endif
812
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200813#if ENABLE_FEATURE_WGET_FTP
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100814static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
815{
816 FILE *sfp;
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200817 char *pass;
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100818 int port;
819
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100820 sfp = open_socket(lsa);
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200821#if FTPS_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100822 if (target->protocol == P_FTPS)
823 spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
824#endif
825
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100826 if (ftpcmd(NULL, NULL, sfp) != 220)
James Byrne69374872019-07-02 11:35:03 +0200827 bb_simple_error_msg_and_die(G.wget_buf);
Denys Vlasenko34590242018-02-12 16:46:13 +0100828 /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100829
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200830 /* Split username:password pair */
831 pass = (char*)"busybox"; /* password for "anonymous" */
832 if (target->user) {
833 pass = strchr(target->user, ':');
834 if (pass)
835 *pass++ = '\0';
836 }
837
838 /* Log in */
839 switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100840 case 230:
841 break;
842 case 331:
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200843 if (ftpcmd("PASS ", pass, sfp) == 230)
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100844 break;
845 /* fall through (failed login) */
846 default:
Denys Vlasenko34590242018-02-12 16:46:13 +0100847 bb_error_msg_and_die("ftp login: %s", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100848 }
849
850 ftpcmd("TYPE I", NULL, sfp);
851
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200852 /* Query file size */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100853 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
854 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
855 if (G.content_len < 0 || errno) {
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200856 bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100857 }
858 G.got_clen = 1;
859 }
860
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200861 /* Enter passive mode */
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100862 if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
863 /* good */
864 } else
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100865 if (ftpcmd("PASV", NULL, sfp) != 227) {
866 pasv_error:
Denys Vlasenko34590242018-02-12 16:46:13 +0100867 bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100868 }
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100869 port = parse_pasv_epsv(G.wget_buf);
870 if (port < 0)
871 goto pasv_error;
872
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100873 set_nport(&lsa->u.sa, htons(port));
874
875 *dfpp = open_socket(lsa);
876
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200877#if FTPS_SUPPORTED
Denys Vlasenko237a9002018-02-08 00:28:30 +0100878 if (target->protocol == P_FTPS) {
879 /* "PROT P" enables encryption of data stream.
880 * Without it (or with "PROT C"), data is sent unencrypted.
881 */
882 if (ftpcmd("PROT P", NULL, sfp) == 200)
883 spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
884 }
Denys Vlasenko2b751572018-02-06 20:49:27 +0100885#endif
Denys Vlasenko403f2992018-02-06 15:15:08 +0100886
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100887 if (G.beg_range != 0) {
888 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
889 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
890 G.content_len -= G.beg_range;
891 else
892 reset_beg_range_to_zero();
893 }
894
Denys Vlasenko34590242018-02-12 16:46:13 +0100895//TODO: needs ftp-escaping 0xff and '\n' bytes here.
896//Or disallow '\n' altogether via sanitize_string() in parse_url().
897//But 0xff's are possible in valid utf8 filenames.
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100898 if (ftpcmd("RETR ", target->path, sfp) > 150)
Denys Vlasenko34590242018-02-12 16:46:13 +0100899 bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100900
901 return sfp;
902}
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +0200903#endif
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100904
Denys Vlasenko2384a352011-02-15 00:58:36 +0100905static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200906{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200907#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
908# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200909 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200910# endif
911 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200912
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200913 polldata.fd = fileno(dfp);
914 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200915#endif
Martin Lewis94e748d2019-01-10 13:59:30 +0100916 if (!(option_mask32 & WGET_OPT_QUIET)) {
917 if (G.output_fd == 1)
918 fprintf(stderr, "writing to stdout\n");
919 else
920 fprintf(stderr, "saving to '%s'\n", G.fname_out);
921 }
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200922 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200923
924 if (G.chunked)
925 goto get_clen;
926
927 /* Loops only if chunked */
928 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100929
930#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
931 /* Must use nonblocking I/O, otherwise fread will loop
932 * and *block* until it reads full buffer,
933 * which messes up progress bar and/or timeout logic.
934 * Because of nonblocking I/O, we need to dance
935 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200936 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100937 */
938 ndelay_on(polldata.fd);
939#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100940 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200941 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100942 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200943
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200944#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100945 /* fread internally uses read loop, which in our case
946 * is usually exited when we get EAGAIN.
947 * In this case, libc sets error marker on the stream.
948 * Need to clear it before next fread to avoid possible
949 * rare false positive ferror below. Rare because usually
950 * fread gets more than zero bytes, and we don't fall
951 * into if (n <= 0) ...
952 */
953 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100954#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200955 errno = 0;
956 rdsz = sizeof(G.wget_buf);
957 if (G.got_clen) {
958 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
959 if ((int)G.content_len <= 0)
960 break;
961 rdsz = (unsigned)G.content_len;
962 }
963 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100964 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200965
966 if (n > 0) {
967 xwrite(G.output_fd, G.wget_buf, n);
968#if ENABLE_FEATURE_WGET_STATUSBAR
969 G.transferred += n;
970#endif
971 if (G.got_clen) {
972 G.content_len -= n;
973 if (G.content_len == 0)
974 break;
975 }
976#if ENABLE_FEATURE_WGET_TIMEOUT
977 second_cnt = G.timeout_seconds;
978#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100979 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200980 }
981
982 /* n <= 0.
983 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100984 * If error occurs, or EOF is reached, the return value
985 * is a short item count (or zero).
986 * fread does not distinguish between EOF and error.
987 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200988 if (errno != EAGAIN) {
989 if (ferror(dfp)) {
990 progress_meter(PROGRESS_END);
James Byrne69374872019-07-02 11:35:03 +0200991 bb_simple_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200992 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100993 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200994 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100995
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200996#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
997 /* It was EAGAIN. There is no data. Wait up to one second
998 * then abort if timed out, or update the bar and try reading again.
999 */
1000 if (safe_poll(&polldata, 1, 1000) == 0) {
1001# if ENABLE_FEATURE_WGET_TIMEOUT
1002 if (second_cnt != 0 && --second_cnt == 0) {
1003 progress_meter(PROGRESS_END);
James Byrne69374872019-07-02 11:35:03 +02001004 bb_simple_error_msg_and_die("download timed out");
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001005 }
1006# endif
1007 /* We used to loop back to poll here,
1008 * but there is no great harm in letting fread
1009 * to try reading anyway.
1010 */
1011 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +01001012#endif
1013 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001014 /* Need to do it _every_ second for "stalled" indicator
1015 * to be shown properly.
1016 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02001017 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001018 } /* while (reading data) */
1019
Denys Vlasenkoc60f4462011-02-11 22:23:23 +01001020#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
1021 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +01001022 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +01001023#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +02001024 if (!G.chunked)
1025 break;
1026
Denys Vlasenko8e2174e2018-04-08 18:06:24 +02001027 /* Each chunk ends with "\r\n" - eat it */
Denys Vlasenko34590242018-02-12 16:46:13 +01001028 fgets_trim_sanitize(dfp, NULL);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +02001029 get_clen:
1030 /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
1031 fgets_trim_sanitize(dfp, NULL);
1032 errno = 0;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001033 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +02001034 /*
1035 * Had a bug with inputs like "ffffffff0001f400"
1036 * smashing the heap later. Ensure >= 0.
1037 */
1038 if (G.content_len < 0 || errno)
1039 bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001040 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +02001041 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001042 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +02001043 /*
1044 * Note that fgets may result in some data being buffered in dfp.
1045 * We loop back to fread, which will retrieve this data.
1046 * Also note that code has to be arranged so that fread
1047 * is done _before_ one-second poll wait - poll doesn't know
1048 * about stdio buffering and can result in spurious one second waits!
1049 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001050 }
1051
Denys Vlasenko9b313dd2019-01-21 13:53:26 +01001052 /* Draw full bar and free its resources */
1053 G.chunked = 0; /* makes it show 100% even for chunked download */
1054 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
1055 progress_meter(PROGRESS_END);
1056 if (G.content_len != 0) {
James Byrne69374872019-07-02 11:35:03 +02001057 bb_simple_perror_msg_and_die("connection closed prematurely");
Denys Vlasenko9b313dd2019-01-21 13:53:26 +01001058 /* GNU wget says "DATE TIME (NN MB/s) - Connection closed at byte NNN. Retrying." */
1059 }
1060
Denys Vlasenko61441242012-06-17 19:52:25 +02001061 /* If -c failed, we restart from the beginning,
1062 * but we do not truncate file then, we do it only now, at the end.
1063 * This lets user to ^C if his 99% complete 10 GB file download
1064 * failed to restart *without* losing the almost complete file.
1065 */
1066 {
1067 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
Denys Vlasenkoe7d853b2020-12-08 19:06:28 +01001068 if (pos != (off_t)-1) {
1069 /* do not truncate if -O- is in use, a user complained about
1070 * "wget -qO- 'http://example.com/empty' >>FILE" truncating FILE.
1071 */
1072 if (!(option_mask32 & WGET_NO_FTRUNCATE))
1073 ftruncate(G.output_fd, pos);
1074 }
Denys Vlasenko61441242012-06-17 19:52:25 +02001075 }
1076
Martin Lewis94e748d2019-01-10 13:59:30 +01001077 if (!(option_mask32 & WGET_OPT_QUIET)) {
1078 if (G.output_fd == 1)
1079 fprintf(stderr, "written to stdout\n");
1080 else
1081 fprintf(stderr, "'%s' saved\n", G.fname_out);
1082 }
Denys Vlasenko7f432802009-06-28 01:02:24 +02001083}
1084
Pere Orga53695632011-02-16 20:09:36 +01001085static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +00001086{
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001087 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001088 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001089 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001090 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +00001091 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001092 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001093 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001094 struct host_info server;
1095 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +00001096
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001097 server.allocated = NULL;
1098 target.allocated = NULL;
1099 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +02001100 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001101
1102 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +00001103
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001104 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001105 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +00001106 if (use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001107 char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001108//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +01001109 use_proxy = (proxy && proxy[0]);
1110 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001111 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +00001112 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001113 if (!use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001114 server.protocol = target.protocol;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001115 server.port = target.port;
1116 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001117 //free(server.allocated); - can't be non-NULL
1118 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001119 } else {
1120 server.host = target.host;
1121 }
1122 }
1123
1124 if (ENABLE_FEATURE_IPV6)
1125 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001126
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001127 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001128 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001129 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001130 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001131 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001132 if (G.fname_out[0] == '/' || !G.fname_out[0])
1133 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +00001134 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001135 if (G.dir_prefix)
1136 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001137 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001138 /* redirects may free target.path later, need to make a copy */
1139 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001140 }
Eric Andersen29edd002000-12-09 16:55:35 +00001141 }
Denis Vlasenko818322b2007-09-24 18:27:04 +00001142#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001143 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001144#endif
1145
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001146 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001147 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001148 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001149 G.output_fd = open(G.fname_out, O_WRONLY);
1150 if (G.output_fd >= 0) {
1151 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +00001152 }
1153 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +01001154 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +00001155 }
1156
David Demelier4a9daf22019-08-29 14:05:27 +02001157 redir_limit = 16;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001158 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +00001159 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001160 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001161 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1162 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1163 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +00001164 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001165 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +01001166 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1167 G.got_clen = 0;
1168 G.chunked = 0;
Denys Vlasenko403f2992018-02-06 15:15:08 +01001169 if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
Eric Andersen79757c92001-04-05 21:45:54 +00001170 /*
1171 * HTTP session
1172 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001173 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001174 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001175
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001176 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001177#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001178 /* openssl (and maybe internal TLS) support is configured */
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001179 if (server.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001180 /* openssl-based helper
1181 * Inconvenient API since we can't give it an open fd
1182 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001183 int fd = spawn_https_helper_openssl(server.host, server.port);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001184# if ENABLE_FEATURE_WGET_HTTPS
1185 if (fd < 0) { /* no openssl? try internal */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001186 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +01001187 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001188 goto socket_opened;
1189 }
1190# else
1191 /* We don't check for exec("openssl") failure in this case */
1192# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001193 sfp = fdopen(fd, "r+");
1194 if (!sfp)
Denys Vlasenko899ae532018-04-01 19:59:37 +02001195 bb_die_memory_exhausted();
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001196 goto socket_opened;
1197 }
1198 sfp = open_socket(lsa);
1199 socket_opened:
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001200#elif ENABLE_FEATURE_WGET_HTTPS
1201 /* Only internal TLS support is configured */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001202 sfp = open_socket(lsa);
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001203 if (server.protocol == P_HTTPS)
Denys Vlasenko403f2992018-02-06 15:15:08 +01001204 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001205#else
1206 /* ssl (https) support is not configured */
1207 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001208#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001209 /* Send HTTP request */
1210 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001211 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001212 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001213 target.path);
1214 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001215 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001216 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1217 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001218 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001219 if (!USR_HEADER_HOST)
1220 SENDFMT(sfp, "Host: %s\r\n", target.host);
1221 if (!USR_HEADER_USER_AGENT)
1222 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001223
Denys Vlasenko9213a552011-02-10 13:23:45 +01001224 /* Ask server to close the connection as soon as we are done
1225 * (IOW: we do not intend to send more requests)
1226 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001227 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001228
Denis Vlasenko9cade082006-11-21 10:43:02 +00001229#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001230 if (target.user && !USR_HEADER_AUTH) {
1231 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001232 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001233 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001234 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1235 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001236 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001237 }
Eric Andersen79757c92001-04-05 21:45:54 +00001238#endif
1239
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001240 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1241 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001242
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001243#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001244 if (G.extra_headers) {
1245 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001246 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001247 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001248
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001249 if (option_mask32 & WGET_OPT_POST_DATA) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001250 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001251 "Content-Type: application/x-www-form-urlencoded\r\n"
1252 "Content-Length: %u\r\n"
1253 "\r\n"
1254 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001255 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001256 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001257 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001258#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001259 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001260 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001261 }
Eric Andersen79757c92001-04-05 21:45:54 +00001262
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001263 fflush(sfp);
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001264
Denys Vlasenko4e08a122017-01-16 17:31:05 +01001265/* Tried doing this unconditionally.
1266 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1267 */
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001268#if SSL_SUPPORTED
1269 if (target.protocol == P_HTTPS) {
1270 /* If we use SSL helper, keeping our end of the socket open for writing
1271 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1272 * even after child closes its copy of the fd.
1273 * This helps:
1274 */
1275 shutdown(fileno(sfp), SHUT_WR);
1276 }
1277#endif
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001278
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001279 /*
1280 * Retrieve HTTP response line and check for "200" status code.
1281 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001282 read_response:
Denys Vlasenko34590242018-02-12 16:46:13 +01001283 fgets_trim_sanitize(sfp, " %s\n");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001284
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001285 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001286 str = skip_non_whitespace(str);
1287 str = skip_whitespace(str);
1288 // FIXME: no error check
1289 // xatou wouldn't work: "200 OK"
1290 status = atoi(str);
1291 switch (status) {
1292 case 0:
1293 case 100:
Denys Vlasenko34590242018-02-12 16:46:13 +01001294 while (get_sanitized_hdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001295 /* eat all remaining headers */;
1296 goto read_response;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001297
1298 /* Success responses */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001299 case 200:
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001300 /* fall through */
1301 case 201: /* 201 Created */
1302/* "The request has been fulfilled and resulted in a new resource being created" */
Denys Vlasenkoef159702016-09-01 11:16:22 +02001303 /* Standard wget is reported to treat this as success */
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001304 /* fall through */
1305 case 202: /* 202 Accepted */
1306/* "The request has been accepted for processing, but the processing has not been completed" */
1307 /* Treat as success: fall through */
1308 case 203: /* 203 Non-Authoritative Information */
1309/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1310 /* fall through */
1311 case 204: /* 204 No Content */
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001312/*
1313Response 204 doesn't say "null file", it says "metadata
1314has changed but data didn't":
1315
1316"10.2.5 204 No Content
1317The server has fulfilled the request but does not need to return
1318an entity-body, and might want to return updated metainformation.
1319The response MAY include new or updated metainformation in the form
1320of entity-headers, which if present SHOULD be associated with
1321the requested variant.
1322
1323If the client is a user agent, it SHOULD NOT change its document
1324view from that which caused the request to be sent. This response
1325is primarily intended to allow input for actions to take place
1326without causing a change to the user agent's active document view,
1327although any new or updated metainformation SHOULD be applied
1328to the document currently in the user agent's active view.
1329
1330The 204 response MUST NOT include a message-body, and thus
1331is always terminated by the first empty line after the header fields."
1332
1333However, in real world it was observed that some web servers
1334(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1335*/
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001336 if (G.beg_range != 0) {
1337 /* "Range:..." was not honored by the server.
1338 * Restart download from the beginning.
1339 */
1340 reset_beg_range_to_zero();
1341 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001342 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001343 /* 205 Reset Content ?? what to do on this ?? */
1344
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001345 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001346 case 301:
1347 case 302:
1348 case 303:
1349 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001350
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001351 case 206: /* Partial Content */
1352 if (G.beg_range != 0)
1353 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001354 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001355 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001356 /* fall through */
1357 default:
Denys Vlasenko34590242018-02-12 16:46:13 +01001358 bb_error_msg_and_die("server returned error: %s", G.wget_buf);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001359 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001360
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001361 /*
1362 * Retrieve HTTP headers.
1363 */
Denys Vlasenko34590242018-02-12 16:46:13 +01001364 while ((str = get_sanitized_hdr(sfp)) != NULL) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001365 static const char keywords[] ALIGN1 =
1366 "content-length\0""transfer-encoding\0""location\0";
1367 enum {
1368 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1369 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001370 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001371
Denys Vlasenko34590242018-02-12 16:46:13 +01001372 /* get_sanitized_hdr converted "FOO:" string to lowercase */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001373
Matthijs van de Water0d586662009-08-22 20:19:48 +02001374 /* strip trailing whitespace */
1375 char *s = strchrnul(str, '\0') - 1;
1376 while (s >= str && (*s == ' ' || *s == '\t')) {
1377 *s = '\0';
1378 s--;
1379 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001380 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001381 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001382 G.content_len = BB_STRTOOFF(str, NULL, 10);
1383 if (G.content_len < 0 || errno) {
Denys Vlasenko34590242018-02-12 16:46:13 +01001384 bb_error_msg_and_die("content-length %s is garbage", str);
Eric Andersen79757c92001-04-05 21:45:54 +00001385 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001386 G.got_clen = 1;
1387 continue;
1388 }
1389 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001390 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenko34590242018-02-12 16:46:13 +01001391 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001392 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001393 }
1394 if (key == KEY_location && status >= 300) {
1395 if (--redir_limit == 0)
James Byrne69374872019-07-02 11:35:03 +02001396 bb_simple_error_msg_and_die("too many redirections");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001397 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001398 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001399 free(redirected_path);
Denys Vlasenko34590242018-02-12 16:46:13 +01001400 target.path = redirected_path = xstrdup(str + 1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001401 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001402 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001403 parse_url(str, &target);
1404 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001405 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001406 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001407 server.allocated = NULL;
Denys Vlasenko9634e8a2018-07-02 18:31:02 +02001408 server.protocol = target.protocol;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001409 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001410 /* strip_ipv6_scope_id(target.host); - no! */
1411 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001412 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001413 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001414 goto resolve_lsa;
1415 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001416 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001417 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001418 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001419 }
1420// if (status >= 300)
1421// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001422
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001423 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001424 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001425 } else {
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +02001426#if ENABLE_FEATURE_WGET_FTP
Eric Andersen79757c92001-04-05 21:45:54 +00001427 /*
1428 * FTP session
1429 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001430 sfp = prepare_ftp_session(&dfp, &target, lsa);
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +02001431#endif
Eric Andersen96700832000-09-04 15:15:55 +00001432 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001433
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001434 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001435
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001436 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001437 if (G.output_fd < 0)
1438 G.output_fd = xopen(G.fname_out, G.o_flags);
1439 retrieve_file_data(dfp);
1440 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1441 xclose(G.output_fd);
1442 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001443 }
Martin Lewis46fc3292019-01-04 18:26:04 +01001444 } else {
Martin Lewis94e748d2019-01-10 13:59:30 +01001445 if (!(option_mask32 & WGET_OPT_QUIET))
1446 fprintf(stderr, "remote file exists\n");
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001447 }
Eric Andersen79757c92001-04-05 21:45:54 +00001448
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +02001449#if ENABLE_FEATURE_WGET_FTP
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001450 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001451 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001452 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001453 if (ftpcmd(NULL, NULL, sfp) != 226)
Denys Vlasenko34590242018-02-12 16:46:13 +01001454 bb_error_msg_and_die("ftp error: %s", G.wget_buf);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001455 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001456 }
Sergey Ponomarevb6e6c832021-01-17 20:35:08 +02001457#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001458 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001459
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001460 free(server.allocated);
1461 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001462 free(server.user);
1463 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001464 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001465 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001466}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001467
1468int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1469int wget_main(int argc UNUSED_PARAM, char **argv)
1470{
1471#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1472 static const char wget_longopts[] ALIGN1 =
1473 /* name, has_arg, val */
1474 "continue\0" No_argument "c"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001475 "quiet\0" No_argument "q"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +01001476 "server-response\0" No_argument "S"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001477 "output-document\0" Required_argument "O"
Martin Lewis64f35362018-12-26 16:28:45 +01001478 "output-file\0" Required_argument "o"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001479 "directory-prefix\0" Required_argument "P"
1480 "proxy\0" Required_argument "Y"
1481 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001482IF_FEATURE_WGET_TIMEOUT(
1483 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001484 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001485IF_DESKTOP( "tries\0" Required_argument "t")
1486 "header\0" Required_argument "\xff"
1487 "post-data\0" Required_argument "\xfe"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001488 "spider\0" No_argument "\xfd"
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001489 "no-check-certificate\0" No_argument "\xfc"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001490 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001491IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001492 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001493IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1494IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1495IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1496IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1497IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001498 ;
Denys Vlasenko036585a2017-08-08 16:38:18 +02001499# define GETOPT32 getopt32long
1500# define LONGOPTS ,wget_longopts
1501#else
1502# define GETOPT32 getopt32
1503# define LONGOPTS
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001504#endif
1505
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001506#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1507 llist_t *headers_llist = NULL;
1508#endif
1509
1510 INIT_G();
1511
Lauri Kasanend074b412013-10-12 21:47:07 +02001512#if ENABLE_FEATURE_WGET_TIMEOUT
1513 G.timeout_seconds = 900;
1514 signal(SIGALRM, alarm_handler);
1515#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001516 G.proxy_flag = "on"; /* use proxies if env vars are set */
1517 G.user_agent = "Wget"; /* "User-Agent" header field */
1518
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001519 GETOPT32(argv, "^"
Martin Lewis64f35362018-12-26 16:28:45 +01001520 "cqSO:o:P:Y:U:T:+"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001521 /*ignored:*/ "t:"
1522 /*ignored:*/ "n::"
1523 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1524 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1525 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1526 * -nH --no-host-directories: wget -r http://host/ won't create host/
1527 * -np --no-parent
1528 * "n::" above says that we accept -n[ARG].
1529 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1530 */
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001531 "\0"
1532 "-1" /* at least one URL */
1533 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
Denys Vlasenko036585a2017-08-08 16:38:18 +02001534 LONGOPTS
Martin Lewis64f35362018-12-26 16:28:45 +01001535 , &G.fname_out, &G.fname_log, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001536 &G.proxy_flag, &G.user_agent,
1537 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001538 NULL, /* -t RETRIES */
1539 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001540 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1541 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1542 );
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001543#if 0 /* option bits debug */
1544 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1545 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1546 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1547 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1548 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001549 if (option_mask32 & WGET_OPT_NO_CHECK_CERT) bb_error_msg("--no-check-certificate");
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001550 exit(0);
1551#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001552 argv += optind;
1553
1554#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1555 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001556 int size = 0;
1557 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001558 llist_t *ll = headers_llist;
1559 while (ll) {
1560 size += strlen(ll->data) + 2;
1561 ll = ll->link;
1562 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001563 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001564 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001565 int bit;
1566 const char *words;
1567
1568 size = sprintf(hdr, "%s\r\n",
1569 (char*)llist_pop(&headers_llist));
1570 /* a bit like index_in_substrings but don't match full key */
1571 bit = 1;
1572 words = wget_user_headers;
1573 while (*words) {
1574 if (strstr(hdr, words) == hdr) {
1575 G.user_headers |= bit;
1576 break;
1577 }
1578 bit <<= 1;
1579 words += strlen(words) + 1;
1580 }
1581 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001582 }
1583 }
1584#endif
1585
Denys Vlasenko2384a352011-02-15 00:58:36 +01001586 G.output_fd = -1;
1587 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1588 if (G.fname_out) { /* -O FILE ? */
1589 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1590 G.output_fd = 1;
Denys Vlasenkoe7d853b2020-12-08 19:06:28 +01001591 option_mask32 = (option_mask32 & (~WGET_OPT_CONTINUE)) | WGET_NO_FTRUNCATE;
Denys Vlasenko2384a352011-02-15 00:58:36 +01001592 }
1593 /* compat with wget: -O FILE can overwrite */
1594 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1595 }
1596
Martin Lewis64f35362018-12-26 16:28:45 +01001597 G.log_fd = -1;
1598 if (G.fname_log) { /* -o FILE ? */
1599 if (!LONE_DASH(G.fname_log)) { /* not -o - ? */
1600 /* compat with wget: -o FILE can overwrite */
1601 G.log_fd = xopen(G.fname_log, O_WRONLY | O_CREAT | O_TRUNC);
1602 /* Redirect only stderr to log file, so -O - will work */
1603 xdup2(G.log_fd, STDERR_FILENO);
1604 }
1605 }
1606
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001607 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001608 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001609
Denys Vlasenko28556b92011-02-15 11:03:53 +01001610 if (G.output_fd >= 0)
1611 xclose(G.output_fd);
1612
Martin Lewis64f35362018-12-26 16:28:45 +01001613 if (G.log_fd >= 0)
1614 xclose(G.log_fd);
1615
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001616#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1617 free(G.extra_headers);
1618#endif
1619 FINI_G();
1620
Pere Orga53695632011-02-16 20:09:36 +01001621 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001622}