blob: 58a51d9fff154db83c36794fe100ba0f15df2f40 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020011//config:config WGET
Denys Vlasenko4eed2c62017-07-18 22:01:24 +020012//config: bool "wget (35 kb)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020013//config: default y
14//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020015//config: wget is a utility for non-interactive download of files from HTTP
16//config: and FTP servers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020017//config:
Denys Vlasenkof5604222017-01-10 14:58:54 +010018//config:config FEATURE_WGET_LONG_OPTIONS
19//config: bool "Enable long options"
20//config: default y
21//config: depends on WGET && LONG_OPTS
22//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020023//config:config FEATURE_WGET_STATUSBAR
Denys Vlasenkof5604222017-01-10 14:58:54 +010024//config: bool "Enable progress bar (+2k)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020025//config: default y
26//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020027//config:
28//config:config FEATURE_WGET_AUTHENTICATION
29//config: bool "Enable HTTP authentication"
30//config: default y
31//config: depends on WGET
32//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020033//config: Support authenticated HTTP transfers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020034//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020035//config:config FEATURE_WGET_TIMEOUT
36//config: bool "Enable timeout option -T SEC"
37//config: default y
38//config: depends on WGET
39//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020040//config: Supports network read and connect timeouts for wget,
41//config: so that wget will give up and timeout, through the -T
42//config: command line option.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020043//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020044//config: Currently only connect and network data read timeout are
45//config: supported (i.e., timeout is not applied to the DNS query). When
46//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
47//config: will work in addition to -T.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020048//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +010049//config:config FEATURE_WGET_HTTPS
50//config: bool "Support HTTPS using internal TLS code"
Denys Vlasenko403f2992018-02-06 15:15:08 +010051//it also enables FTPS support, but it's not well tested yet
Denys Vlasenko9a647c32017-01-23 01:08:16 +010052//config: default y
53//config: depends on WGET
54//config: select TLS
55//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020056//config: wget will use internal TLS code to connect to https:// URLs.
57//config: Note:
58//config: On NOMMU machines, ssl_helper applet should be available
59//config: in the $PATH for this to work. Make sure to select that applet.
Denys Vlasenko9a647c32017-01-23 01:08:16 +010060//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020061//config: Note: currently, TLS code only makes TLS I/O work, it
62//config: does *not* check that the peer is who it claims to be, etc.
63//config: IOW: it uses peer-supplied public keys to establish encryption
64//config: and signing keys, then encrypts and signs outgoing data and
65//config: decrypts incoming data.
66//config: It does not check signature hashes on the incoming data:
67//config: this means that attackers manipulating TCP packets can
68//config: send altered data and we unknowingly receive garbage.
69//config: (This check might be relatively easy to add).
70//config: It does not check public key's certificate:
71//config: this means that the peer may be an attacker impersonating
72//config: the server we think we are talking to.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010073//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020074//config: If you think this is unacceptable, consider this. As more and more
75//config: servers switch to HTTPS-only operation, without such "crippled"
76//config: TLS code it is *impossible* to simply download a kernel source
77//config: from kernel.org. Which can in real world translate into
78//config: "my small automatic tooling to build cross-compilers from sources
79//config: no longer works, I need to additionally keep a local copy
80//config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
81//config: source of wget, need to compile and built both before I can
82//config: download anything. All this despite the fact that the build
83//config: is done in a QEMU sandbox on a machine with absolutely nothing
84//config: worth stealing, so I don't care if someone would go to a lot
85//config: of trouble to intercept my HTTPS download to send me an altered
86//config: kernel tarball".
Denys Vlasenko67f6db62017-01-30 16:27:37 +010087//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020088//config: If you still think this is unacceptable, send patches.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010089//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020090//config: If you still think this is unacceptable, do not want to send
91//config: patches, but do want to waste bandwidth expaining how wrong
92//config: it is, you will be ignored.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010093//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020094//config:config FEATURE_WGET_OPENSSL
95//config: bool "Try to connect to HTTPS using openssl"
96//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020097//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020098//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020099//config: Try to use openssl to handle HTTPS.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200100//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200101//config: OpenSSL has a simple SSL client for debug purposes.
102//config: If you select this option, wget will effectively run:
103//config: "openssl s_client -quiet -connect hostname:443
104//config: -servername hostname 2>/dev/null" and pipe its data
105//config: through it. -servername is not used if hostname is numeric.
106//config: Note inconvenient API: host resolution is done twice,
107//config: and there is no guarantee openssl's idea of IPv6 address
108//config: format is the same as ours.
109//config: Another problem is that s_client prints debug information
110//config: to stderr, and it needs to be suppressed. This means
111//config: all error messages get suppressed too.
112//config: openssl is also a big binary, often dynamically linked
113//config: against ~15 libraries.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200114//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200115//config: If openssl can't be executed, internal TLS code will be used
116//config: (if you enabled it); if openssl can be executed but fails later,
117//config: wget can't detect this, and download will fail.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200118
119//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
120
121//kbuild:lib-$(CONFIG_WGET) += wget.o
122
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100123//usage:#define wget_trivial_usage
124//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200125//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100126//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200127/* Since we ignore these opts, we don't show them in --help */
Denys Vlasenko92e1b082015-10-20 21:51:52 +0200128/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
129/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100130//usage: " [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100131//usage: )
132//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100133//usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100134//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
135//usage: )
136//usage:#define wget_full_usage "\n\n"
137//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200138//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100139//usage: "\n --spider Only check URL existence: $? is 0 if exists"
Denys Vlasenko0972c7f2018-05-28 14:36:26 +0200140///////: "\n --no-check-certificate Don't validate the server's certificate"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100141//usage: )
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200142//usage: "\n -c Continue retrieval of aborted transfer"
143//usage: "\n -q Quiet"
144//usage: "\n -P DIR Save to DIR (default .)"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100145//usage: "\n -S Show server response"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200146//usage: IF_FEATURE_WGET_TIMEOUT(
147//usage: "\n -T SEC Network read timeout is SEC seconds"
148//usage: )
149//usage: "\n -O FILE Save to FILE ('-' for stdout)"
150//usage: "\n -U STR Use STR for User-Agent header"
151//usage: "\n -Y on/off Use proxy"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100152
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000153#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000154
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200155#if 0
156# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100157# define SENDFMT(fp, fmt, ...) \
158 do { \
159 log_io("> " fmt, ##__VA_ARGS__); \
160 fprintf(fp, fmt, ##__VA_ARGS__); \
161 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200162#else
163# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100164# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200165#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100166
167
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100168#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100169
Eric Andersen79757c92001-04-05 21:45:54 +0000170struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100171 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000172 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100173 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100174 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000175 char *host;
176 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000177};
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200178static const char P_FTP[] ALIGN1 = "ftp";
179static const char P_HTTP[] ALIGN1 = "http";
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100180#if SSL_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100181# if ENABLE_FEATURE_WGET_HTTPS
182static const char P_FTPS[] ALIGN1 = "ftps";
183# endif
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200184static const char P_HTTPS[] ALIGN1 = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100185#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000186
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100187#if ENABLE_FEATURE_WGET_LONG_OPTIONS
188/* User-specified headers prevent using our corresponding built-in headers. */
189enum {
190 HDR_HOST = (1<<0),
191 HDR_USER_AGENT = (1<<1),
192 HDR_RANGE = (1<<2),
193 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
194 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
195};
196static const char wget_user_headers[] ALIGN1 =
197 "Host:\0"
198 "User-Agent:\0"
199 "Range:\0"
200# if ENABLE_FEATURE_WGET_AUTHENTICATION
201 "Authorization:\0"
202 "Proxy-Authorization:\0"
203# endif
204 ;
205# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
206# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
207# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
208# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
209# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
210#else /* No long options, no user-headers :( */
211# define USR_HEADER_HOST 0
212# define USR_HEADER_USER_AGENT 0
213# define USR_HEADER_RANGE 0
214# define USR_HEADER_AUTH 0
215# define USR_HEADER_PROXY_AUTH 0
216#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000217
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200218/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000219struct globals {
220 off_t content_len; /* Content-length of the file */
221 off_t beg_range; /* Range at which continue begins */
222#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000223 off_t transferred; /* Number of bytes transferred so far */
224 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100225 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000226#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200227 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100228#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200229 char *post_data;
230 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100231 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100232#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200233 char *fname_out; /* where to direct output (-O) */
234 const char *proxy_flag; /* Use proxies if env vars are set */
235 const char *user_agent; /* "User-Agent" header field */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100236 int output_fd;
237 int o_flags;
Denys Vlasenko5084bae2018-11-24 21:56:21 +0100238#if ENABLE_FEATURE_WGET_TIMEOUT
239 unsigned timeout_seconds;
240 smallint die_if_timed_out;
241#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200242 smallint chunked; /* chunked transfer encoding */
243 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100244 /* Local downloads do benefit from big buffer.
245 * With 512 byte buffer, it was measured to be
246 * an order of magnitude slower than with big one.
247 */
Denys Vlasenko5084bae2018-11-24 21:56:21 +0100248 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024] ALIGNED(sizeof(long));
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100249} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100250#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200251#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200252 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200253} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200254#define FINI_G() do { \
255 FREE_PTR_TO_GLOBALS(); \
256} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000257
258
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200259/* Must match option string! */
260enum {
261 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200262 WGET_OPT_QUIET = (1 << 1),
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100263 WGET_OPT_SERVER_RESPONSE = (1 << 2),
264 WGET_OPT_OUTNAME = (1 << 3),
265 WGET_OPT_PREFIX = (1 << 4),
266 WGET_OPT_PROXY = (1 << 5),
267 WGET_OPT_USER_AGENT = (1 << 6),
268 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
269 WGET_OPT_RETRIES = (1 << 8),
270 WGET_OPT_nsomething = (1 << 9),
271 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
272 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
273 WGET_OPT_SPIDER = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Denys Vlasenko0972c7f2018-05-28 14:36:26 +0200274 WGET_OPT_NO_CHECK_CERT = (1 << 13) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200275};
276
277enum {
278 PROGRESS_START = -1,
279 PROGRESS_END = 0,
280 PROGRESS_BUMP = 1,
281};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000282#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000283static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000284{
Denys Vlasenko26602b82018-11-23 19:14:52 +0100285 int notty;
286
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200287 if (option_mask32 & WGET_OPT_QUIET)
288 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000289
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200290 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100291 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000292
Denys Vlasenko26602b82018-11-23 19:14:52 +0100293 notty = bb_progress_update(&G.pmt,
Denys Vlasenko2384a352011-02-15 00:58:36 +0100294 G.beg_range,
295 G.transferred,
296 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
297 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000298
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200299 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100300 bb_progress_free(&G.pmt);
Denys Vlasenko26602b82018-11-23 19:14:52 +0100301 if (notty == 0)
302 bb_putchar_stderr('\n'); /* it's tty */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100303 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000304 }
305}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200306#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000307static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000308#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000309
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000310
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200311/* IPv6 knows scoped address types i.e. link and site local addresses. Link
312 * local addresses can have a scope identifier to specify the
313 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
314 * identifier is only valid on a single node.
315 *
316 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
317 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
318 * in the Host header as invalid requests, see
319 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
320 */
321static void strip_ipv6_scope_id(char *host)
322{
323 char *scope, *cp;
324
325 /* bbox wget actually handles IPv6 addresses without [], like
326 * wget "http://::1/xxx", but this is not standard.
327 * To save code, _here_ we do not support it. */
328
329 if (host[0] != '[')
330 return; /* not IPv6 */
331
332 scope = strchr(host, '%');
333 if (!scope)
334 return;
335
336 /* Remove the IPv6 zone identifier from the host address */
337 cp = strchr(host, ']');
338 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
339 /* malformed address (not "[xx]:nn" or "[xx]") */
340 return;
341 }
342
343 /* cp points to "]...", scope points to "%eth0]..." */
344 overlapping_strcpy(scope, cp);
345}
346
Denis Vlasenko9cade082006-11-21 10:43:02 +0000347#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100348/* Base64-encode character string. */
349static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000350{
Denys Vlasenko5084bae2018-11-24 21:56:21 +0100351 /* paranoia */
352 unsigned len = strnlen(str, sizeof(G.wget_buf)/4*3 - 10);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100353 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
354 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000355}
356#endif
357
Lauri Kasanend074b412013-10-12 21:47:07 +0200358#if ENABLE_FEATURE_WGET_TIMEOUT
359static void alarm_handler(int sig UNUSED_PARAM)
360{
361 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
Denys Vlasenko6701e912016-03-17 15:58:16 +0100362 if (G.die_if_timed_out)
Lauri Kasanend074b412013-10-12 21:47:07 +0200363 bb_error_msg_and_die("download timed out");
364}
Denys Vlasenko6701e912016-03-17 15:58:16 +0100365static void set_alarm(void)
366{
367 if (G.timeout_seconds) {
368 alarm(G.timeout_seconds);
369 G.die_if_timed_out = 1;
370 }
371}
372# define clear_alarm() ((void)(G.die_if_timed_out = 0))
373#else
374# define set_alarm() ((void)0)
375# define clear_alarm() ((void)0)
Lauri Kasanend074b412013-10-12 21:47:07 +0200376#endif
377
Denys Vlasenkoed727612016-07-25 21:34:57 +0200378#if ENABLE_FEATURE_WGET_OPENSSL
379/*
380 * is_ip_address() attempts to verify whether or not a string
381 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
382 * of inet_pton() can be used to determine this.
383 *
384 * TODO add proper error checking when inet_pton() returns -1
385 * (some form of system error has occurred, and errno is set)
386 */
387static int is_ip_address(const char *string)
388{
389 struct sockaddr_in sa;
390
391 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
392# if ENABLE_FEATURE_IPV6
393 if (result == 0) {
394 struct sockaddr_in6 sa6;
395 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
396 }
397# endif
398 return (result == 1);
399}
400#endif
401
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000402static FILE *open_socket(len_and_sockaddr *lsa)
403{
Lauri Kasanend074b412013-10-12 21:47:07 +0200404 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000405 FILE *fp;
406
Denys Vlasenko6701e912016-03-17 15:58:16 +0100407 set_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200408 fd = xconnect_stream(lsa);
Denys Vlasenko6701e912016-03-17 15:58:16 +0100409 clear_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200410
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000411 /* glibc 2.4 seems to try seeking on it - ??! */
412 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200413 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100414 if (!fp)
Denys Vlasenko899ae532018-04-01 19:59:37 +0200415 bb_die_memory_exhausted();
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000416
417 return fp;
418}
419
Denys Vlasenko34590242018-02-12 16:46:13 +0100420/* We balk at any control chars in other side's messages.
421 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
422 * and error messages.
423 *
424 * The only exception is tabs, which are converted to (one) space:
425 * HTTP's "headers: <whitespace> values" may have those.
426 */
427static char* sanitize_string(char *s)
428{
429 unsigned char *p = (void *) s;
430 while (*p) {
431 if (*p < ' ') {
432 if (*p != '\t')
433 break;
434 *p = ' ';
435 }
436 p++;
437 }
438 *p = '\0';
439 return s;
440}
441
Denys Vlasenkof836f012011-02-10 23:02:28 +0100442/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Denys Vlasenko34590242018-02-12 16:46:13 +0100443static char fgets_trim_sanitize(FILE *fp, const char *fmt)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100444{
445 char c;
446 char *buf_ptr;
447
Denys Vlasenko6701e912016-03-17 15:58:16 +0100448 set_alarm();
Denys Vlasenko34590242018-02-12 16:46:13 +0100449 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100450 bb_perror_msg_and_die("error getting response");
Denys Vlasenko6701e912016-03-17 15:58:16 +0100451 clear_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100452
453 buf_ptr = strchrnul(G.wget_buf, '\n');
454 c = *buf_ptr;
Denys Vlasenko34590242018-02-12 16:46:13 +0100455#if 1
456 /* Disallow any control chars: trim at first char < 0x20 */
457 sanitize_string(G.wget_buf);
458#else
Denys Vlasenkof836f012011-02-10 23:02:28 +0100459 *buf_ptr = '\0';
460 buf_ptr = strchrnul(G.wget_buf, '\r');
461 *buf_ptr = '\0';
Denys Vlasenko34590242018-02-12 16:46:13 +0100462#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100463
464 log_io("< %s", G.wget_buf);
465
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100466 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
467 fprintf(stderr, fmt, G.wget_buf);
468
Denys Vlasenkof836f012011-02-10 23:02:28 +0100469 return c;
470}
471
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100472static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000473{
474 int result;
475 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100476 if (!s2)
477 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000478 fprintf(fp, "%s%s\r\n", s1, s2);
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100479 /* With --server-response, wget also shows its ftp commands */
480 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
481 fprintf(stderr, "--> %s%s\n\n", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000482 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100483 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000484 }
485
Denys Vlasenko34590242018-02-12 16:46:13 +0100486 /* Read until "Nxx something" is received */
487 G.wget_buf[3] = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000488 do {
Denys Vlasenko34590242018-02-12 16:46:13 +0100489 fgets_trim_sanitize(fp, "%s\n");
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100490 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000491
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100492 G.wget_buf[3] = '\0';
493 result = xatoi_positive(G.wget_buf);
494 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000495 return result;
496}
497
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100498static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000499{
500 char *url, *p, *sp;
501
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100502 free(h->allocated);
503 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000504
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100505 h->protocol = P_FTP;
506 p = strstr(url, "://");
507 if (p) {
508 *p = '\0';
509 h->host = p + 3;
510 if (strcmp(url, P_FTP) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200511 h->port = bb_lookup_std_port(P_FTP, "tcp", 21);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100512 } else
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100513#if SSL_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100514# if ENABLE_FEATURE_WGET_HTTPS
515 if (strcmp(url, P_FTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200516 h->port = bb_lookup_std_port(P_FTPS, "tcp", 990);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100517 h->protocol = P_FTPS;
518 } else
519# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100520 if (strcmp(url, P_HTTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200521 h->port = bb_lookup_std_port(P_HTTPS, "tcp", 443);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100522 h->protocol = P_HTTPS;
523 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100524#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100525 if (strcmp(url, P_HTTP) == 0) {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100526 http:
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200527 h->port = bb_lookup_std_port(P_HTTP, "tcp", 80);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100528 h->protocol = P_HTTP;
529 } else {
530 *p = ':';
Denys Vlasenko34590242018-02-12 16:46:13 +0100531 bb_error_msg_and_die("not an http or ftp url: %s", url);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100532 }
533 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100534 // GNU wget is user-friendly and falls back to http://
535 h->host = url;
536 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100537 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000538
539 // FYI:
540 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
Denys Vlasenkoa0aae9f2017-01-20 14:12:10 +0100541 // 'GET /?var=a/b HTTP/1.0'
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000542 // and saves 'index.html?var=a%2Fb' (we save 'b')
543 // wget 'http://busybox.net?login=john@doe':
544 // request: 'GET /?login=john@doe HTTP/1.0'
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200545 // saves: 'index.html?login=john@doe' (we save 'login=john@doe')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000546 // wget 'http://busybox.net#test/test':
547 // request: 'GET / HTTP/1.0'
548 // saves: 'index.html' (we save 'test')
549 //
550 // We also don't add unique .N suffix if file exists...
551 sp = strchr(h->host, '/');
552 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
553 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
554 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000555 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000556 } else if (*sp == '/') {
557 *sp = '\0';
558 h->path = sp + 1;
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200559 } else {
560 // sp points to '#' or '?'
561 // Note:
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000562 // http://busybox.net?login=john@doe is a valid URL
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200563 // (without '/' between ".net" and "?"),
564 // can't store NUL at sp[-1] - this destroys hostname.
565 *sp++ = '\0';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000566 h->path = sp;
567 }
568
569 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000570 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200571 // URL-decode "user:password" string before base64-encoding:
572 // wget http://test:my%20pass@example.com should send
573 // Authorization: Basic dGVzdDpteSBwYXNz
574 // which decodes to "test:my pass".
575 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000576 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100577 free(h->user);
578 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000579 h->host = sp + 1;
580 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100581 /* else: h->user remains NULL, or as set by original request
582 * before redirect (if we are here after a redirect).
583 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000584}
585
Denys Vlasenko34590242018-02-12 16:46:13 +0100586static char *get_sanitized_hdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000587{
588 char *s, *hdrval;
589 int c;
590
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000591 /* retrieve header line */
Denys Vlasenko34590242018-02-12 16:46:13 +0100592 c = fgets_trim_sanitize(fp, " %s\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000593
Denys Vlasenkof836f012011-02-10 23:02:28 +0100594 /* end of the headers? */
595 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000596 return NULL;
597
598 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200599 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
600 /*
601 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
602 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
603 * "A-Z" maps to "a-z".
604 * "@[\]" can't occur in header names.
605 * "^_" maps to "~,DEL" (which is wrong).
606 * "^" was never seen yet, "_" was seen from web.archive.org
607 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
608 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100609 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200610 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000611
612 /* verify we are at the end of the header name */
613 if (*s != ':')
Denys Vlasenko34590242018-02-12 16:46:13 +0100614 bb_error_msg_and_die("bad header line: %s", G.wget_buf);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000615
616 /* locate the start of the header value */
617 *s++ = '\0';
618 hdrval = skip_whitespace(s);
619
Denys Vlasenkof836f012011-02-10 23:02:28 +0100620 if (c != '\n') {
621 /* Rats! The buffer isn't big enough to hold the entire header value */
622 while (c = getc(fp), c != EOF && c != '\n')
623 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000624 }
625
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000626 return hdrval;
627}
628
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200629static void reset_beg_range_to_zero(void)
630{
Denys Vlasenko61441242012-06-17 19:52:25 +0200631 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200632 G.beg_range = 0;
633 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200634 /* Done at the end instead: */
635 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200636}
637
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200638#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200639static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100640{
641 char *allocated = NULL;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200642 char *servername;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100643 int sp[2];
644 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100645 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100646
647 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
648 /* Kernel can have AF_UNIX support disabled */
649 bb_perror_msg_and_die("socketpair");
650
651 if (!strchr(host, ':'))
652 host = allocated = xasprintf("%s:%u", host, port);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200653 servername = xstrdup(host);
654 strrchr(servername, ':')[0] = '\0';
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100655
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200656 fflush_all();
657 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100658 if (pid == 0) {
659 /* Child */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200660 char *argv[8];
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100661
662 close(sp[0]);
663 xmove_fd(sp[1], 0);
664 xdup2(0, 1);
665 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100666 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
667 * It prints some debug stuff on stderr, don't know how to suppress it.
668 * Work around by dev-nulling stderr. We lose all error messages :(
669 */
670 xmove_fd(2, 3);
671 xopen("/dev/null", O_RDWR);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200672 memset(&argv, 0, sizeof(argv));
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100673 argv[0] = (char*)"openssl";
674 argv[1] = (char*)"s_client";
675 argv[2] = (char*)"-quiet";
676 argv[3] = (char*)"-connect";
677 argv[4] = (char*)host;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200678 /*
679 * Per RFC 6066 Section 3, the only permitted values in the
680 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
681 * IPv4 and IPv6 addresses, port numbers are not allowed.
682 */
683 if (!is_ip_address(servername)) {
684 argv[5] = (char*)"-servername";
685 argv[6] = (char*)servername;
686 }
687
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100688 BB_EXECVP(argv[0], argv);
689 xmove_fd(3, 2);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100690# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200691 child_failed = 1;
692 xfunc_die();
693# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100694 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200695# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100696 /* notreached */
697 }
698
Denys Vlasenko53315572014-02-23 23:39:47 +0100699 /* Parent */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200700 free(servername);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100701 free(allocated);
702 close(sp[1]);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100703# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200704 if (child_failed) {
705 close(sp[0]);
706 return -1;
707 }
708# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100709 return sp[0];
710}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200711#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100712
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100713#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko403f2992018-02-06 15:15:08 +0100714static void spawn_ssl_client(const char *host, int network_fd, int flags)
Denys Vlasenko53315572014-02-23 23:39:47 +0100715{
716 int sp[2];
717 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100718 char *servername, *p;
719
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100720 if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT)) {
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100721 option_mask32 |= WGET_OPT_NO_CHECK_CERT;
Denys Vlasenkofe836d82018-11-14 11:35:36 +0100722 bb_error_msg("note: TLS certificate validation not implemented");
Denys Vlasenkodbe95682018-11-13 12:00:19 +0100723 }
Denys Vlasenko0972c7f2018-05-28 14:36:26 +0200724
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100725 servername = xstrdup(host);
726 p = strrchr(servername, ':');
727 if (p) *p = '\0';
Denys Vlasenko53315572014-02-23 23:39:47 +0100728
729 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
730 /* Kernel can have AF_UNIX support disabled */
731 bb_perror_msg_and_die("socketpair");
732
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100733 fflush_all();
Denys Vlasenko53315572014-02-23 23:39:47 +0100734 pid = BB_MMU ? xfork() : xvfork();
735 if (pid == 0) {
736 /* Child */
Denys Vlasenko53315572014-02-23 23:39:47 +0100737 close(sp[0]);
738 xmove_fd(sp[1], 0);
739 xdup2(0, 1);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100740 if (BB_MMU) {
741 tls_state_t *tls = new_tls_state();
742 tls->ifd = tls->ofd = network_fd;
743 tls_handshake(tls, servername);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100744 tls_run_copy_loop(tls, flags);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100745 exit(0);
746 } else {
Denys Vlasenko403f2992018-02-06 15:15:08 +0100747 char *argv[6];
748
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100749 xmove_fd(network_fd, 3);
750 argv[0] = (char*)"ssl_client";
751 argv[1] = (char*)"-s3";
752 //TODO: if (!is_ip_address(servername))...
753 argv[2] = (char*)"-n";
754 argv[3] = servername;
Denys Vlasenko403f2992018-02-06 15:15:08 +0100755 argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
756 argv[5] = NULL;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100757 BB_EXECVP(argv[0], argv);
758 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
759 }
Denys Vlasenko53315572014-02-23 23:39:47 +0100760 /* notreached */
761 }
762
763 /* Parent */
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100764 free(servername);
Denys Vlasenko53315572014-02-23 23:39:47 +0100765 close(sp[1]);
766 xmove_fd(sp[0], network_fd);
767}
768#endif
769
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100770static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
771{
772 FILE *sfp;
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200773 char *pass;
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100774 int port;
775
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100776 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100777#if ENABLE_FEATURE_WGET_HTTPS
778 if (target->protocol == P_FTPS)
779 spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
780#endif
781
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100782 if (ftpcmd(NULL, NULL, sfp) != 220)
Denys Vlasenko34590242018-02-12 16:46:13 +0100783 bb_error_msg_and_die("%s", G.wget_buf);
784 /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100785
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200786 /* Split username:password pair */
787 pass = (char*)"busybox"; /* password for "anonymous" */
788 if (target->user) {
789 pass = strchr(target->user, ':');
790 if (pass)
791 *pass++ = '\0';
792 }
793
794 /* Log in */
795 switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100796 case 230:
797 break;
798 case 331:
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200799 if (ftpcmd("PASS ", pass, sfp) == 230)
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100800 break;
801 /* fall through (failed login) */
802 default:
Denys Vlasenko34590242018-02-12 16:46:13 +0100803 bb_error_msg_and_die("ftp login: %s", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100804 }
805
806 ftpcmd("TYPE I", NULL, sfp);
807
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200808 /* Query file size */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100809 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
810 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
811 if (G.content_len < 0 || errno) {
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200812 bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100813 }
814 G.got_clen = 1;
815 }
816
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200817 /* Enter passive mode */
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100818 if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
819 /* good */
820 } else
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100821 if (ftpcmd("PASV", NULL, sfp) != 227) {
822 pasv_error:
Denys Vlasenko34590242018-02-12 16:46:13 +0100823 bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100824 }
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100825 port = parse_pasv_epsv(G.wget_buf);
826 if (port < 0)
827 goto pasv_error;
828
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100829 set_nport(&lsa->u.sa, htons(port));
830
831 *dfpp = open_socket(lsa);
832
Denys Vlasenko2b751572018-02-06 20:49:27 +0100833#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko237a9002018-02-08 00:28:30 +0100834 if (target->protocol == P_FTPS) {
835 /* "PROT P" enables encryption of data stream.
836 * Without it (or with "PROT C"), data is sent unencrypted.
837 */
838 if (ftpcmd("PROT P", NULL, sfp) == 200)
839 spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
840 }
Denys Vlasenko2b751572018-02-06 20:49:27 +0100841#endif
Denys Vlasenko403f2992018-02-06 15:15:08 +0100842
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100843 if (G.beg_range != 0) {
844 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
845 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
846 G.content_len -= G.beg_range;
847 else
848 reset_beg_range_to_zero();
849 }
850
Denys Vlasenko34590242018-02-12 16:46:13 +0100851//TODO: needs ftp-escaping 0xff and '\n' bytes here.
852//Or disallow '\n' altogether via sanitize_string() in parse_url().
853//But 0xff's are possible in valid utf8 filenames.
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100854 if (ftpcmd("RETR ", target->path, sfp) > 150)
Denys Vlasenko34590242018-02-12 16:46:13 +0100855 bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100856
857 return sfp;
858}
859
Denys Vlasenko2384a352011-02-15 00:58:36 +0100860static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200861{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200862#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
863# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200864 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200865# endif
866 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200867
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200868 polldata.fd = fileno(dfp);
869 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200870#endif
871 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200872
873 if (G.chunked)
874 goto get_clen;
875
876 /* Loops only if chunked */
877 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100878
879#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
880 /* Must use nonblocking I/O, otherwise fread will loop
881 * and *block* until it reads full buffer,
882 * which messes up progress bar and/or timeout logic.
883 * Because of nonblocking I/O, we need to dance
884 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200885 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100886 */
887 ndelay_on(polldata.fd);
888#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100889 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200890 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100891 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200892
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200893#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100894 /* fread internally uses read loop, which in our case
895 * is usually exited when we get EAGAIN.
896 * In this case, libc sets error marker on the stream.
897 * Need to clear it before next fread to avoid possible
898 * rare false positive ferror below. Rare because usually
899 * fread gets more than zero bytes, and we don't fall
900 * into if (n <= 0) ...
901 */
902 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100903#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200904 errno = 0;
905 rdsz = sizeof(G.wget_buf);
906 if (G.got_clen) {
907 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
908 if ((int)G.content_len <= 0)
909 break;
910 rdsz = (unsigned)G.content_len;
911 }
912 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100913 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200914
915 if (n > 0) {
916 xwrite(G.output_fd, G.wget_buf, n);
917#if ENABLE_FEATURE_WGET_STATUSBAR
918 G.transferred += n;
919#endif
920 if (G.got_clen) {
921 G.content_len -= n;
922 if (G.content_len == 0)
923 break;
924 }
925#if ENABLE_FEATURE_WGET_TIMEOUT
926 second_cnt = G.timeout_seconds;
927#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100928 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200929 }
930
931 /* n <= 0.
932 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100933 * If error occurs, or EOF is reached, the return value
934 * is a short item count (or zero).
935 * fread does not distinguish between EOF and error.
936 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200937 if (errno != EAGAIN) {
938 if (ferror(dfp)) {
939 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100940 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200941 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100942 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200943 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100944
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200945#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
946 /* It was EAGAIN. There is no data. Wait up to one second
947 * then abort if timed out, or update the bar and try reading again.
948 */
949 if (safe_poll(&polldata, 1, 1000) == 0) {
950# if ENABLE_FEATURE_WGET_TIMEOUT
951 if (second_cnt != 0 && --second_cnt == 0) {
952 progress_meter(PROGRESS_END);
953 bb_error_msg_and_die("download timed out");
954 }
955# endif
956 /* We used to loop back to poll here,
957 * but there is no great harm in letting fread
958 * to try reading anyway.
959 */
960 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100961#endif
962 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200963 /* Need to do it _every_ second for "stalled" indicator
964 * to be shown properly.
965 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200966 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200967 } /* while (reading data) */
968
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100969#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
970 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100971 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100972#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200973 if (!G.chunked)
974 break;
975
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200976 /* Each chunk ends with "\r\n" - eat it */
Denys Vlasenko34590242018-02-12 16:46:13 +0100977 fgets_trim_sanitize(dfp, NULL);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200978 get_clen:
979 /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
980 fgets_trim_sanitize(dfp, NULL);
981 errno = 0;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100982 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200983 /*
984 * Had a bug with inputs like "ffffffff0001f400"
985 * smashing the heap later. Ensure >= 0.
986 */
987 if (G.content_len < 0 || errno)
988 bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100989 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200990 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100991 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200992 /*
993 * Note that fgets may result in some data being buffered in dfp.
994 * We loop back to fread, which will retrieve this data.
995 * Also note that code has to be arranged so that fread
996 * is done _before_ one-second poll wait - poll doesn't know
997 * about stdio buffering and can result in spurious one second waits!
998 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200999 }
1000
Denys Vlasenko61441242012-06-17 19:52:25 +02001001 /* If -c failed, we restart from the beginning,
1002 * but we do not truncate file then, we do it only now, at the end.
1003 * This lets user to ^C if his 99% complete 10 GB file download
1004 * failed to restart *without* losing the almost complete file.
1005 */
1006 {
1007 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
1008 if (pos != (off_t)-1)
1009 ftruncate(G.output_fd, pos);
1010 }
1011
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001012 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001013 G.chunked = 0; /* makes it show 100% even for chunked download */
1014 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02001015 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +02001016}
1017
Pere Orga53695632011-02-16 20:09:36 +01001018static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +00001019{
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001020 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001021 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001022 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001023 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +00001024 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001025 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001026 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001027 struct host_info server;
1028 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +00001029
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001030 server.allocated = NULL;
1031 target.allocated = NULL;
1032 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +02001033 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001034
1035 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +00001036
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001037 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001038 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +00001039 if (use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001040 char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001041//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +01001042 use_proxy = (proxy && proxy[0]);
1043 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001044 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +00001045 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001046 if (!use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001047 server.protocol = target.protocol;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001048 server.port = target.port;
1049 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001050 //free(server.allocated); - can't be non-NULL
1051 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001052 } else {
1053 server.host = target.host;
1054 }
1055 }
1056
1057 if (ENABLE_FEATURE_IPV6)
1058 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001059
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001060 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001061 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001062 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001063 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001064 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001065 if (G.fname_out[0] == '/' || !G.fname_out[0])
1066 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +00001067 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001068 if (G.dir_prefix)
1069 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001070 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001071 /* redirects may free target.path later, need to make a copy */
1072 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001073 }
Eric Andersen29edd002000-12-09 16:55:35 +00001074 }
Denis Vlasenko818322b2007-09-24 18:27:04 +00001075#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001076 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001077#endif
1078
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001079 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001080 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001081 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001082 G.output_fd = open(G.fname_out, O_WRONLY);
1083 if (G.output_fd >= 0) {
1084 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +00001085 }
1086 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +01001087 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +00001088 }
1089
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001090 redir_limit = 5;
1091 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +00001092 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001093 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001094 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1095 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1096 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +00001097 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001098 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +01001099 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1100 G.got_clen = 0;
1101 G.chunked = 0;
Denys Vlasenko403f2992018-02-06 15:15:08 +01001102 if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
Eric Andersen79757c92001-04-05 21:45:54 +00001103 /*
1104 * HTTP session
1105 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001106 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001107 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001108
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001109 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001110#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001111 /* openssl (and maybe internal TLS) support is configured */
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001112 if (server.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001113 /* openssl-based helper
1114 * Inconvenient API since we can't give it an open fd
1115 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001116 int fd = spawn_https_helper_openssl(server.host, server.port);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001117# if ENABLE_FEATURE_WGET_HTTPS
1118 if (fd < 0) { /* no openssl? try internal */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001119 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +01001120 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001121 goto socket_opened;
1122 }
1123# else
1124 /* We don't check for exec("openssl") failure in this case */
1125# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001126 sfp = fdopen(fd, "r+");
1127 if (!sfp)
Denys Vlasenko899ae532018-04-01 19:59:37 +02001128 bb_die_memory_exhausted();
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001129 goto socket_opened;
1130 }
1131 sfp = open_socket(lsa);
1132 socket_opened:
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001133#elif ENABLE_FEATURE_WGET_HTTPS
1134 /* Only internal TLS support is configured */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001135 sfp = open_socket(lsa);
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001136 if (server.protocol == P_HTTPS)
Denys Vlasenko403f2992018-02-06 15:15:08 +01001137 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001138#else
1139 /* ssl (https) support is not configured */
1140 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001141#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001142 /* Send HTTP request */
1143 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001144 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001145 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001146 target.path);
1147 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001148 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001149 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1150 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001151 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001152 if (!USR_HEADER_HOST)
1153 SENDFMT(sfp, "Host: %s\r\n", target.host);
1154 if (!USR_HEADER_USER_AGENT)
1155 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001156
Denys Vlasenko9213a552011-02-10 13:23:45 +01001157 /* Ask server to close the connection as soon as we are done
1158 * (IOW: we do not intend to send more requests)
1159 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001160 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001161
Denis Vlasenko9cade082006-11-21 10:43:02 +00001162#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001163 if (target.user && !USR_HEADER_AUTH) {
1164 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001165 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001166 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001167 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1168 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001169 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001170 }
Eric Andersen79757c92001-04-05 21:45:54 +00001171#endif
1172
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001173 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1174 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001175
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001176#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001177 if (G.extra_headers) {
1178 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001179 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001180 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001181
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001182 if (option_mask32 & WGET_OPT_POST_DATA) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001183 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001184 "Content-Type: application/x-www-form-urlencoded\r\n"
1185 "Content-Length: %u\r\n"
1186 "\r\n"
1187 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001188 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001189 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001190 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001191#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001192 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001193 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001194 }
Eric Andersen79757c92001-04-05 21:45:54 +00001195
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001196 fflush(sfp);
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001197
Denys Vlasenko4e08a122017-01-16 17:31:05 +01001198/* Tried doing this unconditionally.
1199 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1200 */
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001201#if SSL_SUPPORTED
1202 if (target.protocol == P_HTTPS) {
1203 /* If we use SSL helper, keeping our end of the socket open for writing
1204 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1205 * even after child closes its copy of the fd.
1206 * This helps:
1207 */
1208 shutdown(fileno(sfp), SHUT_WR);
1209 }
1210#endif
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001211
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001212 /*
1213 * Retrieve HTTP response line and check for "200" status code.
1214 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001215 read_response:
Denys Vlasenko34590242018-02-12 16:46:13 +01001216 fgets_trim_sanitize(sfp, " %s\n");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001217
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001218 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001219 str = skip_non_whitespace(str);
1220 str = skip_whitespace(str);
1221 // FIXME: no error check
1222 // xatou wouldn't work: "200 OK"
1223 status = atoi(str);
1224 switch (status) {
1225 case 0:
1226 case 100:
Denys Vlasenko34590242018-02-12 16:46:13 +01001227 while (get_sanitized_hdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001228 /* eat all remaining headers */;
1229 goto read_response;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001230
1231 /* Success responses */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001232 case 200:
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001233 /* fall through */
1234 case 201: /* 201 Created */
1235/* "The request has been fulfilled and resulted in a new resource being created" */
Denys Vlasenkoef159702016-09-01 11:16:22 +02001236 /* Standard wget is reported to treat this as success */
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001237 /* fall through */
1238 case 202: /* 202 Accepted */
1239/* "The request has been accepted for processing, but the processing has not been completed" */
1240 /* Treat as success: fall through */
1241 case 203: /* 203 Non-Authoritative Information */
1242/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1243 /* fall through */
1244 case 204: /* 204 No Content */
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001245/*
1246Response 204 doesn't say "null file", it says "metadata
1247has changed but data didn't":
1248
1249"10.2.5 204 No Content
1250The server has fulfilled the request but does not need to return
1251an entity-body, and might want to return updated metainformation.
1252The response MAY include new or updated metainformation in the form
1253of entity-headers, which if present SHOULD be associated with
1254the requested variant.
1255
1256If the client is a user agent, it SHOULD NOT change its document
1257view from that which caused the request to be sent. This response
1258is primarily intended to allow input for actions to take place
1259without causing a change to the user agent's active document view,
1260although any new or updated metainformation SHOULD be applied
1261to the document currently in the user agent's active view.
1262
1263The 204 response MUST NOT include a message-body, and thus
1264is always terminated by the first empty line after the header fields."
1265
1266However, in real world it was observed that some web servers
1267(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1268*/
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001269 if (G.beg_range != 0) {
1270 /* "Range:..." was not honored by the server.
1271 * Restart download from the beginning.
1272 */
1273 reset_beg_range_to_zero();
1274 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001275 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001276 /* 205 Reset Content ?? what to do on this ?? */
1277
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001278 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001279 case 301:
1280 case 302:
1281 case 303:
1282 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001283
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001284 case 206: /* Partial Content */
1285 if (G.beg_range != 0)
1286 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001287 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001288 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001289 /* fall through */
1290 default:
Denys Vlasenko34590242018-02-12 16:46:13 +01001291 bb_error_msg_and_die("server returned error: %s", G.wget_buf);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001292 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001293
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001294 /*
1295 * Retrieve HTTP headers.
1296 */
Denys Vlasenko34590242018-02-12 16:46:13 +01001297 while ((str = get_sanitized_hdr(sfp)) != NULL) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001298 static const char keywords[] ALIGN1 =
1299 "content-length\0""transfer-encoding\0""location\0";
1300 enum {
1301 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1302 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001303 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001304
Denys Vlasenko34590242018-02-12 16:46:13 +01001305 /* get_sanitized_hdr converted "FOO:" string to lowercase */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001306
Matthijs van de Water0d586662009-08-22 20:19:48 +02001307 /* strip trailing whitespace */
1308 char *s = strchrnul(str, '\0') - 1;
1309 while (s >= str && (*s == ' ' || *s == '\t')) {
1310 *s = '\0';
1311 s--;
1312 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001313 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001314 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001315 G.content_len = BB_STRTOOFF(str, NULL, 10);
1316 if (G.content_len < 0 || errno) {
Denys Vlasenko34590242018-02-12 16:46:13 +01001317 bb_error_msg_and_die("content-length %s is garbage", str);
Eric Andersen79757c92001-04-05 21:45:54 +00001318 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001319 G.got_clen = 1;
1320 continue;
1321 }
1322 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001323 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenko34590242018-02-12 16:46:13 +01001324 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001325 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001326 }
1327 if (key == KEY_location && status >= 300) {
1328 if (--redir_limit == 0)
1329 bb_error_msg_and_die("too many redirections");
1330 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001331 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001332 free(redirected_path);
Denys Vlasenko34590242018-02-12 16:46:13 +01001333 target.path = redirected_path = xstrdup(str + 1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001334 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001335 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001336 parse_url(str, &target);
1337 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001338 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001339 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001340 server.allocated = NULL;
Denys Vlasenko9634e8a2018-07-02 18:31:02 +02001341 server.protocol = target.protocol;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001342 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001343 /* strip_ipv6_scope_id(target.host); - no! */
1344 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001345 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001346 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001347 goto resolve_lsa;
1348 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001349 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001350 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001351 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001352 }
1353// if (status >= 300)
1354// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001355
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001356 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001357 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001358 } else {
Eric Andersen79757c92001-04-05 21:45:54 +00001359 /*
1360 * FTP session
1361 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001362 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +00001363 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001364
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001365 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001366
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001367 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001368 if (G.output_fd < 0)
1369 G.output_fd = xopen(G.fname_out, G.o_flags);
1370 retrieve_file_data(dfp);
1371 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1372 xclose(G.output_fd);
1373 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001374 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001375 }
Eric Andersen79757c92001-04-05 21:45:54 +00001376
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001377 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001378 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001379 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001380 if (ftpcmd(NULL, NULL, sfp) != 226)
Denys Vlasenko34590242018-02-12 16:46:13 +01001381 bb_error_msg_and_die("ftp error: %s", G.wget_buf);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001382 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001383 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001384 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001385
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001386 free(server.allocated);
1387 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001388 free(server.user);
1389 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001390 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001391 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001392}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001393
1394int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1395int wget_main(int argc UNUSED_PARAM, char **argv)
1396{
1397#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1398 static const char wget_longopts[] ALIGN1 =
1399 /* name, has_arg, val */
1400 "continue\0" No_argument "c"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001401 "quiet\0" No_argument "q"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +01001402 "server-response\0" No_argument "S"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001403 "output-document\0" Required_argument "O"
1404 "directory-prefix\0" Required_argument "P"
1405 "proxy\0" Required_argument "Y"
1406 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001407IF_FEATURE_WGET_TIMEOUT(
1408 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001409 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001410IF_DESKTOP( "tries\0" Required_argument "t")
1411 "header\0" Required_argument "\xff"
1412 "post-data\0" Required_argument "\xfe"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001413 "spider\0" No_argument "\xfd"
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001414 "no-check-certificate\0" No_argument "\xfc"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001415 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001416IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001417 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001418IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1419IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1420IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1421IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1422IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001423 ;
Denys Vlasenko036585a2017-08-08 16:38:18 +02001424# define GETOPT32 getopt32long
1425# define LONGOPTS ,wget_longopts
1426#else
1427# define GETOPT32 getopt32
1428# define LONGOPTS
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001429#endif
1430
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001431#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1432 llist_t *headers_llist = NULL;
1433#endif
1434
1435 INIT_G();
1436
Lauri Kasanend074b412013-10-12 21:47:07 +02001437#if ENABLE_FEATURE_WGET_TIMEOUT
1438 G.timeout_seconds = 900;
1439 signal(SIGALRM, alarm_handler);
1440#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001441 G.proxy_flag = "on"; /* use proxies if env vars are set */
1442 G.user_agent = "Wget"; /* "User-Agent" header field */
1443
1444#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001445#endif
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001446 GETOPT32(argv, "^"
1447 "cqSO:P:Y:U:T:+"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001448 /*ignored:*/ "t:"
1449 /*ignored:*/ "n::"
1450 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1451 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1452 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1453 * -nH --no-host-directories: wget -r http://host/ won't create host/
1454 * -np --no-parent
1455 * "n::" above says that we accept -n[ARG].
1456 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1457 */
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001458 "\0"
1459 "-1" /* at least one URL */
1460 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
Denys Vlasenko036585a2017-08-08 16:38:18 +02001461 LONGOPTS
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001462 , &G.fname_out, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001463 &G.proxy_flag, &G.user_agent,
1464 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001465 NULL, /* -t RETRIES */
1466 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001467 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1468 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1469 );
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001470#if 0 /* option bits debug */
1471 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1472 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1473 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1474 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1475 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001476 if (option_mask32 & WGET_OPT_NO_CHECK_CERT) bb_error_msg("--no-check-certificate");
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001477 exit(0);
1478#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001479 argv += optind;
1480
1481#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1482 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001483 int size = 0;
1484 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001485 llist_t *ll = headers_llist;
1486 while (ll) {
1487 size += strlen(ll->data) + 2;
1488 ll = ll->link;
1489 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001490 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001491 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001492 int bit;
1493 const char *words;
1494
1495 size = sprintf(hdr, "%s\r\n",
1496 (char*)llist_pop(&headers_llist));
1497 /* a bit like index_in_substrings but don't match full key */
1498 bit = 1;
1499 words = wget_user_headers;
1500 while (*words) {
1501 if (strstr(hdr, words) == hdr) {
1502 G.user_headers |= bit;
1503 break;
1504 }
1505 bit <<= 1;
1506 words += strlen(words) + 1;
1507 }
1508 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001509 }
1510 }
1511#endif
1512
Denys Vlasenko2384a352011-02-15 00:58:36 +01001513 G.output_fd = -1;
1514 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1515 if (G.fname_out) { /* -O FILE ? */
1516 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1517 G.output_fd = 1;
1518 option_mask32 &= ~WGET_OPT_CONTINUE;
1519 }
1520 /* compat with wget: -O FILE can overwrite */
1521 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1522 }
1523
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001524 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001525 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001526
Denys Vlasenko28556b92011-02-15 11:03:53 +01001527 if (G.output_fd >= 0)
1528 xclose(G.output_fd);
1529
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001530#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1531 free(G.extra_headers);
1532#endif
1533 FINI_G();
1534
Pere Orga53695632011-02-16 20:09:36 +01001535 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001536}