blob: 33c93bad3136f61894ae3ae271b20a9b62935e92 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020011//config:config WGET
Denys Vlasenko4eed2c62017-07-18 22:01:24 +020012//config: bool "wget (35 kb)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020013//config: default y
14//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020015//config: wget is a utility for non-interactive download of files from HTTP
16//config: and FTP servers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020017//config:
Denys Vlasenkof5604222017-01-10 14:58:54 +010018//config:config FEATURE_WGET_LONG_OPTIONS
19//config: bool "Enable long options"
20//config: default y
21//config: depends on WGET && LONG_OPTS
22//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020023//config:config FEATURE_WGET_STATUSBAR
Denys Vlasenkof5604222017-01-10 14:58:54 +010024//config: bool "Enable progress bar (+2k)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020025//config: default y
26//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020027//config:
28//config:config FEATURE_WGET_AUTHENTICATION
29//config: bool "Enable HTTP authentication"
30//config: default y
31//config: depends on WGET
32//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020033//config: Support authenticated HTTP transfers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020034//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020035//config:config FEATURE_WGET_TIMEOUT
36//config: bool "Enable timeout option -T SEC"
37//config: default y
38//config: depends on WGET
39//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020040//config: Supports network read and connect timeouts for wget,
41//config: so that wget will give up and timeout, through the -T
42//config: command line option.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020043//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020044//config: Currently only connect and network data read timeout are
45//config: supported (i.e., timeout is not applied to the DNS query). When
46//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
47//config: will work in addition to -T.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020048//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +010049//config:config FEATURE_WGET_HTTPS
50//config: bool "Support HTTPS using internal TLS code"
Denys Vlasenko403f2992018-02-06 15:15:08 +010051//it also enables FTPS support, but it's not well tested yet
Denys Vlasenko9a647c32017-01-23 01:08:16 +010052//config: default y
53//config: depends on WGET
54//config: select TLS
55//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020056//config: wget will use internal TLS code to connect to https:// URLs.
57//config: Note:
58//config: On NOMMU machines, ssl_helper applet should be available
59//config: in the $PATH for this to work. Make sure to select that applet.
Denys Vlasenko9a647c32017-01-23 01:08:16 +010060//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020061//config: Note: currently, TLS code only makes TLS I/O work, it
62//config: does *not* check that the peer is who it claims to be, etc.
63//config: IOW: it uses peer-supplied public keys to establish encryption
64//config: and signing keys, then encrypts and signs outgoing data and
65//config: decrypts incoming data.
66//config: It does not check signature hashes on the incoming data:
67//config: this means that attackers manipulating TCP packets can
68//config: send altered data and we unknowingly receive garbage.
69//config: (This check might be relatively easy to add).
70//config: It does not check public key's certificate:
71//config: this means that the peer may be an attacker impersonating
72//config: the server we think we are talking to.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010073//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020074//config: If you think this is unacceptable, consider this. As more and more
75//config: servers switch to HTTPS-only operation, without such "crippled"
76//config: TLS code it is *impossible* to simply download a kernel source
77//config: from kernel.org. Which can in real world translate into
78//config: "my small automatic tooling to build cross-compilers from sources
79//config: no longer works, I need to additionally keep a local copy
80//config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
81//config: source of wget, need to compile and built both before I can
82//config: download anything. All this despite the fact that the build
83//config: is done in a QEMU sandbox on a machine with absolutely nothing
84//config: worth stealing, so I don't care if someone would go to a lot
85//config: of trouble to intercept my HTTPS download to send me an altered
86//config: kernel tarball".
Denys Vlasenko67f6db62017-01-30 16:27:37 +010087//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020088//config: If you still think this is unacceptable, send patches.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010089//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020090//config: If you still think this is unacceptable, do not want to send
91//config: patches, but do want to waste bandwidth expaining how wrong
92//config: it is, you will be ignored.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010093//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020094//config:config FEATURE_WGET_OPENSSL
95//config: bool "Try to connect to HTTPS using openssl"
96//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020097//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020098//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020099//config: Try to use openssl to handle HTTPS.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200100//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200101//config: OpenSSL has a simple SSL client for debug purposes.
102//config: If you select this option, wget will effectively run:
103//config: "openssl s_client -quiet -connect hostname:443
104//config: -servername hostname 2>/dev/null" and pipe its data
105//config: through it. -servername is not used if hostname is numeric.
106//config: Note inconvenient API: host resolution is done twice,
107//config: and there is no guarantee openssl's idea of IPv6 address
108//config: format is the same as ours.
109//config: Another problem is that s_client prints debug information
110//config: to stderr, and it needs to be suppressed. This means
111//config: all error messages get suppressed too.
112//config: openssl is also a big binary, often dynamically linked
113//config: against ~15 libraries.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200114//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200115//config: If openssl can't be executed, internal TLS code will be used
116//config: (if you enabled it); if openssl can be executed but fails later,
117//config: wget can't detect this, and download will fail.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200118
119//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
120
121//kbuild:lib-$(CONFIG_WGET) += wget.o
122
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100123//usage:#define wget_trivial_usage
124//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200125//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100126//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200127/* Since we ignore these opts, we don't show them in --help */
Denys Vlasenko92e1b082015-10-20 21:51:52 +0200128/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
129/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100130//usage: " [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100131//usage: )
132//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100133//usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100134//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
135//usage: )
136//usage:#define wget_full_usage "\n\n"
137//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200138//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100139//usage: "\n --spider Only check URL existence: $? is 0 if exists"
Denys Vlasenko0972c7f2018-05-28 14:36:26 +0200140///////: "\n --no-check-certificate Don't validate the server's certificate"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100141//usage: )
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200142//usage: "\n -c Continue retrieval of aborted transfer"
143//usage: "\n -q Quiet"
144//usage: "\n -P DIR Save to DIR (default .)"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100145//usage: "\n -S Show server response"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200146//usage: IF_FEATURE_WGET_TIMEOUT(
147//usage: "\n -T SEC Network read timeout is SEC seconds"
148//usage: )
149//usage: "\n -O FILE Save to FILE ('-' for stdout)"
150//usage: "\n -U STR Use STR for User-Agent header"
151//usage: "\n -Y on/off Use proxy"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100152
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000153#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000154
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200155#if 0
156# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100157# define SENDFMT(fp, fmt, ...) \
158 do { \
159 log_io("> " fmt, ##__VA_ARGS__); \
160 fprintf(fp, fmt, ##__VA_ARGS__); \
161 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200162#else
163# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100164# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200165#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100166
167
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100168#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100169
Eric Andersen79757c92001-04-05 21:45:54 +0000170struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100171 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000172 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100173 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100174 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000175 char *host;
176 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000177};
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200178static const char P_FTP[] ALIGN1 = "ftp";
179static const char P_HTTP[] ALIGN1 = "http";
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100180#if SSL_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100181# if ENABLE_FEATURE_WGET_HTTPS
182static const char P_FTPS[] ALIGN1 = "ftps";
183# endif
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200184static const char P_HTTPS[] ALIGN1 = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100185#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000186
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100187#if ENABLE_FEATURE_WGET_LONG_OPTIONS
188/* User-specified headers prevent using our corresponding built-in headers. */
189enum {
190 HDR_HOST = (1<<0),
191 HDR_USER_AGENT = (1<<1),
192 HDR_RANGE = (1<<2),
193 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
194 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
195};
196static const char wget_user_headers[] ALIGN1 =
197 "Host:\0"
198 "User-Agent:\0"
199 "Range:\0"
200# if ENABLE_FEATURE_WGET_AUTHENTICATION
201 "Authorization:\0"
202 "Proxy-Authorization:\0"
203# endif
204 ;
205# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
206# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
207# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
208# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
209# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
210#else /* No long options, no user-headers :( */
211# define USR_HEADER_HOST 0
212# define USR_HEADER_USER_AGENT 0
213# define USR_HEADER_RANGE 0
214# define USR_HEADER_AUTH 0
215# define USR_HEADER_PROXY_AUTH 0
216#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000217
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200218/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000219struct globals {
220 off_t content_len; /* Content-length of the file */
221 off_t beg_range; /* Range at which continue begins */
222#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000223 off_t transferred; /* Number of bytes transferred so far */
224 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100225 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000226#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200227 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100228#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200229 char *post_data;
230 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100231 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100232#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200233 char *fname_out; /* where to direct output (-O) */
234 const char *proxy_flag; /* Use proxies if env vars are set */
235 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200236#if ENABLE_FEATURE_WGET_TIMEOUT
237 unsigned timeout_seconds;
Denys Vlasenko6701e912016-03-17 15:58:16 +0100238 bool die_if_timed_out;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200239#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +0100240 int output_fd;
241 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200242 smallint chunked; /* chunked transfer encoding */
243 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100244 /* Local downloads do benefit from big buffer.
245 * With 512 byte buffer, it was measured to be
246 * an order of magnitude slower than with big one.
247 */
248 uint64_t just_to_align_next_member;
249 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100250} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100251#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200252#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200253 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200254} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200255#define FINI_G() do { \
256 FREE_PTR_TO_GLOBALS(); \
257} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000258
259
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200260/* Must match option string! */
261enum {
262 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200263 WGET_OPT_QUIET = (1 << 1),
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100264 WGET_OPT_SERVER_RESPONSE = (1 << 2),
265 WGET_OPT_OUTNAME = (1 << 3),
266 WGET_OPT_PREFIX = (1 << 4),
267 WGET_OPT_PROXY = (1 << 5),
268 WGET_OPT_USER_AGENT = (1 << 6),
269 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
270 WGET_OPT_RETRIES = (1 << 8),
271 WGET_OPT_nsomething = (1 << 9),
272 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
273 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
274 WGET_OPT_SPIDER = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Denys Vlasenko0972c7f2018-05-28 14:36:26 +0200275 WGET_OPT_NO_CHECK_CERT = (1 << 13) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200276};
277
278enum {
279 PROGRESS_START = -1,
280 PROGRESS_END = 0,
281 PROGRESS_BUMP = 1,
282};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000283#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000284static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000285{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200286 if (option_mask32 & WGET_OPT_QUIET)
287 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000288
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200289 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100290 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000291
Denys Vlasenko2384a352011-02-15 00:58:36 +0100292 bb_progress_update(&G.pmt,
293 G.beg_range,
294 G.transferred,
295 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
296 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000297
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200298 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100299 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200300 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100301 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000302 }
303}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200304#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000305static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000306#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000307
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000308
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200309/* IPv6 knows scoped address types i.e. link and site local addresses. Link
310 * local addresses can have a scope identifier to specify the
311 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
312 * identifier is only valid on a single node.
313 *
314 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
315 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
316 * in the Host header as invalid requests, see
317 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
318 */
319static void strip_ipv6_scope_id(char *host)
320{
321 char *scope, *cp;
322
323 /* bbox wget actually handles IPv6 addresses without [], like
324 * wget "http://::1/xxx", but this is not standard.
325 * To save code, _here_ we do not support it. */
326
327 if (host[0] != '[')
328 return; /* not IPv6 */
329
330 scope = strchr(host, '%');
331 if (!scope)
332 return;
333
334 /* Remove the IPv6 zone identifier from the host address */
335 cp = strchr(host, ']');
336 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
337 /* malformed address (not "[xx]:nn" or "[xx]") */
338 return;
339 }
340
341 /* cp points to "]...", scope points to "%eth0]..." */
342 overlapping_strcpy(scope, cp);
343}
344
Denis Vlasenko9cade082006-11-21 10:43:02 +0000345#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100346/* Base64-encode character string. */
347static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000348{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000349 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100350 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
351 len = sizeof(G.wget_buf)/4*3 - 10;
352 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
353 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000354}
355#endif
356
Lauri Kasanend074b412013-10-12 21:47:07 +0200357#if ENABLE_FEATURE_WGET_TIMEOUT
358static void alarm_handler(int sig UNUSED_PARAM)
359{
360 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
Denys Vlasenko6701e912016-03-17 15:58:16 +0100361 if (G.die_if_timed_out)
Lauri Kasanend074b412013-10-12 21:47:07 +0200362 bb_error_msg_and_die("download timed out");
363}
Denys Vlasenko6701e912016-03-17 15:58:16 +0100364static void set_alarm(void)
365{
366 if (G.timeout_seconds) {
367 alarm(G.timeout_seconds);
368 G.die_if_timed_out = 1;
369 }
370}
371# define clear_alarm() ((void)(G.die_if_timed_out = 0))
372#else
373# define set_alarm() ((void)0)
374# define clear_alarm() ((void)0)
Lauri Kasanend074b412013-10-12 21:47:07 +0200375#endif
376
Denys Vlasenkoed727612016-07-25 21:34:57 +0200377#if ENABLE_FEATURE_WGET_OPENSSL
378/*
379 * is_ip_address() attempts to verify whether or not a string
380 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
381 * of inet_pton() can be used to determine this.
382 *
383 * TODO add proper error checking when inet_pton() returns -1
384 * (some form of system error has occurred, and errno is set)
385 */
386static int is_ip_address(const char *string)
387{
388 struct sockaddr_in sa;
389
390 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
391# if ENABLE_FEATURE_IPV6
392 if (result == 0) {
393 struct sockaddr_in6 sa6;
394 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
395 }
396# endif
397 return (result == 1);
398}
399#endif
400
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000401static FILE *open_socket(len_and_sockaddr *lsa)
402{
Lauri Kasanend074b412013-10-12 21:47:07 +0200403 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000404 FILE *fp;
405
Denys Vlasenko6701e912016-03-17 15:58:16 +0100406 set_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200407 fd = xconnect_stream(lsa);
Denys Vlasenko6701e912016-03-17 15:58:16 +0100408 clear_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200409
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000410 /* glibc 2.4 seems to try seeking on it - ??! */
411 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200412 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100413 if (!fp)
Denys Vlasenko899ae532018-04-01 19:59:37 +0200414 bb_die_memory_exhausted();
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000415
416 return fp;
417}
418
Denys Vlasenko34590242018-02-12 16:46:13 +0100419/* We balk at any control chars in other side's messages.
420 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
421 * and error messages.
422 *
423 * The only exception is tabs, which are converted to (one) space:
424 * HTTP's "headers: <whitespace> values" may have those.
425 */
426static char* sanitize_string(char *s)
427{
428 unsigned char *p = (void *) s;
429 while (*p) {
430 if (*p < ' ') {
431 if (*p != '\t')
432 break;
433 *p = ' ';
434 }
435 p++;
436 }
437 *p = '\0';
438 return s;
439}
440
Denys Vlasenkof836f012011-02-10 23:02:28 +0100441/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Denys Vlasenko34590242018-02-12 16:46:13 +0100442static char fgets_trim_sanitize(FILE *fp, const char *fmt)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100443{
444 char c;
445 char *buf_ptr;
446
Denys Vlasenko6701e912016-03-17 15:58:16 +0100447 set_alarm();
Denys Vlasenko34590242018-02-12 16:46:13 +0100448 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100449 bb_perror_msg_and_die("error getting response");
Denys Vlasenko6701e912016-03-17 15:58:16 +0100450 clear_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100451
452 buf_ptr = strchrnul(G.wget_buf, '\n');
453 c = *buf_ptr;
Denys Vlasenko34590242018-02-12 16:46:13 +0100454#if 1
455 /* Disallow any control chars: trim at first char < 0x20 */
456 sanitize_string(G.wget_buf);
457#else
Denys Vlasenkof836f012011-02-10 23:02:28 +0100458 *buf_ptr = '\0';
459 buf_ptr = strchrnul(G.wget_buf, '\r');
460 *buf_ptr = '\0';
Denys Vlasenko34590242018-02-12 16:46:13 +0100461#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100462
463 log_io("< %s", G.wget_buf);
464
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100465 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
466 fprintf(stderr, fmt, G.wget_buf);
467
Denys Vlasenkof836f012011-02-10 23:02:28 +0100468 return c;
469}
470
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100471static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000472{
473 int result;
474 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100475 if (!s2)
476 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000477 fprintf(fp, "%s%s\r\n", s1, s2);
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100478 /* With --server-response, wget also shows its ftp commands */
479 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
480 fprintf(stderr, "--> %s%s\n\n", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000481 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100482 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000483 }
484
Denys Vlasenko34590242018-02-12 16:46:13 +0100485 /* Read until "Nxx something" is received */
486 G.wget_buf[3] = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000487 do {
Denys Vlasenko34590242018-02-12 16:46:13 +0100488 fgets_trim_sanitize(fp, "%s\n");
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100489 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000490
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100491 G.wget_buf[3] = '\0';
492 result = xatoi_positive(G.wget_buf);
493 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000494 return result;
495}
496
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100497static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000498{
499 char *url, *p, *sp;
500
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100501 free(h->allocated);
502 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000503
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100504 h->protocol = P_FTP;
505 p = strstr(url, "://");
506 if (p) {
507 *p = '\0';
508 h->host = p + 3;
509 if (strcmp(url, P_FTP) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200510 h->port = bb_lookup_std_port(P_FTP, "tcp", 21);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100511 } else
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100512#if SSL_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100513# if ENABLE_FEATURE_WGET_HTTPS
514 if (strcmp(url, P_FTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200515 h->port = bb_lookup_std_port(P_FTPS, "tcp", 990);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100516 h->protocol = P_FTPS;
517 } else
518# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100519 if (strcmp(url, P_HTTPS) == 0) {
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200520 h->port = bb_lookup_std_port(P_HTTPS, "tcp", 443);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100521 h->protocol = P_HTTPS;
522 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100523#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100524 if (strcmp(url, P_HTTP) == 0) {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100525 http:
Denys Vlasenko2aeb2012018-04-17 12:43:54 +0200526 h->port = bb_lookup_std_port(P_HTTP, "tcp", 80);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100527 h->protocol = P_HTTP;
528 } else {
529 *p = ':';
Denys Vlasenko34590242018-02-12 16:46:13 +0100530 bb_error_msg_and_die("not an http or ftp url: %s", url);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100531 }
532 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100533 // GNU wget is user-friendly and falls back to http://
534 h->host = url;
535 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100536 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000537
538 // FYI:
539 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
Denys Vlasenkoa0aae9f2017-01-20 14:12:10 +0100540 // 'GET /?var=a/b HTTP/1.0'
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000541 // and saves 'index.html?var=a%2Fb' (we save 'b')
542 // wget 'http://busybox.net?login=john@doe':
543 // request: 'GET /?login=john@doe HTTP/1.0'
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200544 // saves: 'index.html?login=john@doe' (we save 'login=john@doe')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000545 // wget 'http://busybox.net#test/test':
546 // request: 'GET / HTTP/1.0'
547 // saves: 'index.html' (we save 'test')
548 //
549 // We also don't add unique .N suffix if file exists...
550 sp = strchr(h->host, '/');
551 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
552 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
553 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000554 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000555 } else if (*sp == '/') {
556 *sp = '\0';
557 h->path = sp + 1;
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200558 } else {
559 // sp points to '#' or '?'
560 // Note:
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000561 // http://busybox.net?login=john@doe is a valid URL
Denys Vlasenkodf45eb42018-04-24 13:35:32 +0200562 // (without '/' between ".net" and "?"),
563 // can't store NUL at sp[-1] - this destroys hostname.
564 *sp++ = '\0';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000565 h->path = sp;
566 }
567
568 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000569 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200570 // URL-decode "user:password" string before base64-encoding:
571 // wget http://test:my%20pass@example.com should send
572 // Authorization: Basic dGVzdDpteSBwYXNz
573 // which decodes to "test:my pass".
574 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000575 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100576 free(h->user);
577 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000578 h->host = sp + 1;
579 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100580 /* else: h->user remains NULL, or as set by original request
581 * before redirect (if we are here after a redirect).
582 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000583}
584
Denys Vlasenko34590242018-02-12 16:46:13 +0100585static char *get_sanitized_hdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000586{
587 char *s, *hdrval;
588 int c;
589
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000590 /* retrieve header line */
Denys Vlasenko34590242018-02-12 16:46:13 +0100591 c = fgets_trim_sanitize(fp, " %s\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000592
Denys Vlasenkof836f012011-02-10 23:02:28 +0100593 /* end of the headers? */
594 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000595 return NULL;
596
597 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200598 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
599 /*
600 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
601 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
602 * "A-Z" maps to "a-z".
603 * "@[\]" can't occur in header names.
604 * "^_" maps to "~,DEL" (which is wrong).
605 * "^" was never seen yet, "_" was seen from web.archive.org
606 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
607 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100608 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200609 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000610
611 /* verify we are at the end of the header name */
612 if (*s != ':')
Denys Vlasenko34590242018-02-12 16:46:13 +0100613 bb_error_msg_and_die("bad header line: %s", G.wget_buf);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000614
615 /* locate the start of the header value */
616 *s++ = '\0';
617 hdrval = skip_whitespace(s);
618
Denys Vlasenkof836f012011-02-10 23:02:28 +0100619 if (c != '\n') {
620 /* Rats! The buffer isn't big enough to hold the entire header value */
621 while (c = getc(fp), c != EOF && c != '\n')
622 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000623 }
624
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000625 return hdrval;
626}
627
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200628static void reset_beg_range_to_zero(void)
629{
Denys Vlasenko61441242012-06-17 19:52:25 +0200630 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200631 G.beg_range = 0;
632 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200633 /* Done at the end instead: */
634 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200635}
636
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200637#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200638static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100639{
640 char *allocated = NULL;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200641 char *servername;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100642 int sp[2];
643 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100644 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100645
646 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
647 /* Kernel can have AF_UNIX support disabled */
648 bb_perror_msg_and_die("socketpair");
649
650 if (!strchr(host, ':'))
651 host = allocated = xasprintf("%s:%u", host, port);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200652 servername = xstrdup(host);
653 strrchr(servername, ':')[0] = '\0';
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100654
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200655 fflush_all();
656 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100657 if (pid == 0) {
658 /* Child */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200659 char *argv[8];
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100660
661 close(sp[0]);
662 xmove_fd(sp[1], 0);
663 xdup2(0, 1);
664 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100665 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
666 * It prints some debug stuff on stderr, don't know how to suppress it.
667 * Work around by dev-nulling stderr. We lose all error messages :(
668 */
669 xmove_fd(2, 3);
670 xopen("/dev/null", O_RDWR);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200671 memset(&argv, 0, sizeof(argv));
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100672 argv[0] = (char*)"openssl";
673 argv[1] = (char*)"s_client";
674 argv[2] = (char*)"-quiet";
675 argv[3] = (char*)"-connect";
676 argv[4] = (char*)host;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200677 /*
678 * Per RFC 6066 Section 3, the only permitted values in the
679 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
680 * IPv4 and IPv6 addresses, port numbers are not allowed.
681 */
682 if (!is_ip_address(servername)) {
683 argv[5] = (char*)"-servername";
684 argv[6] = (char*)servername;
685 }
686
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100687 BB_EXECVP(argv[0], argv);
688 xmove_fd(3, 2);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100689# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200690 child_failed = 1;
691 xfunc_die();
692# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100693 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200694# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100695 /* notreached */
696 }
697
Denys Vlasenko53315572014-02-23 23:39:47 +0100698 /* Parent */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200699 free(servername);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100700 free(allocated);
701 close(sp[1]);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100702# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200703 if (child_failed) {
704 close(sp[0]);
705 return -1;
706 }
707# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100708 return sp[0];
709}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200710#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100711
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100712#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko403f2992018-02-06 15:15:08 +0100713static void spawn_ssl_client(const char *host, int network_fd, int flags)
Denys Vlasenko53315572014-02-23 23:39:47 +0100714{
715 int sp[2];
716 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100717 char *servername, *p;
718
Denys Vlasenko0972c7f2018-05-28 14:36:26 +0200719 if (!(option_mask32 & WGET_OPT_NO_CHECK_CERT))
720 bb_error_msg("note: TLS certificate validation not implemented");
721
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100722 servername = xstrdup(host);
723 p = strrchr(servername, ':');
724 if (p) *p = '\0';
Denys Vlasenko53315572014-02-23 23:39:47 +0100725
726 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
727 /* Kernel can have AF_UNIX support disabled */
728 bb_perror_msg_and_die("socketpair");
729
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100730 fflush_all();
Denys Vlasenko53315572014-02-23 23:39:47 +0100731 pid = BB_MMU ? xfork() : xvfork();
732 if (pid == 0) {
733 /* Child */
Denys Vlasenko53315572014-02-23 23:39:47 +0100734 close(sp[0]);
735 xmove_fd(sp[1], 0);
736 xdup2(0, 1);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100737 if (BB_MMU) {
738 tls_state_t *tls = new_tls_state();
739 tls->ifd = tls->ofd = network_fd;
740 tls_handshake(tls, servername);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100741 tls_run_copy_loop(tls, flags);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100742 exit(0);
743 } else {
Denys Vlasenko403f2992018-02-06 15:15:08 +0100744 char *argv[6];
745
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100746 xmove_fd(network_fd, 3);
747 argv[0] = (char*)"ssl_client";
748 argv[1] = (char*)"-s3";
749 //TODO: if (!is_ip_address(servername))...
750 argv[2] = (char*)"-n";
751 argv[3] = servername;
Denys Vlasenko403f2992018-02-06 15:15:08 +0100752 argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
753 argv[5] = NULL;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100754 BB_EXECVP(argv[0], argv);
755 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
756 }
Denys Vlasenko53315572014-02-23 23:39:47 +0100757 /* notreached */
758 }
759
760 /* Parent */
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100761 free(servername);
Denys Vlasenko53315572014-02-23 23:39:47 +0100762 close(sp[1]);
763 xmove_fd(sp[0], network_fd);
764}
765#endif
766
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100767static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
768{
769 FILE *sfp;
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200770 char *pass;
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100771 int port;
772
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100773 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100774#if ENABLE_FEATURE_WGET_HTTPS
775 if (target->protocol == P_FTPS)
776 spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
777#endif
778
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100779 if (ftpcmd(NULL, NULL, sfp) != 220)
Denys Vlasenko34590242018-02-12 16:46:13 +0100780 bb_error_msg_and_die("%s", G.wget_buf);
781 /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100782
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200783 /* Split username:password pair */
784 pass = (char*)"busybox"; /* password for "anonymous" */
785 if (target->user) {
786 pass = strchr(target->user, ':');
787 if (pass)
788 *pass++ = '\0';
789 }
790
791 /* Log in */
792 switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100793 case 230:
794 break;
795 case 331:
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200796 if (ftpcmd("PASS ", pass, sfp) == 230)
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100797 break;
798 /* fall through (failed login) */
799 default:
Denys Vlasenko34590242018-02-12 16:46:13 +0100800 bb_error_msg_and_die("ftp login: %s", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100801 }
802
803 ftpcmd("TYPE I", NULL, sfp);
804
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200805 /* Query file size */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100806 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
807 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
808 if (G.content_len < 0 || errno) {
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200809 bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100810 }
811 G.got_clen = 1;
812 }
813
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200814 /* Enter passive mode */
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100815 if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
816 /* good */
817 } else
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100818 if (ftpcmd("PASV", NULL, sfp) != 227) {
819 pasv_error:
Denys Vlasenko34590242018-02-12 16:46:13 +0100820 bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100821 }
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100822 port = parse_pasv_epsv(G.wget_buf);
823 if (port < 0)
824 goto pasv_error;
825
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100826 set_nport(&lsa->u.sa, htons(port));
827
828 *dfpp = open_socket(lsa);
829
Denys Vlasenko2b751572018-02-06 20:49:27 +0100830#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko237a9002018-02-08 00:28:30 +0100831 if (target->protocol == P_FTPS) {
832 /* "PROT P" enables encryption of data stream.
833 * Without it (or with "PROT C"), data is sent unencrypted.
834 */
835 if (ftpcmd("PROT P", NULL, sfp) == 200)
836 spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
837 }
Denys Vlasenko2b751572018-02-06 20:49:27 +0100838#endif
Denys Vlasenko403f2992018-02-06 15:15:08 +0100839
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100840 if (G.beg_range != 0) {
841 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
842 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
843 G.content_len -= G.beg_range;
844 else
845 reset_beg_range_to_zero();
846 }
847
Denys Vlasenko34590242018-02-12 16:46:13 +0100848//TODO: needs ftp-escaping 0xff and '\n' bytes here.
849//Or disallow '\n' altogether via sanitize_string() in parse_url().
850//But 0xff's are possible in valid utf8 filenames.
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100851 if (ftpcmd("RETR ", target->path, sfp) > 150)
Denys Vlasenko34590242018-02-12 16:46:13 +0100852 bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100853
854 return sfp;
855}
856
Denys Vlasenko2384a352011-02-15 00:58:36 +0100857static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200858{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200859#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
860# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200861 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200862# endif
863 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200864
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200865 polldata.fd = fileno(dfp);
866 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200867#endif
868 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200869
870 if (G.chunked)
871 goto get_clen;
872
873 /* Loops only if chunked */
874 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100875
876#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
877 /* Must use nonblocking I/O, otherwise fread will loop
878 * and *block* until it reads full buffer,
879 * which messes up progress bar and/or timeout logic.
880 * Because of nonblocking I/O, we need to dance
881 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200882 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100883 */
884 ndelay_on(polldata.fd);
885#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100886 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200887 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100888 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200889
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200890#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100891 /* fread internally uses read loop, which in our case
892 * is usually exited when we get EAGAIN.
893 * In this case, libc sets error marker on the stream.
894 * Need to clear it before next fread to avoid possible
895 * rare false positive ferror below. Rare because usually
896 * fread gets more than zero bytes, and we don't fall
897 * into if (n <= 0) ...
898 */
899 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100900#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200901 errno = 0;
902 rdsz = sizeof(G.wget_buf);
903 if (G.got_clen) {
904 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
905 if ((int)G.content_len <= 0)
906 break;
907 rdsz = (unsigned)G.content_len;
908 }
909 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100910 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200911
912 if (n > 0) {
913 xwrite(G.output_fd, G.wget_buf, n);
914#if ENABLE_FEATURE_WGET_STATUSBAR
915 G.transferred += n;
916#endif
917 if (G.got_clen) {
918 G.content_len -= n;
919 if (G.content_len == 0)
920 break;
921 }
922#if ENABLE_FEATURE_WGET_TIMEOUT
923 second_cnt = G.timeout_seconds;
924#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100925 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200926 }
927
928 /* n <= 0.
929 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100930 * If error occurs, or EOF is reached, the return value
931 * is a short item count (or zero).
932 * fread does not distinguish between EOF and error.
933 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200934 if (errno != EAGAIN) {
935 if (ferror(dfp)) {
936 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100937 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200938 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100939 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200940 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100941
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200942#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
943 /* It was EAGAIN. There is no data. Wait up to one second
944 * then abort if timed out, or update the bar and try reading again.
945 */
946 if (safe_poll(&polldata, 1, 1000) == 0) {
947# if ENABLE_FEATURE_WGET_TIMEOUT
948 if (second_cnt != 0 && --second_cnt == 0) {
949 progress_meter(PROGRESS_END);
950 bb_error_msg_and_die("download timed out");
951 }
952# endif
953 /* We used to loop back to poll here,
954 * but there is no great harm in letting fread
955 * to try reading anyway.
956 */
957 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100958#endif
959 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200960 /* Need to do it _every_ second for "stalled" indicator
961 * to be shown properly.
962 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200963 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200964 } /* while (reading data) */
965
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100966#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
967 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100968 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100969#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200970 if (!G.chunked)
971 break;
972
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200973 /* Each chunk ends with "\r\n" - eat it */
Denys Vlasenko34590242018-02-12 16:46:13 +0100974 fgets_trim_sanitize(dfp, NULL);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200975 get_clen:
976 /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
977 fgets_trim_sanitize(dfp, NULL);
978 errno = 0;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100979 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200980 /*
981 * Had a bug with inputs like "ffffffff0001f400"
982 * smashing the heap later. Ensure >= 0.
983 */
984 if (G.content_len < 0 || errno)
985 bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100986 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200987 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100988 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200989 /*
990 * Note that fgets may result in some data being buffered in dfp.
991 * We loop back to fread, which will retrieve this data.
992 * Also note that code has to be arranged so that fread
993 * is done _before_ one-second poll wait - poll doesn't know
994 * about stdio buffering and can result in spurious one second waits!
995 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200996 }
997
Denys Vlasenko61441242012-06-17 19:52:25 +0200998 /* If -c failed, we restart from the beginning,
999 * but we do not truncate file then, we do it only now, at the end.
1000 * This lets user to ^C if his 99% complete 10 GB file download
1001 * failed to restart *without* losing the almost complete file.
1002 */
1003 {
1004 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
1005 if (pos != (off_t)-1)
1006 ftruncate(G.output_fd, pos);
1007 }
1008
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001009 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001010 G.chunked = 0; /* makes it show 100% even for chunked download */
1011 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02001012 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +02001013}
1014
Pere Orga53695632011-02-16 20:09:36 +01001015static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +00001016{
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001017 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001018 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001019 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001020 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +00001021 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001022 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001023 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001024 struct host_info server;
1025 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +00001026
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001027 server.allocated = NULL;
1028 target.allocated = NULL;
1029 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +02001030 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001031
1032 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +00001033
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001034 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001035 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +00001036 if (use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001037 char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001038//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +01001039 use_proxy = (proxy && proxy[0]);
1040 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001041 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +00001042 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001043 if (!use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001044 server.protocol = target.protocol;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001045 server.port = target.port;
1046 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001047 //free(server.allocated); - can't be non-NULL
1048 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001049 } else {
1050 server.host = target.host;
1051 }
1052 }
1053
1054 if (ENABLE_FEATURE_IPV6)
1055 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001056
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001057 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001058 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001059 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001060 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001061 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001062 if (G.fname_out[0] == '/' || !G.fname_out[0])
1063 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +00001064 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001065 if (G.dir_prefix)
1066 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001067 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001068 /* redirects may free target.path later, need to make a copy */
1069 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001070 }
Eric Andersen29edd002000-12-09 16:55:35 +00001071 }
Denis Vlasenko818322b2007-09-24 18:27:04 +00001072#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001073 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001074#endif
1075
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001076 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001077 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001078 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001079 G.output_fd = open(G.fname_out, O_WRONLY);
1080 if (G.output_fd >= 0) {
1081 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +00001082 }
1083 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +01001084 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +00001085 }
1086
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001087 redir_limit = 5;
1088 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +00001089 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001090 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001091 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1092 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1093 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +00001094 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001095 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +01001096 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1097 G.got_clen = 0;
1098 G.chunked = 0;
Denys Vlasenko403f2992018-02-06 15:15:08 +01001099 if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
Eric Andersen79757c92001-04-05 21:45:54 +00001100 /*
1101 * HTTP session
1102 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001103 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001104 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001105
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001106 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001107#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001108 /* openssl (and maybe internal TLS) support is configured */
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001109 if (server.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001110 /* openssl-based helper
1111 * Inconvenient API since we can't give it an open fd
1112 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001113 int fd = spawn_https_helper_openssl(server.host, server.port);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001114# if ENABLE_FEATURE_WGET_HTTPS
1115 if (fd < 0) { /* no openssl? try internal */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001116 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +01001117 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001118 goto socket_opened;
1119 }
1120# else
1121 /* We don't check for exec("openssl") failure in this case */
1122# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001123 sfp = fdopen(fd, "r+");
1124 if (!sfp)
Denys Vlasenko899ae532018-04-01 19:59:37 +02001125 bb_die_memory_exhausted();
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001126 goto socket_opened;
1127 }
1128 sfp = open_socket(lsa);
1129 socket_opened:
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001130#elif ENABLE_FEATURE_WGET_HTTPS
1131 /* Only internal TLS support is configured */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001132 sfp = open_socket(lsa);
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001133 if (server.protocol == P_HTTPS)
Denys Vlasenko403f2992018-02-06 15:15:08 +01001134 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001135#else
1136 /* ssl (https) support is not configured */
1137 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001138#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001139 /* Send HTTP request */
1140 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001141 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001142 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001143 target.path);
1144 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001145 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001146 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1147 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001148 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001149 if (!USR_HEADER_HOST)
1150 SENDFMT(sfp, "Host: %s\r\n", target.host);
1151 if (!USR_HEADER_USER_AGENT)
1152 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001153
Denys Vlasenko9213a552011-02-10 13:23:45 +01001154 /* Ask server to close the connection as soon as we are done
1155 * (IOW: we do not intend to send more requests)
1156 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001157 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001158
Denis Vlasenko9cade082006-11-21 10:43:02 +00001159#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001160 if (target.user && !USR_HEADER_AUTH) {
1161 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001162 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001163 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001164 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1165 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001166 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001167 }
Eric Andersen79757c92001-04-05 21:45:54 +00001168#endif
1169
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001170 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1171 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001172
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001173#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001174 if (G.extra_headers) {
1175 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001176 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001177 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001178
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001179 if (option_mask32 & WGET_OPT_POST_DATA) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001180 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001181 "Content-Type: application/x-www-form-urlencoded\r\n"
1182 "Content-Length: %u\r\n"
1183 "\r\n"
1184 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001185 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001186 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001187 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001188#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001189 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001190 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001191 }
Eric Andersen79757c92001-04-05 21:45:54 +00001192
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001193 fflush(sfp);
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001194
Denys Vlasenko4e08a122017-01-16 17:31:05 +01001195/* Tried doing this unconditionally.
1196 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1197 */
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001198#if SSL_SUPPORTED
1199 if (target.protocol == P_HTTPS) {
1200 /* If we use SSL helper, keeping our end of the socket open for writing
1201 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1202 * even after child closes its copy of the fd.
1203 * This helps:
1204 */
1205 shutdown(fileno(sfp), SHUT_WR);
1206 }
1207#endif
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001208
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001209 /*
1210 * Retrieve HTTP response line and check for "200" status code.
1211 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001212 read_response:
Denys Vlasenko34590242018-02-12 16:46:13 +01001213 fgets_trim_sanitize(sfp, " %s\n");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001214
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001215 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001216 str = skip_non_whitespace(str);
1217 str = skip_whitespace(str);
1218 // FIXME: no error check
1219 // xatou wouldn't work: "200 OK"
1220 status = atoi(str);
1221 switch (status) {
1222 case 0:
1223 case 100:
Denys Vlasenko34590242018-02-12 16:46:13 +01001224 while (get_sanitized_hdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001225 /* eat all remaining headers */;
1226 goto read_response;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001227
1228 /* Success responses */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001229 case 200:
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001230 /* fall through */
1231 case 201: /* 201 Created */
1232/* "The request has been fulfilled and resulted in a new resource being created" */
Denys Vlasenkoef159702016-09-01 11:16:22 +02001233 /* Standard wget is reported to treat this as success */
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001234 /* fall through */
1235 case 202: /* 202 Accepted */
1236/* "The request has been accepted for processing, but the processing has not been completed" */
1237 /* Treat as success: fall through */
1238 case 203: /* 203 Non-Authoritative Information */
1239/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1240 /* fall through */
1241 case 204: /* 204 No Content */
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001242/*
1243Response 204 doesn't say "null file", it says "metadata
1244has changed but data didn't":
1245
1246"10.2.5 204 No Content
1247The server has fulfilled the request but does not need to return
1248an entity-body, and might want to return updated metainformation.
1249The response MAY include new or updated metainformation in the form
1250of entity-headers, which if present SHOULD be associated with
1251the requested variant.
1252
1253If the client is a user agent, it SHOULD NOT change its document
1254view from that which caused the request to be sent. This response
1255is primarily intended to allow input for actions to take place
1256without causing a change to the user agent's active document view,
1257although any new or updated metainformation SHOULD be applied
1258to the document currently in the user agent's active view.
1259
1260The 204 response MUST NOT include a message-body, and thus
1261is always terminated by the first empty line after the header fields."
1262
1263However, in real world it was observed that some web servers
1264(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1265*/
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001266 if (G.beg_range != 0) {
1267 /* "Range:..." was not honored by the server.
1268 * Restart download from the beginning.
1269 */
1270 reset_beg_range_to_zero();
1271 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001272 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001273 /* 205 Reset Content ?? what to do on this ?? */
1274
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001275 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001276 case 301:
1277 case 302:
1278 case 303:
1279 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001280
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001281 case 206: /* Partial Content */
1282 if (G.beg_range != 0)
1283 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001284 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001285 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001286 /* fall through */
1287 default:
Denys Vlasenko34590242018-02-12 16:46:13 +01001288 bb_error_msg_and_die("server returned error: %s", G.wget_buf);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001289 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001290
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001291 /*
1292 * Retrieve HTTP headers.
1293 */
Denys Vlasenko34590242018-02-12 16:46:13 +01001294 while ((str = get_sanitized_hdr(sfp)) != NULL) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001295 static const char keywords[] ALIGN1 =
1296 "content-length\0""transfer-encoding\0""location\0";
1297 enum {
1298 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1299 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001300 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001301
Denys Vlasenko34590242018-02-12 16:46:13 +01001302 /* get_sanitized_hdr converted "FOO:" string to lowercase */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001303
Matthijs van de Water0d586662009-08-22 20:19:48 +02001304 /* strip trailing whitespace */
1305 char *s = strchrnul(str, '\0') - 1;
1306 while (s >= str && (*s == ' ' || *s == '\t')) {
1307 *s = '\0';
1308 s--;
1309 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001310 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001311 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001312 G.content_len = BB_STRTOOFF(str, NULL, 10);
1313 if (G.content_len < 0 || errno) {
Denys Vlasenko34590242018-02-12 16:46:13 +01001314 bb_error_msg_and_die("content-length %s is garbage", str);
Eric Andersen79757c92001-04-05 21:45:54 +00001315 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001316 G.got_clen = 1;
1317 continue;
1318 }
1319 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001320 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenko34590242018-02-12 16:46:13 +01001321 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001322 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001323 }
1324 if (key == KEY_location && status >= 300) {
1325 if (--redir_limit == 0)
1326 bb_error_msg_and_die("too many redirections");
1327 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001328 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001329 free(redirected_path);
Denys Vlasenko34590242018-02-12 16:46:13 +01001330 target.path = redirected_path = xstrdup(str + 1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001331 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001332 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001333 parse_url(str, &target);
1334 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001335 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001336 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001337 server.allocated = NULL;
Denys Vlasenko9634e8a2018-07-02 18:31:02 +02001338 server.protocol = target.protocol;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001339 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001340 /* strip_ipv6_scope_id(target.host); - no! */
1341 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001342 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001343 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001344 goto resolve_lsa;
1345 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001346 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001347 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001348 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001349 }
1350// if (status >= 300)
1351// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001352
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001353 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001354 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001355 } else {
Eric Andersen79757c92001-04-05 21:45:54 +00001356 /*
1357 * FTP session
1358 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001359 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +00001360 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001361
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001362 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001363
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001364 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001365 if (G.output_fd < 0)
1366 G.output_fd = xopen(G.fname_out, G.o_flags);
1367 retrieve_file_data(dfp);
1368 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1369 xclose(G.output_fd);
1370 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001371 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001372 }
Eric Andersen79757c92001-04-05 21:45:54 +00001373
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001374 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001375 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001376 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001377 if (ftpcmd(NULL, NULL, sfp) != 226)
Denys Vlasenko34590242018-02-12 16:46:13 +01001378 bb_error_msg_and_die("ftp error: %s", G.wget_buf);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001379 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001380 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001381 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001382
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001383 free(server.allocated);
1384 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001385 free(server.user);
1386 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001387 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001388 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001389}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001390
1391int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1392int wget_main(int argc UNUSED_PARAM, char **argv)
1393{
1394#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1395 static const char wget_longopts[] ALIGN1 =
1396 /* name, has_arg, val */
1397 "continue\0" No_argument "c"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001398 "quiet\0" No_argument "q"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +01001399 "server-response\0" No_argument "S"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001400 "output-document\0" Required_argument "O"
1401 "directory-prefix\0" Required_argument "P"
1402 "proxy\0" Required_argument "Y"
1403 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001404IF_FEATURE_WGET_TIMEOUT(
1405 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001406 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001407IF_DESKTOP( "tries\0" Required_argument "t")
1408 "header\0" Required_argument "\xff"
1409 "post-data\0" Required_argument "\xfe"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001410 "spider\0" No_argument "\xfd"
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001411 "no-check-certificate\0" No_argument "\xfc"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001412 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001413IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001414 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001415IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1416IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1417IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1418IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1419IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001420 ;
Denys Vlasenko036585a2017-08-08 16:38:18 +02001421# define GETOPT32 getopt32long
1422# define LONGOPTS ,wget_longopts
1423#else
1424# define GETOPT32 getopt32
1425# define LONGOPTS
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001426#endif
1427
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001428#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1429 llist_t *headers_llist = NULL;
1430#endif
1431
1432 INIT_G();
1433
Lauri Kasanend074b412013-10-12 21:47:07 +02001434#if ENABLE_FEATURE_WGET_TIMEOUT
1435 G.timeout_seconds = 900;
1436 signal(SIGALRM, alarm_handler);
1437#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001438 G.proxy_flag = "on"; /* use proxies if env vars are set */
1439 G.user_agent = "Wget"; /* "User-Agent" header field */
1440
1441#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001442#endif
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001443 GETOPT32(argv, "^"
1444 "cqSO:P:Y:U:T:+"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001445 /*ignored:*/ "t:"
1446 /*ignored:*/ "n::"
1447 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1448 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1449 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1450 * -nH --no-host-directories: wget -r http://host/ won't create host/
1451 * -np --no-parent
1452 * "n::" above says that we accept -n[ARG].
1453 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1454 */
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001455 "\0"
1456 "-1" /* at least one URL */
1457 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
Denys Vlasenko036585a2017-08-08 16:38:18 +02001458 LONGOPTS
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001459 , &G.fname_out, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001460 &G.proxy_flag, &G.user_agent,
1461 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001462 NULL, /* -t RETRIES */
1463 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001464 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1465 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1466 );
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001467#if 0 /* option bits debug */
1468 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1469 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1470 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1471 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1472 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
Denys Vlasenko0972c7f2018-05-28 14:36:26 +02001473 if (option_mask32 & WGET_OPT_NO_CHECK_CERT) bb_error_msg("--no-check-certificate");
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001474 exit(0);
1475#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001476 argv += optind;
1477
1478#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1479 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001480 int size = 0;
1481 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001482 llist_t *ll = headers_llist;
1483 while (ll) {
1484 size += strlen(ll->data) + 2;
1485 ll = ll->link;
1486 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001487 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001488 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001489 int bit;
1490 const char *words;
1491
1492 size = sprintf(hdr, "%s\r\n",
1493 (char*)llist_pop(&headers_llist));
1494 /* a bit like index_in_substrings but don't match full key */
1495 bit = 1;
1496 words = wget_user_headers;
1497 while (*words) {
1498 if (strstr(hdr, words) == hdr) {
1499 G.user_headers |= bit;
1500 break;
1501 }
1502 bit <<= 1;
1503 words += strlen(words) + 1;
1504 }
1505 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001506 }
1507 }
1508#endif
1509
Denys Vlasenko2384a352011-02-15 00:58:36 +01001510 G.output_fd = -1;
1511 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1512 if (G.fname_out) { /* -O FILE ? */
1513 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1514 G.output_fd = 1;
1515 option_mask32 &= ~WGET_OPT_CONTINUE;
1516 }
1517 /* compat with wget: -O FILE can overwrite */
1518 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1519 }
1520
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001521 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001522 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001523
Denys Vlasenko28556b92011-02-15 11:03:53 +01001524 if (G.output_fd >= 0)
1525 xclose(G.output_fd);
1526
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001527#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1528 free(G.extra_headers);
1529#endif
1530 FINI_G();
1531
Pere Orga53695632011-02-16 20:09:36 +01001532 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001533}