blob: 2650b5384a441ec138548335e6f4a5b7439a73a2 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020011//config:config WGET
Denys Vlasenko4eed2c62017-07-18 22:01:24 +020012//config: bool "wget (35 kb)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020013//config: default y
14//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020015//config: wget is a utility for non-interactive download of files from HTTP
16//config: and FTP servers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020017//config:
Denys Vlasenkof5604222017-01-10 14:58:54 +010018//config:config FEATURE_WGET_LONG_OPTIONS
19//config: bool "Enable long options"
20//config: default y
21//config: depends on WGET && LONG_OPTS
22//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020023//config:config FEATURE_WGET_STATUSBAR
Denys Vlasenkof5604222017-01-10 14:58:54 +010024//config: bool "Enable progress bar (+2k)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020025//config: default y
26//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020027//config:
28//config:config FEATURE_WGET_AUTHENTICATION
29//config: bool "Enable HTTP authentication"
30//config: default y
31//config: depends on WGET
32//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020033//config: Support authenticated HTTP transfers.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020034//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020035//config:config FEATURE_WGET_TIMEOUT
36//config: bool "Enable timeout option -T SEC"
37//config: default y
38//config: depends on WGET
39//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020040//config: Supports network read and connect timeouts for wget,
41//config: so that wget will give up and timeout, through the -T
42//config: command line option.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020043//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020044//config: Currently only connect and network data read timeout are
45//config: supported (i.e., timeout is not applied to the DNS query). When
46//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
47//config: will work in addition to -T.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020048//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +010049//config:config FEATURE_WGET_HTTPS
50//config: bool "Support HTTPS using internal TLS code"
Denys Vlasenko403f2992018-02-06 15:15:08 +010051//it also enables FTPS support, but it's not well tested yet
Denys Vlasenko9a647c32017-01-23 01:08:16 +010052//config: default y
53//config: depends on WGET
54//config: select TLS
55//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020056//config: wget will use internal TLS code to connect to https:// URLs.
57//config: Note:
58//config: On NOMMU machines, ssl_helper applet should be available
59//config: in the $PATH for this to work. Make sure to select that applet.
Denys Vlasenko9a647c32017-01-23 01:08:16 +010060//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020061//config: Note: currently, TLS code only makes TLS I/O work, it
62//config: does *not* check that the peer is who it claims to be, etc.
63//config: IOW: it uses peer-supplied public keys to establish encryption
64//config: and signing keys, then encrypts and signs outgoing data and
65//config: decrypts incoming data.
66//config: It does not check signature hashes on the incoming data:
67//config: this means that attackers manipulating TCP packets can
68//config: send altered data and we unknowingly receive garbage.
69//config: (This check might be relatively easy to add).
70//config: It does not check public key's certificate:
71//config: this means that the peer may be an attacker impersonating
72//config: the server we think we are talking to.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010073//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020074//config: If you think this is unacceptable, consider this. As more and more
75//config: servers switch to HTTPS-only operation, without such "crippled"
76//config: TLS code it is *impossible* to simply download a kernel source
77//config: from kernel.org. Which can in real world translate into
78//config: "my small automatic tooling to build cross-compilers from sources
79//config: no longer works, I need to additionally keep a local copy
80//config: of ~4 megabyte source tarball of a SSL library and ~2 megabyte
81//config: source of wget, need to compile and built both before I can
82//config: download anything. All this despite the fact that the build
83//config: is done in a QEMU sandbox on a machine with absolutely nothing
84//config: worth stealing, so I don't care if someone would go to a lot
85//config: of trouble to intercept my HTTPS download to send me an altered
86//config: kernel tarball".
Denys Vlasenko67f6db62017-01-30 16:27:37 +010087//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020088//config: If you still think this is unacceptable, send patches.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010089//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +020090//config: If you still think this is unacceptable, do not want to send
91//config: patches, but do want to waste bandwidth expaining how wrong
92//config: it is, you will be ignored.
Denys Vlasenko67f6db62017-01-30 16:27:37 +010093//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020094//config:config FEATURE_WGET_OPENSSL
95//config: bool "Try to connect to HTTPS using openssl"
96//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020097//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020098//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020099//config: Try to use openssl to handle HTTPS.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200100//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200101//config: OpenSSL has a simple SSL client for debug purposes.
102//config: If you select this option, wget will effectively run:
103//config: "openssl s_client -quiet -connect hostname:443
104//config: -servername hostname 2>/dev/null" and pipe its data
105//config: through it. -servername is not used if hostname is numeric.
106//config: Note inconvenient API: host resolution is done twice,
107//config: and there is no guarantee openssl's idea of IPv6 address
108//config: format is the same as ours.
109//config: Another problem is that s_client prints debug information
110//config: to stderr, and it needs to be suppressed. This means
111//config: all error messages get suppressed too.
112//config: openssl is also a big binary, often dynamically linked
113//config: against ~15 libraries.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200114//config:
Denys Vlasenko72089cf2017-07-21 09:50:55 +0200115//config: If openssl can't be executed, internal TLS code will be used
116//config: (if you enabled it); if openssl can be executed but fails later,
117//config: wget can't detect this, and download will fail.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200118
119//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
120
121//kbuild:lib-$(CONFIG_WGET) += wget.o
122
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100123//usage:#define wget_trivial_usage
124//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200125//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100126//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200127/* Since we ignore these opts, we don't show them in --help */
Denys Vlasenko92e1b082015-10-20 21:51:52 +0200128/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
129/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100130//usage: " [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100131//usage: )
132//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100133//usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100134//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
135//usage: )
136//usage:#define wget_full_usage "\n\n"
137//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200138//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100139//usage: "\n --spider Only check URL existence: $? is 0 if exists"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100140//usage: )
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200141//usage: "\n -c Continue retrieval of aborted transfer"
142//usage: "\n -q Quiet"
143//usage: "\n -P DIR Save to DIR (default .)"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100144//usage: "\n -S Show server response"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200145//usage: IF_FEATURE_WGET_TIMEOUT(
146//usage: "\n -T SEC Network read timeout is SEC seconds"
147//usage: )
148//usage: "\n -O FILE Save to FILE ('-' for stdout)"
149//usage: "\n -U STR Use STR for User-Agent header"
150//usage: "\n -Y on/off Use proxy"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100151
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000152#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000153
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200154#if 0
155# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100156# define SENDFMT(fp, fmt, ...) \
157 do { \
158 log_io("> " fmt, ##__VA_ARGS__); \
159 fprintf(fp, fmt, ##__VA_ARGS__); \
160 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200161#else
162# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100163# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200164#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100165
166
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100167#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100168
Eric Andersen79757c92001-04-05 21:45:54 +0000169struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100170 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000171 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100172 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100173 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000174 char *host;
175 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000176};
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200177static const char P_FTP[] ALIGN1 = "ftp";
178static const char P_HTTP[] ALIGN1 = "http";
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100179#if SSL_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100180# if ENABLE_FEATURE_WGET_HTTPS
181static const char P_FTPS[] ALIGN1 = "ftps";
182# endif
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200183static const char P_HTTPS[] ALIGN1 = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100184#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000185
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100186#if ENABLE_FEATURE_WGET_LONG_OPTIONS
187/* User-specified headers prevent using our corresponding built-in headers. */
188enum {
189 HDR_HOST = (1<<0),
190 HDR_USER_AGENT = (1<<1),
191 HDR_RANGE = (1<<2),
192 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
193 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
194};
195static const char wget_user_headers[] ALIGN1 =
196 "Host:\0"
197 "User-Agent:\0"
198 "Range:\0"
199# if ENABLE_FEATURE_WGET_AUTHENTICATION
200 "Authorization:\0"
201 "Proxy-Authorization:\0"
202# endif
203 ;
204# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
205# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
206# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
207# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
208# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
209#else /* No long options, no user-headers :( */
210# define USR_HEADER_HOST 0
211# define USR_HEADER_USER_AGENT 0
212# define USR_HEADER_RANGE 0
213# define USR_HEADER_AUTH 0
214# define USR_HEADER_PROXY_AUTH 0
215#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000216
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200217/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000218struct globals {
219 off_t content_len; /* Content-length of the file */
220 off_t beg_range; /* Range at which continue begins */
221#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000222 off_t transferred; /* Number of bytes transferred so far */
223 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100224 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000225#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200226 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100227#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200228 char *post_data;
229 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100230 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100231#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200232 char *fname_out; /* where to direct output (-O) */
233 const char *proxy_flag; /* Use proxies if env vars are set */
234 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200235#if ENABLE_FEATURE_WGET_TIMEOUT
236 unsigned timeout_seconds;
Denys Vlasenko6701e912016-03-17 15:58:16 +0100237 bool die_if_timed_out;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200238#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +0100239 int output_fd;
240 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200241 smallint chunked; /* chunked transfer encoding */
242 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100243 /* Local downloads do benefit from big buffer.
244 * With 512 byte buffer, it was measured to be
245 * an order of magnitude slower than with big one.
246 */
247 uint64_t just_to_align_next_member;
248 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100249} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100250#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200251#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200252 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200253} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200254#define FINI_G() do { \
255 FREE_PTR_TO_GLOBALS(); \
256} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000257
258
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200259/* Must match option string! */
260enum {
261 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200262 WGET_OPT_QUIET = (1 << 1),
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100263 WGET_OPT_SERVER_RESPONSE = (1 << 2),
264 WGET_OPT_OUTNAME = (1 << 3),
265 WGET_OPT_PREFIX = (1 << 4),
266 WGET_OPT_PROXY = (1 << 5),
267 WGET_OPT_USER_AGENT = (1 << 6),
268 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
269 WGET_OPT_RETRIES = (1 << 8),
270 WGET_OPT_nsomething = (1 << 9),
271 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
272 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
273 WGET_OPT_SPIDER = (1 << 12) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200274};
275
276enum {
277 PROGRESS_START = -1,
278 PROGRESS_END = 0,
279 PROGRESS_BUMP = 1,
280};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000281#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000282static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000283{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200284 if (option_mask32 & WGET_OPT_QUIET)
285 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000286
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200287 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100288 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000289
Denys Vlasenko2384a352011-02-15 00:58:36 +0100290 bb_progress_update(&G.pmt,
291 G.beg_range,
292 G.transferred,
293 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
294 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000295
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200296 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100297 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200298 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100299 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000300 }
301}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200302#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000303static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000304#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000305
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000306
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200307/* IPv6 knows scoped address types i.e. link and site local addresses. Link
308 * local addresses can have a scope identifier to specify the
309 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
310 * identifier is only valid on a single node.
311 *
312 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
313 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
314 * in the Host header as invalid requests, see
315 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
316 */
317static void strip_ipv6_scope_id(char *host)
318{
319 char *scope, *cp;
320
321 /* bbox wget actually handles IPv6 addresses without [], like
322 * wget "http://::1/xxx", but this is not standard.
323 * To save code, _here_ we do not support it. */
324
325 if (host[0] != '[')
326 return; /* not IPv6 */
327
328 scope = strchr(host, '%');
329 if (!scope)
330 return;
331
332 /* Remove the IPv6 zone identifier from the host address */
333 cp = strchr(host, ']');
334 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
335 /* malformed address (not "[xx]:nn" or "[xx]") */
336 return;
337 }
338
339 /* cp points to "]...", scope points to "%eth0]..." */
340 overlapping_strcpy(scope, cp);
341}
342
Denis Vlasenko9cade082006-11-21 10:43:02 +0000343#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100344/* Base64-encode character string. */
345static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000346{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000347 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100348 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
349 len = sizeof(G.wget_buf)/4*3 - 10;
350 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
351 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000352}
353#endif
354
Lauri Kasanend074b412013-10-12 21:47:07 +0200355#if ENABLE_FEATURE_WGET_TIMEOUT
356static void alarm_handler(int sig UNUSED_PARAM)
357{
358 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
Denys Vlasenko6701e912016-03-17 15:58:16 +0100359 if (G.die_if_timed_out)
Lauri Kasanend074b412013-10-12 21:47:07 +0200360 bb_error_msg_and_die("download timed out");
361}
Denys Vlasenko6701e912016-03-17 15:58:16 +0100362static void set_alarm(void)
363{
364 if (G.timeout_seconds) {
365 alarm(G.timeout_seconds);
366 G.die_if_timed_out = 1;
367 }
368}
369# define clear_alarm() ((void)(G.die_if_timed_out = 0))
370#else
371# define set_alarm() ((void)0)
372# define clear_alarm() ((void)0)
Lauri Kasanend074b412013-10-12 21:47:07 +0200373#endif
374
Denys Vlasenkoed727612016-07-25 21:34:57 +0200375#if ENABLE_FEATURE_WGET_OPENSSL
376/*
377 * is_ip_address() attempts to verify whether or not a string
378 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
379 * of inet_pton() can be used to determine this.
380 *
381 * TODO add proper error checking when inet_pton() returns -1
382 * (some form of system error has occurred, and errno is set)
383 */
384static int is_ip_address(const char *string)
385{
386 struct sockaddr_in sa;
387
388 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
389# if ENABLE_FEATURE_IPV6
390 if (result == 0) {
391 struct sockaddr_in6 sa6;
392 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
393 }
394# endif
395 return (result == 1);
396}
397#endif
398
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000399static FILE *open_socket(len_and_sockaddr *lsa)
400{
Lauri Kasanend074b412013-10-12 21:47:07 +0200401 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000402 FILE *fp;
403
Denys Vlasenko6701e912016-03-17 15:58:16 +0100404 set_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200405 fd = xconnect_stream(lsa);
Denys Vlasenko6701e912016-03-17 15:58:16 +0100406 clear_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200407
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000408 /* glibc 2.4 seems to try seeking on it - ??! */
409 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200410 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100411 if (!fp)
Denys Vlasenko899ae532018-04-01 19:59:37 +0200412 bb_die_memory_exhausted();
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000413
414 return fp;
415}
416
Denys Vlasenko34590242018-02-12 16:46:13 +0100417/* We balk at any control chars in other side's messages.
418 * This prevents nasty surprises (e.g. ESC sequences) in "Location:" URLs
419 * and error messages.
420 *
421 * The only exception is tabs, which are converted to (one) space:
422 * HTTP's "headers: <whitespace> values" may have those.
423 */
424static char* sanitize_string(char *s)
425{
426 unsigned char *p = (void *) s;
427 while (*p) {
428 if (*p < ' ') {
429 if (*p != '\t')
430 break;
431 *p = ' ';
432 }
433 p++;
434 }
435 *p = '\0';
436 return s;
437}
438
Denys Vlasenkof836f012011-02-10 23:02:28 +0100439/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
Denys Vlasenko34590242018-02-12 16:46:13 +0100440static char fgets_trim_sanitize(FILE *fp, const char *fmt)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100441{
442 char c;
443 char *buf_ptr;
444
Denys Vlasenko6701e912016-03-17 15:58:16 +0100445 set_alarm();
Denys Vlasenko34590242018-02-12 16:46:13 +0100446 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
Denys Vlasenkof836f012011-02-10 23:02:28 +0100447 bb_perror_msg_and_die("error getting response");
Denys Vlasenko6701e912016-03-17 15:58:16 +0100448 clear_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100449
450 buf_ptr = strchrnul(G.wget_buf, '\n');
451 c = *buf_ptr;
Denys Vlasenko34590242018-02-12 16:46:13 +0100452#if 1
453 /* Disallow any control chars: trim at first char < 0x20 */
454 sanitize_string(G.wget_buf);
455#else
Denys Vlasenkof836f012011-02-10 23:02:28 +0100456 *buf_ptr = '\0';
457 buf_ptr = strchrnul(G.wget_buf, '\r');
458 *buf_ptr = '\0';
Denys Vlasenko34590242018-02-12 16:46:13 +0100459#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100460
461 log_io("< %s", G.wget_buf);
462
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100463 if (fmt && (option_mask32 & WGET_OPT_SERVER_RESPONSE))
464 fprintf(stderr, fmt, G.wget_buf);
465
Denys Vlasenkof836f012011-02-10 23:02:28 +0100466 return c;
467}
468
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100469static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000470{
471 int result;
472 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100473 if (!s2)
474 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000475 fprintf(fp, "%s%s\r\n", s1, s2);
Denys Vlasenkodff9fef2017-01-24 21:41:43 +0100476 /* With --server-response, wget also shows its ftp commands */
477 if (option_mask32 & WGET_OPT_SERVER_RESPONSE)
478 fprintf(stderr, "--> %s%s\n\n", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000479 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100480 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000481 }
482
Denys Vlasenko34590242018-02-12 16:46:13 +0100483 /* Read until "Nxx something" is received */
484 G.wget_buf[3] = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000485 do {
Denys Vlasenko34590242018-02-12 16:46:13 +0100486 fgets_trim_sanitize(fp, "%s\n");
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100487 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000488
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100489 G.wget_buf[3] = '\0';
490 result = xatoi_positive(G.wget_buf);
491 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000492 return result;
493}
494
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100495static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000496{
497 char *url, *p, *sp;
498
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100499 free(h->allocated);
500 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000501
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100502 h->protocol = P_FTP;
503 p = strstr(url, "://");
504 if (p) {
505 *p = '\0';
506 h->host = p + 3;
507 if (strcmp(url, P_FTP) == 0) {
508 h->port = bb_lookup_port(P_FTP, "tcp", 21);
509 } else
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100510#if SSL_SUPPORTED
Denys Vlasenko403f2992018-02-06 15:15:08 +0100511# if ENABLE_FEATURE_WGET_HTTPS
512 if (strcmp(url, P_FTPS) == 0) {
513 h->port = bb_lookup_port(P_FTPS, "tcp", 990);
514 h->protocol = P_FTPS;
515 } else
516# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100517 if (strcmp(url, P_HTTPS) == 0) {
518 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
519 h->protocol = P_HTTPS;
520 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100521#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100522 if (strcmp(url, P_HTTP) == 0) {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100523 http:
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100524 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
525 h->protocol = P_HTTP;
526 } else {
527 *p = ':';
Denys Vlasenko34590242018-02-12 16:46:13 +0100528 bb_error_msg_and_die("not an http or ftp url: %s", url);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100529 }
530 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100531 // GNU wget is user-friendly and falls back to http://
532 h->host = url;
533 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100534 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000535
536 // FYI:
537 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
Denys Vlasenkoa0aae9f2017-01-20 14:12:10 +0100538 // 'GET /?var=a/b HTTP/1.0'
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000539 // and saves 'index.html?var=a%2Fb' (we save 'b')
540 // wget 'http://busybox.net?login=john@doe':
541 // request: 'GET /?login=john@doe HTTP/1.0'
542 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
543 // wget 'http://busybox.net#test/test':
544 // request: 'GET / HTTP/1.0'
545 // saves: 'index.html' (we save 'test')
546 //
547 // We also don't add unique .N suffix if file exists...
548 sp = strchr(h->host, '/');
549 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
550 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
551 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000552 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000553 } else if (*sp == '/') {
554 *sp = '\0';
555 h->path = sp + 1;
556 } else { // '#' or '?'
557 // http://busybox.net?login=john@doe is a valid URL
558 // memmove converts to:
559 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000560 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000561 h->host--;
562 sp[-1] = '\0';
563 h->path = sp;
564 }
565
566 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000567 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200568 // URL-decode "user:password" string before base64-encoding:
569 // wget http://test:my%20pass@example.com should send
570 // Authorization: Basic dGVzdDpteSBwYXNz
571 // which decodes to "test:my pass".
572 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000573 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100574 free(h->user);
575 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000576 h->host = sp + 1;
577 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100578 /* else: h->user remains NULL, or as set by original request
579 * before redirect (if we are here after a redirect).
580 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000581}
582
Denys Vlasenko34590242018-02-12 16:46:13 +0100583static char *get_sanitized_hdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000584{
585 char *s, *hdrval;
586 int c;
587
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000588 /* retrieve header line */
Denys Vlasenko34590242018-02-12 16:46:13 +0100589 c = fgets_trim_sanitize(fp, " %s\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000590
Denys Vlasenkof836f012011-02-10 23:02:28 +0100591 /* end of the headers? */
592 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000593 return NULL;
594
595 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200596 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
597 /*
598 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
599 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
600 * "A-Z" maps to "a-z".
601 * "@[\]" can't occur in header names.
602 * "^_" maps to "~,DEL" (which is wrong).
603 * "^" was never seen yet, "_" was seen from web.archive.org
604 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
605 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100606 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200607 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000608
609 /* verify we are at the end of the header name */
610 if (*s != ':')
Denys Vlasenko34590242018-02-12 16:46:13 +0100611 bb_error_msg_and_die("bad header line: %s", G.wget_buf);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000612
613 /* locate the start of the header value */
614 *s++ = '\0';
615 hdrval = skip_whitespace(s);
616
Denys Vlasenkof836f012011-02-10 23:02:28 +0100617 if (c != '\n') {
618 /* Rats! The buffer isn't big enough to hold the entire header value */
619 while (c = getc(fp), c != EOF && c != '\n')
620 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000621 }
622
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000623 return hdrval;
624}
625
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200626static void reset_beg_range_to_zero(void)
627{
Denys Vlasenko61441242012-06-17 19:52:25 +0200628 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200629 G.beg_range = 0;
630 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200631 /* Done at the end instead: */
632 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200633}
634
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200635#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200636static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100637{
638 char *allocated = NULL;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200639 char *servername;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100640 int sp[2];
641 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100642 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100643
644 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
645 /* Kernel can have AF_UNIX support disabled */
646 bb_perror_msg_and_die("socketpair");
647
648 if (!strchr(host, ':'))
649 host = allocated = xasprintf("%s:%u", host, port);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200650 servername = xstrdup(host);
651 strrchr(servername, ':')[0] = '\0';
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100652
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200653 fflush_all();
654 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100655 if (pid == 0) {
656 /* Child */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200657 char *argv[8];
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100658
659 close(sp[0]);
660 xmove_fd(sp[1], 0);
661 xdup2(0, 1);
662 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100663 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
664 * It prints some debug stuff on stderr, don't know how to suppress it.
665 * Work around by dev-nulling stderr. We lose all error messages :(
666 */
667 xmove_fd(2, 3);
668 xopen("/dev/null", O_RDWR);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200669 memset(&argv, 0, sizeof(argv));
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100670 argv[0] = (char*)"openssl";
671 argv[1] = (char*)"s_client";
672 argv[2] = (char*)"-quiet";
673 argv[3] = (char*)"-connect";
674 argv[4] = (char*)host;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200675 /*
676 * Per RFC 6066 Section 3, the only permitted values in the
677 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
678 * IPv4 and IPv6 addresses, port numbers are not allowed.
679 */
680 if (!is_ip_address(servername)) {
681 argv[5] = (char*)"-servername";
682 argv[6] = (char*)servername;
683 }
684
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100685 BB_EXECVP(argv[0], argv);
686 xmove_fd(3, 2);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100687# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200688 child_failed = 1;
689 xfunc_die();
690# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100691 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200692# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100693 /* notreached */
694 }
695
Denys Vlasenko53315572014-02-23 23:39:47 +0100696 /* Parent */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200697 free(servername);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100698 free(allocated);
699 close(sp[1]);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100700# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200701 if (child_failed) {
702 close(sp[0]);
703 return -1;
704 }
705# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100706 return sp[0];
707}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200708#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100709
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100710#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko403f2992018-02-06 15:15:08 +0100711static void spawn_ssl_client(const char *host, int network_fd, int flags)
Denys Vlasenko53315572014-02-23 23:39:47 +0100712{
713 int sp[2];
714 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100715 char *servername, *p;
716
717 servername = xstrdup(host);
718 p = strrchr(servername, ':');
719 if (p) *p = '\0';
Denys Vlasenko53315572014-02-23 23:39:47 +0100720
721 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
722 /* Kernel can have AF_UNIX support disabled */
723 bb_perror_msg_and_die("socketpair");
724
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100725 fflush_all();
Denys Vlasenko53315572014-02-23 23:39:47 +0100726 pid = BB_MMU ? xfork() : xvfork();
727 if (pid == 0) {
728 /* Child */
Denys Vlasenko53315572014-02-23 23:39:47 +0100729 close(sp[0]);
730 xmove_fd(sp[1], 0);
731 xdup2(0, 1);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100732 if (BB_MMU) {
733 tls_state_t *tls = new_tls_state();
734 tls->ifd = tls->ofd = network_fd;
735 tls_handshake(tls, servername);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100736 tls_run_copy_loop(tls, flags);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100737 exit(0);
738 } else {
Denys Vlasenko403f2992018-02-06 15:15:08 +0100739 char *argv[6];
740
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100741 xmove_fd(network_fd, 3);
742 argv[0] = (char*)"ssl_client";
743 argv[1] = (char*)"-s3";
744 //TODO: if (!is_ip_address(servername))...
745 argv[2] = (char*)"-n";
746 argv[3] = servername;
Denys Vlasenko403f2992018-02-06 15:15:08 +0100747 argv[4] = (flags & TLSLOOP_EXIT_ON_LOCAL_EOF ? (char*)"-e" : NULL);
748 argv[5] = NULL;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100749 BB_EXECVP(argv[0], argv);
750 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
751 }
Denys Vlasenko53315572014-02-23 23:39:47 +0100752 /* notreached */
753 }
754
755 /* Parent */
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100756 free(servername);
Denys Vlasenko53315572014-02-23 23:39:47 +0100757 close(sp[1]);
758 xmove_fd(sp[0], network_fd);
759}
760#endif
761
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100762static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
763{
764 FILE *sfp;
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200765 char *pass;
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100766 int port;
767
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100768 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +0100769#if ENABLE_FEATURE_WGET_HTTPS
770 if (target->protocol == P_FTPS)
771 spawn_ssl_client(target->host, fileno(sfp), TLSLOOP_EXIT_ON_LOCAL_EOF);
772#endif
773
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100774 if (ftpcmd(NULL, NULL, sfp) != 220)
Denys Vlasenko34590242018-02-12 16:46:13 +0100775 bb_error_msg_and_die("%s", G.wget_buf);
776 /* note: ftpcmd() sanitizes G.wget_buf, ok to print */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100777
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200778 /* Split username:password pair */
779 pass = (char*)"busybox"; /* password for "anonymous" */
780 if (target->user) {
781 pass = strchr(target->user, ':');
782 if (pass)
783 *pass++ = '\0';
784 }
785
786 /* Log in */
787 switch (ftpcmd("USER ", target->user ?: "anonymous", sfp)) {
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100788 case 230:
789 break;
790 case 331:
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200791 if (ftpcmd("PASS ", pass, sfp) == 230)
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100792 break;
793 /* fall through (failed login) */
794 default:
Denys Vlasenko34590242018-02-12 16:46:13 +0100795 bb_error_msg_and_die("ftp login: %s", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100796 }
797
798 ftpcmd("TYPE I", NULL, sfp);
799
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200800 /* Query file size */
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100801 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
802 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
803 if (G.content_len < 0 || errno) {
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200804 bb_error_msg_and_die("bad SIZE value '%s'", G.wget_buf + 4);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100805 }
806 G.got_clen = 1;
807 }
808
Denys Vlasenko32c3e3a2018-04-07 13:22:52 +0200809 /* Enter passive mode */
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100810 if (ENABLE_FEATURE_IPV6 && ftpcmd("EPSV", NULL, sfp) == 229) {
811 /* good */
812 } else
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100813 if (ftpcmd("PASV", NULL, sfp) != 227) {
814 pasv_error:
Denys Vlasenko34590242018-02-12 16:46:13 +0100815 bb_error_msg_and_die("bad response to %s: %s", "PASV", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100816 }
Denys Vlasenko1783ffa2018-02-06 15:48:12 +0100817 port = parse_pasv_epsv(G.wget_buf);
818 if (port < 0)
819 goto pasv_error;
820
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100821 set_nport(&lsa->u.sa, htons(port));
822
823 *dfpp = open_socket(lsa);
824
Denys Vlasenko2b751572018-02-06 20:49:27 +0100825#if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko237a9002018-02-08 00:28:30 +0100826 if (target->protocol == P_FTPS) {
827 /* "PROT P" enables encryption of data stream.
828 * Without it (or with "PROT C"), data is sent unencrypted.
829 */
830 if (ftpcmd("PROT P", NULL, sfp) == 200)
831 spawn_ssl_client(target->host, fileno(*dfpp), /*flags*/ 0);
832 }
Denys Vlasenko2b751572018-02-06 20:49:27 +0100833#endif
Denys Vlasenko403f2992018-02-06 15:15:08 +0100834
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100835 if (G.beg_range != 0) {
836 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
837 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
838 G.content_len -= G.beg_range;
839 else
840 reset_beg_range_to_zero();
841 }
842
Denys Vlasenko34590242018-02-12 16:46:13 +0100843//TODO: needs ftp-escaping 0xff and '\n' bytes here.
844//Or disallow '\n' altogether via sanitize_string() in parse_url().
845//But 0xff's are possible in valid utf8 filenames.
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100846 if (ftpcmd("RETR ", target->path, sfp) > 150)
Denys Vlasenko34590242018-02-12 16:46:13 +0100847 bb_error_msg_and_die("bad response to %s: %s", "RETR", G.wget_buf);
Denys Vlasenkoe9996572018-02-06 15:02:16 +0100848
849 return sfp;
850}
851
Denys Vlasenko2384a352011-02-15 00:58:36 +0100852static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200853{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200854#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
855# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200856 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200857# endif
858 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200859
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200860 polldata.fd = fileno(dfp);
861 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200862#endif
863 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200864
865 if (G.chunked)
866 goto get_clen;
867
868 /* Loops only if chunked */
869 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100870
871#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
872 /* Must use nonblocking I/O, otherwise fread will loop
873 * and *block* until it reads full buffer,
874 * which messes up progress bar and/or timeout logic.
875 * Because of nonblocking I/O, we need to dance
876 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200877 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100878 */
879 ndelay_on(polldata.fd);
880#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100881 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200882 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100883 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200884
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200885#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100886 /* fread internally uses read loop, which in our case
887 * is usually exited when we get EAGAIN.
888 * In this case, libc sets error marker on the stream.
889 * Need to clear it before next fread to avoid possible
890 * rare false positive ferror below. Rare because usually
891 * fread gets more than zero bytes, and we don't fall
892 * into if (n <= 0) ...
893 */
894 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100895#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200896 errno = 0;
897 rdsz = sizeof(G.wget_buf);
898 if (G.got_clen) {
899 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
900 if ((int)G.content_len <= 0)
901 break;
902 rdsz = (unsigned)G.content_len;
903 }
904 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100905 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200906
907 if (n > 0) {
908 xwrite(G.output_fd, G.wget_buf, n);
909#if ENABLE_FEATURE_WGET_STATUSBAR
910 G.transferred += n;
911#endif
912 if (G.got_clen) {
913 G.content_len -= n;
914 if (G.content_len == 0)
915 break;
916 }
917#if ENABLE_FEATURE_WGET_TIMEOUT
918 second_cnt = G.timeout_seconds;
919#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100920 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200921 }
922
923 /* n <= 0.
924 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100925 * If error occurs, or EOF is reached, the return value
926 * is a short item count (or zero).
927 * fread does not distinguish between EOF and error.
928 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200929 if (errno != EAGAIN) {
930 if (ferror(dfp)) {
931 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100932 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200933 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100934 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200935 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100936
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200937#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
938 /* It was EAGAIN. There is no data. Wait up to one second
939 * then abort if timed out, or update the bar and try reading again.
940 */
941 if (safe_poll(&polldata, 1, 1000) == 0) {
942# if ENABLE_FEATURE_WGET_TIMEOUT
943 if (second_cnt != 0 && --second_cnt == 0) {
944 progress_meter(PROGRESS_END);
945 bb_error_msg_and_die("download timed out");
946 }
947# endif
948 /* We used to loop back to poll here,
949 * but there is no great harm in letting fread
950 * to try reading anyway.
951 */
952 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100953#endif
954 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200955 /* Need to do it _every_ second for "stalled" indicator
956 * to be shown properly.
957 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200958 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200959 } /* while (reading data) */
960
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100961#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
962 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100963 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100964#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200965 if (!G.chunked)
966 break;
967
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200968 /* Each chunk ends with "\r\n" - eat it */
Denys Vlasenko34590242018-02-12 16:46:13 +0100969 fgets_trim_sanitize(dfp, NULL);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200970 get_clen:
971 /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
972 fgets_trim_sanitize(dfp, NULL);
973 errno = 0;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100974 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko8e2174e2018-04-08 18:06:24 +0200975 /*
976 * Had a bug with inputs like "ffffffff0001f400"
977 * smashing the heap later. Ensure >= 0.
978 */
979 if (G.content_len < 0 || errno)
980 bb_error_msg_and_die("bad chunk length '%s'", G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100981 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200982 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100983 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200984 /*
985 * Note that fgets may result in some data being buffered in dfp.
986 * We loop back to fread, which will retrieve this data.
987 * Also note that code has to be arranged so that fread
988 * is done _before_ one-second poll wait - poll doesn't know
989 * about stdio buffering and can result in spurious one second waits!
990 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200991 }
992
Denys Vlasenko61441242012-06-17 19:52:25 +0200993 /* If -c failed, we restart from the beginning,
994 * but we do not truncate file then, we do it only now, at the end.
995 * This lets user to ^C if his 99% complete 10 GB file download
996 * failed to restart *without* losing the almost complete file.
997 */
998 {
999 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
1000 if (pos != (off_t)-1)
1001 ftruncate(G.output_fd, pos);
1002 }
1003
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001004 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001005 G.chunked = 0; /* makes it show 100% even for chunked download */
1006 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02001007 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +02001008}
1009
Pere Orga53695632011-02-16 20:09:36 +01001010static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +00001011{
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001012 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001013 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001014 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001015 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +00001016 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001017 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001018 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001019 struct host_info server;
1020 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +00001021
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001022 server.allocated = NULL;
1023 target.allocated = NULL;
1024 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +02001025 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001026
1027 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +00001028
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001029 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001030 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +00001031 if (use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001032 char *proxy = getenv(target.protocol[0] == 'f' ? "ftp_proxy" : "http_proxy");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001033//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +01001034 use_proxy = (proxy && proxy[0]);
1035 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001036 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +00001037 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001038 if (!use_proxy) {
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001039 server.protocol = target.protocol;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001040 server.port = target.port;
1041 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001042 //free(server.allocated); - can't be non-NULL
1043 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001044 } else {
1045 server.host = target.host;
1046 }
1047 }
1048
1049 if (ENABLE_FEATURE_IPV6)
1050 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001051
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001052 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001053 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001054 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001055 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001056 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001057 if (G.fname_out[0] == '/' || !G.fname_out[0])
1058 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +00001059 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001060 if (G.dir_prefix)
1061 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001062 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +02001063 /* redirects may free target.path later, need to make a copy */
1064 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +01001065 }
Eric Andersen29edd002000-12-09 16:55:35 +00001066 }
Denis Vlasenko818322b2007-09-24 18:27:04 +00001067#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001068 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +00001069#endif
1070
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +00001071 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +01001072 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001073 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001074 G.output_fd = open(G.fname_out, O_WRONLY);
1075 if (G.output_fd >= 0) {
1076 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +00001077 }
1078 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +01001079 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +00001080 }
1081
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001082 redir_limit = 5;
1083 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +00001084 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001085 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001086 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
1087 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
1088 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +00001089 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001090 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +01001091 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1092 G.got_clen = 0;
1093 G.chunked = 0;
Denys Vlasenko403f2992018-02-06 15:15:08 +01001094 if (use_proxy || target.protocol[0] != 'f' /*not ftp[s]*/) {
Eric Andersen79757c92001-04-05 21:45:54 +00001095 /*
1096 * HTTP session
1097 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001098 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001099 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001100
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001101 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001102#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001103 /* openssl (and maybe internal TLS) support is configured */
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001104 if (server.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001105 /* openssl-based helper
1106 * Inconvenient API since we can't give it an open fd
1107 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001108 int fd = spawn_https_helper_openssl(server.host, server.port);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001109# if ENABLE_FEATURE_WGET_HTTPS
1110 if (fd < 0) { /* no openssl? try internal */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001111 sfp = open_socket(lsa);
Denys Vlasenko403f2992018-02-06 15:15:08 +01001112 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001113 goto socket_opened;
1114 }
1115# else
1116 /* We don't check for exec("openssl") failure in this case */
1117# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001118 sfp = fdopen(fd, "r+");
1119 if (!sfp)
Denys Vlasenko899ae532018-04-01 19:59:37 +02001120 bb_die_memory_exhausted();
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001121 goto socket_opened;
1122 }
1123 sfp = open_socket(lsa);
1124 socket_opened:
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001125#elif ENABLE_FEATURE_WGET_HTTPS
1126 /* Only internal TLS support is configured */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001127 sfp = open_socket(lsa);
Peter Lloyd804ce5a2018-03-05 00:17:02 +01001128 if (server.protocol == P_HTTPS)
Denys Vlasenko403f2992018-02-06 15:15:08 +01001129 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001130#else
1131 /* ssl (https) support is not configured */
1132 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001133#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001134 /* Send HTTP request */
1135 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001136 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001137 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001138 target.path);
1139 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001140 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001141 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1142 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001143 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001144 if (!USR_HEADER_HOST)
1145 SENDFMT(sfp, "Host: %s\r\n", target.host);
1146 if (!USR_HEADER_USER_AGENT)
1147 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001148
Denys Vlasenko9213a552011-02-10 13:23:45 +01001149 /* Ask server to close the connection as soon as we are done
1150 * (IOW: we do not intend to send more requests)
1151 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001152 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001153
Denis Vlasenko9cade082006-11-21 10:43:02 +00001154#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001155 if (target.user && !USR_HEADER_AUTH) {
1156 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001157 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001158 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001159 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1160 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001161 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001162 }
Eric Andersen79757c92001-04-05 21:45:54 +00001163#endif
1164
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001165 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1166 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001167
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001168#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001169 if (G.extra_headers) {
1170 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001171 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001172 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001173
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001174 if (option_mask32 & WGET_OPT_POST_DATA) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001175 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001176 "Content-Type: application/x-www-form-urlencoded\r\n"
1177 "Content-Length: %u\r\n"
1178 "\r\n"
1179 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001180 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001181 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001182 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001183#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001184 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001185 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001186 }
Eric Andersen79757c92001-04-05 21:45:54 +00001187
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001188 fflush(sfp);
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001189
Denys Vlasenko4e08a122017-01-16 17:31:05 +01001190/* Tried doing this unconditionally.
1191 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1192 */
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001193#if SSL_SUPPORTED
1194 if (target.protocol == P_HTTPS) {
1195 /* If we use SSL helper, keeping our end of the socket open for writing
1196 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1197 * even after child closes its copy of the fd.
1198 * This helps:
1199 */
1200 shutdown(fileno(sfp), SHUT_WR);
1201 }
1202#endif
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001203
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001204 /*
1205 * Retrieve HTTP response line and check for "200" status code.
1206 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001207 read_response:
Denys Vlasenko34590242018-02-12 16:46:13 +01001208 fgets_trim_sanitize(sfp, " %s\n");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001209
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001210 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001211 str = skip_non_whitespace(str);
1212 str = skip_whitespace(str);
1213 // FIXME: no error check
1214 // xatou wouldn't work: "200 OK"
1215 status = atoi(str);
1216 switch (status) {
1217 case 0:
1218 case 100:
Denys Vlasenko34590242018-02-12 16:46:13 +01001219 while (get_sanitized_hdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001220 /* eat all remaining headers */;
1221 goto read_response;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001222
1223 /* Success responses */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001224 case 200:
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001225 /* fall through */
1226 case 201: /* 201 Created */
1227/* "The request has been fulfilled and resulted in a new resource being created" */
Denys Vlasenkoef159702016-09-01 11:16:22 +02001228 /* Standard wget is reported to treat this as success */
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001229 /* fall through */
1230 case 202: /* 202 Accepted */
1231/* "The request has been accepted for processing, but the processing has not been completed" */
1232 /* Treat as success: fall through */
1233 case 203: /* 203 Non-Authoritative Information */
1234/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1235 /* fall through */
1236 case 204: /* 204 No Content */
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001237/*
1238Response 204 doesn't say "null file", it says "metadata
1239has changed but data didn't":
1240
1241"10.2.5 204 No Content
1242The server has fulfilled the request but does not need to return
1243an entity-body, and might want to return updated metainformation.
1244The response MAY include new or updated metainformation in the form
1245of entity-headers, which if present SHOULD be associated with
1246the requested variant.
1247
1248If the client is a user agent, it SHOULD NOT change its document
1249view from that which caused the request to be sent. This response
1250is primarily intended to allow input for actions to take place
1251without causing a change to the user agent's active document view,
1252although any new or updated metainformation SHOULD be applied
1253to the document currently in the user agent's active view.
1254
1255The 204 response MUST NOT include a message-body, and thus
1256is always terminated by the first empty line after the header fields."
1257
1258However, in real world it was observed that some web servers
1259(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1260*/
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001261 if (G.beg_range != 0) {
1262 /* "Range:..." was not honored by the server.
1263 * Restart download from the beginning.
1264 */
1265 reset_beg_range_to_zero();
1266 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001267 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001268 /* 205 Reset Content ?? what to do on this ?? */
1269
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001270 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001271 case 301:
1272 case 302:
1273 case 303:
1274 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001275
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001276 case 206: /* Partial Content */
1277 if (G.beg_range != 0)
1278 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001279 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001280 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001281 /* fall through */
1282 default:
Denys Vlasenko34590242018-02-12 16:46:13 +01001283 bb_error_msg_and_die("server returned error: %s", G.wget_buf);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001284 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001285
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001286 /*
1287 * Retrieve HTTP headers.
1288 */
Denys Vlasenko34590242018-02-12 16:46:13 +01001289 while ((str = get_sanitized_hdr(sfp)) != NULL) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001290 static const char keywords[] ALIGN1 =
1291 "content-length\0""transfer-encoding\0""location\0";
1292 enum {
1293 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1294 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001295 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001296
Denys Vlasenko34590242018-02-12 16:46:13 +01001297 /* get_sanitized_hdr converted "FOO:" string to lowercase */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001298
Matthijs van de Water0d586662009-08-22 20:19:48 +02001299 /* strip trailing whitespace */
1300 char *s = strchrnul(str, '\0') - 1;
1301 while (s >= str && (*s == ' ' || *s == '\t')) {
1302 *s = '\0';
1303 s--;
1304 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001305 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001306 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001307 G.content_len = BB_STRTOOFF(str, NULL, 10);
1308 if (G.content_len < 0 || errno) {
Denys Vlasenko34590242018-02-12 16:46:13 +01001309 bb_error_msg_and_die("content-length %s is garbage", str);
Eric Andersen79757c92001-04-05 21:45:54 +00001310 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001311 G.got_clen = 1;
1312 continue;
1313 }
1314 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001315 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenko34590242018-02-12 16:46:13 +01001316 bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001317 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001318 }
1319 if (key == KEY_location && status >= 300) {
1320 if (--redir_limit == 0)
1321 bb_error_msg_and_die("too many redirections");
1322 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001323 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001324 free(redirected_path);
Denys Vlasenko34590242018-02-12 16:46:13 +01001325 target.path = redirected_path = xstrdup(str + 1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001326 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001327 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001328 parse_url(str, &target);
1329 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001330 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001331 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001332 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001333 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001334 /* strip_ipv6_scope_id(target.host); - no! */
1335 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001336 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001337 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001338 goto resolve_lsa;
1339 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001340 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001341 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001342 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001343 }
1344// if (status >= 300)
1345// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001346
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001347 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001348 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001349 } else {
Eric Andersen79757c92001-04-05 21:45:54 +00001350 /*
1351 * FTP session
1352 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001353 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +00001354 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001355
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001356 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001357
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001358 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001359 if (G.output_fd < 0)
1360 G.output_fd = xopen(G.fname_out, G.o_flags);
1361 retrieve_file_data(dfp);
1362 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1363 xclose(G.output_fd);
1364 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001365 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001366 }
Eric Andersen79757c92001-04-05 21:45:54 +00001367
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001368 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001369 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001370 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001371 if (ftpcmd(NULL, NULL, sfp) != 226)
Denys Vlasenko34590242018-02-12 16:46:13 +01001372 bb_error_msg_and_die("ftp error: %s", G.wget_buf);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001373 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001374 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001375 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001376
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001377 free(server.allocated);
1378 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001379 free(server.user);
1380 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001381 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001382 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001383}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001384
1385int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1386int wget_main(int argc UNUSED_PARAM, char **argv)
1387{
1388#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1389 static const char wget_longopts[] ALIGN1 =
1390 /* name, has_arg, val */
1391 "continue\0" No_argument "c"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001392 "quiet\0" No_argument "q"
Denys Vlasenkodff9fef2017-01-24 21:41:43 +01001393 "server-response\0" No_argument "S"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001394 "output-document\0" Required_argument "O"
1395 "directory-prefix\0" Required_argument "P"
1396 "proxy\0" Required_argument "Y"
1397 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001398IF_FEATURE_WGET_TIMEOUT(
1399 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001400 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001401IF_DESKTOP( "tries\0" Required_argument "t")
1402 "header\0" Required_argument "\xff"
1403 "post-data\0" Required_argument "\xfe"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001404 "spider\0" No_argument "\xfd"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001405 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001406IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001407 /* Ignored (we don't do ssl) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001408IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001409 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001410IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1411IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1412IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1413IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1414IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001415 ;
Denys Vlasenko036585a2017-08-08 16:38:18 +02001416# define GETOPT32 getopt32long
1417# define LONGOPTS ,wget_longopts
1418#else
1419# define GETOPT32 getopt32
1420# define LONGOPTS
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001421#endif
1422
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001423#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1424 llist_t *headers_llist = NULL;
1425#endif
1426
1427 INIT_G();
1428
Lauri Kasanend074b412013-10-12 21:47:07 +02001429#if ENABLE_FEATURE_WGET_TIMEOUT
1430 G.timeout_seconds = 900;
1431 signal(SIGALRM, alarm_handler);
1432#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001433 G.proxy_flag = "on"; /* use proxies if env vars are set */
1434 G.user_agent = "Wget"; /* "User-Agent" header field */
1435
1436#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001437#endif
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001438 GETOPT32(argv, "^"
1439 "cqSO:P:Y:U:T:+"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001440 /*ignored:*/ "t:"
1441 /*ignored:*/ "n::"
1442 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1443 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1444 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1445 * -nH --no-host-directories: wget -r http://host/ won't create host/
1446 * -np --no-parent
1447 * "n::" above says that we accept -n[ARG].
1448 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1449 */
Denys Vlasenko22542ec2017-08-08 21:55:02 +02001450 "\0"
1451 "-1" /* at least one URL */
1452 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::") /* --header is a list */
Denys Vlasenko036585a2017-08-08 16:38:18 +02001453 LONGOPTS
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001454 , &G.fname_out, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001455 &G.proxy_flag, &G.user_agent,
1456 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001457 NULL, /* -t RETRIES */
1458 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001459 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1460 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1461 );
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001462#if 0 /* option bits debug */
1463 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1464 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1465 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1466 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1467 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1468 exit(0);
1469#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001470 argv += optind;
1471
1472#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1473 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001474 int size = 0;
1475 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001476 llist_t *ll = headers_llist;
1477 while (ll) {
1478 size += strlen(ll->data) + 2;
1479 ll = ll->link;
1480 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001481 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001482 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001483 int bit;
1484 const char *words;
1485
1486 size = sprintf(hdr, "%s\r\n",
1487 (char*)llist_pop(&headers_llist));
1488 /* a bit like index_in_substrings but don't match full key */
1489 bit = 1;
1490 words = wget_user_headers;
1491 while (*words) {
1492 if (strstr(hdr, words) == hdr) {
1493 G.user_headers |= bit;
1494 break;
1495 }
1496 bit <<= 1;
1497 words += strlen(words) + 1;
1498 }
1499 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001500 }
1501 }
1502#endif
1503
Denys Vlasenko2384a352011-02-15 00:58:36 +01001504 G.output_fd = -1;
1505 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1506 if (G.fname_out) { /* -O FILE ? */
1507 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1508 G.output_fd = 1;
1509 option_mask32 &= ~WGET_OPT_CONTINUE;
1510 }
1511 /* compat with wget: -O FILE can overwrite */
1512 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1513 }
1514
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001515 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001516 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001517
Denys Vlasenko28556b92011-02-15 11:03:53 +01001518 if (G.output_fd >= 0)
1519 xclose(G.output_fd);
1520
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001521#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1522 free(G.extra_headers);
1523#endif
1524 FINI_G();
1525
Pere Orga53695632011-02-16 20:09:36 +01001526 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001527}