blob: a448acdaeec98001c4c5dab2f96cdcfe9f08cc3f [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020012//config:config WGET
13//config: bool "wget"
14//config: default y
15//config: help
16//config: wget is a utility for non-interactive download of files from HTTP
17//config: and FTP servers.
18//config:
Denys Vlasenkof5604222017-01-10 14:58:54 +010019//config:config FEATURE_WGET_LONG_OPTIONS
20//config: bool "Enable long options"
21//config: default y
22//config: depends on WGET && LONG_OPTS
23//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020024//config:config FEATURE_WGET_STATUSBAR
Denys Vlasenkof5604222017-01-10 14:58:54 +010025//config: bool "Enable progress bar (+2k)"
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020026//config: default y
27//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020028//config:
29//config:config FEATURE_WGET_AUTHENTICATION
30//config: bool "Enable HTTP authentication"
31//config: default y
32//config: depends on WGET
33//config: help
34//config: Support authenticated HTTP transfers.
35//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020036//config:config FEATURE_WGET_TIMEOUT
37//config: bool "Enable timeout option -T SEC"
38//config: default y
39//config: depends on WGET
40//config: help
41//config: Supports network read and connect timeouts for wget,
42//config: so that wget will give up and timeout, through the -T
43//config: command line option.
44//config:
45//config: Currently only connect and network data read timeout are
46//config: supported (i.e., timeout is not applied to the DNS query). When
47//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
48//config: will work in addition to -T.
49//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +010050//config:config FEATURE_WGET_HTTPS
51//config: bool "Support HTTPS using internal TLS code"
52//config: default y
53//config: depends on WGET
54//config: select TLS
55//config: help
56//config: wget will use internal TLS code to connect to https:// URLs.
57//config: Note:
58//config: On NOMMU machines, ssl_helper applet should be available
59//config: in the $PATH for this to work. Make sure to select that applet.
60//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020061//config:config FEATURE_WGET_OPENSSL
62//config: bool "Try to connect to HTTPS using openssl"
63//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020064//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020065//config: help
Denys Vlasenko9a647c32017-01-23 01:08:16 +010066//config: Try to use openssl to handle HTTPS.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020067//config:
68//config: OpenSSL has a simple SSL client for debug purposes.
Denys Vlasenko9a647c32017-01-23 01:08:16 +010069//config: If you select this option, wget will effectively run:
Denys Vlasenkoed727612016-07-25 21:34:57 +020070//config: "openssl s_client -quiet -connect hostname:443
71//config: -servername hostname 2>/dev/null" and pipe its data
72//config: through it. -servername is not used if hostname is numeric.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020073//config: Note inconvenient API: host resolution is done twice,
74//config: and there is no guarantee openssl's idea of IPv6 address
75//config: format is the same as ours.
76//config: Another problem is that s_client prints debug information
77//config: to stderr, and it needs to be suppressed. This means
78//config: all error messages get suppressed too.
79//config: openssl is also a big binary, often dynamically linked
80//config: against ~15 libraries.
81//config:
Denys Vlasenko9a647c32017-01-23 01:08:16 +010082//config: If openssl can't be executed, internal TLS code will be used
83//config: (if you enabled it); if openssl can be executed but fails later,
84//config: wget can't detect this, and download will fail.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020085
86//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
87
88//kbuild:lib-$(CONFIG_WGET) += wget.o
89
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010090//usage:#define wget_trivial_usage
91//usage: IF_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenko2972e2c2016-10-04 04:23:09 +020092//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010093//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +020094/* Since we ignore these opts, we don't show them in --help */
Denys Vlasenko92e1b082015-10-20 21:51:52 +020095/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
96/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +020097//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010098//usage: )
99//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200100//usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100101//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
102//usage: )
103//usage:#define wget_full_usage "\n\n"
104//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200105//usage: IF_FEATURE_WGET_LONG_OPTIONS(
106//usage: "\n --spider Spider mode - only check file existence"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100107//usage: )
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200108//usage: "\n -c Continue retrieval of aborted transfer"
109//usage: "\n -q Quiet"
110//usage: "\n -P DIR Save to DIR (default .)"
111//usage: IF_FEATURE_WGET_TIMEOUT(
112//usage: "\n -T SEC Network read timeout is SEC seconds"
113//usage: )
114//usage: "\n -O FILE Save to FILE ('-' for stdout)"
115//usage: "\n -U STR Use STR for User-Agent header"
116//usage: "\n -Y on/off Use proxy"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100117
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000118#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000119
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200120#if 0
121# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100122# define SENDFMT(fp, fmt, ...) \
123 do { \
124 log_io("> " fmt, ##__VA_ARGS__); \
125 fprintf(fp, fmt, ##__VA_ARGS__); \
126 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200127#else
128# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100129# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200130#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100131
132
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100133#define SSL_SUPPORTED (ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_HTTPS)
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100134
Eric Andersen79757c92001-04-05 21:45:54 +0000135struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100136 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000137 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100138 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100139 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000140 char *host;
141 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000142};
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200143static const char P_FTP[] ALIGN1 = "ftp";
144static const char P_HTTP[] ALIGN1 = "http";
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100145#if SSL_SUPPORTED
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200146static const char P_HTTPS[] ALIGN1 = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100147#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000148
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100149#if ENABLE_FEATURE_WGET_LONG_OPTIONS
150/* User-specified headers prevent using our corresponding built-in headers. */
151enum {
152 HDR_HOST = (1<<0),
153 HDR_USER_AGENT = (1<<1),
154 HDR_RANGE = (1<<2),
155 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
156 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
157};
158static const char wget_user_headers[] ALIGN1 =
159 "Host:\0"
160 "User-Agent:\0"
161 "Range:\0"
162# if ENABLE_FEATURE_WGET_AUTHENTICATION
163 "Authorization:\0"
164 "Proxy-Authorization:\0"
165# endif
166 ;
167# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
168# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
169# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
170# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
171# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
172#else /* No long options, no user-headers :( */
173# define USR_HEADER_HOST 0
174# define USR_HEADER_USER_AGENT 0
175# define USR_HEADER_RANGE 0
176# define USR_HEADER_AUTH 0
177# define USR_HEADER_PROXY_AUTH 0
178#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000179
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200180/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000181struct globals {
182 off_t content_len; /* Content-length of the file */
183 off_t beg_range; /* Range at which continue begins */
184#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000185 off_t transferred; /* Number of bytes transferred so far */
186 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100187 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000188#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200189 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100190#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200191 char *post_data;
192 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100193 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100194#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200195 char *fname_out; /* where to direct output (-O) */
196 const char *proxy_flag; /* Use proxies if env vars are set */
197 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200198#if ENABLE_FEATURE_WGET_TIMEOUT
199 unsigned timeout_seconds;
Denys Vlasenko6701e912016-03-17 15:58:16 +0100200 bool die_if_timed_out;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200201#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +0100202 int output_fd;
203 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200204 smallint chunked; /* chunked transfer encoding */
205 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100206 /* Local downloads do benefit from big buffer.
207 * With 512 byte buffer, it was measured to be
208 * an order of magnitude slower than with big one.
209 */
210 uint64_t just_to_align_next_member;
211 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100212} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100213#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200214#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200215 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200216} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200217#define FINI_G() do { \
218 FREE_PTR_TO_GLOBALS(); \
219} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000220
221
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200222/* Must match option string! */
223enum {
224 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenko2972e2c2016-10-04 04:23:09 +0200225 WGET_OPT_QUIET = (1 << 1),
226 WGET_OPT_OUTNAME = (1 << 2),
227 WGET_OPT_PREFIX = (1 << 3),
228 WGET_OPT_PROXY = (1 << 4),
229 WGET_OPT_USER_AGENT = (1 << 5),
230 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 6),
231 WGET_OPT_RETRIES = (1 << 7),
232 WGET_OPT_nsomething = (1 << 8),
233 WGET_OPT_HEADER = (1 << 9) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
234 WGET_OPT_POST_DATA = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
235 WGET_OPT_SPIDER = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200236};
237
238enum {
239 PROGRESS_START = -1,
240 PROGRESS_END = 0,
241 PROGRESS_BUMP = 1,
242};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000243#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000244static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000245{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200246 if (option_mask32 & WGET_OPT_QUIET)
247 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000248
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200249 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100250 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000251
Denys Vlasenko2384a352011-02-15 00:58:36 +0100252 bb_progress_update(&G.pmt,
253 G.beg_range,
254 G.transferred,
255 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
256 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000257
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200258 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100259 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200260 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100261 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000262 }
263}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200264#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000265static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000266#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000267
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000268
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200269/* IPv6 knows scoped address types i.e. link and site local addresses. Link
270 * local addresses can have a scope identifier to specify the
271 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
272 * identifier is only valid on a single node.
273 *
274 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
275 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
276 * in the Host header as invalid requests, see
277 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
278 */
279static void strip_ipv6_scope_id(char *host)
280{
281 char *scope, *cp;
282
283 /* bbox wget actually handles IPv6 addresses without [], like
284 * wget "http://::1/xxx", but this is not standard.
285 * To save code, _here_ we do not support it. */
286
287 if (host[0] != '[')
288 return; /* not IPv6 */
289
290 scope = strchr(host, '%');
291 if (!scope)
292 return;
293
294 /* Remove the IPv6 zone identifier from the host address */
295 cp = strchr(host, ']');
296 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
297 /* malformed address (not "[xx]:nn" or "[xx]") */
298 return;
299 }
300
301 /* cp points to "]...", scope points to "%eth0]..." */
302 overlapping_strcpy(scope, cp);
303}
304
Denis Vlasenko9cade082006-11-21 10:43:02 +0000305#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100306/* Base64-encode character string. */
307static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000308{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000309 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100310 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
311 len = sizeof(G.wget_buf)/4*3 - 10;
312 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
313 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000314}
315#endif
316
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200317static char* sanitize_string(char *s)
318{
319 unsigned char *p = (void *) s;
320 while (*p >= ' ')
321 p++;
322 *p = '\0';
323 return s;
324}
325
Lauri Kasanend074b412013-10-12 21:47:07 +0200326#if ENABLE_FEATURE_WGET_TIMEOUT
327static void alarm_handler(int sig UNUSED_PARAM)
328{
329 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
Denys Vlasenko6701e912016-03-17 15:58:16 +0100330 if (G.die_if_timed_out)
Lauri Kasanend074b412013-10-12 21:47:07 +0200331 bb_error_msg_and_die("download timed out");
332}
Denys Vlasenko6701e912016-03-17 15:58:16 +0100333static void set_alarm(void)
334{
335 if (G.timeout_seconds) {
336 alarm(G.timeout_seconds);
337 G.die_if_timed_out = 1;
338 }
339}
340# define clear_alarm() ((void)(G.die_if_timed_out = 0))
341#else
342# define set_alarm() ((void)0)
343# define clear_alarm() ((void)0)
Lauri Kasanend074b412013-10-12 21:47:07 +0200344#endif
345
Denys Vlasenkoed727612016-07-25 21:34:57 +0200346#if ENABLE_FEATURE_WGET_OPENSSL
347/*
348 * is_ip_address() attempts to verify whether or not a string
349 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
350 * of inet_pton() can be used to determine this.
351 *
352 * TODO add proper error checking when inet_pton() returns -1
353 * (some form of system error has occurred, and errno is set)
354 */
355static int is_ip_address(const char *string)
356{
357 struct sockaddr_in sa;
358
359 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
360# if ENABLE_FEATURE_IPV6
361 if (result == 0) {
362 struct sockaddr_in6 sa6;
363 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
364 }
365# endif
366 return (result == 1);
367}
368#endif
369
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000370static FILE *open_socket(len_and_sockaddr *lsa)
371{
Lauri Kasanend074b412013-10-12 21:47:07 +0200372 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000373 FILE *fp;
374
Denys Vlasenko6701e912016-03-17 15:58:16 +0100375 set_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200376 fd = xconnect_stream(lsa);
Denys Vlasenko6701e912016-03-17 15:58:16 +0100377 clear_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200378
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000379 /* glibc 2.4 seems to try seeking on it - ??! */
380 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200381 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100382 if (!fp)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100383 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000384
385 return fp;
386}
387
Denys Vlasenkof836f012011-02-10 23:02:28 +0100388/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
389static char fgets_and_trim(FILE *fp)
390{
391 char c;
392 char *buf_ptr;
393
Denys Vlasenko6701e912016-03-17 15:58:16 +0100394 set_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100395 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
396 bb_perror_msg_and_die("error getting response");
Denys Vlasenko6701e912016-03-17 15:58:16 +0100397 clear_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100398
399 buf_ptr = strchrnul(G.wget_buf, '\n');
400 c = *buf_ptr;
401 *buf_ptr = '\0';
402 buf_ptr = strchrnul(G.wget_buf, '\r');
403 *buf_ptr = '\0';
404
405 log_io("< %s", G.wget_buf);
406
407 return c;
408}
409
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100410static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000411{
412 int result;
413 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100414 if (!s2)
415 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000416 fprintf(fp, "%s%s\r\n", s1, s2);
417 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100418 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000419 }
420
421 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100422 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100423 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000424
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100425 G.wget_buf[3] = '\0';
426 result = xatoi_positive(G.wget_buf);
427 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000428 return result;
429}
430
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100431static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000432{
433 char *url, *p, *sp;
434
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100435 free(h->allocated);
436 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000437
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100438 h->protocol = P_FTP;
439 p = strstr(url, "://");
440 if (p) {
441 *p = '\0';
442 h->host = p + 3;
443 if (strcmp(url, P_FTP) == 0) {
444 h->port = bb_lookup_port(P_FTP, "tcp", 21);
445 } else
Denys Vlasenkoa6f86512017-01-11 20:16:45 +0100446#if SSL_SUPPORTED
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100447 if (strcmp(url, P_HTTPS) == 0) {
448 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
449 h->protocol = P_HTTPS;
450 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100451#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100452 if (strcmp(url, P_HTTP) == 0) {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100453 http:
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100454 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
455 h->protocol = P_HTTP;
456 } else {
457 *p = ':';
458 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
459 }
460 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100461 // GNU wget is user-friendly and falls back to http://
462 h->host = url;
463 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100464 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000465
466 // FYI:
467 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
Denys Vlasenkoa0aae9f2017-01-20 14:12:10 +0100468 // 'GET /?var=a/b HTTP/1.0'
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000469 // and saves 'index.html?var=a%2Fb' (we save 'b')
470 // wget 'http://busybox.net?login=john@doe':
471 // request: 'GET /?login=john@doe HTTP/1.0'
472 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
473 // wget 'http://busybox.net#test/test':
474 // request: 'GET / HTTP/1.0'
475 // saves: 'index.html' (we save 'test')
476 //
477 // We also don't add unique .N suffix if file exists...
478 sp = strchr(h->host, '/');
479 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
480 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
481 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000482 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000483 } else if (*sp == '/') {
484 *sp = '\0';
485 h->path = sp + 1;
486 } else { // '#' or '?'
487 // http://busybox.net?login=john@doe is a valid URL
488 // memmove converts to:
489 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000490 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000491 h->host--;
492 sp[-1] = '\0';
493 h->path = sp;
494 }
495
496 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000497 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200498 // URL-decode "user:password" string before base64-encoding:
499 // wget http://test:my%20pass@example.com should send
500 // Authorization: Basic dGVzdDpteSBwYXNz
501 // which decodes to "test:my pass".
502 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000503 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100504 free(h->user);
505 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000506 h->host = sp + 1;
507 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100508 /* else: h->user remains NULL, or as set by original request
509 * before redirect (if we are here after a redirect).
510 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000511}
512
Denys Vlasenkof836f012011-02-10 23:02:28 +0100513static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000514{
515 char *s, *hdrval;
516 int c;
517
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000518 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100519 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000520
Denys Vlasenkof836f012011-02-10 23:02:28 +0100521 /* end of the headers? */
522 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000523 return NULL;
524
525 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200526 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
527 /*
528 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
529 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
530 * "A-Z" maps to "a-z".
531 * "@[\]" can't occur in header names.
532 * "^_" maps to "~,DEL" (which is wrong).
533 * "^" was never seen yet, "_" was seen from web.archive.org
534 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
535 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100536 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200537 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000538
539 /* verify we are at the end of the header name */
540 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100541 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000542
543 /* locate the start of the header value */
544 *s++ = '\0';
545 hdrval = skip_whitespace(s);
546
Denys Vlasenkof836f012011-02-10 23:02:28 +0100547 if (c != '\n') {
548 /* Rats! The buffer isn't big enough to hold the entire header value */
549 while (c = getc(fp), c != EOF && c != '\n')
550 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000551 }
552
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000553 return hdrval;
554}
555
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200556static void reset_beg_range_to_zero(void)
557{
Denys Vlasenko61441242012-06-17 19:52:25 +0200558 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200559 G.beg_range = 0;
560 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200561 /* Done at the end instead: */
562 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200563}
564
Denys Vlasenko7f432802009-06-28 01:02:24 +0200565static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
566{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200567 FILE *sfp;
568 char *str;
569 int port;
570
571 if (!target->user)
572 target->user = xstrdup("anonymous:busybox@");
573
574 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100575 if (ftpcmd(NULL, NULL, sfp) != 220)
576 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200577
578 /*
579 * Splitting username:password pair,
580 * trying to log in
581 */
582 str = strchr(target->user, ':');
583 if (str)
584 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100585 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200586 case 230:
587 break;
588 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100589 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200590 break;
591 /* fall through (failed login) */
592 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100593 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200594 }
595
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100596 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200597
598 /*
599 * Querying file size
600 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100601 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
602 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100603 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200604 bb_error_msg_and_die("SIZE value is garbage");
605 }
606 G.got_clen = 1;
607 }
608
609 /*
610 * Entering passive mode
611 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100612 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200613 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100614 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200615 }
616 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
617 // Server's IP is N1.N2.N3.N4 (we ignore it)
618 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100619 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200620 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100621 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200622 if (!str) goto pasv_error;
623 port = xatou_range(str+1, 0, 255);
624 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100625 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200626 if (!str) goto pasv_error;
627 port += xatou_range(str+1, 0, 255) * 256;
Denys Vlasenkoca183112011-04-07 17:52:20 +0200628 set_nport(&lsa->u.sa, htons(port));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200629
630 *dfpp = open_socket(lsa);
631
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200632 if (G.beg_range != 0) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100633 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
634 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100635 G.content_len -= G.beg_range;
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200636 else
637 reset_beg_range_to_zero();
Denys Vlasenko7f432802009-06-28 01:02:24 +0200638 }
639
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100640 if (ftpcmd("RETR ", target->path, sfp) > 150)
641 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200642
643 return sfp;
644}
645
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200646#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200647static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100648{
649 char *allocated = NULL;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200650 char *servername;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100651 int sp[2];
652 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100653 IF_FEATURE_WGET_HTTPS(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100654
655 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
656 /* Kernel can have AF_UNIX support disabled */
657 bb_perror_msg_and_die("socketpair");
658
659 if (!strchr(host, ':'))
660 host = allocated = xasprintf("%s:%u", host, port);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200661 servername = xstrdup(host);
662 strrchr(servername, ':')[0] = '\0';
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100663
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200664 fflush_all();
665 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100666 if (pid == 0) {
667 /* Child */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200668 char *argv[8];
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100669
670 close(sp[0]);
671 xmove_fd(sp[1], 0);
672 xdup2(0, 1);
673 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100674 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
675 * It prints some debug stuff on stderr, don't know how to suppress it.
676 * Work around by dev-nulling stderr. We lose all error messages :(
677 */
678 xmove_fd(2, 3);
679 xopen("/dev/null", O_RDWR);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200680 memset(&argv, 0, sizeof(argv));
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100681 argv[0] = (char*)"openssl";
682 argv[1] = (char*)"s_client";
683 argv[2] = (char*)"-quiet";
684 argv[3] = (char*)"-connect";
685 argv[4] = (char*)host;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200686 /*
687 * Per RFC 6066 Section 3, the only permitted values in the
688 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
689 * IPv4 and IPv6 addresses, port numbers are not allowed.
690 */
691 if (!is_ip_address(servername)) {
692 argv[5] = (char*)"-servername";
693 argv[6] = (char*)servername;
694 }
695
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100696 BB_EXECVP(argv[0], argv);
697 xmove_fd(3, 2);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100698# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200699 child_failed = 1;
700 xfunc_die();
701# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100702 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200703# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100704 /* notreached */
705 }
706
Denys Vlasenko53315572014-02-23 23:39:47 +0100707 /* Parent */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200708 free(servername);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100709 free(allocated);
710 close(sp[1]);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100711# if ENABLE_FEATURE_WGET_HTTPS
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200712 if (child_failed) {
713 close(sp[0]);
714 return -1;
715 }
716# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100717 return sp[0];
718}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200719#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100720
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100721#if ENABLE_FEATURE_WGET_HTTPS
722static void spawn_ssl_client(const char *host, int network_fd)
Denys Vlasenko53315572014-02-23 23:39:47 +0100723{
724 int sp[2];
725 int pid;
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100726 char *servername, *p;
727
728 servername = xstrdup(host);
729 p = strrchr(servername, ':');
730 if (p) *p = '\0';
Denys Vlasenko53315572014-02-23 23:39:47 +0100731
732 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
733 /* Kernel can have AF_UNIX support disabled */
734 bb_perror_msg_and_die("socketpair");
735
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100736 fflush_all();
Denys Vlasenko53315572014-02-23 23:39:47 +0100737 pid = BB_MMU ? xfork() : xvfork();
738 if (pid == 0) {
739 /* Child */
Denys Vlasenko53315572014-02-23 23:39:47 +0100740 close(sp[0]);
741 xmove_fd(sp[1], 0);
742 xdup2(0, 1);
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100743 if (BB_MMU) {
744 tls_state_t *tls = new_tls_state();
745 tls->ifd = tls->ofd = network_fd;
746 tls_handshake(tls, servername);
747 tls_run_copy_loop(tls);
748 exit(0);
749 } else {
750 char *argv[5];
751 xmove_fd(network_fd, 3);
752 argv[0] = (char*)"ssl_client";
753 argv[1] = (char*)"-s3";
754 //TODO: if (!is_ip_address(servername))...
755 argv[2] = (char*)"-n";
756 argv[3] = servername;
757 argv[4] = NULL;
758 BB_EXECVP(argv[0], argv);
759 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
760 }
Denys Vlasenko53315572014-02-23 23:39:47 +0100761 /* notreached */
762 }
763
764 /* Parent */
Denys Vlasenko9a647c32017-01-23 01:08:16 +0100765 free(servername);
Denys Vlasenko53315572014-02-23 23:39:47 +0100766 close(sp[1]);
767 xmove_fd(sp[0], network_fd);
768}
769#endif
770
Denys Vlasenko2384a352011-02-15 00:58:36 +0100771static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200772{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200773#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
774# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200775 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200776# endif
777 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200778
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200779 polldata.fd = fileno(dfp);
780 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200781#endif
782 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200783
784 if (G.chunked)
785 goto get_clen;
786
787 /* Loops only if chunked */
788 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100789
790#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
791 /* Must use nonblocking I/O, otherwise fread will loop
792 * and *block* until it reads full buffer,
793 * which messes up progress bar and/or timeout logic.
794 * Because of nonblocking I/O, we need to dance
795 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200796 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100797 */
798 ndelay_on(polldata.fd);
799#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100800 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200801 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100802 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200803
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200804#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100805 /* fread internally uses read loop, which in our case
806 * is usually exited when we get EAGAIN.
807 * In this case, libc sets error marker on the stream.
808 * Need to clear it before next fread to avoid possible
809 * rare false positive ferror below. Rare because usually
810 * fread gets more than zero bytes, and we don't fall
811 * into if (n <= 0) ...
812 */
813 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100814#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200815 errno = 0;
816 rdsz = sizeof(G.wget_buf);
817 if (G.got_clen) {
818 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
819 if ((int)G.content_len <= 0)
820 break;
821 rdsz = (unsigned)G.content_len;
822 }
823 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100824 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200825
826 if (n > 0) {
827 xwrite(G.output_fd, G.wget_buf, n);
828#if ENABLE_FEATURE_WGET_STATUSBAR
829 G.transferred += n;
830#endif
831 if (G.got_clen) {
832 G.content_len -= n;
833 if (G.content_len == 0)
834 break;
835 }
836#if ENABLE_FEATURE_WGET_TIMEOUT
837 second_cnt = G.timeout_seconds;
838#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100839 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200840 }
841
842 /* n <= 0.
843 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100844 * If error occurs, or EOF is reached, the return value
845 * is a short item count (or zero).
846 * fread does not distinguish between EOF and error.
847 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200848 if (errno != EAGAIN) {
849 if (ferror(dfp)) {
850 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100851 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200852 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100853 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200854 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100855
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200856#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
857 /* It was EAGAIN. There is no data. Wait up to one second
858 * then abort if timed out, or update the bar and try reading again.
859 */
860 if (safe_poll(&polldata, 1, 1000) == 0) {
861# if ENABLE_FEATURE_WGET_TIMEOUT
862 if (second_cnt != 0 && --second_cnt == 0) {
863 progress_meter(PROGRESS_END);
864 bb_error_msg_and_die("download timed out");
865 }
866# endif
867 /* We used to loop back to poll here,
868 * but there is no great harm in letting fread
869 * to try reading anyway.
870 */
871 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100872#endif
873 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200874 /* Need to do it _every_ second for "stalled" indicator
875 * to be shown properly.
876 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200877 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200878 } /* while (reading data) */
879
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100880#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
881 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100882 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100883#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200884 if (!G.chunked)
885 break;
886
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100887 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200888 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100889 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100890 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200891 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100892 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200893 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100894 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200895 /*
896 * Note that fgets may result in some data being buffered in dfp.
897 * We loop back to fread, which will retrieve this data.
898 * Also note that code has to be arranged so that fread
899 * is done _before_ one-second poll wait - poll doesn't know
900 * about stdio buffering and can result in spurious one second waits!
901 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200902 }
903
Denys Vlasenko61441242012-06-17 19:52:25 +0200904 /* If -c failed, we restart from the beginning,
905 * but we do not truncate file then, we do it only now, at the end.
906 * This lets user to ^C if his 99% complete 10 GB file download
907 * failed to restart *without* losing the almost complete file.
908 */
909 {
910 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
911 if (pos != (off_t)-1)
912 ftruncate(G.output_fd, pos);
913 }
914
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100915 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100916 G.chunked = 0; /* makes it show 100% even for chunked download */
917 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200918 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200919}
920
Pere Orga53695632011-02-16 20:09:36 +0100921static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000922{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100923 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200924 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100925 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200926 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000927 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100928 char *proxy = NULL;
929 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100930 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100931 struct host_info server;
932 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000933
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100934 server.allocated = NULL;
935 target.allocated = NULL;
936 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200937 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100938
939 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000940
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000941 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100942 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000943 if (use_proxy) {
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100944 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
945//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +0100946 use_proxy = (proxy && proxy[0]);
947 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000948 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000949 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200950 if (!use_proxy) {
951 server.port = target.port;
952 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100953 //free(server.allocated); - can't be non-NULL
954 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200955 } else {
956 server.host = target.host;
957 }
958 }
959
960 if (ENABLE_FEATURE_IPV6)
961 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000962
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100963 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100964 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100965 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100966 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000967 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100968 if (G.fname_out[0] == '/' || !G.fname_out[0])
969 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000970 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200971 if (G.dir_prefix)
972 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100973 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200974 /* redirects may free target.path later, need to make a copy */
975 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100976 }
Eric Andersen29edd002000-12-09 16:55:35 +0000977 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000978#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100979 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000980#endif
981
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000982 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100983 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100984 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100985 G.output_fd = open(G.fname_out, O_WRONLY);
986 if (G.output_fd >= 0) {
987 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000988 }
989 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100990 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000991 }
992
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200993 redir_limit = 5;
994 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000995 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100996 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200997 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
998 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
999 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +00001000 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001001 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +01001002 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
1003 G.got_clen = 0;
1004 G.chunked = 0;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001005 if (use_proxy || target.protocol != P_FTP) {
Eric Andersen79757c92001-04-05 21:45:54 +00001006 /*
1007 * HTTP session
1008 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001009 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001010 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001011
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001012 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001013#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001014 /* openssl (and maybe internal TLS) support is configured */
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001015 if (target.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001016 /* openssl-based helper
1017 * Inconvenient API since we can't give it an open fd
1018 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001019 int fd = spawn_https_helper_openssl(server.host, server.port);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001020# if ENABLE_FEATURE_WGET_HTTPS
1021 if (fd < 0) { /* no openssl? try internal */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001022 sfp = open_socket(lsa);
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001023 spawn_ssl_client(server.host, fileno(sfp));
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001024 goto socket_opened;
1025 }
1026# else
1027 /* We don't check for exec("openssl") failure in this case */
1028# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001029 sfp = fdopen(fd, "r+");
1030 if (!sfp)
1031 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001032 goto socket_opened;
1033 }
1034 sfp = open_socket(lsa);
1035 socket_opened:
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001036#elif ENABLE_FEATURE_WGET_HTTPS
1037 /* Only internal TLS support is configured */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001038 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001039 if (target.protocol == P_HTTPS)
Denys Vlasenko9a647c32017-01-23 01:08:16 +01001040 spawn_ssl_client(server.host, fileno(sfp));
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001041#else
1042 /* ssl (https) support is not configured */
1043 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001044#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001045 /* Send HTTP request */
1046 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001047 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001048 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001049 target.path);
1050 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001051 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001052 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1053 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001054 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001055 if (!USR_HEADER_HOST)
1056 SENDFMT(sfp, "Host: %s\r\n", target.host);
1057 if (!USR_HEADER_USER_AGENT)
1058 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001059
Denys Vlasenko9213a552011-02-10 13:23:45 +01001060 /* Ask server to close the connection as soon as we are done
1061 * (IOW: we do not intend to send more requests)
1062 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001063 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001064
Denis Vlasenko9cade082006-11-21 10:43:02 +00001065#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001066 if (target.user && !USR_HEADER_AUTH) {
1067 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001068 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001069 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001070 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1071 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001072 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001073 }
Eric Andersen79757c92001-04-05 21:45:54 +00001074#endif
1075
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001076 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1077 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001078
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001079#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001080 if (G.extra_headers) {
1081 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001082 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001083 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001084
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001085 if (option_mask32 & WGET_OPT_POST_DATA) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001086 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001087 "Content-Type: application/x-www-form-urlencoded\r\n"
1088 "Content-Length: %u\r\n"
1089 "\r\n"
1090 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001091 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001092 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001093 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001094#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001095 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001096 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001097 }
Eric Andersen79757c92001-04-05 21:45:54 +00001098
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001099 fflush(sfp);
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001100
Denys Vlasenko4e08a122017-01-16 17:31:05 +01001101/* Tried doing this unconditionally.
1102 * Cloudflare and nginx/1.11.5 are shocked to see SHUT_WR on non-HTTPS.
1103 */
Denys Vlasenkoa6f86512017-01-11 20:16:45 +01001104#if SSL_SUPPORTED
1105 if (target.protocol == P_HTTPS) {
1106 /* If we use SSL helper, keeping our end of the socket open for writing
1107 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1108 * even after child closes its copy of the fd.
1109 * This helps:
1110 */
1111 shutdown(fileno(sfp), SHUT_WR);
1112 }
1113#endif
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001114
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001115 /*
1116 * Retrieve HTTP response line and check for "200" status code.
1117 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001118 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +01001119 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001120
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001121 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001122 str = skip_non_whitespace(str);
1123 str = skip_whitespace(str);
1124 // FIXME: no error check
1125 // xatou wouldn't work: "200 OK"
1126 status = atoi(str);
1127 switch (status) {
1128 case 0:
1129 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001130 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001131 /* eat all remaining headers */;
1132 goto read_response;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001133
1134 /* Success responses */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001135 case 200:
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001136 /* fall through */
1137 case 201: /* 201 Created */
1138/* "The request has been fulfilled and resulted in a new resource being created" */
Denys Vlasenkoef159702016-09-01 11:16:22 +02001139 /* Standard wget is reported to treat this as success */
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001140 /* fall through */
1141 case 202: /* 202 Accepted */
1142/* "The request has been accepted for processing, but the processing has not been completed" */
1143 /* Treat as success: fall through */
1144 case 203: /* 203 Non-Authoritative Information */
1145/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1146 /* fall through */
1147 case 204: /* 204 No Content */
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001148/*
1149Response 204 doesn't say "null file", it says "metadata
1150has changed but data didn't":
1151
1152"10.2.5 204 No Content
1153The server has fulfilled the request but does not need to return
1154an entity-body, and might want to return updated metainformation.
1155The response MAY include new or updated metainformation in the form
1156of entity-headers, which if present SHOULD be associated with
1157the requested variant.
1158
1159If the client is a user agent, it SHOULD NOT change its document
1160view from that which caused the request to be sent. This response
1161is primarily intended to allow input for actions to take place
1162without causing a change to the user agent's active document view,
1163although any new or updated metainformation SHOULD be applied
1164to the document currently in the user agent's active view.
1165
1166The 204 response MUST NOT include a message-body, and thus
1167is always terminated by the first empty line after the header fields."
1168
1169However, in real world it was observed that some web servers
1170(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1171*/
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001172 if (G.beg_range != 0) {
1173 /* "Range:..." was not honored by the server.
1174 * Restart download from the beginning.
1175 */
1176 reset_beg_range_to_zero();
1177 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001178 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001179 /* 205 Reset Content ?? what to do on this ?? */
1180
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001181 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001182 case 301:
1183 case 302:
1184 case 303:
1185 break;
Denys Vlasenko9ff910d2016-08-31 13:28:53 +02001186
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001187 case 206: /* Partial Content */
1188 if (G.beg_range != 0)
1189 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001190 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001191 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001192 /* fall through */
1193 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001194 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001195 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001196
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001197 /*
1198 * Retrieve HTTP headers.
1199 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001200 while ((str = gethdr(sfp)) != NULL) {
1201 static const char keywords[] ALIGN1 =
1202 "content-length\0""transfer-encoding\0""location\0";
1203 enum {
1204 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1205 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001206 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001207
1208 /* gethdr converted "FOO:" string to lowercase */
1209
Matthijs van de Water0d586662009-08-22 20:19:48 +02001210 /* strip trailing whitespace */
1211 char *s = strchrnul(str, '\0') - 1;
1212 while (s >= str && (*s == ' ' || *s == '\t')) {
1213 *s = '\0';
1214 s--;
1215 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001216 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001217 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001218 G.content_len = BB_STRTOOFF(str, NULL, 10);
1219 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001220 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +00001221 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001222 G.got_clen = 1;
1223 continue;
1224 }
1225 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001226 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001227 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001228 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001229 }
1230 if (key == KEY_location && status >= 300) {
1231 if (--redir_limit == 0)
1232 bb_error_msg_and_die("too many redirections");
1233 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001234 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001235 free(redirected_path);
1236 target.path = redirected_path = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001237 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001238 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001239 parse_url(str, &target);
1240 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001241 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001242 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001243 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001244 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001245 /* strip_ipv6_scope_id(target.host); - no! */
1246 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001247 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001248 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001249 goto resolve_lsa;
1250 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001251 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001252 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001253 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001254 }
1255// if (status >= 300)
1256// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001257
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001258 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001259 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001260 } else {
Eric Andersen79757c92001-04-05 21:45:54 +00001261 /*
1262 * FTP session
1263 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001264 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +00001265 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001266
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001267 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001268
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001269 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001270 if (G.output_fd < 0)
1271 G.output_fd = xopen(G.fname_out, G.o_flags);
1272 retrieve_file_data(dfp);
1273 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1274 xclose(G.output_fd);
1275 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001276 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001277 }
Eric Andersen79757c92001-04-05 21:45:54 +00001278
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001279 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001280 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001281 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001282 if (ftpcmd(NULL, NULL, sfp) != 226)
1283 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1284 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001285 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001286 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001287
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001288 free(server.allocated);
1289 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001290 free(server.user);
1291 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001292 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001293 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001294}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001295
1296int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1297int wget_main(int argc UNUSED_PARAM, char **argv)
1298{
1299#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1300 static const char wget_longopts[] ALIGN1 =
1301 /* name, has_arg, val */
1302 "continue\0" No_argument "c"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001303 "quiet\0" No_argument "q"
1304 "output-document\0" Required_argument "O"
1305 "directory-prefix\0" Required_argument "P"
1306 "proxy\0" Required_argument "Y"
1307 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001308IF_FEATURE_WGET_TIMEOUT(
1309 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001310 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001311IF_DESKTOP( "tries\0" Required_argument "t")
1312 "header\0" Required_argument "\xff"
1313 "post-data\0" Required_argument "\xfe"
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001314 "spider\0" No_argument "\xfd"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001315 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001316IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001317 /* Ignored (we don't do ssl) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001318IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001319 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001320IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1321IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1322IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1323IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1324IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001325 ;
1326#endif
1327
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001328#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1329 llist_t *headers_llist = NULL;
1330#endif
1331
1332 INIT_G();
1333
Lauri Kasanend074b412013-10-12 21:47:07 +02001334#if ENABLE_FEATURE_WGET_TIMEOUT
1335 G.timeout_seconds = 900;
1336 signal(SIGALRM, alarm_handler);
1337#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001338 G.proxy_flag = "on"; /* use proxies if env vars are set */
1339 G.user_agent = "Wget"; /* "User-Agent" header field */
1340
1341#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1342 applet_long_options = wget_longopts;
1343#endif
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001344 opt_complementary = "-1" /* at least one URL */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001345 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001346 getopt32(argv, "cqO:P:Y:U:T:+"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001347 /*ignored:*/ "t:"
1348 /*ignored:*/ "n::"
1349 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1350 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1351 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1352 * -nH --no-host-directories: wget -r http://host/ won't create host/
1353 * -np --no-parent
1354 * "n::" above says that we accept -n[ARG].
1355 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1356 */
1357 , &G.fname_out, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001358 &G.proxy_flag, &G.user_agent,
1359 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001360 NULL, /* -t RETRIES */
1361 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001362 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1363 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1364 );
Denys Vlasenko2972e2c2016-10-04 04:23:09 +02001365#if 0 /* option bits debug */
1366 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1367 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1368 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1369 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1370 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1371 exit(0);
1372#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001373 argv += optind;
1374
1375#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1376 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001377 int size = 0;
1378 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001379 llist_t *ll = headers_llist;
1380 while (ll) {
1381 size += strlen(ll->data) + 2;
1382 ll = ll->link;
1383 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001384 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001385 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001386 int bit;
1387 const char *words;
1388
1389 size = sprintf(hdr, "%s\r\n",
1390 (char*)llist_pop(&headers_llist));
1391 /* a bit like index_in_substrings but don't match full key */
1392 bit = 1;
1393 words = wget_user_headers;
1394 while (*words) {
1395 if (strstr(hdr, words) == hdr) {
1396 G.user_headers |= bit;
1397 break;
1398 }
1399 bit <<= 1;
1400 words += strlen(words) + 1;
1401 }
1402 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001403 }
1404 }
1405#endif
1406
Denys Vlasenko2384a352011-02-15 00:58:36 +01001407 G.output_fd = -1;
1408 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1409 if (G.fname_out) { /* -O FILE ? */
1410 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1411 G.output_fd = 1;
1412 option_mask32 &= ~WGET_OPT_CONTINUE;
1413 }
1414 /* compat with wget: -O FILE can overwrite */
1415 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1416 }
1417
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001418 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001419 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001420
Denys Vlasenko28556b92011-02-15 11:03:53 +01001421 if (G.output_fd >= 0)
1422 xclose(G.output_fd);
1423
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001424#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1425 free(G.extra_headers);
1426#endif
1427 FINI_G();
1428
Pere Orga53695632011-02-16 20:09:36 +01001429 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001430}