blob: 7a465058515ca59f63c347fac2a92cc2c9cf18ba [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020012//config:config WGET
13//config: bool "wget"
14//config: default y
15//config: help
16//config: wget is a utility for non-interactive download of files from HTTP
17//config: and FTP servers.
18//config:
19//config:config FEATURE_WGET_STATUSBAR
20//config: bool "Enable a nifty process meter (+2k)"
21//config: default y
22//config: depends on WGET
23//config: help
24//config: Enable the transfer progress bar for wget transfers.
25//config:
26//config:config FEATURE_WGET_AUTHENTICATION
27//config: bool "Enable HTTP authentication"
28//config: default y
29//config: depends on WGET
30//config: help
31//config: Support authenticated HTTP transfers.
32//config:
33//config:config FEATURE_WGET_LONG_OPTIONS
34//config: bool "Enable long options"
35//config: default y
36//config: depends on WGET && LONG_OPTS
37//config: help
38//config: Support long options for the wget applet.
39//config:
40//config:config FEATURE_WGET_TIMEOUT
41//config: bool "Enable timeout option -T SEC"
42//config: default y
43//config: depends on WGET
44//config: help
45//config: Supports network read and connect timeouts for wget,
46//config: so that wget will give up and timeout, through the -T
47//config: command line option.
48//config:
49//config: Currently only connect and network data read timeout are
50//config: supported (i.e., timeout is not applied to the DNS query). When
51//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
52//config: will work in addition to -T.
53//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020054//config:config FEATURE_WGET_OPENSSL
55//config: bool "Try to connect to HTTPS using openssl"
56//config: default y
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020057//config: depends on WGET
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020058//config: help
59//config: Choose how wget establishes SSL connection for https:// URLs.
60//config:
61//config: Busybox itself contains no SSL code. wget will spawn
62//config: a helper program to talk over HTTPS.
63//config:
64//config: OpenSSL has a simple SSL client for debug purposes.
Denys Vlasenkoed727612016-07-25 21:34:57 +020065//config: If you select "openssl" helper, wget will effectively run:
66//config: "openssl s_client -quiet -connect hostname:443
67//config: -servername hostname 2>/dev/null" and pipe its data
68//config: through it. -servername is not used if hostname is numeric.
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020069//config: Note inconvenient API: host resolution is done twice,
70//config: and there is no guarantee openssl's idea of IPv6 address
71//config: format is the same as ours.
72//config: Another problem is that s_client prints debug information
73//config: to stderr, and it needs to be suppressed. This means
74//config: all error messages get suppressed too.
75//config: openssl is also a big binary, often dynamically linked
76//config: against ~15 libraries.
77//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020078//config:config FEATURE_WGET_SSL_HELPER
79//config: bool "Try to connect to HTTPS using ssl_helper"
80//config: default y
81//config: depends on WGET
82//config: help
83//config: Choose how wget establishes SSL connection for https:// URLs.
84//config:
85//config: Busybox itself contains no SSL code. wget will spawn
86//config: a helper program to talk over HTTPS.
87//config:
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020088//config: ssl_helper is a tool which can be built statically
89//config: from busybox sources against a small embedded SSL library.
90//config: Please see networking/ssl_helper/README.
91//config: It does not require double host resolution and emits
92//config: error messages to stderr.
93//config:
Denys Vlasenko2007ef52015-10-07 02:40:53 +020094//config: Precompiled static binary may be available at
95//config: http://busybox.net/downloads/binaries/
Denys Vlasenko1c6c6702015-10-07 01:39:40 +020096
97//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
98
99//kbuild:lib-$(CONFIG_WGET) += wget.o
100
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100101//usage:#define wget_trivial_usage
102//usage: IF_FEATURE_WGET_LONG_OPTIONS(
103//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
104//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200105/* Since we ignore these opts, we don't show them in --help */
Denys Vlasenko92e1b082015-10-20 21:51:52 +0200106/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
107/* //usage: " [-nv] [-nc] [-nH] [-np]" */
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200108//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100109//usage: )
110//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
111//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
112//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
113//usage: )
114//usage:#define wget_full_usage "\n\n"
115//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +0100116//usage: "\n -s Spider mode - only check file existence"
117//usage: "\n -c Continue retrieval of aborted transfer"
118//usage: "\n -q Quiet"
119//usage: "\n -P DIR Save to DIR (default .)"
120//usage: IF_FEATURE_WGET_TIMEOUT(
121//usage: "\n -T SEC Network read timeout is SEC seconds"
122//usage: )
123//usage: "\n -O FILE Save to FILE ('-' for stdout)"
124//usage: "\n -U STR Use STR for User-Agent header"
125//usage: "\n -Y Use proxy ('on' or 'off')"
126
Denis Vlasenkob6adbf12007-05-26 19:00:18 +0000127#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000128
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200129#if 0
130# define log_io(...) bb_error_msg(__VA_ARGS__)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100131# define SENDFMT(fp, fmt, ...) \
132 do { \
133 log_io("> " fmt, ##__VA_ARGS__); \
134 fprintf(fp, fmt, ##__VA_ARGS__); \
135 } while (0);
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200136#else
137# define log_io(...) ((void)0)
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100138# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200139#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +0100140
141
Eric Andersen79757c92001-04-05 21:45:54 +0000142struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100143 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000144 const char *path;
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100145 char *user;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100146 const char *protocol;
Denis Vlasenko818322b2007-09-24 18:27:04 +0000147 char *host;
148 int port;
Eric Andersen79757c92001-04-05 21:45:54 +0000149};
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200150static const char P_FTP[] ALIGN1 = "ftp";
151static const char P_HTTP[] ALIGN1 = "http";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100152#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
Denys Vlasenko3e134eb2016-04-22 18:09:21 +0200153static const char P_HTTPS[] ALIGN1 = "https";
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100154#endif
Eric Andersen79757c92001-04-05 21:45:54 +0000155
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100156#if ENABLE_FEATURE_WGET_LONG_OPTIONS
157/* User-specified headers prevent using our corresponding built-in headers. */
158enum {
159 HDR_HOST = (1<<0),
160 HDR_USER_AGENT = (1<<1),
161 HDR_RANGE = (1<<2),
162 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
163 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
164};
165static const char wget_user_headers[] ALIGN1 =
166 "Host:\0"
167 "User-Agent:\0"
168 "Range:\0"
169# if ENABLE_FEATURE_WGET_AUTHENTICATION
170 "Authorization:\0"
171 "Proxy-Authorization:\0"
172# endif
173 ;
174# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
175# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
176# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
177# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
178# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
179#else /* No long options, no user-headers :( */
180# define USR_HEADER_HOST 0
181# define USR_HEADER_USER_AGENT 0
182# define USR_HEADER_RANGE 0
183# define USR_HEADER_AUTH 0
184# define USR_HEADER_PROXY_AUTH 0
185#endif
Denis Vlasenko77105632007-09-24 15:04:00 +0000186
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200187/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +0000188struct globals {
189 off_t content_len; /* Content-length of the file */
190 off_t beg_range; /* Range at which continue begins */
191#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +0000192 off_t transferred; /* Number of bytes transferred so far */
193 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +0100194 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +0000195#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200196 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100197#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200198 char *post_data;
199 char *extra_headers;
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +0100200 unsigned char user_headers; /* Headers mentioned by the user */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100201#endif
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200202 char *fname_out; /* where to direct output (-O) */
203 const char *proxy_flag; /* Use proxies if env vars are set */
204 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200205#if ENABLE_FEATURE_WGET_TIMEOUT
206 unsigned timeout_seconds;
Denys Vlasenko6701e912016-03-17 15:58:16 +0100207 bool die_if_timed_out;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200208#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +0100209 int output_fd;
210 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200211 smallint chunked; /* chunked transfer encoding */
212 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100213 /* Local downloads do benefit from big buffer.
214 * With 512 byte buffer, it was measured to be
215 * an order of magnitude slower than with big one.
216 */
217 uint64_t just_to_align_next_member;
218 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +0100219} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100220#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200221#define INIT_G() do { \
Denys Vlasenko982e87f2013-07-30 11:52:58 +0200222 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200223} while (0)
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +0200224#define FINI_G() do { \
225 FREE_PTR_TO_GLOBALS(); \
226} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +0000227
228
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200229/* Must match option string! */
230enum {
231 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200232 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200233 WGET_OPT_QUIET = (1 << 2),
234 WGET_OPT_OUTNAME = (1 << 3),
235 WGET_OPT_PREFIX = (1 << 4),
236 WGET_OPT_PROXY = (1 << 5),
237 WGET_OPT_USER_AGENT = (1 << 6),
238 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
239 WGET_OPT_RETRIES = (1 << 8),
240 WGET_OPT_PASSIVE = (1 << 9),
241 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
242 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
243};
244
245enum {
246 PROGRESS_START = -1,
247 PROGRESS_END = 0,
248 PROGRESS_BUMP = 1,
249};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000250#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000251static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000252{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200253 if (option_mask32 & WGET_OPT_QUIET)
254 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000255
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200256 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100257 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000258
Denys Vlasenko2384a352011-02-15 00:58:36 +0100259 bb_progress_update(&G.pmt,
260 G.beg_range,
261 G.transferred,
262 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
263 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000264
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200265 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100266 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200267 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100268 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000269 }
270}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200271#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000272static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000273#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000274
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000275
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200276/* IPv6 knows scoped address types i.e. link and site local addresses. Link
277 * local addresses can have a scope identifier to specify the
278 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
279 * identifier is only valid on a single node.
280 *
281 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
282 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
283 * in the Host header as invalid requests, see
284 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
285 */
286static void strip_ipv6_scope_id(char *host)
287{
288 char *scope, *cp;
289
290 /* bbox wget actually handles IPv6 addresses without [], like
291 * wget "http://::1/xxx", but this is not standard.
292 * To save code, _here_ we do not support it. */
293
294 if (host[0] != '[')
295 return; /* not IPv6 */
296
297 scope = strchr(host, '%');
298 if (!scope)
299 return;
300
301 /* Remove the IPv6 zone identifier from the host address */
302 cp = strchr(host, ']');
303 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
304 /* malformed address (not "[xx]:nn" or "[xx]") */
305 return;
306 }
307
308 /* cp points to "]...", scope points to "%eth0]..." */
309 overlapping_strcpy(scope, cp);
310}
311
Denis Vlasenko9cade082006-11-21 10:43:02 +0000312#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100313/* Base64-encode character string. */
314static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000315{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000316 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100317 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
318 len = sizeof(G.wget_buf)/4*3 - 10;
319 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
320 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000321}
322#endif
323
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200324static char* sanitize_string(char *s)
325{
326 unsigned char *p = (void *) s;
327 while (*p >= ' ')
328 p++;
329 *p = '\0';
330 return s;
331}
332
Lauri Kasanend074b412013-10-12 21:47:07 +0200333#if ENABLE_FEATURE_WGET_TIMEOUT
334static void alarm_handler(int sig UNUSED_PARAM)
335{
336 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
Denys Vlasenko6701e912016-03-17 15:58:16 +0100337 if (G.die_if_timed_out)
Lauri Kasanend074b412013-10-12 21:47:07 +0200338 bb_error_msg_and_die("download timed out");
339}
Denys Vlasenko6701e912016-03-17 15:58:16 +0100340static void set_alarm(void)
341{
342 if (G.timeout_seconds) {
343 alarm(G.timeout_seconds);
344 G.die_if_timed_out = 1;
345 }
346}
347# define clear_alarm() ((void)(G.die_if_timed_out = 0))
348#else
349# define set_alarm() ((void)0)
350# define clear_alarm() ((void)0)
Lauri Kasanend074b412013-10-12 21:47:07 +0200351#endif
352
Denys Vlasenkoed727612016-07-25 21:34:57 +0200353#if ENABLE_FEATURE_WGET_OPENSSL
354/*
355 * is_ip_address() attempts to verify whether or not a string
356 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
357 * of inet_pton() can be used to determine this.
358 *
359 * TODO add proper error checking when inet_pton() returns -1
360 * (some form of system error has occurred, and errno is set)
361 */
362static int is_ip_address(const char *string)
363{
364 struct sockaddr_in sa;
365
366 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
367# if ENABLE_FEATURE_IPV6
368 if (result == 0) {
369 struct sockaddr_in6 sa6;
370 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
371 }
372# endif
373 return (result == 1);
374}
375#endif
376
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000377static FILE *open_socket(len_and_sockaddr *lsa)
378{
Lauri Kasanend074b412013-10-12 21:47:07 +0200379 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000380 FILE *fp;
381
Denys Vlasenko6701e912016-03-17 15:58:16 +0100382 set_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200383 fd = xconnect_stream(lsa);
Denys Vlasenko6701e912016-03-17 15:58:16 +0100384 clear_alarm();
Lauri Kasanend074b412013-10-12 21:47:07 +0200385
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000386 /* glibc 2.4 seems to try seeking on it - ??! */
387 /* hopefully it understands what ESPIPE means... */
Lauri Kasanend074b412013-10-12 21:47:07 +0200388 fp = fdopen(fd, "r+");
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100389 if (!fp)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100390 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000391
392 return fp;
393}
394
Denys Vlasenkof836f012011-02-10 23:02:28 +0100395/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
396static char fgets_and_trim(FILE *fp)
397{
398 char c;
399 char *buf_ptr;
400
Denys Vlasenko6701e912016-03-17 15:58:16 +0100401 set_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100402 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
403 bb_perror_msg_and_die("error getting response");
Denys Vlasenko6701e912016-03-17 15:58:16 +0100404 clear_alarm();
Denys Vlasenkof836f012011-02-10 23:02:28 +0100405
406 buf_ptr = strchrnul(G.wget_buf, '\n');
407 c = *buf_ptr;
408 *buf_ptr = '\0';
409 buf_ptr = strchrnul(G.wget_buf, '\r');
410 *buf_ptr = '\0';
411
412 log_io("< %s", G.wget_buf);
413
414 return c;
415}
416
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100417static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000418{
419 int result;
420 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100421 if (!s2)
422 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000423 fprintf(fp, "%s%s\r\n", s1, s2);
424 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100425 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000426 }
427
428 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100429 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100430 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000431
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100432 G.wget_buf[3] = '\0';
433 result = xatoi_positive(G.wget_buf);
434 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000435 return result;
436}
437
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100438static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000439{
440 char *url, *p, *sp;
441
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100442 free(h->allocated);
443 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000444
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100445 h->protocol = P_FTP;
446 p = strstr(url, "://");
447 if (p) {
448 *p = '\0';
449 h->host = p + 3;
450 if (strcmp(url, P_FTP) == 0) {
451 h->port = bb_lookup_port(P_FTP, "tcp", 21);
452 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100453#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100454 if (strcmp(url, P_HTTPS) == 0) {
455 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
456 h->protocol = P_HTTPS;
457 } else
Ron Yorston4d0c1ea2015-10-12 10:51:25 +0100458#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100459 if (strcmp(url, P_HTTP) == 0) {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100460 http:
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100461 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
462 h->protocol = P_HTTP;
463 } else {
464 *p = ':';
465 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
466 }
467 } else {
Lauri Kasanen4967a412013-12-17 19:03:41 +0100468 // GNU wget is user-friendly and falls back to http://
469 h->host = url;
470 goto http;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100471 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000472
473 // FYI:
474 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
475 // 'GET /?var=a/b HTTP 1.0'
476 // and saves 'index.html?var=a%2Fb' (we save 'b')
477 // wget 'http://busybox.net?login=john@doe':
478 // request: 'GET /?login=john@doe HTTP/1.0'
479 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
480 // wget 'http://busybox.net#test/test':
481 // request: 'GET / HTTP/1.0'
482 // saves: 'index.html' (we save 'test')
483 //
484 // We also don't add unique .N suffix if file exists...
485 sp = strchr(h->host, '/');
486 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
487 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
488 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000489 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000490 } else if (*sp == '/') {
491 *sp = '\0';
492 h->path = sp + 1;
493 } else { // '#' or '?'
494 // http://busybox.net?login=john@doe is a valid URL
495 // memmove converts to:
496 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000497 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000498 h->host--;
499 sp[-1] = '\0';
500 h->path = sp;
501 }
502
503 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000504 if (sp != NULL) {
Denys Vlasenkodd1061b2011-09-11 21:04:02 +0200505 // URL-decode "user:password" string before base64-encoding:
506 // wget http://test:my%20pass@example.com should send
507 // Authorization: Basic dGVzdDpteSBwYXNz
508 // which decodes to "test:my pass".
509 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000510 *sp = '\0';
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100511 free(h->user);
512 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000513 h->host = sp + 1;
514 }
Denys Vlasenkod353bff2014-02-03 14:09:42 +0100515 /* else: h->user remains NULL, or as set by original request
516 * before redirect (if we are here after a redirect).
517 */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000518}
519
Denys Vlasenkof836f012011-02-10 23:02:28 +0100520static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000521{
522 char *s, *hdrval;
523 int c;
524
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000525 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100526 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000527
Denys Vlasenkof836f012011-02-10 23:02:28 +0100528 /* end of the headers? */
529 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000530 return NULL;
531
532 /* convert the header name to lower case */
Denys Vlasenkoea267d52013-07-01 15:01:50 +0200533 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
534 /*
535 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
536 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
537 * "A-Z" maps to "a-z".
538 * "@[\]" can't occur in header names.
539 * "^_" maps to "~,DEL" (which is wrong).
540 * "^" was never seen yet, "_" was seen from web.archive.org
541 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
542 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100543 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200544 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000545
546 /* verify we are at the end of the header name */
547 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100548 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000549
550 /* locate the start of the header value */
551 *s++ = '\0';
552 hdrval = skip_whitespace(s);
553
Denys Vlasenkof836f012011-02-10 23:02:28 +0100554 if (c != '\n') {
555 /* Rats! The buffer isn't big enough to hold the entire header value */
556 while (c = getc(fp), c != EOF && c != '\n')
557 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000558 }
559
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000560 return hdrval;
561}
562
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200563static void reset_beg_range_to_zero(void)
564{
Denys Vlasenko61441242012-06-17 19:52:25 +0200565 bb_error_msg("restart failed");
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200566 G.beg_range = 0;
567 xlseek(G.output_fd, 0, SEEK_SET);
Denys Vlasenko61441242012-06-17 19:52:25 +0200568 /* Done at the end instead: */
569 /* ftruncate(G.output_fd, 0); */
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200570}
571
Denys Vlasenko7f432802009-06-28 01:02:24 +0200572static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
573{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200574 FILE *sfp;
575 char *str;
576 int port;
577
578 if (!target->user)
579 target->user = xstrdup("anonymous:busybox@");
580
581 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100582 if (ftpcmd(NULL, NULL, sfp) != 220)
583 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200584
585 /*
586 * Splitting username:password pair,
587 * trying to log in
588 */
589 str = strchr(target->user, ':');
590 if (str)
591 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100592 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200593 case 230:
594 break;
595 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100596 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200597 break;
598 /* fall through (failed login) */
599 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100600 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200601 }
602
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100603 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200604
605 /*
606 * Querying file size
607 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100608 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
609 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100610 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200611 bb_error_msg_and_die("SIZE value is garbage");
612 }
613 G.got_clen = 1;
614 }
615
616 /*
617 * Entering passive mode
618 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100619 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200620 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100621 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200622 }
623 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
624 // Server's IP is N1.N2.N3.N4 (we ignore it)
625 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100626 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200627 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100628 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200629 if (!str) goto pasv_error;
630 port = xatou_range(str+1, 0, 255);
631 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100632 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200633 if (!str) goto pasv_error;
634 port += xatou_range(str+1, 0, 255) * 256;
Denys Vlasenkoca183112011-04-07 17:52:20 +0200635 set_nport(&lsa->u.sa, htons(port));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200636
637 *dfpp = open_socket(lsa);
638
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +0200639 if (G.beg_range != 0) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100640 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
641 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100642 G.content_len -= G.beg_range;
Denys Vlasenkobf146b82012-06-13 17:31:07 +0200643 else
644 reset_beg_range_to_zero();
Denys Vlasenko7f432802009-06-28 01:02:24 +0200645 }
646
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100647 if (ftpcmd("RETR ", target->path, sfp) > 150)
648 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200649
650 return sfp;
651}
652
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200653#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200654static int spawn_https_helper_openssl(const char *host, unsigned port)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100655{
656 char *allocated = NULL;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200657 char *servername;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100658 int sp[2];
659 int pid;
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200660 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100661
662 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
663 /* Kernel can have AF_UNIX support disabled */
664 bb_perror_msg_and_die("socketpair");
665
666 if (!strchr(host, ':'))
667 host = allocated = xasprintf("%s:%u", host, port);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200668 servername = xstrdup(host);
669 strrchr(servername, ':')[0] = '\0';
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100670
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200671 fflush_all();
672 pid = xvfork();
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100673 if (pid == 0) {
674 /* Child */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200675 char *argv[8];
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100676
677 close(sp[0]);
678 xmove_fd(sp[1], 0);
679 xdup2(0, 1);
680 /*
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100681 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
682 * It prints some debug stuff on stderr, don't know how to suppress it.
683 * Work around by dev-nulling stderr. We lose all error messages :(
684 */
685 xmove_fd(2, 3);
686 xopen("/dev/null", O_RDWR);
Denys Vlasenkoed727612016-07-25 21:34:57 +0200687 memset(&argv, 0, sizeof(argv));
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100688 argv[0] = (char*)"openssl";
689 argv[1] = (char*)"s_client";
690 argv[2] = (char*)"-quiet";
691 argv[3] = (char*)"-connect";
692 argv[4] = (char*)host;
Denys Vlasenkoed727612016-07-25 21:34:57 +0200693 /*
694 * Per RFC 6066 Section 3, the only permitted values in the
695 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
696 * IPv4 and IPv6 addresses, port numbers are not allowed.
697 */
698 if (!is_ip_address(servername)) {
699 argv[5] = (char*)"-servername";
700 argv[6] = (char*)servername;
701 }
702
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100703 BB_EXECVP(argv[0], argv);
704 xmove_fd(3, 2);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200705# if ENABLE_FEATURE_WGET_SSL_HELPER
706 child_failed = 1;
707 xfunc_die();
708# else
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100709 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200710# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100711 /* notreached */
712 }
713
Denys Vlasenko53315572014-02-23 23:39:47 +0100714 /* Parent */
Denys Vlasenkoed727612016-07-25 21:34:57 +0200715 free(servername);
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100716 free(allocated);
717 close(sp[1]);
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200718# if ENABLE_FEATURE_WGET_SSL_HELPER
719 if (child_failed) {
720 close(sp[0]);
721 return -1;
722 }
723# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100724 return sp[0];
725}
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200726#endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100727
Denys Vlasenko1c6c6702015-10-07 01:39:40 +0200728/* See networking/ssl_helper/README how to build one */
729#if ENABLE_FEATURE_WGET_SSL_HELPER
Denys Vlasenko2007ef52015-10-07 02:40:53 +0200730static void spawn_https_helper_small(int network_fd)
Denys Vlasenko53315572014-02-23 23:39:47 +0100731{
732 int sp[2];
733 int pid;
734
735 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
736 /* Kernel can have AF_UNIX support disabled */
737 bb_perror_msg_and_die("socketpair");
738
739 pid = BB_MMU ? xfork() : xvfork();
740 if (pid == 0) {
741 /* Child */
742 char *argv[3];
743
744 close(sp[0]);
745 xmove_fd(sp[1], 0);
746 xdup2(0, 1);
747 xmove_fd(network_fd, 3);
748 /*
749 * A simple ssl/tls helper
750 */
751 argv[0] = (char*)"ssl_helper";
752 argv[1] = (char*)"-d3";
753 argv[2] = NULL;
754 BB_EXECVP(argv[0], argv);
755 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
756 /* notreached */
757 }
758
759 /* Parent */
760 close(sp[1]);
761 xmove_fd(sp[0], network_fd);
762}
763#endif
764
Denys Vlasenko2384a352011-02-15 00:58:36 +0100765static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200766{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200767#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
768# if ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200769 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200770# endif
771 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200772
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200773 polldata.fd = fileno(dfp);
774 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200775#endif
776 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200777
778 if (G.chunked)
779 goto get_clen;
780
781 /* Loops only if chunked */
782 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100783
784#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
785 /* Must use nonblocking I/O, otherwise fread will loop
786 * and *block* until it reads full buffer,
787 * which messes up progress bar and/or timeout logic.
788 * Because of nonblocking I/O, we need to dance
789 * very carefully around EAGAIN. See explanation at
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200790 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100791 */
792 ndelay_on(polldata.fd);
793#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100794 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200795 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100796 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200797
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200798#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100799 /* fread internally uses read loop, which in our case
800 * is usually exited when we get EAGAIN.
801 * In this case, libc sets error marker on the stream.
802 * Need to clear it before next fread to avoid possible
803 * rare false positive ferror below. Rare because usually
804 * fread gets more than zero bytes, and we don't fall
805 * into if (n <= 0) ...
806 */
807 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100808#endif
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200809 errno = 0;
810 rdsz = sizeof(G.wget_buf);
811 if (G.got_clen) {
812 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
813 if ((int)G.content_len <= 0)
814 break;
815 rdsz = (unsigned)G.content_len;
816 }
817 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100818 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200819
820 if (n > 0) {
821 xwrite(G.output_fd, G.wget_buf, n);
822#if ENABLE_FEATURE_WGET_STATUSBAR
823 G.transferred += n;
824#endif
825 if (G.got_clen) {
826 G.content_len -= n;
827 if (G.content_len == 0)
828 break;
829 }
830#if ENABLE_FEATURE_WGET_TIMEOUT
831 second_cnt = G.timeout_seconds;
832#endif
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100833 goto bump;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200834 }
835
836 /* n <= 0.
837 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100838 * If error occurs, or EOF is reached, the return value
839 * is a short item count (or zero).
840 * fread does not distinguish between EOF and error.
841 */
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200842 if (errno != EAGAIN) {
843 if (ferror(dfp)) {
844 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100845 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200846 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100847 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200848 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100849
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200850#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
851 /* It was EAGAIN. There is no data. Wait up to one second
852 * then abort if timed out, or update the bar and try reading again.
853 */
854 if (safe_poll(&polldata, 1, 1000) == 0) {
855# if ENABLE_FEATURE_WGET_TIMEOUT
856 if (second_cnt != 0 && --second_cnt == 0) {
857 progress_meter(PROGRESS_END);
858 bb_error_msg_and_die("download timed out");
859 }
860# endif
861 /* We used to loop back to poll here,
862 * but there is no great harm in letting fread
863 * to try reading anyway.
864 */
865 }
Denys Vlasenkofaa9e942014-03-27 16:50:29 +0100866#endif
867 bump:
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200868 /* Need to do it _every_ second for "stalled" indicator
869 * to be shown properly.
870 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200871 progress_meter(PROGRESS_BUMP);
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200872 } /* while (reading data) */
873
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100874#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
875 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100876 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100877#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200878 if (!G.chunked)
879 break;
880
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100881 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200882 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100883 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100884 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200885 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100886 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200887 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100888 G.got_clen = 1;
Denys Vlasenkob7812ce2012-09-03 12:49:30 +0200889 /*
890 * Note that fgets may result in some data being buffered in dfp.
891 * We loop back to fread, which will retrieve this data.
892 * Also note that code has to be arranged so that fread
893 * is done _before_ one-second poll wait - poll doesn't know
894 * about stdio buffering and can result in spurious one second waits!
895 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200896 }
897
Denys Vlasenko61441242012-06-17 19:52:25 +0200898 /* If -c failed, we restart from the beginning,
899 * but we do not truncate file then, we do it only now, at the end.
900 * This lets user to ^C if his 99% complete 10 GB file download
901 * failed to restart *without* losing the almost complete file.
902 */
903 {
904 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
905 if (pos != (off_t)-1)
906 ftruncate(G.output_fd, pos);
907 }
908
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100909 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100910 G.chunked = 0; /* makes it show 100% even for chunked download */
911 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200912 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200913}
914
Pere Orga53695632011-02-16 20:09:36 +0100915static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000916{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100917 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200918 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100919 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200920 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000921 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100922 char *proxy = NULL;
923 char *fname_out_alloc;
Denys Vlasenko93b4a602011-12-18 05:11:56 +0100924 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100925 struct host_info server;
926 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000927
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100928 server.allocated = NULL;
929 target.allocated = NULL;
930 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200931 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100932
933 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000934
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000935 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100936 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000937 if (use_proxy) {
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100938 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
939//FIXME: what if protocol is https? Ok to use http_proxy?
Denys Vlasenko2384a352011-02-15 00:58:36 +0100940 use_proxy = (proxy && proxy[0]);
941 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000942 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000943 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200944 if (!use_proxy) {
945 server.port = target.port;
946 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100947 //free(server.allocated); - can't be non-NULL
948 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200949 } else {
950 server.host = target.host;
951 }
952 }
953
954 if (ENABLE_FEATURE_IPV6)
955 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000956
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100957 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100958 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100959 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100960 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000961 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100962 if (G.fname_out[0] == '/' || !G.fname_out[0])
963 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000964 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200965 if (G.dir_prefix)
966 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100967 else {
Denys Vlasenkoaacd4482012-06-17 20:21:30 +0200968 /* redirects may free target.path later, need to make a copy */
969 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100970 }
Eric Andersen29edd002000-12-09 16:55:35 +0000971 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000972#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100973 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000974#endif
975
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000976 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100977 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100978 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100979 G.output_fd = open(G.fname_out, O_WRONLY);
980 if (G.output_fd >= 0) {
981 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000982 }
983 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100984 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000985 }
986
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200987 redir_limit = 5;
988 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000989 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100990 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200991 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
992 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
993 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000994 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200995 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +0100996 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
997 G.got_clen = 0;
998 G.chunked = 0;
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +0100999 if (use_proxy || target.protocol != P_FTP) {
Eric Andersen79757c92001-04-05 21:45:54 +00001000 /*
1001 * HTTP session
1002 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001003 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001004 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +02001005
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001006 /* Open socket to http(s) server */
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001007#if ENABLE_FEATURE_WGET_OPENSSL
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001008 /* openssl (and maybe ssl_helper) support is configured */
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001009 if (target.protocol == P_HTTPS) {
Denys Vlasenko1c6c6702015-10-07 01:39:40 +02001010 /* openssl-based helper
1011 * Inconvenient API since we can't give it an open fd
1012 */
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001013 int fd = spawn_https_helper_openssl(server.host, server.port);
1014# if ENABLE_FEATURE_WGET_SSL_HELPER
1015 if (fd < 0) { /* no openssl? try ssl_helper */
1016 sfp = open_socket(lsa);
1017 spawn_https_helper_small(fileno(sfp));
1018 goto socket_opened;
1019 }
1020# else
1021 /* We don't check for exec("openssl") failure in this case */
1022# endif
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001023 sfp = fdopen(fd, "r+");
1024 if (!sfp)
1025 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001026 goto socket_opened;
1027 }
1028 sfp = open_socket(lsa);
1029 socket_opened:
1030#elif ENABLE_FEATURE_WGET_SSL_HELPER
1031 /* Only ssl_helper support is configured */
1032 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001033 if (target.protocol == P_HTTPS)
Denys Vlasenko2007ef52015-10-07 02:40:53 +02001034 spawn_https_helper_small(fileno(sfp));
1035#else
1036 /* ssl (https) support is not configured */
1037 sfp = open_socket(lsa);
Denys Vlasenko53315572014-02-23 23:39:47 +01001038#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001039 /* Send HTTP request */
1040 if (use_proxy) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001041 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001042 target.protocol, target.host,
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001043 target.path);
1044 } else {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001045 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
Denys Vlasenko8b7e8ae2014-02-22 14:12:29 +01001046 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1047 target.path);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001048 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001049 if (!USR_HEADER_HOST)
1050 SENDFMT(sfp, "Host: %s\r\n", target.host);
1051 if (!USR_HEADER_USER_AGENT)
1052 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +00001053
Denys Vlasenko9213a552011-02-10 13:23:45 +01001054 /* Ask server to close the connection as soon as we are done
1055 * (IOW: we do not intend to send more requests)
1056 */
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001057 SENDFMT(sfp, "Connection: close\r\n");
Denys Vlasenko9213a552011-02-10 13:23:45 +01001058
Denis Vlasenko9cade082006-11-21 10:43:02 +00001059#if ENABLE_FEATURE_WGET_AUTHENTICATION
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001060 if (target.user && !USR_HEADER_AUTH) {
1061 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001062 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001063 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001064 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1065 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001066 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001067 }
Eric Andersen79757c92001-04-05 21:45:54 +00001068#endif
1069
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001070 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1071 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +01001072
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001073#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001074 if (G.extra_headers) {
1075 log_io(G.extra_headers);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001076 fputs(G.extra_headers, sfp);
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001077 }
Denis Vlasenko5a2ad692009-03-04 14:13:37 +00001078
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001079 if (option_mask32 & WGET_OPT_POST_DATA) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001080 SENDFMT(sfp,
Denys Vlasenko9213a552011-02-10 13:23:45 +01001081 "Content-Type: application/x-www-form-urlencoded\r\n"
1082 "Content-Length: %u\r\n"
1083 "\r\n"
1084 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +02001085 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +01001086 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001087 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +00001088#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +01001089 {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001090 SENDFMT(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001091 }
Eric Andersen79757c92001-04-05 21:45:54 +00001092
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001093 fflush(sfp);
Denys Vlasenkode3da6b2016-08-21 03:39:39 +02001094 /* If we use SSL helper, keeping our end of the socket open for writing
1095 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1096 * even after child closes its copy of the fd.
1097 * This helps:
1098 */
1099 shutdown(fileno(sfp), SHUT_WR);
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +02001100
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001101 /*
1102 * Retrieve HTTP response line and check for "200" status code.
1103 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001104 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +01001105 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001106
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001107 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001108 str = skip_non_whitespace(str);
1109 str = skip_whitespace(str);
1110 // FIXME: no error check
1111 // xatou wouldn't work: "200 OK"
1112 status = atoi(str);
1113 switch (status) {
1114 case 0:
1115 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001116 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001117 /* eat all remaining headers */;
1118 goto read_response;
1119 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +00001120/*
1121Response 204 doesn't say "null file", it says "metadata
1122has changed but data didn't":
1123
1124"10.2.5 204 No Content
1125The server has fulfilled the request but does not need to return
1126an entity-body, and might want to return updated metainformation.
1127The response MAY include new or updated metainformation in the form
1128of entity-headers, which if present SHOULD be associated with
1129the requested variant.
1130
1131If the client is a user agent, it SHOULD NOT change its document
1132view from that which caused the request to be sent. This response
1133is primarily intended to allow input for actions to take place
1134without causing a change to the user agent's active document view,
1135although any new or updated metainformation SHOULD be applied
1136to the document currently in the user agent's active view.
1137
1138The 204 response MUST NOT include a message-body, and thus
1139is always terminated by the first empty line after the header fields."
1140
1141However, in real world it was observed that some web servers
1142(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1143*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001144 case 204:
Denys Vlasenkobf146b82012-06-13 17:31:07 +02001145 if (G.beg_range != 0) {
1146 /* "Range:..." was not honored by the server.
1147 * Restart download from the beginning.
1148 */
1149 reset_beg_range_to_zero();
1150 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001151 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +02001152 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001153 case 301:
1154 case 302:
1155 case 303:
1156 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001157 case 206: /* Partial Content */
1158 if (G.beg_range != 0)
1159 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +00001160 break;
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001161 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001162 /* fall through */
1163 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001164 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001165 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001166
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001167 /*
1168 * Retrieve HTTP headers.
1169 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001170 while ((str = gethdr(sfp)) != NULL) {
1171 static const char keywords[] ALIGN1 =
1172 "content-length\0""transfer-encoding\0""location\0";
1173 enum {
1174 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1175 };
Matthijs van de Water0d586662009-08-22 20:19:48 +02001176 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001177
1178 /* gethdr converted "FOO:" string to lowercase */
1179
Matthijs van de Water0d586662009-08-22 20:19:48 +02001180 /* strip trailing whitespace */
1181 char *s = strchrnul(str, '\0') - 1;
1182 while (s >= str && (*s == ' ' || *s == '\t')) {
1183 *s = '\0';
1184 s--;
1185 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001186 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001187 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +01001188 G.content_len = BB_STRTOOFF(str, NULL, 10);
1189 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001190 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +00001191 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001192 G.got_clen = 1;
1193 continue;
1194 }
1195 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001196 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001197 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001198 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001199 }
1200 if (key == KEY_location && status >= 300) {
1201 if (--redir_limit == 0)
1202 bb_error_msg_and_die("too many redirections");
1203 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001204 if (str[0] == '/') {
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001205 free(redirected_path);
1206 target.path = redirected_path = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001207 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001208 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001209 parse_url(str, &target);
1210 if (!use_proxy) {
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001211 /* server.user remains untouched */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001212 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +01001213 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001214 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +02001215 /* strip_ipv6_scope_id(target.host); - no! */
1216 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001217 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +00001218 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001219 goto resolve_lsa;
1220 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +00001221 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001222 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +00001223 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001224 }
1225// if (status >= 300)
1226// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001227
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001228 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +00001229 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +00001230 } else {
Eric Andersen79757c92001-04-05 21:45:54 +00001231 /*
1232 * FTP session
1233 */
Denys Vlasenko7f432802009-06-28 01:02:24 +02001234 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +00001235 }
Denis Vlasenko77105632007-09-24 15:04:00 +00001236
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001237 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001238
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001239 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +01001240 if (G.output_fd < 0)
1241 G.output_fd = xopen(G.fname_out, G.o_flags);
1242 retrieve_file_data(dfp);
1243 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1244 xclose(G.output_fd);
1245 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001246 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +00001247 }
Eric Andersen79757c92001-04-05 21:45:54 +00001248
Denys Vlasenkof1fab092009-06-28 03:33:57 +02001249 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001250 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +00001251 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +01001252 if (ftpcmd(NULL, NULL, sfp) != 226)
1253 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1254 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +00001255 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001256 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +00001257
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001258 free(server.allocated);
1259 free(target.allocated);
Denys Vlasenkod353bff2014-02-03 14:09:42 +01001260 free(server.user);
1261 free(target.user);
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +01001262 free(fname_out_alloc);
Denys Vlasenko93b4a602011-12-18 05:11:56 +01001263 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +00001264}
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001265
1266int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1267int wget_main(int argc UNUSED_PARAM, char **argv)
1268{
1269#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1270 static const char wget_longopts[] ALIGN1 =
1271 /* name, has_arg, val */
1272 "continue\0" No_argument "c"
1273//FIXME: -s isn't --spider, it's --save-headers!
1274 "spider\0" No_argument "s"
1275 "quiet\0" No_argument "q"
1276 "output-document\0" Required_argument "O"
1277 "directory-prefix\0" Required_argument "P"
1278 "proxy\0" Required_argument "Y"
1279 "user-agent\0" Required_argument "U"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001280IF_FEATURE_WGET_TIMEOUT(
1281 "timeout\0" Required_argument "T")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001282 /* Ignored: */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001283IF_DESKTOP( "tries\0" Required_argument "t")
1284 "header\0" Required_argument "\xff"
1285 "post-data\0" Required_argument "\xfe"
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001286 /* Ignored (we always use PASV): */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001287IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001288 /* Ignored (we don't do ssl) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001289IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
Vladimir Dronnikovf5abc782012-06-13 17:29:41 +02001290 /* Ignored (we don't support caching) */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001291IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1292IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1293IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1294IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1295IF_DESKTOP( "no-parent\0" No_argument "\xf0")
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001296 ;
1297#endif
1298
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001299#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1300 llist_t *headers_llist = NULL;
1301#endif
1302
1303 INIT_G();
1304
Lauri Kasanend074b412013-10-12 21:47:07 +02001305#if ENABLE_FEATURE_WGET_TIMEOUT
1306 G.timeout_seconds = 900;
1307 signal(SIGALRM, alarm_handler);
1308#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001309 G.proxy_flag = "on"; /* use proxies if env vars are set */
1310 G.user_agent = "Wget"; /* "User-Agent" header field */
1311
1312#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1313 applet_long_options = wget_longopts;
1314#endif
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001315 opt_complementary = "-1" /* at least one URL */
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001316 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
Denys Vlasenko237bedd2016-07-06 21:58:02 +02001317 getopt32(argv, "csqO:P:Y:U:T:+"
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001318 /*ignored:*/ "t:"
1319 /*ignored:*/ "n::"
1320 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1321 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1322 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1323 * -nH --no-host-directories: wget -r http://host/ won't create host/
1324 * -np --no-parent
1325 * "n::" above says that we accept -n[ARG].
1326 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1327 */
1328 , &G.fname_out, &G.dir_prefix,
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001329 &G.proxy_flag, &G.user_agent,
1330 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
Denys Vlasenko92e1b082015-10-20 21:51:52 +02001331 NULL, /* -t RETRIES */
1332 NULL /* -n[ARG] */
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001333 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1334 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1335 );
1336 argv += optind;
1337
1338#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1339 if (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001340 int size = 0;
1341 char *hdr;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001342 llist_t *ll = headers_llist;
1343 while (ll) {
1344 size += strlen(ll->data) + 2;
1345 ll = ll->link;
1346 }
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001347 G.extra_headers = hdr = xmalloc(size + 1);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001348 while (headers_llist) {
Bernhard Reutner-Fischerd7bfee12015-02-18 20:41:02 +01001349 int bit;
1350 const char *words;
1351
1352 size = sprintf(hdr, "%s\r\n",
1353 (char*)llist_pop(&headers_llist));
1354 /* a bit like index_in_substrings but don't match full key */
1355 bit = 1;
1356 words = wget_user_headers;
1357 while (*words) {
1358 if (strstr(hdr, words) == hdr) {
1359 G.user_headers |= bit;
1360 break;
1361 }
1362 bit <<= 1;
1363 words += strlen(words) + 1;
1364 }
1365 hdr += size;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001366 }
1367 }
1368#endif
1369
Denys Vlasenko2384a352011-02-15 00:58:36 +01001370 G.output_fd = -1;
1371 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1372 if (G.fname_out) { /* -O FILE ? */
1373 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1374 G.output_fd = 1;
1375 option_mask32 &= ~WGET_OPT_CONTINUE;
1376 }
1377 /* compat with wget: -O FILE can overwrite */
1378 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1379 }
1380
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001381 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001382 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001383
Denys Vlasenko28556b92011-02-15 11:03:53 +01001384 if (G.output_fd >= 0)
1385 xclose(G.output_fd);
1386
Guilherme Maciel Ferreira840ef172013-10-16 14:43:30 +02001387#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1388 free(G.extra_headers);
1389#endif
1390 FINI_G();
1391
Pere Orga53695632011-02-16 20:09:36 +01001392 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001393}