blob: 9e150769ca3a9808d800f71a4f0c9179f23e5d8b [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
3 * wget - retrieve a file using HTTP
4 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Eric Andersenb520e082000-10-03 00:21:45 +00006 *
Eric Andersen4e573f42000-11-14 23:29:24 +00007 * Note: According to RFC2616 section 3.6.1, "All HTTP/1.1 applications MUST be
8 * able to receive and decode the "chunked" transfer-coding, and MUST ignore
9 * chunk-extension extensions they do not understand."
Eric Andersenb520e082000-10-03 00:21:45 +000010 *
Eric Andersen4e573f42000-11-14 23:29:24 +000011 * This prevents this particular wget app from completely RFC compliant, and as
12 * such, prevents it from being used as a general purpose web browser... This
13 * is a design decision, since it makes the code smaller.
Eric Andersenb520e082000-10-03 00:21:45 +000014 *
Eric Andersen96700832000-09-04 15:15:55 +000015 */
16
Eric Andersen3570a342000-09-25 21:45:58 +000017#include "busybox.h"
Eric Andersen96700832000-09-04 15:15:55 +000018#include <stdio.h>
19#include <stdlib.h>
20#include <unistd.h>
21#include <ctype.h>
22#include <string.h>
Eric Andersenb520e082000-10-03 00:21:45 +000023#include <unistd.h>
24#include <signal.h>
25#include <sys/ioctl.h>
Eric Andersen96700832000-09-04 15:15:55 +000026
Eric Andersenb520e082000-10-03 00:21:45 +000027#include <sys/time.h>
Eric Andersen96700832000-09-04 15:15:55 +000028#include <sys/types.h>
29#include <sys/stat.h>
30#include <sys/socket.h>
31#include <netinet/in.h>
32#include <arpa/inet.h>
33#include <netdb.h>
34
35
36void parse_url(char *url, char **uri_host, int *uri_port, char **uri_path);
37FILE *open_socket(char *host, int port);
38char *gethdr(char *buf, size_t bufsiz, FILE *fp, int *istrunc);
Eric Andersenb520e082000-10-03 00:21:45 +000039void progressmeter(int flag);
Eric Andersen96700832000-09-04 15:15:55 +000040
Eric Andersenb520e082000-10-03 00:21:45 +000041/* Globals (can be accessed from signal handlers */
42static off_t filesize = 0; /* content-length of the file */
43#ifdef BB_FEATURE_STATUSBAR
44static char *curfile; /* Name of current file being transferred. */
45static struct timeval start; /* Time a transfer started. */
46volatile unsigned long statbytes; /* Number of bytes transferred so far. */
47/* For progressmeter() -- number of seconds before xfer considered "stalled" */
48#define STALLTIME 5
49#endif
Eric Andersen96700832000-09-04 15:15:55 +000050
51int wget_main(int argc, char **argv)
52{
53 FILE *sfp; /* socket to web server */
54 char *uri_host, *uri_path; /* parsed from command line url */
Eric Andersenf3b2b522000-12-07 22:42:11 +000055 char *proxy;
Eric Andersen96700832000-09-04 15:15:55 +000056 int uri_port;
57 char *s, buf[512];
58 int n;
59
60 char *fname_out = NULL; /* where to direct output (-O) */
61 int do_continue = 0; /* continue a prev transfer (-c) */
62 long beg_range = 0L; /* range at which continue begins */
63 int got_clen = 0; /* got content-length: from server */
Eric Andersenb520e082000-10-03 00:21:45 +000064 FILE *output; /* socket to web server */
Glenn L McGrath1bca5ed2000-12-09 08:12:06 +000065 int quiet_flag = FALSE;
Eric Andersen96700832000-09-04 15:15:55 +000066 /*
67 * Crack command line.
68 */
Glenn L McGrath1bca5ed2000-12-09 08:12:06 +000069 while ((n = getopt(argc, argv, "cqO:")) != EOF) {
Eric Andersen96700832000-09-04 15:15:55 +000070 switch (n) {
71 case 'c':
72 ++do_continue;
73 break;
Glenn L McGrath1bca5ed2000-12-09 08:12:06 +000074 case 'q':
75 quiet_flag = TRUE;
76 break;
Eric Andersen96700832000-09-04 15:15:55 +000077 case 'O':
Randolph Chung02553a22000-12-07 03:53:47 +000078 /* can't set fname_out to NULL if outputting to stdout, because
79 * this gets interpreted as the auto-gen output filename
80 * case below - tausq@debian.org
81 */
82 fname_out = (strcmp(optarg, "-") == 0 ? (char *)1 : optarg);
Eric Andersen96700832000-09-04 15:15:55 +000083 break;
84 default:
85 usage(wget_usage);
86 }
87 }
Eric Andersen25b669c2000-10-02 23:19:38 +000088
Eric Andersen96700832000-09-04 15:15:55 +000089 if (argc - optind != 1)
90 usage(wget_usage);
Eric Andersen25b669c2000-10-02 23:19:38 +000091
92 /* Guess an output filename */
93 if (!fname_out) {
Eric Andersenb520e082000-10-03 00:21:45 +000094 fname_out =
95#ifdef BB_FEATURE_STATUSBAR
96 curfile =
97#endif
98 get_last_path_component(argv[optind]);
99#ifdef BB_FEATURE_STATUSBAR
100 } else {
101 curfile=argv[optind];
102#endif
Eric Andersen25b669c2000-10-02 23:19:38 +0000103 }
104
Eric Andersenb520e082000-10-03 00:21:45 +0000105
Eric Andersen25b669c2000-10-02 23:19:38 +0000106 if (do_continue && !fname_out)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000107 error_msg_and_die("cannot specify continue (-c) without a filename (-O)\n");
Eric Andersenf3b2b522000-12-07 22:42:11 +0000108
Eric Andersen96700832000-09-04 15:15:55 +0000109 /*
Eric Andersenf3b2b522000-12-07 22:42:11 +0000110 * Use the proxy if necessary.
Eric Andersen96700832000-09-04 15:15:55 +0000111 */
Eric Andersenf3b2b522000-12-07 22:42:11 +0000112 if ((proxy = getenv("http_proxy")) != NULL) {
113 proxy = xstrdup(proxy);
114 parse_url(proxy, &uri_host, &uri_port, &uri_path);
115 uri_path = argv[optind];
116 } else {
117 /*
118 * Parse url into components.
119 */
120 parse_url(argv[optind], &uri_host, &uri_port, &uri_path);
121 }
Eric Andersen96700832000-09-04 15:15:55 +0000122
123 /*
124 * Open socket to server.
125 */
126 sfp = open_socket(uri_host, uri_port);
127
128 /*
129 * Open the output stream.
130 */
Randolph Chung02553a22000-12-07 03:53:47 +0000131 if (fname_out != (char *)1) {
Eric Andersenb520e082000-10-03 00:21:45 +0000132 if ( (output=fopen(fname_out, (do_continue ? "a" : "w")))
133 == NULL)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000134 perror_msg_and_die("fopen(%s)", fname_out);
Randolph Chung02553a22000-12-07 03:53:47 +0000135 } else {
136 output = stdout;
Eric Andersen96700832000-09-04 15:15:55 +0000137 }
138
139 /*
140 * Determine where to start transfer.
141 */
142 if (do_continue) {
143 struct stat sbuf;
Eric Andersenb520e082000-10-03 00:21:45 +0000144 if (fstat(fileno(output), &sbuf) < 0)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000145 error_msg_and_die("fstat()");
Eric Andersen96700832000-09-04 15:15:55 +0000146 if (sbuf.st_size > 0)
147 beg_range = sbuf.st_size;
148 else
149 do_continue = 0;
150 }
151
152 /*
153 * Send HTTP request.
154 */
155 fprintf(sfp, "GET %s HTTP/1.1\r\nHost: %s\r\n", uri_path, uri_host);
156 if (do_continue)
157 fprintf(sfp, "Range: bytes=%ld-\r\n", beg_range);
158 fputs("Connection: close\r\n\r\n", sfp);
159
160 /*
161 * Retrieve HTTP response line and check for "200" status code.
162 */
163 if (fgets(buf, sizeof(buf), sfp) == NULL)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000164 error_msg_and_die("no response from server\n");
Eric Andersen96700832000-09-04 15:15:55 +0000165 for (s = buf ; *s != '\0' && !isspace(*s) ; ++s)
166 ;
167 for ( ; isspace(*s) ; ++s)
168 ;
169 switch (atoi(s)) {
Eric Andersenb520e082000-10-03 00:21:45 +0000170 case 200:
171 if (!do_continue)
172 break;
Mark Whitleyf57c9442000-12-07 19:56:48 +0000173 error_msg_and_die("server does not support ranges\n");
Eric Andersenb520e082000-10-03 00:21:45 +0000174 case 206:
175 if (do_continue)
176 break;
177 /*FALLTHRU*/
178 default:
Mark Whitleyf57c9442000-12-07 19:56:48 +0000179 error_msg_and_die("server returned error: %s", buf);
Eric Andersen96700832000-09-04 15:15:55 +0000180 }
181
182 /*
183 * Retrieve HTTP headers.
184 */
185 while ((s = gethdr(buf, sizeof(buf), sfp, &n)) != NULL) {
186 if (strcmp(buf, "content-length") == 0) {
Eric Andersenb520e082000-10-03 00:21:45 +0000187 filesize = atol(s);
Eric Andersen96700832000-09-04 15:15:55 +0000188 got_clen = 1;
189 continue;
190 }
191 if (strcmp(buf, "transfer-encoding") == 0) {
Mark Whitleyf57c9442000-12-07 19:56:48 +0000192 error_msg_and_die("server wants to do %s transfer encoding\n", s);
Eric Andersen96700832000-09-04 15:15:55 +0000193 continue;
194 }
195 }
196
197 /*
198 * Retrieve HTTP body.
199 */
Eric Andersenb520e082000-10-03 00:21:45 +0000200#ifdef BB_FEATURE_STATUSBAR
201 statbytes=0;
Glenn L McGrath1bca5ed2000-12-09 08:12:06 +0000202 if (quiet_flag==FALSE)
203 progressmeter(-1);
Eric Andersenb520e082000-10-03 00:21:45 +0000204#endif
205 while (filesize > 0 && (n = fread(buf, 1, sizeof(buf), sfp)) > 0) {
206 fwrite(buf, 1, n, output);
207#ifdef BB_FEATURE_STATUSBAR
208 statbytes+=n;
Glenn L McGrath1bca5ed2000-12-09 08:12:06 +0000209 if (quiet_flag==FALSE)
210 progressmeter(1);
Eric Andersenb520e082000-10-03 00:21:45 +0000211#endif
Eric Andersen96700832000-09-04 15:15:55 +0000212 if (got_clen)
Eric Andersenb520e082000-10-03 00:21:45 +0000213 filesize -= n;
Eric Andersen96700832000-09-04 15:15:55 +0000214 }
215 if (n == 0 && ferror(sfp))
Mark Whitleyf57c9442000-12-07 19:56:48 +0000216 perror_msg_and_die("network read error");
Eric Andersen96700832000-09-04 15:15:55 +0000217
218 exit(0);
219}
220
221
222void parse_url(char *url, char **uri_host, int *uri_port, char **uri_path)
223{
224 char *s, *h;
Randolph Chung02553a22000-12-07 03:53:47 +0000225 static char *defaultpath = "/";
Eric Andersen96700832000-09-04 15:15:55 +0000226
227 *uri_port = 80;
228
229 if (strncmp(url, "http://", 7) != 0)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000230 error_msg_and_die("not an http url: %s\n", url);
Eric Andersen96700832000-09-04 15:15:55 +0000231
232 /* pull the host portion to the front of the buffer */
Randolph Chung02553a22000-12-07 03:53:47 +0000233 for (s = url, h = url+7 ; *h != '/' && *h != 0; ++h) {
Eric Andersen96700832000-09-04 15:15:55 +0000234 if (*h == ':') {
235 *uri_port = atoi(h+1);
236 *h = '\0';
237 }
238 *s++ = *h;
239 }
240 *s = '\0';
Randolph Chung02553a22000-12-07 03:53:47 +0000241
242 if (*h == 0) h = defaultpath;
243
Eric Andersen96700832000-09-04 15:15:55 +0000244 *uri_host = url;
245 *uri_path = h;
246}
247
248
249FILE *open_socket(char *host, int port)
250{
251 struct sockaddr_in sin;
252 struct hostent *hp;
253 int fd;
254 FILE *fp;
255
256 memzero(&sin, sizeof(sin));
257 sin.sin_family = AF_INET;
258 if ((hp = (struct hostent *) gethostbyname(host)) == NULL)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000259 error_msg_and_die("cannot resolve %s\n", host);
Eric Andersen96700832000-09-04 15:15:55 +0000260 memcpy(&sin.sin_addr, hp->h_addr_list[0], hp->h_length);
261 sin.sin_port = htons(port);
262
263 /*
264 * Get the server onto a stdio stream.
265 */
266 if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000267 perror_msg_and_die("socket()");
Eric Andersen96700832000-09-04 15:15:55 +0000268 if (connect(fd, (struct sockaddr *) &sin, sizeof(sin)) < 0)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000269 perror_msg_and_die("connect(%s)", host);
Eric Andersen96700832000-09-04 15:15:55 +0000270 if ((fp = fdopen(fd, "r+")) == NULL)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000271 perror_msg_and_die("fdopen()");
Eric Andersen96700832000-09-04 15:15:55 +0000272
273 return fp;
274}
275
276
277char *gethdr(char *buf, size_t bufsiz, FILE *fp, int *istrunc)
278{
279 char *s, *hdrval;
280 int c;
281
282 *istrunc = 0;
283
284 /* retrieve header line */
285 if (fgets(buf, bufsiz, fp) == NULL)
286 return NULL;
287
288 /* see if we are at the end of the headers */
289 for (s = buf ; *s == '\r' ; ++s)
290 ;
291 if (s[0] == '\n')
292 return NULL;
293
294 /* convert the header name to lower case */
295 for (s = buf ; isalnum(*s) || *s == '-' ; ++s)
296 *s = tolower(*s);
297
298 /* verify we are at the end of the header name */
299 if (*s != ':')
Mark Whitleyf57c9442000-12-07 19:56:48 +0000300 error_msg_and_die("bad header line: %s\n", buf);
Eric Andersen96700832000-09-04 15:15:55 +0000301
302 /* locate the start of the header value */
303 for (*s++ = '\0' ; *s == ' ' || *s == '\t' ; ++s)
304 ;
305 hdrval = s;
306
307 /* locate the end of header */
308 while (*s != '\0' && *s != '\r' && *s != '\n')
309 ++s;
310
311 /* end of header found */
312 if (*s != '\0') {
313 *s = '\0';
314 return hdrval;
315 }
316
Eric Andersen5d638842000-09-14 21:46:30 +0000317 /* Rats! The buffer isn't big enough to hold the entire header value. */
Eric Andersen96700832000-09-04 15:15:55 +0000318 while (c = getc(fp), c != EOF && c != '\n')
319 ;
320 *istrunc = 1;
321 return hdrval;
322}
323
Eric Andersenb520e082000-10-03 00:21:45 +0000324#ifdef BB_FEATURE_STATUSBAR
Eric Andersen4e573f42000-11-14 23:29:24 +0000325/* Stuff below is from BSD rcp util.c, as added to openshh.
326 * Original copyright notice is retained at the end of this file.
327 *
328 */
Eric Andersenb520e082000-10-03 00:21:45 +0000329
330
331int
332getttywidth(void)
333{
334 struct winsize winsize;
335
336 if (ioctl(fileno(stdout), TIOCGWINSZ, &winsize) != -1)
337 return (winsize.ws_col ? winsize.ws_col : 80);
338 else
339 return (80);
340}
341
342void
343updateprogressmeter(int ignore)
344{
345 int save_errno = errno;
346
347 progressmeter(0);
348 errno = save_errno;
349}
350
351void
352alarmtimer(int wait)
353{
354 struct itimerval itv;
355
356 itv.it_value.tv_sec = wait;
357 itv.it_value.tv_usec = 0;
358 itv.it_interval = itv.it_value;
359 setitimer(ITIMER_REAL, &itv, NULL);
360}
361
362
363void
364progressmeter(int flag)
365{
366 static const char prefixes[] = " KMGTP";
367 static struct timeval lastupdate;
368 static off_t lastsize;
369 struct timeval now, td, wait;
370 off_t cursize, abbrevsize;
371 double elapsed;
372 int ratio, barlength, i, remaining;
373 char buf[256];
374
375 if (flag == -1) {
376 (void) gettimeofday(&start, (struct timezone *) 0);
377 lastupdate = start;
378 lastsize = 0;
379 }
380
381 (void) gettimeofday(&now, (struct timezone *) 0);
382 cursize = statbytes;
383 if (filesize != 0) {
384 ratio = 100.0 * cursize / filesize;
385 ratio = MAX(ratio, 0);
386 ratio = MIN(ratio, 100);
387 } else
388 ratio = 100;
389
390 snprintf(buf, sizeof(buf), "\r%-20.20s %3d%% ", curfile, ratio);
391
392 barlength = getttywidth() - 51;
393 if (barlength > 0) {
394 i = barlength * ratio / 100;
395 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
396 "|%.*s%*s|", i,
397 "*****************************************************************************"
398 "*****************************************************************************",
399 barlength - i, "");
400 }
401 i = 0;
402 abbrevsize = cursize;
403 while (abbrevsize >= 100000 && i < sizeof(prefixes)) {
404 i++;
405 abbrevsize >>= 10;
406 }
407 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " %5d %c%c ",
408 (int) abbrevsize, prefixes[i], prefixes[i] == ' ' ? ' ' :
409 'B');
410
411 timersub(&now, &lastupdate, &wait);
412 if (cursize > lastsize) {
413 lastupdate = now;
414 lastsize = cursize;
415 if (wait.tv_sec >= STALLTIME) {
416 start.tv_sec += wait.tv_sec;
417 start.tv_usec += wait.tv_usec;
418 }
419 wait.tv_sec = 0;
420 }
421 timersub(&now, &start, &td);
422 elapsed = td.tv_sec + (td.tv_usec / 1000000.0);
423
424 if (statbytes <= 0 || elapsed <= 0.0 || cursize > filesize) {
425 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
426 " --:-- ETA");
427 } else if (wait.tv_sec >= STALLTIME) {
428 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
429 " - stalled -");
430 } else {
431 remaining = (int) (filesize / (statbytes / elapsed) - elapsed);
432 i = remaining / 3600;
433 if (i)
434 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
435 "%2d:", i);
436 else
437 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
438 " ");
439 i = remaining % 3600;
440 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
441 "%02d:%02d ETA", i / 60, i % 60);
442 }
Randolph Chungda7b8292000-12-07 03:55:35 +0000443 write(fileno(stderr), buf, strlen(buf));
Eric Andersenb520e082000-10-03 00:21:45 +0000444
445 if (flag == -1) {
446 struct sigaction sa;
447 sa.sa_handler = updateprogressmeter;
448 sigemptyset(&sa.sa_mask);
449 sa.sa_flags = SA_RESTART;
450 sigaction(SIGALRM, &sa, NULL);
451 alarmtimer(1);
452 } else if (flag == 1) {
453 alarmtimer(0);
454 statbytes = 0;
455 }
456}
457#endif
Eric Andersen4e573f42000-11-14 23:29:24 +0000458
459/* Original copyright notice which applies to the BB_FEATURE_STATUSBAR stuff,
460 * much of which was blatently stolen from openssh. */
461
462/*-
463 * Copyright (c) 1992, 1993
464 * The Regents of the University of California. All rights reserved.
465 *
466 * Redistribution and use in source and binary forms, with or without
467 * modification, are permitted provided that the following conditions
468 * are met:
469 * 1. Redistributions of source code must retain the above copyright
470 * notice, this list of conditions and the following disclaimer.
471 * 2. Redistributions in binary form must reproduce the above copyright
472 * notice, this list of conditions and the following disclaimer in the
473 * documentation and/or other materials provided with the distribution.
474 *
475 * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
476 * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
477 *
478 * 4. Neither the name of the University nor the names of its contributors
479 * may be used to endorse or promote products derived from this software
480 * without specific prior written permission.
481 *
482 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
483 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
484 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
485 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
486 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
487 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
488 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
489 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
490 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
491 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
492 * SUCH DAMAGE.
493 *
Glenn L McGrath1bca5ed2000-12-09 08:12:06 +0000494 * $Id: wget.c,v 1.12 2000/12/09 08:12:06 bug1 Exp $
Eric Andersen4e573f42000-11-14 23:29:24 +0000495 */
496
497
498
Eric Andersen96700832000-09-04 15:15:55 +0000499/*
500Local Variables:
501c-file-style: "linux"
502c-basic-offset: 4
503tab-width: 4
504End:
505*/
Eric Andersenb520e082000-10-03 00:21:45 +0000506
Eric Andersen4e573f42000-11-14 23:29:24 +0000507
508