blob: 0ec4dff19ba081ac8ce66b1e79af392ee3a48c39 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
3 * wget - retrieve a file using HTTP
4 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Eric Andersenb520e082000-10-03 00:21:45 +00006 *
Eric Andersen4e573f42000-11-14 23:29:24 +00007 * Note: According to RFC2616 section 3.6.1, "All HTTP/1.1 applications MUST be
8 * able to receive and decode the "chunked" transfer-coding, and MUST ignore
9 * chunk-extension extensions they do not understand."
Eric Andersenb520e082000-10-03 00:21:45 +000010 *
Eric Andersen4e573f42000-11-14 23:29:24 +000011 * This prevents this particular wget app from completely RFC compliant, and as
12 * such, prevents it from being used as a general purpose web browser... This
13 * is a design decision, since it makes the code smaller.
Eric Andersenb520e082000-10-03 00:21:45 +000014 *
Eric Andersen96700832000-09-04 15:15:55 +000015 */
16
Eric Andersen3570a342000-09-25 21:45:58 +000017#include "busybox.h"
Eric Andersen96700832000-09-04 15:15:55 +000018#include <stdio.h>
19#include <stdlib.h>
20#include <unistd.h>
21#include <ctype.h>
22#include <string.h>
Eric Andersenb520e082000-10-03 00:21:45 +000023#include <unistd.h>
24#include <signal.h>
25#include <sys/ioctl.h>
Eric Andersen96700832000-09-04 15:15:55 +000026
Eric Andersenb520e082000-10-03 00:21:45 +000027#include <sys/time.h>
Eric Andersen96700832000-09-04 15:15:55 +000028#include <sys/types.h>
29#include <sys/stat.h>
30#include <sys/socket.h>
31#include <netinet/in.h>
32#include <arpa/inet.h>
33#include <netdb.h>
34
35
36void parse_url(char *url, char **uri_host, int *uri_port, char **uri_path);
37FILE *open_socket(char *host, int port);
38char *gethdr(char *buf, size_t bufsiz, FILE *fp, int *istrunc);
Eric Andersenb520e082000-10-03 00:21:45 +000039void progressmeter(int flag);
Eric Andersen96700832000-09-04 15:15:55 +000040
Eric Andersenb520e082000-10-03 00:21:45 +000041/* Globals (can be accessed from signal handlers */
42static off_t filesize = 0; /* content-length of the file */
43#ifdef BB_FEATURE_STATUSBAR
44static char *curfile; /* Name of current file being transferred. */
45static struct timeval start; /* Time a transfer started. */
46volatile unsigned long statbytes; /* Number of bytes transferred so far. */
47/* For progressmeter() -- number of seconds before xfer considered "stalled" */
48#define STALLTIME 5
49#endif
Eric Andersen96700832000-09-04 15:15:55 +000050
51int wget_main(int argc, char **argv)
52{
53 FILE *sfp; /* socket to web server */
54 char *uri_host, *uri_path; /* parsed from command line url */
55 int uri_port;
56 char *s, buf[512];
57 int n;
58
59 char *fname_out = NULL; /* where to direct output (-O) */
60 int do_continue = 0; /* continue a prev transfer (-c) */
61 long beg_range = 0L; /* range at which continue begins */
62 int got_clen = 0; /* got content-length: from server */
Eric Andersenb520e082000-10-03 00:21:45 +000063 FILE *output; /* socket to web server */
Eric Andersen96700832000-09-04 15:15:55 +000064
65 /*
66 * Crack command line.
67 */
68 while ((n = getopt(argc, argv, "cO:")) != EOF) {
69 switch (n) {
70 case 'c':
71 ++do_continue;
72 break;
73 case 'O':
Randolph Chung02553a22000-12-07 03:53:47 +000074 /* can't set fname_out to NULL if outputting to stdout, because
75 * this gets interpreted as the auto-gen output filename
76 * case below - tausq@debian.org
77 */
78 fname_out = (strcmp(optarg, "-") == 0 ? (char *)1 : optarg);
Eric Andersen96700832000-09-04 15:15:55 +000079 break;
80 default:
81 usage(wget_usage);
82 }
83 }
Eric Andersen25b669c2000-10-02 23:19:38 +000084
Eric Andersen96700832000-09-04 15:15:55 +000085 if (argc - optind != 1)
86 usage(wget_usage);
Eric Andersen25b669c2000-10-02 23:19:38 +000087
88 /* Guess an output filename */
89 if (!fname_out) {
Eric Andersenb520e082000-10-03 00:21:45 +000090 fname_out =
91#ifdef BB_FEATURE_STATUSBAR
92 curfile =
93#endif
94 get_last_path_component(argv[optind]);
95#ifdef BB_FEATURE_STATUSBAR
96 } else {
97 curfile=argv[optind];
98#endif
Eric Andersen25b669c2000-10-02 23:19:38 +000099 }
100
Eric Andersenb520e082000-10-03 00:21:45 +0000101
Eric Andersen25b669c2000-10-02 23:19:38 +0000102 if (do_continue && !fname_out)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000103 error_msg_and_die("cannot specify continue (-c) without a filename (-O)\n");
Eric Andersen96700832000-09-04 15:15:55 +0000104 /*
105 * Parse url into components.
106 */
107 parse_url(argv[optind], &uri_host, &uri_port, &uri_path);
108
109 /*
110 * Open socket to server.
111 */
112 sfp = open_socket(uri_host, uri_port);
113
114 /*
115 * Open the output stream.
116 */
Randolph Chung02553a22000-12-07 03:53:47 +0000117 if (fname_out != (char *)1) {
Eric Andersenb520e082000-10-03 00:21:45 +0000118 if ( (output=fopen(fname_out, (do_continue ? "a" : "w")))
119 == NULL)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000120 perror_msg_and_die("fopen(%s)", fname_out);
Randolph Chung02553a22000-12-07 03:53:47 +0000121 } else {
122 output = stdout;
Eric Andersen96700832000-09-04 15:15:55 +0000123 }
124
125 /*
126 * Determine where to start transfer.
127 */
128 if (do_continue) {
129 struct stat sbuf;
Eric Andersenb520e082000-10-03 00:21:45 +0000130 if (fstat(fileno(output), &sbuf) < 0)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000131 error_msg_and_die("fstat()");
Eric Andersen96700832000-09-04 15:15:55 +0000132 if (sbuf.st_size > 0)
133 beg_range = sbuf.st_size;
134 else
135 do_continue = 0;
136 }
137
138 /*
139 * Send HTTP request.
140 */
141 fprintf(sfp, "GET %s HTTP/1.1\r\nHost: %s\r\n", uri_path, uri_host);
142 if (do_continue)
143 fprintf(sfp, "Range: bytes=%ld-\r\n", beg_range);
144 fputs("Connection: close\r\n\r\n", sfp);
145
146 /*
147 * Retrieve HTTP response line and check for "200" status code.
148 */
149 if (fgets(buf, sizeof(buf), sfp) == NULL)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000150 error_msg_and_die("no response from server\n");
Eric Andersen96700832000-09-04 15:15:55 +0000151 for (s = buf ; *s != '\0' && !isspace(*s) ; ++s)
152 ;
153 for ( ; isspace(*s) ; ++s)
154 ;
155 switch (atoi(s)) {
Eric Andersenb520e082000-10-03 00:21:45 +0000156 case 200:
157 if (!do_continue)
158 break;
Mark Whitleyf57c9442000-12-07 19:56:48 +0000159 error_msg_and_die("server does not support ranges\n");
Eric Andersenb520e082000-10-03 00:21:45 +0000160 case 206:
161 if (do_continue)
162 break;
163 /*FALLTHRU*/
164 default:
Mark Whitleyf57c9442000-12-07 19:56:48 +0000165 error_msg_and_die("server returned error: %s", buf);
Eric Andersen96700832000-09-04 15:15:55 +0000166 }
167
168 /*
169 * Retrieve HTTP headers.
170 */
171 while ((s = gethdr(buf, sizeof(buf), sfp, &n)) != NULL) {
172 if (strcmp(buf, "content-length") == 0) {
Eric Andersenb520e082000-10-03 00:21:45 +0000173 filesize = atol(s);
Eric Andersen96700832000-09-04 15:15:55 +0000174 got_clen = 1;
175 continue;
176 }
177 if (strcmp(buf, "transfer-encoding") == 0) {
Mark Whitleyf57c9442000-12-07 19:56:48 +0000178 error_msg_and_die("server wants to do %s transfer encoding\n", s);
Eric Andersen96700832000-09-04 15:15:55 +0000179 continue;
180 }
181 }
182
183 /*
184 * Retrieve HTTP body.
185 */
Eric Andersenb520e082000-10-03 00:21:45 +0000186#ifdef BB_FEATURE_STATUSBAR
187 statbytes=0;
188 progressmeter(-1);
189#endif
190 while (filesize > 0 && (n = fread(buf, 1, sizeof(buf), sfp)) > 0) {
191 fwrite(buf, 1, n, output);
192#ifdef BB_FEATURE_STATUSBAR
193 statbytes+=n;
194 progressmeter(1);
195#endif
Eric Andersen96700832000-09-04 15:15:55 +0000196 if (got_clen)
Eric Andersenb520e082000-10-03 00:21:45 +0000197 filesize -= n;
Eric Andersen96700832000-09-04 15:15:55 +0000198 }
199 if (n == 0 && ferror(sfp))
Mark Whitleyf57c9442000-12-07 19:56:48 +0000200 perror_msg_and_die("network read error");
Eric Andersen96700832000-09-04 15:15:55 +0000201
202 exit(0);
203}
204
205
206void parse_url(char *url, char **uri_host, int *uri_port, char **uri_path)
207{
208 char *s, *h;
Randolph Chung02553a22000-12-07 03:53:47 +0000209 static char *defaultpath = "/";
Eric Andersen96700832000-09-04 15:15:55 +0000210
211 *uri_port = 80;
212
213 if (strncmp(url, "http://", 7) != 0)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000214 error_msg_and_die("not an http url: %s\n", url);
Eric Andersen96700832000-09-04 15:15:55 +0000215
216 /* pull the host portion to the front of the buffer */
Randolph Chung02553a22000-12-07 03:53:47 +0000217 for (s = url, h = url+7 ; *h != '/' && *h != 0; ++h) {
Eric Andersen96700832000-09-04 15:15:55 +0000218 if (*h == ':') {
219 *uri_port = atoi(h+1);
220 *h = '\0';
221 }
222 *s++ = *h;
223 }
224 *s = '\0';
Randolph Chung02553a22000-12-07 03:53:47 +0000225
226 if (*h == 0) h = defaultpath;
227
Eric Andersen96700832000-09-04 15:15:55 +0000228 *uri_host = url;
229 *uri_path = h;
230}
231
232
233FILE *open_socket(char *host, int port)
234{
235 struct sockaddr_in sin;
236 struct hostent *hp;
237 int fd;
238 FILE *fp;
239
240 memzero(&sin, sizeof(sin));
241 sin.sin_family = AF_INET;
242 if ((hp = (struct hostent *) gethostbyname(host)) == NULL)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000243 error_msg_and_die("cannot resolve %s\n", host);
Eric Andersen96700832000-09-04 15:15:55 +0000244 memcpy(&sin.sin_addr, hp->h_addr_list[0], hp->h_length);
245 sin.sin_port = htons(port);
246
247 /*
248 * Get the server onto a stdio stream.
249 */
250 if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000251 perror_msg_and_die("socket()");
Eric Andersen96700832000-09-04 15:15:55 +0000252 if (connect(fd, (struct sockaddr *) &sin, sizeof(sin)) < 0)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000253 perror_msg_and_die("connect(%s)", host);
Eric Andersen96700832000-09-04 15:15:55 +0000254 if ((fp = fdopen(fd, "r+")) == NULL)
Mark Whitleyf57c9442000-12-07 19:56:48 +0000255 perror_msg_and_die("fdopen()");
Eric Andersen96700832000-09-04 15:15:55 +0000256
257 return fp;
258}
259
260
261char *gethdr(char *buf, size_t bufsiz, FILE *fp, int *istrunc)
262{
263 char *s, *hdrval;
264 int c;
265
266 *istrunc = 0;
267
268 /* retrieve header line */
269 if (fgets(buf, bufsiz, fp) == NULL)
270 return NULL;
271
272 /* see if we are at the end of the headers */
273 for (s = buf ; *s == '\r' ; ++s)
274 ;
275 if (s[0] == '\n')
276 return NULL;
277
278 /* convert the header name to lower case */
279 for (s = buf ; isalnum(*s) || *s == '-' ; ++s)
280 *s = tolower(*s);
281
282 /* verify we are at the end of the header name */
283 if (*s != ':')
Mark Whitleyf57c9442000-12-07 19:56:48 +0000284 error_msg_and_die("bad header line: %s\n", buf);
Eric Andersen96700832000-09-04 15:15:55 +0000285
286 /* locate the start of the header value */
287 for (*s++ = '\0' ; *s == ' ' || *s == '\t' ; ++s)
288 ;
289 hdrval = s;
290
291 /* locate the end of header */
292 while (*s != '\0' && *s != '\r' && *s != '\n')
293 ++s;
294
295 /* end of header found */
296 if (*s != '\0') {
297 *s = '\0';
298 return hdrval;
299 }
300
Eric Andersen5d638842000-09-14 21:46:30 +0000301 /* Rats! The buffer isn't big enough to hold the entire header value. */
Eric Andersen96700832000-09-04 15:15:55 +0000302 while (c = getc(fp), c != EOF && c != '\n')
303 ;
304 *istrunc = 1;
305 return hdrval;
306}
307
Eric Andersenb520e082000-10-03 00:21:45 +0000308#ifdef BB_FEATURE_STATUSBAR
Eric Andersen4e573f42000-11-14 23:29:24 +0000309/* Stuff below is from BSD rcp util.c, as added to openshh.
310 * Original copyright notice is retained at the end of this file.
311 *
312 */
Eric Andersenb520e082000-10-03 00:21:45 +0000313
314
315int
316getttywidth(void)
317{
318 struct winsize winsize;
319
320 if (ioctl(fileno(stdout), TIOCGWINSZ, &winsize) != -1)
321 return (winsize.ws_col ? winsize.ws_col : 80);
322 else
323 return (80);
324}
325
326void
327updateprogressmeter(int ignore)
328{
329 int save_errno = errno;
330
331 progressmeter(0);
332 errno = save_errno;
333}
334
335void
336alarmtimer(int wait)
337{
338 struct itimerval itv;
339
340 itv.it_value.tv_sec = wait;
341 itv.it_value.tv_usec = 0;
342 itv.it_interval = itv.it_value;
343 setitimer(ITIMER_REAL, &itv, NULL);
344}
345
346
347void
348progressmeter(int flag)
349{
350 static const char prefixes[] = " KMGTP";
351 static struct timeval lastupdate;
352 static off_t lastsize;
353 struct timeval now, td, wait;
354 off_t cursize, abbrevsize;
355 double elapsed;
356 int ratio, barlength, i, remaining;
357 char buf[256];
358
359 if (flag == -1) {
360 (void) gettimeofday(&start, (struct timezone *) 0);
361 lastupdate = start;
362 lastsize = 0;
363 }
364
365 (void) gettimeofday(&now, (struct timezone *) 0);
366 cursize = statbytes;
367 if (filesize != 0) {
368 ratio = 100.0 * cursize / filesize;
369 ratio = MAX(ratio, 0);
370 ratio = MIN(ratio, 100);
371 } else
372 ratio = 100;
373
374 snprintf(buf, sizeof(buf), "\r%-20.20s %3d%% ", curfile, ratio);
375
376 barlength = getttywidth() - 51;
377 if (barlength > 0) {
378 i = barlength * ratio / 100;
379 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
380 "|%.*s%*s|", i,
381 "*****************************************************************************"
382 "*****************************************************************************",
383 barlength - i, "");
384 }
385 i = 0;
386 abbrevsize = cursize;
387 while (abbrevsize >= 100000 && i < sizeof(prefixes)) {
388 i++;
389 abbrevsize >>= 10;
390 }
391 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " %5d %c%c ",
392 (int) abbrevsize, prefixes[i], prefixes[i] == ' ' ? ' ' :
393 'B');
394
395 timersub(&now, &lastupdate, &wait);
396 if (cursize > lastsize) {
397 lastupdate = now;
398 lastsize = cursize;
399 if (wait.tv_sec >= STALLTIME) {
400 start.tv_sec += wait.tv_sec;
401 start.tv_usec += wait.tv_usec;
402 }
403 wait.tv_sec = 0;
404 }
405 timersub(&now, &start, &td);
406 elapsed = td.tv_sec + (td.tv_usec / 1000000.0);
407
408 if (statbytes <= 0 || elapsed <= 0.0 || cursize > filesize) {
409 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
410 " --:-- ETA");
411 } else if (wait.tv_sec >= STALLTIME) {
412 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
413 " - stalled -");
414 } else {
415 remaining = (int) (filesize / (statbytes / elapsed) - elapsed);
416 i = remaining / 3600;
417 if (i)
418 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
419 "%2d:", i);
420 else
421 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
422 " ");
423 i = remaining % 3600;
424 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
425 "%02d:%02d ETA", i / 60, i % 60);
426 }
Randolph Chungda7b8292000-12-07 03:55:35 +0000427 write(fileno(stderr), buf, strlen(buf));
Eric Andersenb520e082000-10-03 00:21:45 +0000428
429 if (flag == -1) {
430 struct sigaction sa;
431 sa.sa_handler = updateprogressmeter;
432 sigemptyset(&sa.sa_mask);
433 sa.sa_flags = SA_RESTART;
434 sigaction(SIGALRM, &sa, NULL);
435 alarmtimer(1);
436 } else if (flag == 1) {
437 alarmtimer(0);
438 statbytes = 0;
439 }
440}
441#endif
Eric Andersen4e573f42000-11-14 23:29:24 +0000442
443/* Original copyright notice which applies to the BB_FEATURE_STATUSBAR stuff,
444 * much of which was blatently stolen from openssh. */
445
446/*-
447 * Copyright (c) 1992, 1993
448 * The Regents of the University of California. All rights reserved.
449 *
450 * Redistribution and use in source and binary forms, with or without
451 * modification, are permitted provided that the following conditions
452 * are met:
453 * 1. Redistributions of source code must retain the above copyright
454 * notice, this list of conditions and the following disclaimer.
455 * 2. Redistributions in binary form must reproduce the above copyright
456 * notice, this list of conditions and the following disclaimer in the
457 * documentation and/or other materials provided with the distribution.
458 *
459 * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
460 * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
461 *
462 * 4. Neither the name of the University nor the names of its contributors
463 * may be used to endorse or promote products derived from this software
464 * without specific prior written permission.
465 *
466 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
467 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
468 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
469 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
470 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
471 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
472 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
473 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
474 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
475 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
476 * SUCH DAMAGE.
477 *
Mark Whitleyf57c9442000-12-07 19:56:48 +0000478 * $Id: wget.c,v 1.10 2000/12/07 19:56:48 markw Exp $
Eric Andersen4e573f42000-11-14 23:29:24 +0000479 */
480
481
482
Eric Andersen96700832000-09-04 15:15:55 +0000483/*
484Local Variables:
485c-file-style: "linux"
486c-basic-offset: 4
487tab-width: 4
488End:
489*/
Eric Andersenb520e082000-10-03 00:21:45 +0000490
Eric Andersen4e573f42000-11-14 23:29:24 +0000491
492