blob: 78db6e32a153c13c2c9aa44377b06da9cc63c26a [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
3 * wget - retrieve a file using HTTP
4 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Eric Andersenb520e082000-10-03 00:21:45 +00006 *
Eric Andersen4e573f42000-11-14 23:29:24 +00007 * Note: According to RFC2616 section 3.6.1, "All HTTP/1.1 applications MUST be
8 * able to receive and decode the "chunked" transfer-coding, and MUST ignore
9 * chunk-extension extensions they do not understand."
Eric Andersenb520e082000-10-03 00:21:45 +000010 *
Eric Andersen4e573f42000-11-14 23:29:24 +000011 * This prevents this particular wget app from completely RFC compliant, and as
12 * such, prevents it from being used as a general purpose web browser... This
13 * is a design decision, since it makes the code smaller.
Eric Andersenb520e082000-10-03 00:21:45 +000014 *
Eric Andersen96700832000-09-04 15:15:55 +000015 */
16
Eric Andersen3570a342000-09-25 21:45:58 +000017#include "busybox.h"
Eric Andersen96700832000-09-04 15:15:55 +000018#include <stdio.h>
19#include <stdlib.h>
20#include <unistd.h>
21#include <ctype.h>
22#include <string.h>
Eric Andersenb520e082000-10-03 00:21:45 +000023#include <unistd.h>
24#include <signal.h>
25#include <sys/ioctl.h>
Eric Andersen96700832000-09-04 15:15:55 +000026
Eric Andersenb520e082000-10-03 00:21:45 +000027#include <sys/time.h>
Eric Andersen96700832000-09-04 15:15:55 +000028#include <sys/types.h>
29#include <sys/stat.h>
30#include <sys/socket.h>
31#include <netinet/in.h>
32#include <arpa/inet.h>
33#include <netdb.h>
34
35
36void parse_url(char *url, char **uri_host, int *uri_port, char **uri_path);
37FILE *open_socket(char *host, int port);
38char *gethdr(char *buf, size_t bufsiz, FILE *fp, int *istrunc);
Eric Andersenb520e082000-10-03 00:21:45 +000039void progressmeter(int flag);
Eric Andersen96700832000-09-04 15:15:55 +000040
Eric Andersenb520e082000-10-03 00:21:45 +000041/* Globals (can be accessed from signal handlers */
42static off_t filesize = 0; /* content-length of the file */
43#ifdef BB_FEATURE_STATUSBAR
44static char *curfile; /* Name of current file being transferred. */
45static struct timeval start; /* Time a transfer started. */
46volatile unsigned long statbytes; /* Number of bytes transferred so far. */
47/* For progressmeter() -- number of seconds before xfer considered "stalled" */
48#define STALLTIME 5
49#endif
Eric Andersen96700832000-09-04 15:15:55 +000050
51int wget_main(int argc, char **argv)
52{
53 FILE *sfp; /* socket to web server */
54 char *uri_host, *uri_path; /* parsed from command line url */
55 int uri_port;
56 char *s, buf[512];
57 int n;
58
59 char *fname_out = NULL; /* where to direct output (-O) */
60 int do_continue = 0; /* continue a prev transfer (-c) */
61 long beg_range = 0L; /* range at which continue begins */
62 int got_clen = 0; /* got content-length: from server */
Eric Andersenb520e082000-10-03 00:21:45 +000063 FILE *output; /* socket to web server */
Eric Andersen96700832000-09-04 15:15:55 +000064
65 /*
66 * Crack command line.
67 */
68 while ((n = getopt(argc, argv, "cO:")) != EOF) {
69 switch (n) {
70 case 'c':
71 ++do_continue;
72 break;
73 case 'O':
Eric Andersen5d638842000-09-14 21:46:30 +000074 fname_out = (strcmp(optarg, "-") == 0 ? NULL : optarg);
Eric Andersen96700832000-09-04 15:15:55 +000075 break;
76 default:
77 usage(wget_usage);
78 }
79 }
Eric Andersen25b669c2000-10-02 23:19:38 +000080
Eric Andersen96700832000-09-04 15:15:55 +000081 if (argc - optind != 1)
82 usage(wget_usage);
Eric Andersen25b669c2000-10-02 23:19:38 +000083
84 /* Guess an output filename */
85 if (!fname_out) {
Eric Andersenb520e082000-10-03 00:21:45 +000086 fname_out =
87#ifdef BB_FEATURE_STATUSBAR
88 curfile =
89#endif
90 get_last_path_component(argv[optind]);
91#ifdef BB_FEATURE_STATUSBAR
92 } else {
93 curfile=argv[optind];
94#endif
Eric Andersen25b669c2000-10-02 23:19:38 +000095 }
96
Eric Andersenb520e082000-10-03 00:21:45 +000097
Eric Andersen25b669c2000-10-02 23:19:38 +000098 if (do_continue && !fname_out)
Matt Kraai97d26122000-10-25 16:25:50 +000099 fatalError("cannot specify continue (-c) without a filename (-O)\n");
Eric Andersen96700832000-09-04 15:15:55 +0000100 /*
101 * Parse url into components.
102 */
103 parse_url(argv[optind], &uri_host, &uri_port, &uri_path);
104
105 /*
106 * Open socket to server.
107 */
108 sfp = open_socket(uri_host, uri_port);
109
110 /*
111 * Open the output stream.
112 */
113 if (fname_out != NULL) {
Eric Andersenb520e082000-10-03 00:21:45 +0000114 if ( (output=fopen(fname_out, (do_continue ? "a" : "w")))
115 == NULL)
Matt Kraai97d26122000-10-25 16:25:50 +0000116 fatalPerror("fopen(%s)", fname_out);
Eric Andersenb520e082000-10-03 00:21:45 +0000117 } else {
118 output=stdout;
Eric Andersen96700832000-09-04 15:15:55 +0000119 }
120
121 /*
122 * Determine where to start transfer.
123 */
124 if (do_continue) {
125 struct stat sbuf;
Eric Andersenb520e082000-10-03 00:21:45 +0000126 if (fstat(fileno(output), &sbuf) < 0)
Matt Kraai97d26122000-10-25 16:25:50 +0000127 fatalError("fstat()");
Eric Andersen96700832000-09-04 15:15:55 +0000128 if (sbuf.st_size > 0)
129 beg_range = sbuf.st_size;
130 else
131 do_continue = 0;
132 }
133
134 /*
135 * Send HTTP request.
136 */
137 fprintf(sfp, "GET %s HTTP/1.1\r\nHost: %s\r\n", uri_path, uri_host);
138 if (do_continue)
139 fprintf(sfp, "Range: bytes=%ld-\r\n", beg_range);
140 fputs("Connection: close\r\n\r\n", sfp);
141
142 /*
143 * Retrieve HTTP response line and check for "200" status code.
144 */
145 if (fgets(buf, sizeof(buf), sfp) == NULL)
Matt Kraai97d26122000-10-25 16:25:50 +0000146 fatalError("no response from server\n");
Eric Andersen96700832000-09-04 15:15:55 +0000147 for (s = buf ; *s != '\0' && !isspace(*s) ; ++s)
148 ;
149 for ( ; isspace(*s) ; ++s)
150 ;
151 switch (atoi(s)) {
Eric Andersenb520e082000-10-03 00:21:45 +0000152 case 200:
153 if (!do_continue)
154 break;
Matt Kraai97d26122000-10-25 16:25:50 +0000155 fatalError("server does not support ranges\n");
Eric Andersenb520e082000-10-03 00:21:45 +0000156 case 206:
157 if (do_continue)
158 break;
159 /*FALLTHRU*/
160 default:
Matt Kraai97d26122000-10-25 16:25:50 +0000161 fatalError("server returned error: %s", buf);
Eric Andersen96700832000-09-04 15:15:55 +0000162 }
163
164 /*
165 * Retrieve HTTP headers.
166 */
167 while ((s = gethdr(buf, sizeof(buf), sfp, &n)) != NULL) {
168 if (strcmp(buf, "content-length") == 0) {
Eric Andersenb520e082000-10-03 00:21:45 +0000169 filesize = atol(s);
Eric Andersen96700832000-09-04 15:15:55 +0000170 got_clen = 1;
171 continue;
172 }
173 if (strcmp(buf, "transfer-encoding") == 0) {
Matt Kraai97d26122000-10-25 16:25:50 +0000174 fatalError("server wants to do %s transfer encoding\n", s);
Eric Andersen96700832000-09-04 15:15:55 +0000175 continue;
176 }
177 }
178
179 /*
180 * Retrieve HTTP body.
181 */
Eric Andersenb520e082000-10-03 00:21:45 +0000182#ifdef BB_FEATURE_STATUSBAR
183 statbytes=0;
184 progressmeter(-1);
185#endif
186 while (filesize > 0 && (n = fread(buf, 1, sizeof(buf), sfp)) > 0) {
187 fwrite(buf, 1, n, output);
188#ifdef BB_FEATURE_STATUSBAR
189 statbytes+=n;
190 progressmeter(1);
191#endif
Eric Andersen96700832000-09-04 15:15:55 +0000192 if (got_clen)
Eric Andersenb520e082000-10-03 00:21:45 +0000193 filesize -= n;
Eric Andersen96700832000-09-04 15:15:55 +0000194 }
195 if (n == 0 && ferror(sfp))
Matt Kraai97d26122000-10-25 16:25:50 +0000196 fatalPerror("network read error");
Eric Andersen96700832000-09-04 15:15:55 +0000197
198 exit(0);
199}
200
201
202void parse_url(char *url, char **uri_host, int *uri_port, char **uri_path)
203{
204 char *s, *h;
205
206 *uri_port = 80;
207
208 if (strncmp(url, "http://", 7) != 0)
Matt Kraai97d26122000-10-25 16:25:50 +0000209 fatalError("not an http url: %s\n", url);
Eric Andersen96700832000-09-04 15:15:55 +0000210
211 /* pull the host portion to the front of the buffer */
212 for (s = url, h = url+7 ; *h != '/' ; ++h) {
213 if (*h == '\0')
Matt Kraai97d26122000-10-25 16:25:50 +0000214 fatalError("cannot parse url: %s\n", url);
Eric Andersen96700832000-09-04 15:15:55 +0000215 if (*h == ':') {
216 *uri_port = atoi(h+1);
217 *h = '\0';
218 }
219 *s++ = *h;
220 }
221 *s = '\0';
222 *uri_host = url;
223 *uri_path = h;
224}
225
226
227FILE *open_socket(char *host, int port)
228{
229 struct sockaddr_in sin;
230 struct hostent *hp;
231 int fd;
232 FILE *fp;
233
234 memzero(&sin, sizeof(sin));
235 sin.sin_family = AF_INET;
236 if ((hp = (struct hostent *) gethostbyname(host)) == NULL)
Matt Kraai97d26122000-10-25 16:25:50 +0000237 fatalError("cannot resolve %s\n", host);
Eric Andersen96700832000-09-04 15:15:55 +0000238 memcpy(&sin.sin_addr, hp->h_addr_list[0], hp->h_length);
239 sin.sin_port = htons(port);
240
241 /*
242 * Get the server onto a stdio stream.
243 */
244 if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
Matt Kraai97d26122000-10-25 16:25:50 +0000245 fatalPerror("socket()");
Eric Andersen96700832000-09-04 15:15:55 +0000246 if (connect(fd, (struct sockaddr *) &sin, sizeof(sin)) < 0)
Matt Kraai97d26122000-10-25 16:25:50 +0000247 fatalPerror("connect(%s)", host);
Eric Andersen96700832000-09-04 15:15:55 +0000248 if ((fp = fdopen(fd, "r+")) == NULL)
Matt Kraai97d26122000-10-25 16:25:50 +0000249 fatalPerror("fdopen()");
Eric Andersen96700832000-09-04 15:15:55 +0000250
251 return fp;
252}
253
254
255char *gethdr(char *buf, size_t bufsiz, FILE *fp, int *istrunc)
256{
257 char *s, *hdrval;
258 int c;
259
260 *istrunc = 0;
261
262 /* retrieve header line */
263 if (fgets(buf, bufsiz, fp) == NULL)
264 return NULL;
265
266 /* see if we are at the end of the headers */
267 for (s = buf ; *s == '\r' ; ++s)
268 ;
269 if (s[0] == '\n')
270 return NULL;
271
272 /* convert the header name to lower case */
273 for (s = buf ; isalnum(*s) || *s == '-' ; ++s)
274 *s = tolower(*s);
275
276 /* verify we are at the end of the header name */
277 if (*s != ':')
Matt Kraai97d26122000-10-25 16:25:50 +0000278 fatalError("bad header line: %s\n", buf);
Eric Andersen96700832000-09-04 15:15:55 +0000279
280 /* locate the start of the header value */
281 for (*s++ = '\0' ; *s == ' ' || *s == '\t' ; ++s)
282 ;
283 hdrval = s;
284
285 /* locate the end of header */
286 while (*s != '\0' && *s != '\r' && *s != '\n')
287 ++s;
288
289 /* end of header found */
290 if (*s != '\0') {
291 *s = '\0';
292 return hdrval;
293 }
294
Eric Andersen5d638842000-09-14 21:46:30 +0000295 /* Rats! The buffer isn't big enough to hold the entire header value. */
Eric Andersen96700832000-09-04 15:15:55 +0000296 while (c = getc(fp), c != EOF && c != '\n')
297 ;
298 *istrunc = 1;
299 return hdrval;
300}
301
Eric Andersenb520e082000-10-03 00:21:45 +0000302#ifdef BB_FEATURE_STATUSBAR
Eric Andersen4e573f42000-11-14 23:29:24 +0000303/* Stuff below is from BSD rcp util.c, as added to openshh.
304 * Original copyright notice is retained at the end of this file.
305 *
306 */
Eric Andersenb520e082000-10-03 00:21:45 +0000307
308
309int
310getttywidth(void)
311{
312 struct winsize winsize;
313
314 if (ioctl(fileno(stdout), TIOCGWINSZ, &winsize) != -1)
315 return (winsize.ws_col ? winsize.ws_col : 80);
316 else
317 return (80);
318}
319
320void
321updateprogressmeter(int ignore)
322{
323 int save_errno = errno;
324
325 progressmeter(0);
326 errno = save_errno;
327}
328
329void
330alarmtimer(int wait)
331{
332 struct itimerval itv;
333
334 itv.it_value.tv_sec = wait;
335 itv.it_value.tv_usec = 0;
336 itv.it_interval = itv.it_value;
337 setitimer(ITIMER_REAL, &itv, NULL);
338}
339
340
341void
342progressmeter(int flag)
343{
344 static const char prefixes[] = " KMGTP";
345 static struct timeval lastupdate;
346 static off_t lastsize;
347 struct timeval now, td, wait;
348 off_t cursize, abbrevsize;
349 double elapsed;
350 int ratio, barlength, i, remaining;
351 char buf[256];
352
353 if (flag == -1) {
354 (void) gettimeofday(&start, (struct timezone *) 0);
355 lastupdate = start;
356 lastsize = 0;
357 }
358
359 (void) gettimeofday(&now, (struct timezone *) 0);
360 cursize = statbytes;
361 if (filesize != 0) {
362 ratio = 100.0 * cursize / filesize;
363 ratio = MAX(ratio, 0);
364 ratio = MIN(ratio, 100);
365 } else
366 ratio = 100;
367
368 snprintf(buf, sizeof(buf), "\r%-20.20s %3d%% ", curfile, ratio);
369
370 barlength = getttywidth() - 51;
371 if (barlength > 0) {
372 i = barlength * ratio / 100;
373 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
374 "|%.*s%*s|", i,
375 "*****************************************************************************"
376 "*****************************************************************************",
377 barlength - i, "");
378 }
379 i = 0;
380 abbrevsize = cursize;
381 while (abbrevsize >= 100000 && i < sizeof(prefixes)) {
382 i++;
383 abbrevsize >>= 10;
384 }
385 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " %5d %c%c ",
386 (int) abbrevsize, prefixes[i], prefixes[i] == ' ' ? ' ' :
387 'B');
388
389 timersub(&now, &lastupdate, &wait);
390 if (cursize > lastsize) {
391 lastupdate = now;
392 lastsize = cursize;
393 if (wait.tv_sec >= STALLTIME) {
394 start.tv_sec += wait.tv_sec;
395 start.tv_usec += wait.tv_usec;
396 }
397 wait.tv_sec = 0;
398 }
399 timersub(&now, &start, &td);
400 elapsed = td.tv_sec + (td.tv_usec / 1000000.0);
401
402 if (statbytes <= 0 || elapsed <= 0.0 || cursize > filesize) {
403 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
404 " --:-- ETA");
405 } else if (wait.tv_sec >= STALLTIME) {
406 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
407 " - stalled -");
408 } else {
409 remaining = (int) (filesize / (statbytes / elapsed) - elapsed);
410 i = remaining / 3600;
411 if (i)
412 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
413 "%2d:", i);
414 else
415 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
416 " ");
417 i = remaining % 3600;
418 snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
419 "%02d:%02d ETA", i / 60, i % 60);
420 }
421 write(fileno(stdout), buf, strlen(buf));
422
423 if (flag == -1) {
424 struct sigaction sa;
425 sa.sa_handler = updateprogressmeter;
426 sigemptyset(&sa.sa_mask);
427 sa.sa_flags = SA_RESTART;
428 sigaction(SIGALRM, &sa, NULL);
429 alarmtimer(1);
430 } else if (flag == 1) {
431 alarmtimer(0);
432 statbytes = 0;
433 }
434}
435#endif
Eric Andersen4e573f42000-11-14 23:29:24 +0000436
437/* Original copyright notice which applies to the BB_FEATURE_STATUSBAR stuff,
438 * much of which was blatently stolen from openssh. */
439
440/*-
441 * Copyright (c) 1992, 1993
442 * The Regents of the University of California. All rights reserved.
443 *
444 * Redistribution and use in source and binary forms, with or without
445 * modification, are permitted provided that the following conditions
446 * are met:
447 * 1. Redistributions of source code must retain the above copyright
448 * notice, this list of conditions and the following disclaimer.
449 * 2. Redistributions in binary form must reproduce the above copyright
450 * notice, this list of conditions and the following disclaimer in the
451 * documentation and/or other materials provided with the distribution.
452 *
453 * 3. <BSD Advertising Clause omitted per the July 22, 1999 licensing change
454 * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change>
455 *
456 * 4. Neither the name of the University nor the names of its contributors
457 * may be used to endorse or promote products derived from this software
458 * without specific prior written permission.
459 *
460 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
461 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
462 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
463 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
464 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
465 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
466 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
467 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
468 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
469 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
470 * SUCH DAMAGE.
471 *
472 * $Id: wget.c,v 1.7 2000/11/14 23:29:24 andersen Exp $
473 */
474
475
476
Eric Andersen96700832000-09-04 15:15:55 +0000477/*
478Local Variables:
479c-file-style: "linux"
480c-basic-offset: 4
481tab-width: 4
482End:
483*/
Eric Andersenb520e082000-10-03 00:21:45 +0000484
Eric Andersen4e573f42000-11-14 23:29:24 +0000485
486