blob: f26bcca8613bdb44c0a60315183fbb5c20c7c348 [file] [log] [blame]
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001/* vi: set sw=4 ts=4: */
2/*
3 * Mini diff implementation for busybox, adapted from OpenBSD diff.
4 *
5 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00006 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
7 *
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00008 * Sponsored in part by the Defense Advanced Research Projects
9 * Agency (DARPA) and Air Force Research Laboratory, Air Force
10 * Materiel Command, USAF, under agreement number F39502-99-1-0512.
Bernhard Reutner-Fischer14aa06f2006-05-19 13:02:27 +000011 *
Rob Landleye4386342006-04-18 20:41:51 +000012 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000013 */
14
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000015#include "busybox.h"
16
17#define FSIZE_MAX 32768
18
19/*
20 * Output flags
21 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000022#define D_HEADER 1 /* Print a header/footer between files */
23#define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */
24#define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000025
26/*
27 * Status values for print_status() and diffreg() return values
28 * Guide:
29 * D_SAME - files are the same
30 * D_DIFFER - files differ
31 * D_BINARY - binary files differ
32 * D_COMMON - subdirectory common to both dirs
33 * D_ONLY - file only exists in one dir
34 * D_MISMATCH1 - path1 a dir, path2 a file
35 * D_MISMATCH2 - path1 a file, path2 a dir
36 * D_ERROR - error occurred
37 * D_SKIPPED1 - skipped path1 as it is a special file
38 * D_SKIPPED2 - skipped path2 as it is a special file
39 */
40
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +000041#define D_SAME 0
42#define D_DIFFER (1<<0)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000043#define D_BINARY (1<<1)
44#define D_COMMON (1<<2)
45#define D_ONLY (1<<3)
46#define D_MISMATCH1 (1<<4)
47#define D_MISMATCH2 (1<<5)
48#define D_ERROR (1<<6)
49#define D_SKIPPED1 (1<<7)
50#define D_SKIPPED2 (1<<8)
51
52/* Command line options */
53static unsigned long cmd_flags;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000054
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +000055#define FLAG_a (1<<0)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000056#define FLAG_b (1<<1)
57#define FLAG_d (1<<2)
58#define FLAG_i (1<<3)
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +000059#define FLAG_L (1<<4)
60#define FLAG_N (1<<5)
61#define FLAG_q (1<<6)
62#define FLAG_r (1<<7)
63#define FLAG_s (1<<8)
64#define FLAG_S (1<<9)
65#define FLAG_t (1<<10)
66#define FLAG_T (1<<11)
67#define FLAG_U (1<<12)
68#define FLAG_w (1<<13)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000069
70int context, status;
71char *start, *label[2];
72struct stat stb1, stb2;
73char **dl;
74int dl_count = 0;
75
76struct cand {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000077 int x;
78 int y;
79 int pred;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000080};
81
82struct line {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000083 int serial;
84 int value;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000085} *file[2];
86
87/*
88 * The following struct is used to record change information
89 * doing a "context" or "unified" diff. (see routine "change" to
90 * understand the highly mnemonic field names)
91 */
92struct context_vec {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000093 int a; /* start line in old file */
94 int b; /* end line in old file */
95 int c; /* start line in new file */
96 int d; /* end line in new file */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000097};
98
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000099static int *J; /* will be overlaid on class */
100static int *class; /* will be overlaid on file[0] */
101static int *klist; /* will be overlaid on file[0] after class */
102static int *member; /* will be overlaid on file[1] */
103static int clen;
104static int len[2];
105static int pref, suff; /* length of prefix and suffix */
106static int slen[2];
107static int anychange;
108static long *ixnew; /* will be overlaid on file[1] */
109static long *ixold; /* will be overlaid on klist */
110static struct cand *clist; /* merely a free storage pot for candidates */
111static int clistlen; /* the length of clist */
112static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000113static struct context_vec *context_vec_start;
114static struct context_vec *context_vec_end;
115static struct context_vec *context_vec_ptr;
116
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000117static void print_only(const char *path, size_t dirlen, const char *entry)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000118{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000119 if (dirlen > 1)
120 dirlen--;
121 printf("Only in %.*s: %s\n", (int) dirlen, path, entry);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000122}
123
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000124static void print_status(int val, char *path1, char *path2, char *entry)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000125{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000126 const char *const _entry = entry ? entry : "";
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +0000127 char *_path1 = entry ? concat_path_file(path1, _entry) : path1;
128 char *_path2 = entry ? concat_path_file(path2, _entry) : path2;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000129
130 switch (val) {
131 case D_ONLY:
132 print_only(path1, strlen(path1), entry);
133 break;
134 case D_COMMON:
135 printf("Common subdirectories: %s and %s\n", _path1, _path2);
136 break;
137 case D_BINARY:
138 printf("Binary files %s and %s differ\n", _path1, _path2);
139 break;
140 case D_DIFFER:
141 if (cmd_flags & FLAG_q)
142 printf("Files %s and %s differ\n", _path1, _path2);
143 break;
144 case D_SAME:
145 if (cmd_flags & FLAG_s)
146 printf("Files %s and %s are identical\n", _path1, _path2);
147 break;
148 case D_MISMATCH1:
149 printf("File %s is a directory while file %s is a regular file\n",
150 _path1, _path2);
151 break;
152 case D_MISMATCH2:
153 printf("File %s is a regular file while file %s is a directory\n",
154 _path1, _path2);
155 break;
156 case D_SKIPPED1:
157 printf("File %s is not a regular file or directory and was skipped\n",
158 _path1);
159 break;
160 case D_SKIPPED2:
161 printf("File %s is not a regular file or directory and was skipped\n",
162 _path2);
163 break;
164 }
165 if (entry) {
166 free(_path1);
167 free(_path2);
168 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000169}
170
171/*
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000172 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
173 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000174static int readhash(FILE * f)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000175{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000176 int i, t, space;
177 int sum;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000178
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000179 sum = 1;
180 space = 0;
181 if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) {
182 if (FLAG_i)
183 for (i = 0; (t = getc(f)) != '\n'; i++) {
184 if (t == EOF) {
185 if (i == 0)
186 return (0);
187 break;
188 }
189 sum = sum * 127 + t;
190 } else
191 for (i = 0; (t = getc(f)) != '\n'; i++) {
192 if (t == EOF) {
193 if (i == 0)
194 return (0);
195 break;
196 }
197 sum = sum * 127 + t;
198 }
199 } else {
200 for (i = 0;;) {
201 switch (t = getc(f)) {
202 case '\t':
203 case '\r':
204 case '\v':
205 case '\f':
206 case ' ':
207 space++;
208 continue;
209 default:
210 if (space && !(cmd_flags & FLAG_w)) {
211 i++;
212 space = 0;
213 }
214 sum = sum * 127 + t;
215 i++;
216 continue;
217 case EOF:
218 if (i == 0)
219 return (0);
220 /* FALLTHROUGH */
221 case '\n':
222 break;
223 }
224 break;
225 }
226 }
227 /*
228 * There is a remote possibility that we end up with a zero sum.
229 * Zero is used as an EOF marker, so return 1 instead.
230 */
231 return (sum == 0 ? 1 : sum);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000232}
233
234
235
236/*
237 * Check to see if the given files differ.
238 * Returns 0 if they are the same, 1 if different, and -1 on error.
239 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000240static int files_differ(FILE * f1, FILE * f2, int flags)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000241{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000242 char buf1[BUFSIZ], buf2[BUFSIZ];
243 size_t i, j;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000244
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000245 if ((flags & (D_EMPTY1 | D_EMPTY2)) || stb1.st_size != stb2.st_size ||
246 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
247 return (1);
248 while (1) {
249 i = fread(buf1, 1, sizeof(buf1), f1);
250 j = fread(buf2, 1, sizeof(buf2), f2);
251 if (i != j)
252 return (1);
253 if (i == 0 && j == 0) {
254 if (ferror(f1) || ferror(f2))
255 return (1);
256 return (0);
257 }
258 if (memcmp(buf1, buf2, i) != 0)
259 return (1);
260 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000261}
262
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000263static void prepare(int i, FILE * fd, off_t filesize)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000264{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000265 struct line *p;
266 int h;
267 size_t j, sz;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000268
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000269 rewind(fd);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000270
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000271 sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;
272 if (sz < 100)
273 sz = 100;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000274
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000275 p = xmalloc((sz + 3) * sizeof(struct line));
276 for (j = 0; (h = readhash(fd));) {
277 if (j == sz) {
278 sz = sz * 3 / 2;
279 p = xrealloc(p, (sz + 3) * sizeof(struct line));
280 }
281 p[++j].value = h;
282 }
283 len[i] = j;
284 file[i] = p;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000285}
286
287static void prune(void)
288{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000289 int i, j;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000290
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000291 for (pref = 0; pref < len[0] && pref < len[1] &&
292 file[0][pref + 1].value == file[1][pref + 1].value; pref++);
293 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
294 file[0][len[0] - suff].value == file[1][len[1] - suff].value;
295 suff++);
296 for (j = 0; j < 2; j++) {
297 sfile[j] = file[j] + pref;
298 slen[j] = len[j] - pref - suff;
299 for (i = 0; i <= slen[j]; i++)
300 sfile[j][i].serial = i;
301 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000302}
303
304static void equiv(struct line *a, int n, struct line *b, int m, int *c)
305{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000306 int i, j;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000307
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000308 i = j = 1;
309 while (i <= n && j <= m) {
310 if (a[i].value < b[j].value)
311 a[i++].value = 0;
312 else if (a[i].value == b[j].value)
313 a[i++].value = j;
314 else
315 j++;
316 }
317 while (i <= n)
318 a[i++].value = 0;
319 b[m + 1].value = 0;
320 j = 0;
321 while (++j <= m) {
322 c[j] = -b[j].serial;
323 while (b[j + 1].value == b[j].value) {
324 j++;
325 c[j] = b[j].serial;
326 }
327 }
328 c[j] = -1;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000329}
330
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000331static int isqrt(int n)
332{
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000333 int y, x = 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000334
335 if (n == 0)
336 return (0);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000337
338 do {
339 y = x;
340 x = n / x;
341 x += y;
342 x /= 2;
343 } while ((x - y) > 1 || (x - y) < -1);
344
345 return (x);
346}
347
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000348static int newcand(int x, int y, int pred)
349{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000350 struct cand *q;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000351
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000352 if (clen == clistlen) {
353 clistlen = clistlen * 11 / 10;
354 clist = xrealloc(clist, clistlen * sizeof(struct cand));
355 }
356 q = clist + clen;
357 q->x = x;
358 q->y = y;
359 q->pred = pred;
360 return (clen++);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000361}
362
363
364static int search(int *c, int k, int y)
365{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000366 int i, j, l, t;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000367
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000368 if (clist[c[k]].y < y) /* quick look for typical case */
369 return (k + 1);
370 i = 0;
371 j = k + 1;
372 while (1) {
373 l = i + j;
374 if ((l >>= 1) <= i)
375 break;
376 t = clist[c[l]].y;
377 if (t > y)
378 j = l;
379 else if (t < y)
380 i = l;
381 else
382 return (l);
383 }
384 return (l + 1);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000385}
386
387
388static int stone(int *a, int n, int *b, int *c)
389{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000390 int i, k, y, j, l;
391 int oldc, tc, oldl;
392 unsigned int numtries;
393
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000394#if ENABLE_FEATURE_DIFF_MINIMAL
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000395 const unsigned int bound =
396 (cmd_flags & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000397#else
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000398 const unsigned int bound = MAX(256, isqrt(n));
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000399#endif
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000400 k = 0;
401 c[0] = newcand(0, 0, 0);
402 for (i = 1; i <= n; i++) {
403 j = a[i];
404 if (j == 0)
405 continue;
406 y = -b[j];
407 oldl = 0;
408 oldc = c[0];
409 numtries = 0;
410 do {
411 if (y <= clist[oldc].y)
412 continue;
413 l = search(c, k, y);
414 if (l != oldl + 1)
415 oldc = c[l - 1];
416 if (l <= k) {
417 if (clist[c[l]].y <= y)
418 continue;
419 tc = c[l];
420 c[l] = newcand(i, y, oldc);
421 oldc = tc;
422 oldl = l;
423 numtries++;
424 } else {
425 c[l] = newcand(i, y, oldc);
426 k++;
427 break;
428 }
429 } while ((y = b[++j]) > 0 && numtries < bound);
430 }
431 return (k);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000432}
433
434static void unravel(int p)
435{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000436 struct cand *q;
437 int i;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000438
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000439 for (i = 0; i <= len[0]; i++)
440 J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0;
441 for (q = clist + p; q->y != 0; q = clist + q->pred)
442 J[q->x + pref] = q->y + pref;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000443}
444
445
446static void unsort(struct line *f, int l, int *b)
447{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000448 int *a, i;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000449
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000450 a = xmalloc((l + 1) * sizeof(int));
451 for (i = 1; i <= l; i++)
452 a[f[i].serial] = f[i].value;
453 for (i = 1; i <= l; i++)
454 b[i] = a[i];
455 free(a);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000456}
457
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000458static int skipline(FILE * f)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000459{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000460 int i, c;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000461
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000462 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
463 continue;
464 return (i);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000465}
466
467
468/*
469 * Check does double duty:
470 * 1. ferret out any fortuitous correspondences due
471 * to confounding by hashing (which result in "jackpot")
472 * 2. collect random access indexes to the two files
473 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000474static void check(FILE * f1, FILE * f2)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000475{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000476 int i, j, jackpot, c, d;
477 long ctold, ctnew;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000478
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000479 rewind(f1);
480 rewind(f2);
481 j = 1;
482 ixold[0] = ixnew[0] = 0;
483 jackpot = 0;
484 ctold = ctnew = 0;
485 for (i = 1; i <= len[0]; i++) {
486 if (J[i] == 0) {
487 ixold[i] = ctold += skipline(f1);
488 continue;
489 }
490 while (j < J[i]) {
491 ixnew[j] = ctnew += skipline(f2);
492 j++;
493 }
494 if ((cmd_flags & FLAG_b) || (cmd_flags & FLAG_w)
495 || (cmd_flags & FLAG_i)) {
496 while (1) {
497 c = getc(f1);
498 d = getc(f2);
499 /*
500 * GNU diff ignores a missing newline
501 * in one file if bflag || wflag.
502 */
503 if (((cmd_flags & FLAG_b) || (cmd_flags & FLAG_w)) &&
504 ((c == EOF && d == '\n') || (c == '\n' && d == EOF))) {
505 break;
506 }
507 ctold++;
508 ctnew++;
509 if ((cmd_flags & FLAG_b) && isspace(c) && isspace(d)) {
510 do {
511 if (c == '\n')
512 break;
513 ctold++;
514 } while (isspace(c = getc(f1)));
515 do {
516 if (d == '\n')
517 break;
518 ctnew++;
519 } while (isspace(d = getc(f2)));
520 } else if (cmd_flags & FLAG_w) {
521 while (isspace(c) && c != '\n') {
522 c = getc(f1);
523 ctold++;
524 }
525 while (isspace(d) && d != '\n') {
526 d = getc(f2);
527 ctnew++;
528 }
529 }
530 if (c != d) {
531 jackpot++;
532 J[i] = 0;
533 if (c != '\n' && c != EOF)
534 ctold += skipline(f1);
535 if (d != '\n' && c != EOF)
536 ctnew += skipline(f2);
537 break;
538 }
539 if (c == '\n' || c == EOF)
540 break;
541 }
542 } else {
543 while (1) {
544 ctold++;
545 ctnew++;
546 if ((c = getc(f1)) != (d = getc(f2))) {
547 J[i] = 0;
548 if (c != '\n' && c != EOF)
549 ctold += skipline(f1);
550 if (d != '\n' && c != EOF)
551 ctnew += skipline(f2);
552 break;
553 }
554 if (c == '\n' || c == EOF)
555 break;
556 }
557 }
558 ixold[i] = ctold;
559 ixnew[j] = ctnew;
560 j++;
561 }
562 for (; j <= len[1]; j++)
563 ixnew[j] = ctnew += skipline(f2);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000564}
565
566/* shellsort CACM #201 */
567static void sort(struct line *a, int n)
568{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000569 struct line *ai, *aim, w;
570 int j, m = 0, k;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000571
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000572 if (n == 0)
573 return;
574 for (j = 1; j <= n; j *= 2)
575 m = 2 * j - 1;
576 for (m /= 2; m != 0; m /= 2) {
577 k = n - m;
578 for (j = 1; j <= k; j++) {
579 for (ai = &a[j]; ai > a; ai -= m) {
580 aim = &ai[m];
581 if (aim < ai)
582 break; /* wraparound */
583 if (aim->value > ai[0].value ||
584 (aim->value == ai[0].value && aim->serial > ai[0].serial))
585 break;
586 w.value = ai[0].value;
587 ai[0].value = aim->value;
588 aim->value = w.value;
589 w.serial = ai[0].serial;
590 ai[0].serial = aim->serial;
591 aim->serial = w.serial;
592 }
593 }
594 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000595}
596
597
598static void uni_range(int a, int b)
599{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000600 if (a < b)
601 printf("%d,%d", a, b - a + 1);
602 else if (a == b)
603 printf("%d", b);
604 else
605 printf("%d,0", b);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000606}
607
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000608static int fetch(long *f, int a, int b, FILE * lb, int ch)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000609{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000610 int i, j, c, lastc, col, nc;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000611
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000612 if (a > b)
613 return (0);
614 for (i = a; i <= b; i++) {
615 fseek(lb, f[i - 1], SEEK_SET);
616 nc = f[i] - f[i - 1];
617 if (ch != '\0') {
618 putchar(ch);
619 if (cmd_flags & FLAG_T)
620 putchar('\t');
621 }
622 col = 0;
623 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
624 if ((c = getc(lb)) == EOF) {
625 puts("\n\\ No newline at end of file");
626 return (0);
627 }
628 if (c == '\t' && (cmd_flags & FLAG_t)) {
629 do {
630 putchar(' ');
631 } while (++col & 7);
632 } else {
633 putchar(c);
634 col++;
635 }
636 }
637 }
638 return (0);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000639}
640
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000641static int asciifile(FILE * f)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000642{
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000643#if ENABLE_FEATURE_DIFF_BINARY
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +0000644 unsigned char buf[BUFSIZ];
645 int i, cnt;
646#endif
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000647
648 if ((cmd_flags & FLAG_a) || f == NULL)
649 return (1);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000650
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000651#if ENABLE_FEATURE_DIFF_BINARY
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000652 rewind(f);
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000653 cnt = fread(buf, 1, sizeof(buf), f);
654 for (i = 0; i < cnt; i++) {
655 if (!isprint(buf[i]) && !isspace(buf[i])) {
656 return (0);
657 }
658 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000659#endif
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000660 return (1);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000661}
662
663/* dump accumulated "unified" diff changes */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000664static void dump_unified_vec(FILE * f1, FILE * f2)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000665{
666 struct context_vec *cvp = context_vec_start;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000667 int lowa, upb, lowc, upd;
668 int a, b, c, d;
669 char ch;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000670
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000671 if (context_vec_start > context_vec_ptr)
672 return;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000673
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000674 b = d = 0; /* gcc */
675 lowa = MAX(1, cvp->a - context);
676 upb = MIN(len[0], context_vec_ptr->b + context);
677 lowc = MAX(1, cvp->c - context);
678 upd = MIN(len[1], context_vec_ptr->d + context);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000679
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000680 fputs("@@ -", stdout);
681 uni_range(lowa, upb);
682 fputs(" +", stdout);
683 uni_range(lowc, upd);
684 fputs(" @@", stdout);
685 putchar('\n');
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000686
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000687 /*
688 * Output changes in "unified" diff format--the old and new lines
689 * are printed together.
690 */
691 for (; cvp <= context_vec_ptr; cvp++) {
692 a = cvp->a;
693 b = cvp->b;
694 c = cvp->c;
695 d = cvp->d;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000696
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000697 /*
698 * c: both new and old changes
699 * d: only changes in the old file
700 * a: only changes in the new file
701 */
702 if (a <= b && c <= d)
703 ch = 'c';
704 else
705 ch = (a <= b) ? 'd' : 'a';
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000706 if (ch == 'c' || ch == 'd') {
707 fetch(ixold, lowa, a - 1, f1, ' ');
708 fetch(ixold, a, b, f1, '-');
709 }
710 if (ch == 'a')
711 fetch(ixnew, lowc, c - 1, f2, ' ');
712 if (ch == 'c' || ch == 'a')
713 fetch(ixnew, c, d, f2, '+');
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000714 lowa = b + 1;
715 lowc = d + 1;
716 }
717 fetch(ixnew, d + 1, upd, f2, ' ');
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000718
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000719 context_vec_ptr = context_vec_start - 1;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000720}
721
722
723static void print_header(const char *file1, const char *file2)
724{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000725 if (label[0] != NULL)
726 printf("%s %s\n", "---", label[0]);
727 else
728 printf("%s %s\t%s", "---", file1, ctime(&stb1.st_mtime));
729 if (label[1] != NULL)
730 printf("%s %s\n", "+++", label[1]);
731 else
732 printf("%s %s\t%s", "+++", file2, ctime(&stb2.st_mtime));
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000733}
734
735
736
737/*
738 * Indicate that there is a difference between lines a and b of the from file
739 * to get to lines c to d of the to file. If a is greater then b then there
740 * are no lines in the from file involved and this means that there were
741 * lines appended (beginning at b). If c is greater than d then there are
742 * lines missing from the to file.
743 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000744static void change(char *file1, FILE * f1, char *file2, FILE * f2, int a,
745 int b, int c, int d)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000746{
747 static size_t max_context = 64;
748
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000749 if (a > b && c > d)
750 return;
751 if (cmd_flags & FLAG_q)
752 return;
753
754 /*
755 * Allocate change records as needed.
756 */
757 if (context_vec_ptr == context_vec_end - 1) {
758 ptrdiff_t offset = context_vec_ptr - context_vec_start;
759
760 max_context <<= 1;
761 context_vec_start = xrealloc(context_vec_start,
762 max_context *
763 sizeof(struct context_vec));
764 context_vec_end = context_vec_start + max_context;
765 context_vec_ptr = context_vec_start + offset;
766 }
767 if (anychange == 0) {
768 /*
769 * Print the context/unidiff header first time through.
770 */
771 print_header(file1, file2);
772 anychange = 1;
773 } else if (a > context_vec_ptr->b + (2 * context) + 1 &&
774 c > context_vec_ptr->d + (2 * context) + 1) {
775 /*
776 * If this change is more than 'context' lines from the
777 * previous change, dump the record and reset it.
778 */
779 dump_unified_vec(f1, f2);
780 }
781 context_vec_ptr++;
782 context_vec_ptr->a = a;
783 context_vec_ptr->b = b;
784 context_vec_ptr->c = c;
785 context_vec_ptr->d = d;
786 return;
787
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000788}
789
790
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000791static void output(char *file1, FILE * f1, char *file2, FILE * f2)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000792{
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000793
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000794 /* Note that j0 and j1 can't be used as they are defined in math.h.
795 * This also allows the rather amusing variable 'j00'... */
796 int m, i0, i1, j00, j01;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000797
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000798 rewind(f1);
799 rewind(f2);
800 m = len[0];
801 J[0] = 0;
802 J[m + 1] = len[1] + 1;
803 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
804 while (i0 <= m && J[i0] == J[i0 - 1] + 1)
805 i0++;
806 j00 = J[i0 - 1] + 1;
807 i1 = i0 - 1;
808 while (i1 < m && J[i1 + 1] == 0)
809 i1++;
810 j01 = J[i1 + 1] - 1;
811 J[i1] = j01;
812 change(file1, f1, file2, f2, i0, i1, j00, j01);
813 }
814 if (m == 0) {
815 change(file1, f1, file2, f2, 1, 0, 1, len[1]);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000816 }
817 if (anychange != 0) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000818 dump_unified_vec(f1, f2);
819 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000820}
821
822/*
Denis Vlasenko9213a9e2006-09-17 16:28:10 +0000823 * The following code uses an algorithm due to Harold Stone,
824 * which finds a pair of longest identical subsequences in
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000825 * the two files.
826 *
827 * The major goal is to generate the match vector J.
828 * J[i] is the index of the line in file1 corresponding
829 * to line i file0. J[i] = 0 if there is no
830 * such line in file1.
831 *
832 * Lines are hashed so as to work in core. All potential
833 * matches are located by sorting the lines of each file
834 * on the hash (called ``value''). In particular, this
835 * collects the equivalence classes in file1 together.
836 * Subroutine equiv replaces the value of each line in
837 * file0 by the index of the first element of its
838 * matching equivalence in (the reordered) file1.
839 * To save space equiv squeezes file1 into a single
840 * array member in which the equivalence classes
841 * are simply concatenated, except that their first
842 * members are flagged by changing sign.
843 *
844 * Next the indices that point into member are unsorted into
845 * array class according to the original order of file0.
846 *
847 * The cleverness lies in routine stone. This marches
848 * through the lines of file0, developing a vector klist
849 * of "k-candidates". At step i a k-candidate is a matched
850 * pair of lines x,y (x in file0 y in file1) such that
851 * there is a common subsequence of length k
852 * between the first i lines of file0 and the first y
853 * lines of file1, but there is no such subsequence for
854 * any smaller y. x is the earliest possible mate to y
855 * that occurs in such a subsequence.
856 *
857 * Whenever any of the members of the equivalence class of
858 * lines in file1 matable to a line in file0 has serial number
859 * less than the y of some k-candidate, that k-candidate
860 * with the smallest such y is replaced. The new
861 * k-candidate is chained (via pred) to the current
862 * k-1 candidate so that the actual subsequence can
863 * be recovered. When a member has serial number greater
864 * that the y of all k-candidates, the klist is extended.
865 * At the end, the longest subsequence is pulled out
866 * and placed in the array J by unravel
867 *
868 * With J in hand, the matches there recorded are
869 * checked against reality to assure that no spurious
870 * matches have crept in due to hashing. If they have,
871 * they are broken, and "jackpot" is recorded--a harmless
872 * matter except that a true match for a spuriously
873 * mated line may now be unnecessarily reported as a change.
874 *
875 * Much of the complexity of the program comes simply
876 * from trying to minimize core utilization and
877 * maximize the range of doable problems by dynamically
878 * allocating what is needed and reusing what is not.
879 * The core requirements for problems larger than somewhat
880 * are (in words) 2*length(file0) + length(file1) +
881 * 3*(number of k-candidates installed), typically about
882 * 6n words for files of length n.
883 */
884
885static int diffreg(char *ofile1, char *ofile2, int flags)
886{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000887 char *file1 = ofile1;
888 char *file2 = ofile2;
889 FILE *f1 = NULL;
890 FILE *f2 = NULL;
891 int rval = D_SAME;
892 int i;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000893
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000894 anychange = 0;
895 context_vec_ptr = context_vec_start - 1;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000896
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000897 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
898 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
899 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
900 goto closem;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000901
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000902 if (flags & D_EMPTY1)
Rob Landleyd921b2e2006-08-03 15:41:12 +0000903 f1 = xfopen(bb_dev_null, "r");
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000904 else {
905 if (strcmp(file1, "-") == 0)
906 f1 = stdin;
907 else
Rob Landleyd921b2e2006-08-03 15:41:12 +0000908 f1 = xfopen(file1, "r");
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000909 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000910
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000911 if (flags & D_EMPTY2)
Rob Landleyd921b2e2006-08-03 15:41:12 +0000912 f2 = xfopen(bb_dev_null, "r");
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000913 else {
914 if (strcmp(file2, "-") == 0)
915 f2 = stdin;
916 else
Rob Landleyd921b2e2006-08-03 15:41:12 +0000917 f2 = xfopen(file2, "r");
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000918 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000919
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000920 if ((i = files_differ(f1, f2, flags)) == 0)
921 goto closem;
922 else if (i != 1) { /* 1 == ok */
923 /* error */
924 status |= 2;
925 goto closem;
926 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000927
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000928 if (!asciifile(f1) || !asciifile(f2)) {
929 rval = D_BINARY;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000930 status |= 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000931 goto closem;
932 }
933
934 prepare(0, f1, stb1.st_size);
935 prepare(1, f2, stb2.st_size);
936 prune();
937 sort(sfile[0], slen[0]);
938 sort(sfile[1], slen[1]);
939
940 member = (int *) file[1];
941 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
942 member = xrealloc(member, (slen[1] + 2) * sizeof(int));
943
944 class = (int *) file[0];
945 unsort(sfile[0], slen[0], class);
946 class = xrealloc(class, (slen[0] + 2) * sizeof(int));
947
948 klist = xmalloc((slen[0] + 2) * sizeof(int));
949 clen = 0;
950 clistlen = 100;
951 clist = xmalloc(clistlen * sizeof(struct cand));
952 i = stone(class, slen[0], member, klist);
953 free(member);
954 free(class);
955
956 J = xrealloc(J, (len[0] + 2) * sizeof(int));
957 unravel(klist[i]);
958 free(clist);
959 free(klist);
960
961 ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
962 ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
963 check(f1, f2);
964 output(file1, f1, file2, f2);
965
966 closem:
967 if (anychange) {
968 status |= 1;
969 if (rval == D_SAME)
970 rval = D_DIFFER;
971 }
972 if (f1 != NULL)
973 fclose(f1);
974 if (f2 != NULL)
975 fclose(f2);
976 if (file1 != ofile1)
977 free(file1);
978 if (file2 != ofile2)
979 free(file2);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000980 return (rval);
981}
982
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000983#if ENABLE_FEATURE_DIFF_DIR
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000984static void do_diff(char *dir1, char *path1, char *dir2, char *path2)
985{
986
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000987 int flags = D_HEADER;
988 int val;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000989
Rob Landleyd921b2e2006-08-03 15:41:12 +0000990 char *fullpath1 = xasprintf("%s/%s", dir1, path1);
991 char *fullpath2 = xasprintf("%s/%s", dir2, path2);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000992
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000993 if (stat(fullpath1, &stb1) != 0) {
994 flags |= D_EMPTY1;
995 memset(&stb1, 0, sizeof(stb1));
Rob Landleyd921b2e2006-08-03 15:41:12 +0000996 fullpath1 = xasprintf("%s/%s", dir1, path2);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000997 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000998 if (stat(fullpath2, &stb2) != 0) {
999 flags |= D_EMPTY2;
1000 memset(&stb2, 0, sizeof(stb2));
1001 stb2.st_mode = stb1.st_mode;
Rob Landleyd921b2e2006-08-03 15:41:12 +00001002 fullpath2 = xasprintf("%s/%s", dir2, path1);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001003 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001004
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001005 if (stb1.st_mode == 0)
1006 stb1.st_mode = stb2.st_mode;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001007
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001008 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1009 printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
1010 return;
1011 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001012
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001013 if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
1014 val = D_SKIPPED1;
1015 else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
1016 val = D_SKIPPED2;
1017 else
1018 val = diffreg(fullpath1, fullpath2, flags);
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001019
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001020 print_status(val, fullpath1, fullpath2, NULL);
1021}
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001022#endif
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001023
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001024#if ENABLE_FEATURE_DIFF_DIR
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001025static int dir_strcmp(const void *p1, const void *p2)
1026{
1027 return strcmp(*(char *const *) p1, *(char *const *) p2);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001028}
1029
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001030/* This function adds a filename to dl, the directory listing. */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001031
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001032static int add_to_dirlist(const char *filename,
Denis Vlasenko8c35d652006-10-27 23:42:25 +00001033 struct stat ATTRIBUTE_UNUSED * sb, void *userdata, int depth)
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001034{
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001035 dl_count++;
1036 dl = xrealloc(dl, dl_count * sizeof(char *));
Rob Landleyd921b2e2006-08-03 15:41:12 +00001037 dl[dl_count - 1] = xstrdup(filename);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001038 if (cmd_flags & FLAG_r) {
1039 int *pp = (int *) userdata;
1040 int path_len = *pp + 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001041
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001042 dl[dl_count - 1] = &(dl[dl_count - 1])[path_len];
1043 }
1044 return TRUE;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001045}
1046
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001047/* This returns a sorted directory listing. */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001048static char **get_dir(char *path)
1049{
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001050
1051 int i;
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +00001052 char **retval;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001053
1054 /* If -r has been set, then the recursive_action function will be
1055 * used. Unfortunately, this outputs the root directory along with
1056 * the recursed paths, so use void *userdata to specify the string
1057 * length of the root directory. It can then be removed in
1058 * add_to_dirlist. */
1059
1060 int path_len = strlen(path);
1061 void *userdata = &path_len;
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +00001062
Rob Landleyd921b2e2006-08-03 15:41:12 +00001063 /* Reset dl_count - there's no need to free dl as xrealloc does
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +00001064 * the job nicely. */
1065 dl_count = 0;
1066
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001067 /* Now fill dl with a listing. */
1068 if (cmd_flags & FLAG_r)
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001069 recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL,
Denis Vlasenko8c35d652006-10-27 23:42:25 +00001070 userdata, 0);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001071 else {
1072 DIR *dp;
1073 struct dirent *ep;
Bernhard Reutner-Fischercb448162006-04-12 07:35:12 +00001074
Rob Landleyd921b2e2006-08-03 15:41:12 +00001075 dp = warn_opendir(path);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001076 while ((ep = readdir(dp))) {
1077 if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, ".")))
1078 continue;
Denis Vlasenko8c35d652006-10-27 23:42:25 +00001079 add_to_dirlist(ep->d_name, NULL, NULL, 0);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001080 }
1081 closedir(dp);
1082 }
1083
1084 /* Sort dl alphabetically. */
1085 qsort(dl, dl_count, sizeof(char *), dir_strcmp);
1086
1087 /* Copy dl so that we can return it. */
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +00001088 retval = xmalloc(dl_count * sizeof(char *));
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001089 for (i = 0; i < dl_count; i++)
Rob Landleyd921b2e2006-08-03 15:41:12 +00001090 retval[i] = xstrdup(dl[i]);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001091
1092 return retval;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001093}
1094
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001095static void diffdir(char *p1, char *p2)
1096{
1097
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001098 char **dirlist1, **dirlist2;
1099 char *dp1, *dp2;
1100 int dirlist1_count, dirlist2_count;
1101 int pos;
1102
1103 /* Check for trailing slashes. */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001104
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001105 if (p1[strlen(p1) - 1] == '/')
1106 p1[strlen(p1) - 1] = '\0';
1107 if (p2[strlen(p2) - 1] == '/')
1108 p2[strlen(p2) - 1] = '\0';
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001109
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001110 /* Get directory listings for p1 and p2. */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001111
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001112 dirlist1 = get_dir(p1);
1113 dirlist1_count = dl_count;
1114 dirlist1[dirlist1_count] = NULL;
1115 dirlist2 = get_dir(p2);
1116 dirlist2_count = dl_count;
1117 dirlist2[dirlist2_count] = NULL;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001118
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001119 /* If -S was set, find the starting point. */
1120 if (start) {
1121 while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0)
1122 dirlist1++;
1123 while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0)
1124 dirlist2++;
1125 if ((*dirlist1 == NULL) || (*dirlist2 == NULL))
Bernhard Reutner-Fischer19008b82006-06-07 20:17:41 +00001126 bb_error_msg(bb_msg_invalid_arg, "NULL", "-S");
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001127 }
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001128
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001129 /* Now that both dirlist1 and dirlist2 contain sorted directory
1130 * listings, we can start to go through dirlist1. If both listings
1131 * contain the same file, then do a normal diff. Otherwise, behaviour
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001132 * is determined by whether the -N flag is set. */
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001133 while (*dirlist1 != NULL || *dirlist2 != NULL) {
1134 dp1 = *dirlist1;
1135 dp2 = *dirlist2;
1136 pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
1137 if (pos == 0) {
1138 do_diff(p1, dp1, p2, dp2);
1139 dirlist1++;
1140 dirlist2++;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001141 } else if (pos < 0) {
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001142 if (cmd_flags & FLAG_N)
1143 do_diff(p1, dp1, p2, NULL);
1144 else
1145 print_only(p1, strlen(p1) + 1, dp1);
1146 dirlist1++;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001147 } else {
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001148 if (cmd_flags & FLAG_N)
1149 do_diff(p1, NULL, p2, dp2);
1150 else
1151 print_only(p2, strlen(p2) + 1, dp2);
1152 dirlist2++;
1153 }
1154 }
1155}
1156#endif
1157
1158
1159
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001160int diff_main(int argc, char **argv)
1161{
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001162 int gotstdin = 0;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001163
1164 char *U_opt;
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001165 llist_t *L_arg = NULL;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001166
Denis Vlasenko67b23e62006-10-03 21:00:06 +00001167 opt_complementary = "L::";
Denis Vlasenko7d219aa2006-10-05 10:17:08 +00001168 cmd_flags = getopt32(argc, argv, "abdiL:NqrsS:tTU:wu",
1169 &L_arg, &start, &U_opt);
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001170
1171 if (cmd_flags & FLAG_L) {
1172 while (L_arg) {
1173 if (label[0] == NULL)
1174 label[0] = L_arg->data;
1175 else if (label[1] == NULL)
1176 label[1] = L_arg->data;
1177 else
1178 bb_show_usage();
1179
1180 L_arg = L_arg->link;
1181 }
1182
1183 /* If both label[0] and label[1] were set, they need to be swapped. */
1184 if (label[0] && label[1]) {
1185 char *tmp;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001186
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001187 tmp = label[1];
1188 label[1] = label[0];
1189 label[0] = tmp;
1190 }
1191 }
1192
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001193 context = 3; /* This is the default number of lines of context. */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001194 if (cmd_flags & FLAG_U) {
Denis Vlasenko13858992006-10-08 12:49:22 +00001195 context = xatoul_range(U_opt, 1, INT_MAX);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001196 }
1197 argc -= optind;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001198 argv += optind;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001199
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001200 /*
1201 * Do sanity checks, fill in stb1 and stb2 and call the appropriate
1202 * driver routine. Both drivers use the contents of stb1 and stb2.
1203 */
1204 if (argc < 2) {
Denis Vlasenkoea620772006-10-14 02:23:43 +00001205 bb_error_msg("missing filename");
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001206 bb_show_usage();
1207 }
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001208 if (strcmp(argv[0], "-") == 0) {
1209 fstat(STDIN_FILENO, &stb1);
1210 gotstdin = 1;
1211 } else
1212 xstat(argv[0], &stb1);
1213 if (strcmp(argv[1], "-") == 0) {
1214 fstat(STDIN_FILENO, &stb2);
1215 gotstdin = 1;
1216 } else
1217 xstat(argv[1], &stb2);
1218 if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode)))
Denis Vlasenkoea620772006-10-14 02:23:43 +00001219 bb_error_msg_and_die("can't compare - to a directory");
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001220 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001221#if ENABLE_FEATURE_DIFF_DIR
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001222 diffdir(argv[0], argv[1]);
1223#else
Denis Vlasenkoea620772006-10-14 02:23:43 +00001224 bb_error_msg_and_die("directory comparison not supported");
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001225#endif
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001226 } else {
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001227 if (S_ISDIR(stb1.st_mode)) {
1228 argv[0] = concat_path_file(argv[0], argv[1]);
Bernhard Reutner-Fischerd2c306e2006-05-29 12:10:23 +00001229 xstat(argv[0], &stb1);
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001230 }
1231 if (S_ISDIR(stb2.st_mode)) {
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001232 argv[1] = concat_path_file(argv[1], argv[0]);
Bernhard Reutner-Fischerd2c306e2006-05-29 12:10:23 +00001233 xstat(argv[1], &stb2);
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001234 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001235 print_status(diffreg(argv[0], argv[1], 0), argv[0], argv[1], NULL);
1236 }
1237 exit(status);
1238}