blob: 072e4bc208e19f6e7a2839aa3f6b486e0488a0b2 [file] [log] [blame]
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001/* vi: set sw=4 ts=4: */
2/*
3 * Mini diff implementation for busybox, adapted from OpenBSD diff.
4 *
5 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00006 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
7 *
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00008 * Sponsored in part by the Defense Advanced Research Projects
9 * Agency (DARPA) and Air Force Research Laboratory, Air Force
10 * Materiel Command, USAF, under agreement number F39502-99-1-0512.
Bernhard Reutner-Fischer14aa06f2006-05-19 13:02:27 +000011 *
Rob Landleye4386342006-04-18 20:41:51 +000012 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000013 */
14
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000015#include "busybox.h"
16
17#define FSIZE_MAX 32768
18
19/*
20 * Output flags
21 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000022#define D_HEADER 1 /* Print a header/footer between files */
23#define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */
24#define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000025
26/*
27 * Status values for print_status() and diffreg() return values
28 * Guide:
29 * D_SAME - files are the same
30 * D_DIFFER - files differ
31 * D_BINARY - binary files differ
32 * D_COMMON - subdirectory common to both dirs
33 * D_ONLY - file only exists in one dir
34 * D_MISMATCH1 - path1 a dir, path2 a file
35 * D_MISMATCH2 - path1 a file, path2 a dir
36 * D_ERROR - error occurred
37 * D_SKIPPED1 - skipped path1 as it is a special file
38 * D_SKIPPED2 - skipped path2 as it is a special file
39 */
40
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +000041#define D_SAME 0
42#define D_DIFFER (1<<0)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000043#define D_BINARY (1<<1)
44#define D_COMMON (1<<2)
45#define D_ONLY (1<<3)
46#define D_MISMATCH1 (1<<4)
47#define D_MISMATCH2 (1<<5)
48#define D_ERROR (1<<6)
49#define D_SKIPPED1 (1<<7)
50#define D_SKIPPED2 (1<<8)
51
52/* Command line options */
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +000053#define FLAG_a (1<<0)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000054#define FLAG_b (1<<1)
55#define FLAG_d (1<<2)
56#define FLAG_i (1<<3)
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +000057#define FLAG_L (1<<4)
58#define FLAG_N (1<<5)
59#define FLAG_q (1<<6)
60#define FLAG_r (1<<7)
61#define FLAG_s (1<<8)
62#define FLAG_S (1<<9)
63#define FLAG_t (1<<10)
64#define FLAG_T (1<<11)
65#define FLAG_U (1<<12)
66#define FLAG_w (1<<13)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000067
Bernhard Reutner-Fischer5b6f7762006-12-13 16:50:15 +000068/* XXX: FIXME: the following variables should be static, but gcc currently
Denis Vlasenko6a1d6612006-12-16 22:18:44 +000069 * creates a much bigger object if we do this. [which version of gcc? --vda] */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000070int context, status;
71char *start, *label[2];
72struct stat stb1, stb2;
73char **dl;
Denis Vlasenko3bba5452006-12-30 17:57:03 +000074USE_FEATURE_DIFF_DIR(static int dl_count;)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000075
76struct cand {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000077 int x;
78 int y;
79 int pred;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000080};
81
82struct line {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000083 int serial;
84 int value;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000085} *file[2];
86
87/*
88 * The following struct is used to record change information
89 * doing a "context" or "unified" diff. (see routine "change" to
90 * understand the highly mnemonic field names)
91 */
92struct context_vec {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000093 int a; /* start line in old file */
94 int b; /* end line in old file */
95 int c; /* start line in new file */
96 int d; /* end line in new file */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +000097};
98
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +000099static int *J; /* will be overlaid on class */
100static int *class; /* will be overlaid on file[0] */
101static int *klist; /* will be overlaid on file[0] after class */
102static int *member; /* will be overlaid on file[1] */
103static int clen;
104static int len[2];
105static int pref, suff; /* length of prefix and suffix */
106static int slen[2];
107static int anychange;
108static long *ixnew; /* will be overlaid on file[1] */
109static long *ixold; /* will be overlaid on klist */
110static struct cand *clist; /* merely a free storage pot for candidates */
111static int clistlen; /* the length of clist */
112static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000113static struct context_vec *context_vec_start;
114static struct context_vec *context_vec_end;
115static struct context_vec *context_vec_ptr;
116
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000117
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000118static void print_only(const char *path, size_t dirlen, const char *entry)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000119{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000120 if (dirlen > 1)
121 dirlen--;
122 printf("Only in %.*s: %s\n", (int) dirlen, path, entry);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000123}
124
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000125
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000126static void print_status(int val, char *path1, char *path2, char *entry)
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000127{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000128 const char *const _entry = entry ? entry : "";
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +0000129 char *_path1 = entry ? concat_path_file(path1, _entry) : path1;
130 char *_path2 = entry ? concat_path_file(path2, _entry) : path2;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000131
132 switch (val) {
133 case D_ONLY:
134 print_only(path1, strlen(path1), entry);
135 break;
136 case D_COMMON:
137 printf("Common subdirectories: %s and %s\n", _path1, _path2);
138 break;
139 case D_BINARY:
140 printf("Binary files %s and %s differ\n", _path1, _path2);
141 break;
142 case D_DIFFER:
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000143 if (option_mask32 & FLAG_q)
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000144 printf("Files %s and %s differ\n", _path1, _path2);
145 break;
146 case D_SAME:
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000147 if (option_mask32 & FLAG_s)
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000148 printf("Files %s and %s are identical\n", _path1, _path2);
149 break;
150 case D_MISMATCH1:
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000151 printf("File %s is a %s while file %s is a %s\n",
152 _path1, "directory", _path2, "regular file");
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000153 break;
154 case D_MISMATCH2:
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000155 printf("File %s is a %s while file %s is a %s\n",
156 _path1, "regular file", _path2, "directory");
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000157 break;
158 case D_SKIPPED1:
159 printf("File %s is not a regular file or directory and was skipped\n",
160 _path1);
161 break;
162 case D_SKIPPED2:
163 printf("File %s is not a regular file or directory and was skipped\n",
164 _path2);
165 break;
166 }
167 if (entry) {
168 free(_path1);
169 free(_path2);
170 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000171}
172
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000173
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000174/*
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000175 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
176 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000177static int readhash(FILE * f)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000178{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000179 int i, t, space;
180 int sum;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000181
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000182 sum = 1;
183 space = 0;
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000184 if (!(option_mask32 & FLAG_b) && !(option_mask32 & FLAG_w)) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000185 if (FLAG_i)
186 for (i = 0; (t = getc(f)) != '\n'; i++) {
187 if (t == EOF) {
188 if (i == 0)
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000189 return 0;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000190 break;
191 }
192 sum = sum * 127 + t;
193 } else
194 for (i = 0; (t = getc(f)) != '\n'; i++) {
195 if (t == EOF) {
196 if (i == 0)
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000197 return 0;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000198 break;
199 }
200 sum = sum * 127 + t;
201 }
202 } else {
203 for (i = 0;;) {
204 switch (t = getc(f)) {
205 case '\t':
206 case '\r':
207 case '\v':
208 case '\f':
209 case ' ':
210 space++;
211 continue;
212 default:
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000213 if (space && !(option_mask32 & FLAG_w)) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000214 i++;
215 space = 0;
216 }
217 sum = sum * 127 + t;
218 i++;
219 continue;
220 case EOF:
221 if (i == 0)
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000222 return 0;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000223 /* FALLTHROUGH */
224 case '\n':
225 break;
226 }
227 break;
228 }
229 }
230 /*
231 * There is a remote possibility that we end up with a zero sum.
232 * Zero is used as an EOF marker, so return 1 instead.
233 */
234 return (sum == 0 ? 1 : sum);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000235}
236
237
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000238/*
239 * Check to see if the given files differ.
240 * Returns 0 if they are the same, 1 if different, and -1 on error.
241 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000242static int files_differ(FILE * f1, FILE * f2, int flags)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000243{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000244 size_t i, j;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000245
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000246 if ((flags & (D_EMPTY1 | D_EMPTY2)) || stb1.st_size != stb2.st_size ||
247 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000248 return 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000249 while (1) {
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000250 i = fread(bb_common_bufsiz1, 1, BUFSIZ/2, f1);
251 j = fread(bb_common_bufsiz1 + BUFSIZ/2, 1, BUFSIZ/2, f2);
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000252 if (i != j)
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000253 return 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000254 if (i == 0 && j == 0) {
255 if (ferror(f1) || ferror(f2))
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000256 return 1;
257 return 0;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000258 }
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000259 if (memcmp(bb_common_bufsiz1,
260 bb_common_bufsiz1 + BUFSIZ/2, i) != 0)
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000261 return 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000262 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000263}
264
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000265
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000266static void prepare(int i, FILE * fd, off_t filesize)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000267{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000268 struct line *p;
269 int h;
270 size_t j, sz;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000271
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000272 rewind(fd);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000273
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000274 sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;
275 if (sz < 100)
276 sz = 100;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000277
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000278 p = xmalloc((sz + 3) * sizeof(struct line));
279 for (j = 0; (h = readhash(fd));) {
280 if (j == sz) {
281 sz = sz * 3 / 2;
282 p = xrealloc(p, (sz + 3) * sizeof(struct line));
283 }
284 p[++j].value = h;
285 }
286 len[i] = j;
287 file[i] = p;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000288}
289
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000290
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000291static void prune(void)
292{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000293 int i, j;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000294
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000295 for (pref = 0; pref < len[0] && pref < len[1] &&
296 file[0][pref + 1].value == file[1][pref + 1].value; pref++);
297 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
298 file[0][len[0] - suff].value == file[1][len[1] - suff].value;
299 suff++);
300 for (j = 0; j < 2; j++) {
301 sfile[j] = file[j] + pref;
302 slen[j] = len[j] - pref - suff;
303 for (i = 0; i <= slen[j]; i++)
304 sfile[j][i].serial = i;
305 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000306}
307
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000308
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000309static void equiv(struct line *a, int n, struct line *b, int m, int *c)
310{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000311 int i, j;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000312
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000313 i = j = 1;
314 while (i <= n && j <= m) {
315 if (a[i].value < b[j].value)
316 a[i++].value = 0;
317 else if (a[i].value == b[j].value)
318 a[i++].value = j;
319 else
320 j++;
321 }
322 while (i <= n)
323 a[i++].value = 0;
324 b[m + 1].value = 0;
325 j = 0;
326 while (++j <= m) {
327 c[j] = -b[j].serial;
328 while (b[j + 1].value == b[j].value) {
329 j++;
330 c[j] = b[j].serial;
331 }
332 }
333 c[j] = -1;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000334}
335
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000336
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000337static int isqrt(int n)
338{
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000339 int y, x = 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000340
341 if (n == 0)
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000342 return 0;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000343
344 do {
345 y = x;
346 x = n / x;
347 x += y;
348 x /= 2;
349 } while ((x - y) > 1 || (x - y) < -1);
350
Denis Vlasenkod9e15f22006-11-27 16:49:55 +0000351 return x;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000352}
353
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000354
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000355static int newcand(int x, int y, int pred)
356{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000357 struct cand *q;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000358
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000359 if (clen == clistlen) {
360 clistlen = clistlen * 11 / 10;
361 clist = xrealloc(clist, clistlen * sizeof(struct cand));
362 }
363 q = clist + clen;
364 q->x = x;
365 q->y = y;
366 q->pred = pred;
Denis Vlasenkod9e15f22006-11-27 16:49:55 +0000367 return clen++;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000368}
369
370
371static int search(int *c, int k, int y)
372{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000373 int i, j, l, t;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000374
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000375 if (clist[c[k]].y < y) /* quick look for typical case */
Denis Vlasenkod9e15f22006-11-27 16:49:55 +0000376 return k + 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000377 i = 0;
378 j = k + 1;
379 while (1) {
380 l = i + j;
381 if ((l >>= 1) <= i)
382 break;
383 t = clist[c[l]].y;
384 if (t > y)
385 j = l;
386 else if (t < y)
387 i = l;
388 else
Denis Vlasenkod9e15f22006-11-27 16:49:55 +0000389 return l;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000390 }
Denis Vlasenkod9e15f22006-11-27 16:49:55 +0000391 return l + 1;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000392}
393
394
395static int stone(int *a, int n, int *b, int *c)
396{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000397 int i, k, y, j, l;
398 int oldc, tc, oldl;
399 unsigned int numtries;
400
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000401#if ENABLE_FEATURE_DIFF_MINIMAL
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000402 const unsigned int bound =
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000403 (option_mask32 & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000404#else
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000405 const unsigned int bound = MAX(256, isqrt(n));
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000406#endif
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000407 k = 0;
408 c[0] = newcand(0, 0, 0);
409 for (i = 1; i <= n; i++) {
410 j = a[i];
411 if (j == 0)
412 continue;
413 y = -b[j];
414 oldl = 0;
415 oldc = c[0];
416 numtries = 0;
417 do {
418 if (y <= clist[oldc].y)
419 continue;
420 l = search(c, k, y);
421 if (l != oldl + 1)
422 oldc = c[l - 1];
423 if (l <= k) {
424 if (clist[c[l]].y <= y)
425 continue;
426 tc = c[l];
427 c[l] = newcand(i, y, oldc);
428 oldc = tc;
429 oldl = l;
430 numtries++;
431 } else {
432 c[l] = newcand(i, y, oldc);
433 k++;
434 break;
435 }
436 } while ((y = b[++j]) > 0 && numtries < bound);
437 }
Denis Vlasenkod9e15f22006-11-27 16:49:55 +0000438 return k;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000439}
440
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000441
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000442static void unravel(int p)
443{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000444 struct cand *q;
445 int i;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000446
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000447 for (i = 0; i <= len[0]; i++)
448 J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0;
449 for (q = clist + p; q->y != 0; q = clist + q->pred)
450 J[q->x + pref] = q->y + pref;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000451}
452
453
454static void unsort(struct line *f, int l, int *b)
455{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000456 int *a, i;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000457
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000458 a = xmalloc((l + 1) * sizeof(int));
459 for (i = 1; i <= l; i++)
460 a[f[i].serial] = f[i].value;
461 for (i = 1; i <= l; i++)
462 b[i] = a[i];
463 free(a);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000464}
465
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000466
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000467static int skipline(FILE * f)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000468{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000469 int i, c;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000470
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000471 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
472 continue;
Denis Vlasenkod9e15f22006-11-27 16:49:55 +0000473 return i;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000474}
475
476
477/*
478 * Check does double duty:
479 * 1. ferret out any fortuitous correspondences due
480 * to confounding by hashing (which result in "jackpot")
481 * 2. collect random access indexes to the two files
482 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000483static void check(FILE * f1, FILE * f2)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000484{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000485 int i, j, jackpot, c, d;
486 long ctold, ctnew;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000487
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000488 rewind(f1);
489 rewind(f2);
490 j = 1;
491 ixold[0] = ixnew[0] = 0;
492 jackpot = 0;
493 ctold = ctnew = 0;
494 for (i = 1; i <= len[0]; i++) {
495 if (J[i] == 0) {
496 ixold[i] = ctold += skipline(f1);
497 continue;
498 }
499 while (j < J[i]) {
500 ixnew[j] = ctnew += skipline(f2);
501 j++;
502 }
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000503 if ((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w)
504 || (option_mask32 & FLAG_i)) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000505 while (1) {
506 c = getc(f1);
507 d = getc(f2);
508 /*
509 * GNU diff ignores a missing newline
510 * in one file if bflag || wflag.
511 */
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000512 if (((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w)) &&
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000513 ((c == EOF && d == '\n') || (c == '\n' && d == EOF))) {
514 break;
515 }
516 ctold++;
517 ctnew++;
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000518 if ((option_mask32 & FLAG_b) && isspace(c) && isspace(d)) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000519 do {
520 if (c == '\n')
521 break;
522 ctold++;
523 } while (isspace(c = getc(f1)));
524 do {
525 if (d == '\n')
526 break;
527 ctnew++;
528 } while (isspace(d = getc(f2)));
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000529 } else if (option_mask32 & FLAG_w) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000530 while (isspace(c) && c != '\n') {
531 c = getc(f1);
532 ctold++;
533 }
534 while (isspace(d) && d != '\n') {
535 d = getc(f2);
536 ctnew++;
537 }
538 }
539 if (c != d) {
540 jackpot++;
541 J[i] = 0;
542 if (c != '\n' && c != EOF)
543 ctold += skipline(f1);
544 if (d != '\n' && c != EOF)
545 ctnew += skipline(f2);
546 break;
547 }
548 if (c == '\n' || c == EOF)
549 break;
550 }
551 } else {
552 while (1) {
553 ctold++;
554 ctnew++;
555 if ((c = getc(f1)) != (d = getc(f2))) {
556 J[i] = 0;
557 if (c != '\n' && c != EOF)
558 ctold += skipline(f1);
559 if (d != '\n' && c != EOF)
560 ctnew += skipline(f2);
561 break;
562 }
563 if (c == '\n' || c == EOF)
564 break;
565 }
566 }
567 ixold[i] = ctold;
568 ixnew[j] = ctnew;
569 j++;
570 }
571 for (; j <= len[1]; j++)
572 ixnew[j] = ctnew += skipline(f2);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000573}
574
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000575
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000576/* shellsort CACM #201 */
577static void sort(struct line *a, int n)
578{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000579 struct line *ai, *aim, w;
580 int j, m = 0, k;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000581
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000582 if (n == 0)
583 return;
584 for (j = 1; j <= n; j *= 2)
585 m = 2 * j - 1;
586 for (m /= 2; m != 0; m /= 2) {
587 k = n - m;
588 for (j = 1; j <= k; j++) {
589 for (ai = &a[j]; ai > a; ai -= m) {
590 aim = &ai[m];
591 if (aim < ai)
592 break; /* wraparound */
593 if (aim->value > ai[0].value ||
594 (aim->value == ai[0].value && aim->serial > ai[0].serial))
595 break;
596 w.value = ai[0].value;
597 ai[0].value = aim->value;
598 aim->value = w.value;
599 w.serial = ai[0].serial;
600 ai[0].serial = aim->serial;
601 aim->serial = w.serial;
602 }
603 }
604 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000605}
606
607
608static void uni_range(int a, int b)
609{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000610 if (a < b)
611 printf("%d,%d", a, b - a + 1);
612 else if (a == b)
613 printf("%d", b);
614 else
615 printf("%d,0", b);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000616}
617
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000618
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000619static int fetch(long *f, int a, int b, FILE * lb, int ch)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000620{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000621 int i, j, c, lastc, col, nc;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000622
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000623 if (a > b)
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000624 return 0;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000625 for (i = a; i <= b; i++) {
626 fseek(lb, f[i - 1], SEEK_SET);
627 nc = f[i] - f[i - 1];
628 if (ch != '\0') {
629 putchar(ch);
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000630 if (option_mask32 & FLAG_T)
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000631 putchar('\t');
632 }
633 col = 0;
634 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
635 if ((c = getc(lb)) == EOF) {
636 puts("\n\\ No newline at end of file");
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000637 return 0;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000638 }
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000639 if (c == '\t' && (option_mask32 & FLAG_t)) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000640 do {
641 putchar(' ');
642 } while (++col & 7);
643 } else {
644 putchar(c);
645 col++;
646 }
647 }
648 }
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000649 return 0;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000650}
651
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000652
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000653static int asciifile(FILE * f)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000654{
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000655#if ENABLE_FEATURE_DIFF_BINARY
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +0000656 int i, cnt;
657#endif
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000658
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000659 if ((option_mask32 & FLAG_a) || f == NULL)
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000660 return 1;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000661
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000662#if ENABLE_FEATURE_DIFF_BINARY
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000663 rewind(f);
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000664 cnt = fread(bb_common_bufsiz1, 1, BUFSIZ, f);
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000665 for (i = 0; i < cnt; i++) {
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000666 if (!isprint(bb_common_bufsiz1[i])
667 && !isspace(bb_common_bufsiz1[i])) {
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000668 return 0;
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +0000669 }
670 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000671#endif
Denis Vlasenko079f8af2006-11-27 16:49:31 +0000672 return 1;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000673}
674
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000675
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000676/* dump accumulated "unified" diff changes */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000677static void dump_unified_vec(FILE * f1, FILE * f2)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000678{
679 struct context_vec *cvp = context_vec_start;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000680 int lowa, upb, lowc, upd;
681 int a, b, c, d;
682 char ch;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000683
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000684 if (context_vec_start > context_vec_ptr)
685 return;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000686
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000687 b = d = 0; /* gcc */
688 lowa = MAX(1, cvp->a - context);
689 upb = MIN(len[0], context_vec_ptr->b + context);
690 lowc = MAX(1, cvp->c - context);
691 upd = MIN(len[1], context_vec_ptr->d + context);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000692
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000693 fputs("@@ -", stdout);
694 uni_range(lowa, upb);
695 fputs(" +", stdout);
696 uni_range(lowc, upd);
697 fputs(" @@", stdout);
698 putchar('\n');
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000699
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000700 /*
701 * Output changes in "unified" diff format--the old and new lines
702 * are printed together.
703 */
704 for (; cvp <= context_vec_ptr; cvp++) {
705 a = cvp->a;
706 b = cvp->b;
707 c = cvp->c;
708 d = cvp->d;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000709
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000710 /*
711 * c: both new and old changes
712 * d: only changes in the old file
713 * a: only changes in the new file
714 */
715 if (a <= b && c <= d)
716 ch = 'c';
717 else
718 ch = (a <= b) ? 'd' : 'a';
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000719 if (ch == 'c' || ch == 'd') {
720 fetch(ixold, lowa, a - 1, f1, ' ');
721 fetch(ixold, a, b, f1, '-');
722 }
723 if (ch == 'a')
724 fetch(ixnew, lowc, c - 1, f2, ' ');
725 if (ch == 'c' || ch == 'a')
726 fetch(ixnew, c, d, f2, '+');
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000727 lowa = b + 1;
728 lowc = d + 1;
729 }
730 fetch(ixnew, d + 1, upd, f2, ' ');
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000731
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000732 context_vec_ptr = context_vec_start - 1;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000733}
734
735
736static void print_header(const char *file1, const char *file2)
737{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000738 if (label[0] != NULL)
739 printf("%s %s\n", "---", label[0]);
740 else
741 printf("%s %s\t%s", "---", file1, ctime(&stb1.st_mtime));
742 if (label[1] != NULL)
743 printf("%s %s\n", "+++", label[1]);
744 else
745 printf("%s %s\t%s", "+++", file2, ctime(&stb2.st_mtime));
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000746}
747
748
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000749/*
750 * Indicate that there is a difference between lines a and b of the from file
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000751 * to get to lines c to d of the to file. If a is greater than b then there
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000752 * are no lines in the from file involved and this means that there were
753 * lines appended (beginning at b). If c is greater than d then there are
754 * lines missing from the to file.
755 */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000756static void change(char *file1, FILE * f1, char *file2, FILE * f2, int a,
757 int b, int c, int d)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000758{
759 static size_t max_context = 64;
760
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000761 if ((a > b && c > d) || (option_mask32 & FLAG_q)) {
762 anychange = 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000763 return;
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000764 }
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000765
766 /*
767 * Allocate change records as needed.
768 */
769 if (context_vec_ptr == context_vec_end - 1) {
770 ptrdiff_t offset = context_vec_ptr - context_vec_start;
771
772 max_context <<= 1;
773 context_vec_start = xrealloc(context_vec_start,
774 max_context *
775 sizeof(struct context_vec));
776 context_vec_end = context_vec_start + max_context;
777 context_vec_ptr = context_vec_start + offset;
778 }
779 if (anychange == 0) {
780 /*
781 * Print the context/unidiff header first time through.
782 */
783 print_header(file1, file2);
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000784 } else if (a > context_vec_ptr->b + (2 * context) + 1 &&
785 c > context_vec_ptr->d + (2 * context) + 1) {
786 /*
787 * If this change is more than 'context' lines from the
788 * previous change, dump the record and reset it.
789 */
790 dump_unified_vec(f1, f2);
791 }
792 context_vec_ptr++;
793 context_vec_ptr->a = a;
794 context_vec_ptr->b = b;
795 context_vec_ptr->c = c;
796 context_vec_ptr->d = d;
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000797 anychange = 1;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000798}
799
800
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000801static void output(char *file1, FILE * f1, char *file2, FILE * f2)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000802{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000803 /* Note that j0 and j1 can't be used as they are defined in math.h.
804 * This also allows the rather amusing variable 'j00'... */
805 int m, i0, i1, j00, j01;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000806
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000807 rewind(f1);
808 rewind(f2);
809 m = len[0];
810 J[0] = 0;
811 J[m + 1] = len[1] + 1;
812 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
813 while (i0 <= m && J[i0] == J[i0 - 1] + 1)
814 i0++;
815 j00 = J[i0 - 1] + 1;
816 i1 = i0 - 1;
817 while (i1 < m && J[i1 + 1] == 0)
818 i1++;
819 j01 = J[i1 + 1] - 1;
820 J[i1] = j01;
821 change(file1, f1, file2, f2, i0, i1, j00, j01);
822 }
823 if (m == 0) {
824 change(file1, f1, file2, f2, 1, 0, 1, len[1]);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000825 }
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000826 if (anychange != 0 && !(option_mask32 & FLAG_q)) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000827 dump_unified_vec(f1, f2);
828 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000829}
830
831/*
Denis Vlasenko9213a9e2006-09-17 16:28:10 +0000832 * The following code uses an algorithm due to Harold Stone,
833 * which finds a pair of longest identical subsequences in
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000834 * the two files.
835 *
836 * The major goal is to generate the match vector J.
837 * J[i] is the index of the line in file1 corresponding
838 * to line i file0. J[i] = 0 if there is no
839 * such line in file1.
840 *
841 * Lines are hashed so as to work in core. All potential
842 * matches are located by sorting the lines of each file
843 * on the hash (called ``value''). In particular, this
844 * collects the equivalence classes in file1 together.
845 * Subroutine equiv replaces the value of each line in
846 * file0 by the index of the first element of its
847 * matching equivalence in (the reordered) file1.
848 * To save space equiv squeezes file1 into a single
849 * array member in which the equivalence classes
850 * are simply concatenated, except that their first
851 * members are flagged by changing sign.
852 *
853 * Next the indices that point into member are unsorted into
854 * array class according to the original order of file0.
855 *
856 * The cleverness lies in routine stone. This marches
857 * through the lines of file0, developing a vector klist
858 * of "k-candidates". At step i a k-candidate is a matched
859 * pair of lines x,y (x in file0 y in file1) such that
860 * there is a common subsequence of length k
861 * between the first i lines of file0 and the first y
862 * lines of file1, but there is no such subsequence for
863 * any smaller y. x is the earliest possible mate to y
864 * that occurs in such a subsequence.
865 *
866 * Whenever any of the members of the equivalence class of
867 * lines in file1 matable to a line in file0 has serial number
868 * less than the y of some k-candidate, that k-candidate
869 * with the smallest such y is replaced. The new
870 * k-candidate is chained (via pred) to the current
871 * k-1 candidate so that the actual subsequence can
872 * be recovered. When a member has serial number greater
873 * that the y of all k-candidates, the klist is extended.
874 * At the end, the longest subsequence is pulled out
875 * and placed in the array J by unravel
876 *
877 * With J in hand, the matches there recorded are
878 * checked against reality to assure that no spurious
879 * matches have crept in due to hashing. If they have,
880 * they are broken, and "jackpot" is recorded--a harmless
881 * matter except that a true match for a spuriously
882 * mated line may now be unnecessarily reported as a change.
883 *
884 * Much of the complexity of the program comes simply
885 * from trying to minimize core utilization and
886 * maximize the range of doable problems by dynamically
887 * allocating what is needed and reusing what is not.
888 * The core requirements for problems larger than somewhat
889 * are (in words) 2*length(file0) + length(file1) +
890 * 3*(number of k-candidates installed), typically about
891 * 6n words for files of length n.
892 */
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000893static int diffreg(char *ofile1, char *ofile2, int flags)
894{
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000895 char *file1 = ofile1;
896 char *file2 = ofile2;
897 FILE *f1 = NULL;
898 FILE *f2 = NULL;
899 int rval = D_SAME;
900 int i;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000901
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000902 anychange = 0;
903 context_vec_ptr = context_vec_start - 1;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000904
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000905 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
906 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
Denis Vlasenko9f739442006-12-16 23:49:13 +0000907 if (LONE_DASH(file1) && LONE_DASH(file2))
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000908 goto closem;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000909
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000910 f1 = stdin;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000911 if (flags & D_EMPTY1)
Rob Landleyd921b2e2006-08-03 15:41:12 +0000912 f1 = xfopen(bb_dev_null, "r");
Denis Vlasenko9f739442006-12-16 23:49:13 +0000913 else if (NOT_LONE_DASH(file1))
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000914 f1 = xfopen(file1, "r");
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000915
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000916 f2 = stdin;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000917 if (flags & D_EMPTY2)
Rob Landleyd921b2e2006-08-03 15:41:12 +0000918 f2 = xfopen(bb_dev_null, "r");
Denis Vlasenko9f739442006-12-16 23:49:13 +0000919 else if (NOT_LONE_DASH(file2))
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000920 f2 = xfopen(file2, "r");
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000921
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000922 i = files_differ(f1, f2, flags);
923 if (i == 0)
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000924 goto closem;
925 else if (i != 1) { /* 1 == ok */
926 /* error */
927 status |= 2;
928 goto closem;
929 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000930
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000931 if (!asciifile(f1) || !asciifile(f2)) {
932 rval = D_BINARY;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000933 status |= 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000934 goto closem;
935 }
936
937 prepare(0, f1, stb1.st_size);
938 prepare(1, f2, stb2.st_size);
939 prune();
940 sort(sfile[0], slen[0]);
941 sort(sfile[1], slen[1]);
942
943 member = (int *) file[1];
944 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
945 member = xrealloc(member, (slen[1] + 2) * sizeof(int));
946
947 class = (int *) file[0];
948 unsort(sfile[0], slen[0], class);
949 class = xrealloc(class, (slen[0] + 2) * sizeof(int));
950
951 klist = xmalloc((slen[0] + 2) * sizeof(int));
952 clen = 0;
953 clistlen = 100;
954 clist = xmalloc(clistlen * sizeof(struct cand));
955 i = stone(class, slen[0], member, klist);
956 free(member);
957 free(class);
958
959 J = xrealloc(J, (len[0] + 2) * sizeof(int));
960 unravel(klist[i]);
961 free(clist);
962 free(klist);
963
964 ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
965 ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
966 check(f1, f2);
967 output(file1, f1, file2, f2);
968
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000969 closem:
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000970 if (anychange) {
971 status |= 1;
972 if (rval == D_SAME)
973 rval = D_DIFFER;
974 }
975 if (f1 != NULL)
976 fclose(f1);
977 if (f2 != NULL)
978 fclose(f2);
979 if (file1 != ofile1)
980 free(file1);
981 if (file2 != ofile2)
982 free(file2);
Denis Vlasenkod9e15f22006-11-27 16:49:55 +0000983 return rval;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000984}
985
Denis Vlasenko6a1d6612006-12-16 22:18:44 +0000986
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000987#if ENABLE_FEATURE_DIFF_DIR
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000988static void do_diff(char *dir1, char *path1, char *dir2, char *path2)
989{
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000990 int flags = D_HEADER;
991 int val;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +0000992
Rob Landleyd921b2e2006-08-03 15:41:12 +0000993 char *fullpath1 = xasprintf("%s/%s", dir1, path1);
994 char *fullpath2 = xasprintf("%s/%s", dir2, path2);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +0000995
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +0000996 if (stat(fullpath1, &stb1) != 0) {
997 flags |= D_EMPTY1;
998 memset(&stb1, 0, sizeof(stb1));
Rob Landleyd921b2e2006-08-03 15:41:12 +0000999 fullpath1 = xasprintf("%s/%s", dir1, path2);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001000 }
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001001 if (stat(fullpath2, &stb2) != 0) {
1002 flags |= D_EMPTY2;
1003 memset(&stb2, 0, sizeof(stb2));
1004 stb2.st_mode = stb1.st_mode;
Rob Landleyd921b2e2006-08-03 15:41:12 +00001005 fullpath2 = xasprintf("%s/%s", dir2, path1);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001006 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001007
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001008 if (stb1.st_mode == 0)
1009 stb1.st_mode = stb2.st_mode;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001010
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001011 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1012 printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
1013 return;
1014 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001015
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001016 if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
1017 val = D_SKIPPED1;
1018 else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
1019 val = D_SKIPPED2;
1020 else
1021 val = diffreg(fullpath1, fullpath2, flags);
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001022
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001023 print_status(val, fullpath1, fullpath2, NULL);
1024}
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001025#endif
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001026
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001027
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001028#if ENABLE_FEATURE_DIFF_DIR
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001029static int dir_strcmp(const void *p1, const void *p2)
1030{
1031 return strcmp(*(char *const *) p1, *(char *const *) p2);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001032}
1033
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001034
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001035/* This function adds a filename to dl, the directory listing. */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001036static int add_to_dirlist(const char *filename,
Bernhard Reutner-Fischer5b6f7762006-12-13 16:50:15 +00001037 struct stat ATTRIBUTE_UNUSED * sb, void *userdata,
1038 int depth ATTRIBUTE_UNUSED)
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001039{
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001040 dl_count++;
1041 dl = xrealloc(dl, dl_count * sizeof(char *));
Rob Landleyd921b2e2006-08-03 15:41:12 +00001042 dl[dl_count - 1] = xstrdup(filename);
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001043 if (option_mask32 & FLAG_r) {
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001044 int *pp = (int *) userdata;
1045 int path_len = *pp + 1;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001046
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001047 dl[dl_count - 1] = &(dl[dl_count - 1])[path_len];
1048 }
1049 return TRUE;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001050}
1051
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001052
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001053/* This returns a sorted directory listing. */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001054static char **get_dir(char *path)
1055{
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001056 int i;
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +00001057 char **retval;
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001058
1059 /* If -r has been set, then the recursive_action function will be
1060 * used. Unfortunately, this outputs the root directory along with
1061 * the recursed paths, so use void *userdata to specify the string
1062 * length of the root directory. It can then be removed in
1063 * add_to_dirlist. */
1064
1065 int path_len = strlen(path);
1066 void *userdata = &path_len;
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +00001067
Rob Landleyd921b2e2006-08-03 15:41:12 +00001068 /* Reset dl_count - there's no need to free dl as xrealloc does
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +00001069 * the job nicely. */
1070 dl_count = 0;
1071
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001072 /* Now fill dl with a listing. */
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001073 if (option_mask32 & FLAG_r)
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001074 recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL,
Denis Vlasenko8c35d652006-10-27 23:42:25 +00001075 userdata, 0);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001076 else {
1077 DIR *dp;
1078 struct dirent *ep;
Bernhard Reutner-Fischercb448162006-04-12 07:35:12 +00001079
Rob Landleyd921b2e2006-08-03 15:41:12 +00001080 dp = warn_opendir(path);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001081 while ((ep = readdir(dp))) {
Denis Vlasenkobf66fbc2006-12-21 13:23:14 +00001082 if (!strcmp(ep->d_name, "..") || LONE_CHAR(ep->d_name, '.'))
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001083 continue;
Denis Vlasenko8c35d652006-10-27 23:42:25 +00001084 add_to_dirlist(ep->d_name, NULL, NULL, 0);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001085 }
1086 closedir(dp);
1087 }
1088
1089 /* Sort dl alphabetically. */
1090 qsort(dl, dl_count, sizeof(char *), dir_strcmp);
1091
1092 /* Copy dl so that we can return it. */
Bernhard Reutner-Fischer5fb0fec2006-04-06 11:28:19 +00001093 retval = xmalloc(dl_count * sizeof(char *));
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001094 for (i = 0; i < dl_count; i++)
Rob Landleyd921b2e2006-08-03 15:41:12 +00001095 retval[i] = xstrdup(dl[i]);
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001096
1097 return retval;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001098}
1099
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001100
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001101static void diffdir(char *p1, char *p2)
1102{
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001103 char **dirlist1, **dirlist2;
1104 char *dp1, *dp2;
1105 int dirlist1_count, dirlist2_count;
1106 int pos;
1107
1108 /* Check for trailing slashes. */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001109
Bernhard Reutner-Fischer56b95692006-12-14 11:27:58 +00001110 dp1 = last_char_is(p1, '/');
1111 if (dp1 != NULL)
1112 *dp1 = '\0';
1113 dp2 = last_char_is(p2, '/');
1114 if (dp2 != NULL)
1115 *dp2 = '\0';
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001116
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001117 /* Get directory listings for p1 and p2. */
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001118
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001119 dirlist1 = get_dir(p1);
1120 dirlist1_count = dl_count;
1121 dirlist1[dirlist1_count] = NULL;
1122 dirlist2 = get_dir(p2);
1123 dirlist2_count = dl_count;
1124 dirlist2[dirlist2_count] = NULL;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001125
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001126 /* If -S was set, find the starting point. */
1127 if (start) {
1128 while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0)
1129 dirlist1++;
1130 while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0)
1131 dirlist2++;
1132 if ((*dirlist1 == NULL) || (*dirlist2 == NULL))
Bernhard Reutner-Fischer19008b82006-06-07 20:17:41 +00001133 bb_error_msg(bb_msg_invalid_arg, "NULL", "-S");
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001134 }
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001135
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001136 /* Now that both dirlist1 and dirlist2 contain sorted directory
1137 * listings, we can start to go through dirlist1. If both listings
1138 * contain the same file, then do a normal diff. Otherwise, behaviour
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001139 * is determined by whether the -N flag is set. */
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001140 while (*dirlist1 != NULL || *dirlist2 != NULL) {
1141 dp1 = *dirlist1;
1142 dp2 = *dirlist2;
1143 pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
1144 if (pos == 0) {
1145 do_diff(p1, dp1, p2, dp2);
1146 dirlist1++;
1147 dirlist2++;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001148 } else if (pos < 0) {
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001149 if (option_mask32 & FLAG_N)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001150 do_diff(p1, dp1, p2, NULL);
1151 else
1152 print_only(p1, strlen(p1) + 1, dp1);
1153 dirlist1++;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001154 } else {
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001155 if (option_mask32 & FLAG_N)
Bernhard Reutner-Fischer693a9362006-04-06 08:15:24 +00001156 do_diff(p1, NULL, p2, dp2);
1157 else
1158 print_only(p2, strlen(p2) + 1, dp2);
1159 dirlist2++;
1160 }
1161 }
1162}
1163#endif
1164
1165
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001166int diff_main(int argc, char **argv)
1167{
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001168 int gotstdin = 0;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001169
1170 char *U_opt;
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001171 llist_t *L_arg = NULL;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001172
Denis Vlasenko67b23e62006-10-03 21:00:06 +00001173 opt_complementary = "L::";
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001174 getopt32(argc, argv, "abdiL:NqrsS:tTU:wu"
1175 "p" /* ignored (for compatibility) */,
Denis Vlasenko7d219aa2006-10-05 10:17:08 +00001176 &L_arg, &start, &U_opt);
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001177
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001178 if (option_mask32 & FLAG_L) {
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001179 while (L_arg) {
1180 if (label[0] == NULL)
1181 label[0] = L_arg->data;
1182 else if (label[1] == NULL)
1183 label[1] = L_arg->data;
1184 else
1185 bb_show_usage();
1186
1187 L_arg = L_arg->link;
1188 }
1189
1190 /* If both label[0] and label[1] were set, they need to be swapped. */
1191 if (label[0] && label[1]) {
1192 char *tmp;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001193
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001194 tmp = label[1];
1195 label[1] = label[0];
1196 label[0] = tmp;
1197 }
1198 }
1199
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001200 context = 3; /* This is the default number of lines of context. */
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001201 if (option_mask32 & FLAG_U) {
1202 context = xatou_range(U_opt, 1, INT_MAX);
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001203 }
1204 argc -= optind;
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001205 argv += optind;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001206
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001207 /*
1208 * Do sanity checks, fill in stb1 and stb2 and call the appropriate
1209 * driver routine. Both drivers use the contents of stb1 and stb2.
1210 */
1211 if (argc < 2) {
Denis Vlasenkoea620772006-10-14 02:23:43 +00001212 bb_error_msg("missing filename");
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001213 bb_show_usage();
1214 }
Denis Vlasenko9f739442006-12-16 23:49:13 +00001215 if (LONE_DASH(argv[0])) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001216 fstat(STDIN_FILENO, &stb1);
1217 gotstdin = 1;
1218 } else
1219 xstat(argv[0], &stb1);
Denis Vlasenko9f739442006-12-16 23:49:13 +00001220 if (LONE_DASH(argv[1])) {
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001221 fstat(STDIN_FILENO, &stb2);
1222 gotstdin = 1;
1223 } else
1224 xstat(argv[1], &stb2);
1225 if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode)))
Denis Vlasenkoea620772006-10-14 02:23:43 +00001226 bb_error_msg_and_die("can't compare - to a directory");
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001227 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001228#if ENABLE_FEATURE_DIFF_DIR
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001229 diffdir(argv[0], argv[1]);
1230#else
Denis Vlasenkoea620772006-10-14 02:23:43 +00001231 bb_error_msg_and_die("directory comparison not supported");
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001232#endif
Bernhard Reutner-Fischerbbc225e2006-05-29 12:12:45 +00001233 } else {
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001234 if (S_ISDIR(stb1.st_mode)) {
1235 argv[0] = concat_path_file(argv[0], argv[1]);
Bernhard Reutner-Fischerd2c306e2006-05-29 12:10:23 +00001236 xstat(argv[0], &stb1);
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001237 }
1238 if (S_ISDIR(stb2.st_mode)) {
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001239 argv[1] = concat_path_file(argv[1], argv[0]);
Bernhard Reutner-Fischerd2c306e2006-05-29 12:10:23 +00001240 xstat(argv[1], &stb2);
Bernhard Reutner-Fischerbc142142006-04-06 16:07:08 +00001241 }
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001242 print_status(diffreg(argv[0], argv[1], 0), argv[0], argv[1], NULL);
1243 }
Denis Vlasenko6a1d6612006-12-16 22:18:44 +00001244 return status;
Bernhard Reutner-Fischer8f7d3892006-04-06 08:11:08 +00001245}