blob: 8ac09e64175f6f8d49b81132aa69ce7690bb34ea [file] [log] [blame]
Erik Andersenfb002d02000-03-05 08:07:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * Copyright (c) 1988, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#if 0
36#ifndef lint
37static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\
38 The Regents of the University of California. All rights reserved.\n";
39#endif /* not lint */
40
41#ifndef lint
42#if 0
43static char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95";
44#endif
45static const char rcsid[] =
46
47 "$Id: tr.c,v 1.1 2000/03/05 08:07:00 erik Exp $";
48#endif /* not lint */
49#endif /* #if 0 */
50
51#include "internal.h"
52#include <locale.h>
53#include <sys/types.h>
54#include <sys/cdefs.h>
55#include <sys/types.h>
56
57#include <err.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62
63#include <ctype.h>
64#include <err.h>
65#include <stddef.h>
66
67typedef struct {
68 enum { STRING1, STRING2 } which;
69 enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
70 int cnt; /* character count */
71 int lastch; /* last character */
72 int equiv[2]; /* equivalence set */
73 int *set; /* set of characters */
74 char *str; /* user's string */
75} STR;
76
77#include <limits.h>
78#define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */
79#define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */
80
81static int next __P((STR *));
82
83static int string1[NCHARS] = {
84 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */
85 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
86 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
87 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
88 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
89 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
90 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
91 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
92 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
93 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
94 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
95 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
96 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
97 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
98 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
99 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
100 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
101 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
102 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
103 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
104 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
105 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
106 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
107 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
108 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
109 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
110 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
111 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
112 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
113 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
114 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
115 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
116}, string2[NCHARS];
117
118STR s1 = { STRING1, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
119STR s2 = { STRING2, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
120
121static void setup(string, arg, str, cflag)
122int *string;
123char *arg;
124STR *str;
125int cflag;
126{
127 register int cnt, *p;
128
129 str->str = arg;
130 bzero(string, NCHARS * sizeof(int));
131
132 while (next(str))
133 string[str->lastch] = 1;
134 if (cflag)
135 for (p = string, cnt = NCHARS; cnt--; ++p)
136 *p = !*p;
137}
138
139static void tr_usage()
140{
141 (void) fprintf(stderr, "%s\n%s\n%s\n%s\n",
142 "usage: tr [-csu] string1 string2",
143 " tr [-cu] -d string1",
144 " tr [-cu] -s string1",
145 " tr [-cu] -ds string1 string2");
146 exit(1);
147}
148
149
150extern int tr_main(argc, argv)
151int argc;
152char **argv;
153{
154 register int ch, cnt, lastch, *p;
155 int cflag, dflag, sflag, isstring2;
156
157 (void) setlocale(LC_CTYPE, "");
158
159 cflag = dflag = sflag = 0;
160 while ((ch = getopt(argc, argv, "cdsu")) != -1)
161 switch ((char) ch) {
162 case 'c':
163 cflag = 1;
164 break;
165 case 'd':
166 dflag = 1;
167 break;
168 case 's':
169 sflag = 1;
170 break;
171 case 'u':
172 setbuf(stdout, (char *) NULL);
173 break;
174 case '?':
175 default:
176 tr_usage();
177 }
178 argc -= optind;
179 argv += optind;
180
181 switch (argc) {
182 case 0:
183 default:
184 tr_usage();
185 /* NOTREACHED */
186 case 1:
187 isstring2 = 0;
188 break;
189 case 2:
190 isstring2 = 1;
191 break;
192 }
193
194 /*
195 * tr -ds [-c] string1 string2
196 * Delete all characters (or complemented characters) in string1.
197 * Squeeze all characters in string2.
198 */
199 if (dflag && sflag) {
200 if (!isstring2)
201 tr_usage();
202
203 setup(string1, argv[0], &s1, cflag);
204 setup(string2, argv[1], &s2, 0);
205
206 for (lastch = OOBCH; (ch = getchar()) != EOF;)
207 if (!string1[ch] && (!string2[ch] || lastch != ch)) {
208 lastch = ch;
209 (void) putchar(ch);
210 }
211 exit(0);
212 }
213
214 /*
215 * tr -d [-c] string1
216 * Delete all characters (or complemented characters) in string1.
217 */
218 if (dflag) {
219 if (isstring2)
220 tr_usage();
221
222 setup(string1, argv[0], &s1, cflag);
223
224 while ((ch = getchar()) != EOF)
225 if (!string1[ch])
226 (void) putchar(ch);
227 exit(0);
228 }
229
230 /*
231 * tr -s [-c] string1
232 * Squeeze all characters (or complemented characters) in string1.
233 */
234 if (sflag && !isstring2) {
235 setup(string1, argv[0], &s1, cflag);
236
237 for (lastch = OOBCH; (ch = getchar()) != EOF;)
238 if (!string1[ch] || lastch != ch) {
239 lastch = ch;
240 (void) putchar(ch);
241 }
242 exit(0);
243 }
244
245 /*
246 * tr [-cs] string1 string2
247 * Replace all characters (or complemented characters) in string1 with
248 * the character in the same position in string2. If the -s option is
249 * specified, squeeze all the characters in string2.
250 */
251 if (!isstring2)
252 tr_usage();
253
254 s1.str = argv[0];
255 s2.str = argv[1];
256
257 if (cflag)
258 for (cnt = NCHARS, p = string1; cnt--;)
259 *p++ = OOBCH;
260
261 if (!next(&s2))
262 errx(1, "empty string2");
263
264 /* If string2 runs out of characters, use the last one specified. */
265 if (sflag)
266 while (next(&s1)) {
267 string1[s1.lastch] = ch = s2.lastch;
268 string2[ch] = 1;
269 (void) next(&s2);
270 } else
271 while (next(&s1)) {
272 string1[s1.lastch] = ch = s2.lastch;
273 (void) next(&s2);
274 }
275
276 if (cflag)
277 for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt)
278 *p = *p == OOBCH ? ch : cnt;
279
280 if (sflag)
281 for (lastch = OOBCH; (ch = getchar()) != EOF;) {
282 ch = string1[ch];
283 if (!string2[ch] || lastch != ch) {
284 lastch = ch;
285 (void) putchar(ch);
286 }
287 } else
288 while ((ch = getchar()) != EOF)
289 (void) putchar(string1[ch]);
290 exit(0);
291}
292
293static int backslash __P((STR *));
294static int bracket __P((STR *));
295static int c_class __P((const void *, const void *));
296static void genclass __P((STR *));
297static void genequiv __P((STR *));
298static int genrange __P((STR *));
299static void genseq __P((STR *));
300
301static int next(s)
302register STR *s;
303{
304 register int ch;
305
306 switch (s->state) {
307 case EOS:
308 return (0);
309 case INFINITE:
310 return (1);
311 case NORMAL:
312 switch (ch = (u_char) * s->str) {
313 case '\0':
314 s->state = EOS;
315 return (0);
316 case '\\':
317 s->lastch = backslash(s);
318 break;
319 case '[':
320 if (bracket(s))
321 return (next(s));
322 /* FALLTHROUGH */
323 default:
324 ++s->str;
325 s->lastch = ch;
326 break;
327 }
328
329 /* We can start a range at any time. */
330 if (s->str[0] == '-' && genrange(s))
331 return (next(s));
332 return (1);
333 case RANGE:
334 if (s->cnt-- == 0) {
335 s->state = NORMAL;
336 return (next(s));
337 }
338 ++s->lastch;
339 return (1);
340 case SEQUENCE:
341 if (s->cnt-- == 0) {
342 s->state = NORMAL;
343 return (next(s));
344 }
345 return (1);
346 case SET:
347 if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
348 s->state = NORMAL;
349 return (next(s));
350 }
351 return (1);
352 }
353 /* NOTREACHED */
354 return (0);
355}
356
357static int bracket(s)
358register STR *s;
359{
360 register char *p;
361
362 switch (s->str[1]) {
363 case ':': /* "[:class:]" */
364 if ((p = strstr(s->str + 2, ":]")) == NULL)
365 return (0);
366 *p = '\0';
367 s->str += 2;
368 genclass(s);
369 s->str = p + 2;
370 return (1);
371 case '=': /* "[=equiv=]" */
372 if ((p = strstr(s->str + 2, "=]")) == NULL)
373 return (0);
374 s->str += 2;
375 genequiv(s);
376 return (1);
377 default: /* "[\###*n]" or "[#*n]" */
378 if ((p = strpbrk(s->str + 2, "*]")) == NULL)
379 return (0);
380 if (p[0] != '*' || index(p, ']') == NULL)
381 return (0);
382 s->str += 1;
383 genseq(s);
384 return (1);
385 }
386 /* NOTREACHED */
387}
388
389typedef struct {
390 char *name;
391 int (*func) __P((int));
392 int *set;
393} CLASS;
394
395static CLASS classes[] = {
396#undef isalnum
397 {"alnum", isalnum,},
398#undef isalpha
399 {"alpha", isalpha,},
400/*#undef isblank
401 { "blank", isblank, },*/
402#undef iscntrl
403 {"cntrl", iscntrl,},
404#undef isdigit
405 {"digit", isdigit,},
406#undef isgraph
407 {"graph", isgraph,},
408#undef islower
409 {"lower", islower,},
410#undef isprint
411 {"print", isprint,},
412#undef ispunct
413 {"punct", ispunct,},
414#undef isspace
415 {"space", isspace,},
416#undef isupper
417 {"upper", isupper,},
418#undef isxdigit
419 {"xdigit", isxdigit,},
420};
421
422static void genclass(s)
423STR *s;
424{
425 register int cnt, (*func) __P((int));
426 CLASS *cp, tmp;
427 int *p;
428
429 tmp.name = s->str;
430 if ((cp = (CLASS *) bsearch(&tmp, classes, sizeof(classes) /
431 sizeof(CLASS), sizeof(CLASS),
432 c_class)) == NULL) errx(1,
433 "unknown class %s",
434 s->str);
435
436 if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
437 errx(1, "malloc");
438 bzero(p, (NCHARS + 1) * sizeof(int));
439
440 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
441 if ((func) (cnt))
442 *p++ = cnt;
443 *p = OOBCH;
444
445 s->cnt = 0;
446 s->state = SET;
447 s->set = cp->set;
448}
449
450static int c_class(a, b)
451const void *a, *b;
452{
453 return (strcmp(((CLASS *) a)->name, ((CLASS *) b)->name));
454}
455
456/*
457 * English doesn't have any equivalence classes, so for now
458 * we just syntax check and grab the character.
459 */
460static void genequiv(s)
461STR *s;
462{
463 if (*s->str == '\\') {
464 s->equiv[0] = backslash(s);
465 if (*s->str != '=')
466 errx(1, "misplaced equivalence equals sign");
467 } else {
468 s->equiv[0] = s->str[0];
469 if (s->str[1] != '=')
470 errx(1, "misplaced equivalence equals sign");
471 }
472 s->str += 2;
473 s->cnt = 0;
474 s->state = SET;
475 s->set = s->equiv;
476}
477
478static int genrange(s)
479STR *s;
480{
481 int stopval;
482 char *savestart;
483
484 savestart = s->str;
485 stopval = *++s->str == '\\' ? backslash(s) : (u_char) * s->str++;
486 if (stopval < (u_char) s->lastch) {
487 s->str = savestart;
488 return (0);
489 }
490 s->cnt = stopval - s->lastch + 1;
491 s->state = RANGE;
492 --s->lastch;
493 return (1);
494}
495
496static void genseq(s)
497STR *s;
498{
499 char *ep;
500
501 if (s->which == STRING1)
502 errx(1, "sequences only valid in string2");
503
504 if (*s->str == '\\')
505 s->lastch = backslash(s);
506 else
507 s->lastch = *s->str++;
508 if (*s->str != '*')
509 errx(1, "misplaced sequence asterisk");
510
511 switch (*++s->str) {
512 case '\\':
513 s->cnt = backslash(s);
514 break;
515 case ']':
516 s->cnt = 0;
517 ++s->str;
518 break;
519 default:
520 if (isdigit((u_char) * s->str)) {
521 s->cnt = strtol(s->str, &ep, 0);
522 if (*ep == ']') {
523 s->str = ep + 1;
524 break;
525 }
526 }
527 errx(1, "illegal sequence count");
528 /* NOTREACHED */
529 }
530
531 s->state = s->cnt ? SEQUENCE : INFINITE;
532}
533
534/*
535 * Translate \??? into a character. Up to 3 octal digits, if no digits either
536 * an escape code or a literal character.
537 */
538static int backslash(s)
539register STR *s;
540{
541 register int ch, cnt, val;
542
543 for (cnt = val = 0;;) {
544 ch = (u_char) * ++s->str;
545 if (!isascii(ch) || !isdigit(ch))
546 break;
547 val = val * 8 + ch - '0';
548 if (++cnt == 3) {
549 ++s->str;
550 break;
551 }
552 }
553 if (cnt)
554 return (val);
555 if (ch != '\0')
556 ++s->str;
557 switch (ch) {
558 case 'a': /* escape characters */
559 return ('\7');
560 case 'b':
561 return ('\b');
562 case 'f':
563 return ('\f');
564 case 'n':
565 return ('\n');
566 case 'r':
567 return ('\r');
568 case 't':
569 return ('\t');
570 case 'v':
571 return ('\13');
572 case '\0': /* \" -> \ */
573 s->state = EOS;
574 return ('\\');
575 default: /* \x" -> x */
576 return (ch);
577 }
578}