blob: 3bfa4808056d4dc0c694ee30bc886f4ce5814d66 [file] [log] [blame]
Erik Andersenfb002d02000-03-05 08:07:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * Copyright (c) 1988, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#if 0
36#ifndef lint
37static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\
38 The Regents of the University of California. All rights reserved.\n";
39#endif /* not lint */
40
41#ifndef lint
42#if 0
43static char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95";
44#endif
45static const char rcsid[] =
46
Erik Andersen0d068a22000-03-21 22:32:57 +000047 "$Id: tr.c,v 1.2 2000/03/21 22:32:57 erik Exp $";
Erik Andersenfb002d02000-03-05 08:07:00 +000048#endif /* not lint */
49#endif /* #if 0 */
50
51#include "internal.h"
52#include <locale.h>
53#include <sys/types.h>
54#include <sys/cdefs.h>
55#include <sys/types.h>
56
57#include <err.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62
63#include <ctype.h>
64#include <err.h>
65#include <stddef.h>
66
67typedef struct {
68 enum { STRING1, STRING2 } which;
69 enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
70 int cnt; /* character count */
71 int lastch; /* last character */
72 int equiv[2]; /* equivalence set */
73 int *set; /* set of characters */
74 char *str; /* user's string */
75} STR;
76
77#include <limits.h>
78#define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */
79#define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */
80
81static int next __P((STR *));
82
83static int string1[NCHARS] = {
84 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */
85 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
86 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
87 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
88 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
89 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
90 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
91 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
92 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
93 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
94 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
95 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
96 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
97 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
98 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
99 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
100 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
101 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
102 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
103 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
104 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
105 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
106 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
107 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
108 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
109 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
110 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
111 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
112 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
113 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
114 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
115 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
116}, string2[NCHARS];
117
118STR s1 = { STRING1, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
119STR s2 = { STRING2, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
120
121static void setup(string, arg, str, cflag)
122int *string;
123char *arg;
124STR *str;
125int cflag;
126{
127 register int cnt, *p;
128
129 str->str = arg;
130 bzero(string, NCHARS * sizeof(int));
131
132 while (next(str))
133 string[str->lastch] = 1;
134 if (cflag)
135 for (p = string, cnt = NCHARS; cnt--; ++p)
136 *p = !*p;
137}
138
139static void tr_usage()
140{
141 (void) fprintf(stderr, "%s\n%s\n%s\n%s\n",
142 "usage: tr [-csu] string1 string2",
143 " tr [-cu] -d string1",
144 " tr [-cu] -s string1",
145 " tr [-cu] -ds string1 string2");
146 exit(1);
147}
148
149
150extern int tr_main(argc, argv)
151int argc;
152char **argv;
153{
154 register int ch, cnt, lastch, *p;
155 int cflag, dflag, sflag, isstring2;
156
157 (void) setlocale(LC_CTYPE, "");
158
159 cflag = dflag = sflag = 0;
160 while ((ch = getopt(argc, argv, "cdsu")) != -1)
161 switch ((char) ch) {
162 case 'c':
163 cflag = 1;
164 break;
165 case 'd':
166 dflag = 1;
167 break;
168 case 's':
169 sflag = 1;
170 break;
171 case 'u':
172 setbuf(stdout, (char *) NULL);
173 break;
174 case '?':
175 default:
176 tr_usage();
177 }
178 argc -= optind;
179 argv += optind;
180
181 switch (argc) {
182 case 0:
183 default:
184 tr_usage();
185 /* NOTREACHED */
186 case 1:
187 isstring2 = 0;
188 break;
189 case 2:
190 isstring2 = 1;
191 break;
192 }
193
194 /*
195 * tr -ds [-c] string1 string2
196 * Delete all characters (or complemented characters) in string1.
197 * Squeeze all characters in string2.
198 */
199 if (dflag && sflag) {
200 if (!isstring2)
201 tr_usage();
202
203 setup(string1, argv[0], &s1, cflag);
204 setup(string2, argv[1], &s2, 0);
205
206 for (lastch = OOBCH; (ch = getchar()) != EOF;)
207 if (!string1[ch] && (!string2[ch] || lastch != ch)) {
208 lastch = ch;
209 (void) putchar(ch);
210 }
211 exit(0);
212 }
213
214 /*
215 * tr -d [-c] string1
216 * Delete all characters (or complemented characters) in string1.
217 */
218 if (dflag) {
219 if (isstring2)
220 tr_usage();
221
222 setup(string1, argv[0], &s1, cflag);
223
224 while ((ch = getchar()) != EOF)
225 if (!string1[ch])
226 (void) putchar(ch);
227 exit(0);
228 }
229
230 /*
231 * tr -s [-c] string1
232 * Squeeze all characters (or complemented characters) in string1.
233 */
234 if (sflag && !isstring2) {
235 setup(string1, argv[0], &s1, cflag);
236
237 for (lastch = OOBCH; (ch = getchar()) != EOF;)
238 if (!string1[ch] || lastch != ch) {
239 lastch = ch;
240 (void) putchar(ch);
241 }
242 exit(0);
243 }
244
245 /*
246 * tr [-cs] string1 string2
247 * Replace all characters (or complemented characters) in string1 with
248 * the character in the same position in string2. If the -s option is
249 * specified, squeeze all the characters in string2.
250 */
251 if (!isstring2)
252 tr_usage();
253
254 s1.str = argv[0];
255 s2.str = argv[1];
256
257 if (cflag)
258 for (cnt = NCHARS, p = string1; cnt--;)
259 *p++ = OOBCH;
260
261 if (!next(&s2))
262 errx(1, "empty string2");
263
264 /* If string2 runs out of characters, use the last one specified. */
265 if (sflag)
266 while (next(&s1)) {
267 string1[s1.lastch] = ch = s2.lastch;
268 string2[ch] = 1;
269 (void) next(&s2);
270 } else
271 while (next(&s1)) {
272 string1[s1.lastch] = ch = s2.lastch;
273 (void) next(&s2);
274 }
275
276 if (cflag)
277 for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt)
278 *p = *p == OOBCH ? ch : cnt;
279
280 if (sflag)
281 for (lastch = OOBCH; (ch = getchar()) != EOF;) {
282 ch = string1[ch];
283 if (!string2[ch] || lastch != ch) {
284 lastch = ch;
285 (void) putchar(ch);
286 }
287 } else
288 while ((ch = getchar()) != EOF)
289 (void) putchar(string1[ch]);
290 exit(0);
291}
292
293static int backslash __P((STR *));
294static int bracket __P((STR *));
295static int c_class __P((const void *, const void *));
296static void genclass __P((STR *));
297static void genequiv __P((STR *));
298static int genrange __P((STR *));
299static void genseq __P((STR *));
300
301static int next(s)
302register STR *s;
303{
304 register int ch;
305
306 switch (s->state) {
307 case EOS:
308 return (0);
309 case INFINITE:
310 return (1);
311 case NORMAL:
312 switch (ch = (u_char) * s->str) {
313 case '\0':
314 s->state = EOS;
315 return (0);
316 case '\\':
317 s->lastch = backslash(s);
318 break;
319 case '[':
320 if (bracket(s))
321 return (next(s));
322 /* FALLTHROUGH */
323 default:
324 ++s->str;
325 s->lastch = ch;
326 break;
327 }
328
329 /* We can start a range at any time. */
330 if (s->str[0] == '-' && genrange(s))
331 return (next(s));
332 return (1);
333 case RANGE:
334 if (s->cnt-- == 0) {
335 s->state = NORMAL;
336 return (next(s));
337 }
338 ++s->lastch;
339 return (1);
340 case SEQUENCE:
341 if (s->cnt-- == 0) {
342 s->state = NORMAL;
343 return (next(s));
344 }
345 return (1);
346 case SET:
347 if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
348 s->state = NORMAL;
349 return (next(s));
350 }
351 return (1);
352 }
353 /* NOTREACHED */
354 return (0);
355}
356
357static int bracket(s)
358register STR *s;
359{
360 register char *p;
361
362 switch (s->str[1]) {
363 case ':': /* "[:class:]" */
364 if ((p = strstr(s->str + 2, ":]")) == NULL)
365 return (0);
366 *p = '\0';
367 s->str += 2;
368 genclass(s);
369 s->str = p + 2;
370 return (1);
371 case '=': /* "[=equiv=]" */
372 if ((p = strstr(s->str + 2, "=]")) == NULL)
373 return (0);
374 s->str += 2;
375 genequiv(s);
376 return (1);
377 default: /* "[\###*n]" or "[#*n]" */
378 if ((p = strpbrk(s->str + 2, "*]")) == NULL)
379 return (0);
380 if (p[0] != '*' || index(p, ']') == NULL)
381 return (0);
382 s->str += 1;
383 genseq(s);
384 return (1);
385 }
386 /* NOTREACHED */
387}
388
389typedef struct {
390 char *name;
391 int (*func) __P((int));
392 int *set;
393} CLASS;
394
395static CLASS classes[] = {
396#undef isalnum
397 {"alnum", isalnum,},
398#undef isalpha
399 {"alpha", isalpha,},
400/*#undef isblank
401 { "blank", isblank, },*/
402#undef iscntrl
403 {"cntrl", iscntrl,},
404#undef isdigit
405 {"digit", isdigit,},
406#undef isgraph
407 {"graph", isgraph,},
408#undef islower
409 {"lower", islower,},
410#undef isprint
411 {"print", isprint,},
412#undef ispunct
413 {"punct", ispunct,},
414#undef isspace
415 {"space", isspace,},
416#undef isupper
417 {"upper", isupper,},
418#undef isxdigit
419 {"xdigit", isxdigit,},
420};
421
422static void genclass(s)
423STR *s;
424{
425 register int cnt, (*func) __P((int));
426 CLASS *cp, tmp;
427 int *p;
428
429 tmp.name = s->str;
430 if ((cp = (CLASS *) bsearch(&tmp, classes, sizeof(classes) /
431 sizeof(CLASS), sizeof(CLASS),
432 c_class)) == NULL) errx(1,
433 "unknown class %s",
434 s->str);
435
Erik Andersen0d068a22000-03-21 22:32:57 +0000436 cp->set = p = xmalloc((NCHARS + 1) * sizeof(int));
Erik Andersenfb002d02000-03-05 08:07:00 +0000437 bzero(p, (NCHARS + 1) * sizeof(int));
438
439 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
440 if ((func) (cnt))
441 *p++ = cnt;
442 *p = OOBCH;
443
444 s->cnt = 0;
445 s->state = SET;
446 s->set = cp->set;
447}
448
449static int c_class(a, b)
450const void *a, *b;
451{
452 return (strcmp(((CLASS *) a)->name, ((CLASS *) b)->name));
453}
454
455/*
456 * English doesn't have any equivalence classes, so for now
457 * we just syntax check and grab the character.
458 */
459static void genequiv(s)
460STR *s;
461{
462 if (*s->str == '\\') {
463 s->equiv[0] = backslash(s);
464 if (*s->str != '=')
465 errx(1, "misplaced equivalence equals sign");
466 } else {
467 s->equiv[0] = s->str[0];
468 if (s->str[1] != '=')
469 errx(1, "misplaced equivalence equals sign");
470 }
471 s->str += 2;
472 s->cnt = 0;
473 s->state = SET;
474 s->set = s->equiv;
475}
476
477static int genrange(s)
478STR *s;
479{
480 int stopval;
481 char *savestart;
482
483 savestart = s->str;
484 stopval = *++s->str == '\\' ? backslash(s) : (u_char) * s->str++;
485 if (stopval < (u_char) s->lastch) {
486 s->str = savestart;
487 return (0);
488 }
489 s->cnt = stopval - s->lastch + 1;
490 s->state = RANGE;
491 --s->lastch;
492 return (1);
493}
494
495static void genseq(s)
496STR *s;
497{
498 char *ep;
499
500 if (s->which == STRING1)
501 errx(1, "sequences only valid in string2");
502
503 if (*s->str == '\\')
504 s->lastch = backslash(s);
505 else
506 s->lastch = *s->str++;
507 if (*s->str != '*')
508 errx(1, "misplaced sequence asterisk");
509
510 switch (*++s->str) {
511 case '\\':
512 s->cnt = backslash(s);
513 break;
514 case ']':
515 s->cnt = 0;
516 ++s->str;
517 break;
518 default:
519 if (isdigit((u_char) * s->str)) {
520 s->cnt = strtol(s->str, &ep, 0);
521 if (*ep == ']') {
522 s->str = ep + 1;
523 break;
524 }
525 }
526 errx(1, "illegal sequence count");
527 /* NOTREACHED */
528 }
529
530 s->state = s->cnt ? SEQUENCE : INFINITE;
531}
532
533/*
534 * Translate \??? into a character. Up to 3 octal digits, if no digits either
535 * an escape code or a literal character.
536 */
537static int backslash(s)
538register STR *s;
539{
540 register int ch, cnt, val;
541
542 for (cnt = val = 0;;) {
543 ch = (u_char) * ++s->str;
544 if (!isascii(ch) || !isdigit(ch))
545 break;
546 val = val * 8 + ch - '0';
547 if (++cnt == 3) {
548 ++s->str;
549 break;
550 }
551 }
552 if (cnt)
553 return (val);
554 if (ch != '\0')
555 ++s->str;
556 switch (ch) {
557 case 'a': /* escape characters */
558 return ('\7');
559 case 'b':
560 return ('\b');
561 case 'f':
562 return ('\f');
563 case 'n':
564 return ('\n');
565 case 'r':
566 return ('\r');
567 case 't':
568 return ('\t');
569 case 'v':
570 return ('\13');
571 case '\0': /* \" -> \ */
572 s->state = EOS;
573 return ('\\');
574 default: /* \x" -> x */
575 return (ch);
576 }
577}