Blame - tr.c - codeaurora/busybox

blob: 8ac09e64175f6f8d49b81132aa69ce7690bb34ea [file] [log] [blame]

Erik Andersen	fb002d0	2000-03-05 08:07:00 +0000	[diff] [blame]	1	/* vi: set sw=4 ts=4: */
				2	/*
				3	* Copyright (c) 1988, 1993
				4	* The Regents of the University of California. All rights reserved.
				5	*
				6	* Redistribution and use in source and binary forms, with or without
				7	* modification, are permitted provided that the following conditions
				8	* are met:
				9	* 1. Redistributions of source code must retain the above copyright
				10	* notice, this list of conditions and the following disclaimer.
				11	* 2. Redistributions in binary form must reproduce the above copyright
				12	* notice, this list of conditions and the following disclaimer in the
				13	* documentation and/or other materials provided with the distribution.
				14	* 3. All advertising materials mentioning features or use of this software
				15	* must display the following acknowledgement:
				16	* This product includes software developed by the University of
				17	* California, Berkeley and its contributors.
				18	* 4. Neither the name of the University nor the names of its contributors
				19	* may be used to endorse or promote products derived from this software
				20	* without specific prior written permission.
				21	*
				22	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
				23	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				24	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				25	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
				26	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				27	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
				28	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
				29	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
				30	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
				31	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
				32	* SUCH DAMAGE.
				33	*/
				34
				35	#if 0
				36	#ifndef lint
				37	static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\
				38	The Regents of the University of California. All rights reserved.\n";
				39	#endif /* not lint */
				40
				41	#ifndef lint
				42	#if 0
				43	static char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95";
				44	#endif
				45	static const char rcsid[] =
				46
				47	"$Id: tr.c,v 1.1 2000/03/05 08:07:00 erik Exp $";
				48	#endif /* not lint */
				49	#endif /* #if 0 */
				50
				51	#include "internal.h"
				52	#include <locale.h>
				53	#include <sys/types.h>
				54	#include <sys/cdefs.h>
				55	#include <sys/types.h>
				56
				57	#include <err.h>
				58	#include <stdio.h>
				59	#include <stdlib.h>
				60	#include <string.h>
				61	#include <unistd.h>
				62
				63	#include <ctype.h>
				64	#include <err.h>
				65	#include <stddef.h>
				66
				67	typedef struct {
				68	enum { STRING1, STRING2 } which;
				69	enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
				70	int cnt; /* character count */
				71	int lastch; /* last character */
				72	int equiv[2]; /* equivalence set */
				73	int set; / set of characters */
				74	char str; / user's string */
				75	} STR;
				76
				77	#include <limits.h>
				78	#define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */
				79	#define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */
				80
				81	static int next __P((STR *));
				82
				83	static int string1[NCHARS] = {
				84	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */
				85	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
				86	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
				87	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
				88	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
				89	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
				90	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
				91	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
				92	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
				93	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
				94	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
				95	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
				96	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
				97	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
				98	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
				99	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
				100	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
				101	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
				102	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
				103	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
				104	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
				105	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
				106	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
				107	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
				108	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
				109	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
				110	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
				111	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
				112	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
				113	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
				114	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
				115	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
				116	}, string2[NCHARS];
				117
				118	STR s1 = { STRING1, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
				119	STR s2 = { STRING2, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL };
				120
				121	static void setup(string, arg, str, cflag)
				122	int *string;
				123	char *arg;
				124	STR *str;
				125	int cflag;
				126	{
				127	register int cnt, *p;
				128
				129	str->str = arg;
				130	bzero(string, NCHARS * sizeof(int));
				131
				132	while (next(str))
				133	string[str->lastch] = 1;
				134	if (cflag)
				135	for (p = string, cnt = NCHARS; cnt--; ++p)
				136	p = !p;
				137	}
				138
				139	static void tr_usage()
				140	{
				141	(void) fprintf(stderr, "%s\n%s\n%s\n%s\n",
				142	"usage: tr [-csu] string1 string2",
				143	" tr [-cu] -d string1",
				144	" tr [-cu] -s string1",
				145	" tr [-cu] -ds string1 string2");
				146	exit(1);
				147	}
				148
				149
				150	extern int tr_main(argc, argv)
				151	int argc;
				152	char **argv;
				153	{
				154	register int ch, cnt, lastch, *p;
				155	int cflag, dflag, sflag, isstring2;
				156
				157	(void) setlocale(LC_CTYPE, "");
				158
				159	cflag = dflag = sflag = 0;
				160	while ((ch = getopt(argc, argv, "cdsu")) != -1)
				161	switch ((char) ch) {
				162	case 'c':
				163	cflag = 1;
				164	break;
				165	case 'd':
				166	dflag = 1;
				167	break;
				168	case 's':
				169	sflag = 1;
				170	break;
				171	case 'u':
				172	setbuf(stdout, (char *) NULL);
				173	break;
				174	case '?':
				175	default:
				176	tr_usage();
				177	}
				178	argc -= optind;
				179	argv += optind;
				180
				181	switch (argc) {
				182	case 0:
				183	default:
				184	tr_usage();
				185	/* NOTREACHED */
				186	case 1:
				187	isstring2 = 0;
				188	break;
				189	case 2:
				190	isstring2 = 1;
				191	break;
				192	}
				193
				194	/*
				195	* tr -ds [-c] string1 string2
				196	* Delete all characters (or complemented characters) in string1.
				197	* Squeeze all characters in string2.
				198	*/
				199	if (dflag && sflag) {
				200	if (!isstring2)
				201	tr_usage();
				202
				203	setup(string1, argv[0], &s1, cflag);
				204	setup(string2, argv[1], &s2, 0);
				205
				206	for (lastch = OOBCH; (ch = getchar()) != EOF;)
				207	if (!string1[ch] && (!string2[ch] \|\| lastch != ch)) {
				208	lastch = ch;
				209	(void) putchar(ch);
				210	}
				211	exit(0);
				212	}
				213
				214	/*
				215	* tr -d [-c] string1
				216	* Delete all characters (or complemented characters) in string1.
				217	*/
				218	if (dflag) {
				219	if (isstring2)
				220	tr_usage();
				221
				222	setup(string1, argv[0], &s1, cflag);
				223
				224	while ((ch = getchar()) != EOF)
				225	if (!string1[ch])
				226	(void) putchar(ch);
				227	exit(0);
				228	}
				229
				230	/*
				231	* tr -s [-c] string1
				232	* Squeeze all characters (or complemented characters) in string1.
				233	*/
				234	if (sflag && !isstring2) {
				235	setup(string1, argv[0], &s1, cflag);
				236
				237	for (lastch = OOBCH; (ch = getchar()) != EOF;)
				238	if (!string1[ch] \|\| lastch != ch) {
				239	lastch = ch;
				240	(void) putchar(ch);
				241	}
				242	exit(0);
				243	}
				244
				245	/*
				246	* tr [-cs] string1 string2
				247	* Replace all characters (or complemented characters) in string1 with
				248	* the character in the same position in string2. If the -s option is
				249	* specified, squeeze all the characters in string2.
				250	*/
				251	if (!isstring2)
				252	tr_usage();
				253
				254	s1.str = argv[0];
				255	s2.str = argv[1];
				256
				257	if (cflag)
				258	for (cnt = NCHARS, p = string1; cnt--;)
				259	*p++ = OOBCH;
				260
				261	if (!next(&s2))
				262	errx(1, "empty string2");
				263
				264	/* If string2 runs out of characters, use the last one specified. */
				265	if (sflag)
				266	while (next(&s1)) {
				267	string1[s1.lastch] = ch = s2.lastch;
				268	string2[ch] = 1;
				269	(void) next(&s2);
				270	} else
				271	while (next(&s1)) {
				272	string1[s1.lastch] = ch = s2.lastch;
				273	(void) next(&s2);
				274	}
				275
				276	if (cflag)
				277	for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt)
				278	p = p == OOBCH ? ch : cnt;
				279
				280	if (sflag)
				281	for (lastch = OOBCH; (ch = getchar()) != EOF;) {
				282	ch = string1[ch];
				283	if (!string2[ch] \|\| lastch != ch) {
				284	lastch = ch;
				285	(void) putchar(ch);
				286	}
				287	} else
				288	while ((ch = getchar()) != EOF)
				289	(void) putchar(string1[ch]);
				290	exit(0);
				291	}
				292
				293	static int backslash __P((STR *));
				294	static int bracket __P((STR *));
				295	static int c_class __P((const void , const void ));
				296	static void genclass __P((STR *));
				297	static void genequiv __P((STR *));
				298	static int genrange __P((STR *));
				299	static void genseq __P((STR *));
				300
				301	static int next(s)
				302	register STR *s;
				303	{
				304	register int ch;
				305
				306	switch (s->state) {
				307	case EOS:
				308	return (0);
				309	case INFINITE:
				310	return (1);
				311	case NORMAL:
				312	switch (ch = (u_char) * s->str) {
				313	case '\0':
				314	s->state = EOS;
				315	return (0);
				316	case '\\':
				317	s->lastch = backslash(s);
				318	break;
				319	case '[':
				320	if (bracket(s))
				321	return (next(s));
				322	/* FALLTHROUGH */
				323	default:
				324	++s->str;
				325	s->lastch = ch;
				326	break;
				327	}
				328
				329	/* We can start a range at any time. */
				330	if (s->str[0] == '-' && genrange(s))
				331	return (next(s));
				332	return (1);
				333	case RANGE:
				334	if (s->cnt-- == 0) {
				335	s->state = NORMAL;
				336	return (next(s));
				337	}
				338	++s->lastch;
				339	return (1);
				340	case SEQUENCE:
				341	if (s->cnt-- == 0) {
				342	s->state = NORMAL;
				343	return (next(s));
				344	}
				345	return (1);
				346	case SET:
				347	if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
				348	s->state = NORMAL;
				349	return (next(s));
				350	}
				351	return (1);
				352	}
				353	/* NOTREACHED */
				354	return (0);
				355	}
				356
				357	static int bracket(s)
				358	register STR *s;
				359	{
				360	register char *p;
				361
				362	switch (s->str[1]) {
				363	case ':': /* "[:class:]" */
				364	if ((p = strstr(s->str + 2, ":]")) == NULL)
				365	return (0);
				366	*p = '\0';
				367	s->str += 2;
				368	genclass(s);
				369	s->str = p + 2;
				370	return (1);
				371	case '=': /* "[=equiv=]" */
				372	if ((p = strstr(s->str + 2, "=]")) == NULL)
				373	return (0);
				374	s->str += 2;
				375	genequiv(s);
				376	return (1);
				377	default: /* "[\###n]" or "[#n]" */
				378	if ((p = strpbrk(s->str + 2, "*]")) == NULL)
				379	return (0);
				380	if (p[0] != '*' \|\| index(p, ']') == NULL)
				381	return (0);
				382	s->str += 1;
				383	genseq(s);
				384	return (1);
				385	}
				386	/* NOTREACHED */
				387	}
				388
				389	typedef struct {
				390	char *name;
				391	int (*func) __P((int));
				392	int *set;
				393	} CLASS;
				394
				395	static CLASS classes[] = {
				396	#undef isalnum
				397	{"alnum", isalnum,},
				398	#undef isalpha
				399	{"alpha", isalpha,},
				400	/*#undef isblank
				401	{ "blank", isblank, },*/
				402	#undef iscntrl
				403	{"cntrl", iscntrl,},
				404	#undef isdigit
				405	{"digit", isdigit,},
				406	#undef isgraph
				407	{"graph", isgraph,},
				408	#undef islower
				409	{"lower", islower,},
				410	#undef isprint
				411	{"print", isprint,},
				412	#undef ispunct
				413	{"punct", ispunct,},
				414	#undef isspace
				415	{"space", isspace,},
				416	#undef isupper
				417	{"upper", isupper,},
				418	#undef isxdigit
				419	{"xdigit", isxdigit,},
				420	};
				421
				422	static void genclass(s)
				423	STR *s;
				424	{
				425	register int cnt, (*func) __P((int));
				426	CLASS *cp, tmp;
				427	int *p;
				428
				429	tmp.name = s->str;
				430	if ((cp = (CLASS *) bsearch(&tmp, classes, sizeof(classes) /
				431	sizeof(CLASS), sizeof(CLASS),
				432	c_class)) == NULL) errx(1,
				433	"unknown class %s",
				434	s->str);
				435
				436	if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
				437	errx(1, "malloc");
				438	bzero(p, (NCHARS + 1) * sizeof(int));
				439
				440	for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
				441	if ((func) (cnt))
				442	*p++ = cnt;
				443	*p = OOBCH;
				444
				445	s->cnt = 0;
				446	s->state = SET;
				447	s->set = cp->set;
				448	}
				449
				450	static int c_class(a, b)
				451	const void a, b;
				452	{
				453	return (strcmp(((CLASS ) a)->name, ((CLASS ) b)->name));
				454	}
				455
				456	/*
				457	* English doesn't have any equivalence classes, so for now
				458	* we just syntax check and grab the character.
				459	*/
				460	static void genequiv(s)
				461	STR *s;
				462	{
				463	if (*s->str == '\\') {
				464	s->equiv[0] = backslash(s);
				465	if (*s->str != '=')
				466	errx(1, "misplaced equivalence equals sign");
				467	} else {
				468	s->equiv[0] = s->str[0];
				469	if (s->str[1] != '=')
				470	errx(1, "misplaced equivalence equals sign");
				471	}
				472	s->str += 2;
				473	s->cnt = 0;
				474	s->state = SET;
				475	s->set = s->equiv;
				476	}
				477
				478	static int genrange(s)
				479	STR *s;
				480	{
				481	int stopval;
				482	char *savestart;
				483
				484	savestart = s->str;
				485	stopval = ++s->str == '\\' ? backslash(s) : (u_char) s->str++;
				486	if (stopval < (u_char) s->lastch) {
				487	s->str = savestart;
				488	return (0);
				489	}
				490	s->cnt = stopval - s->lastch + 1;
				491	s->state = RANGE;
				492	--s->lastch;
				493	return (1);
				494	}
				495
				496	static void genseq(s)
				497	STR *s;
				498	{
				499	char *ep;
				500
				501	if (s->which == STRING1)
				502	errx(1, "sequences only valid in string2");
				503
				504	if (*s->str == '\\')
				505	s->lastch = backslash(s);
				506	else
				507	s->lastch = *s->str++;
				508	if (s->str != '')
				509	errx(1, "misplaced sequence asterisk");
				510
				511	switch (*++s->str) {
				512	case '\\':
				513	s->cnt = backslash(s);
				514	break;
				515	case ']':
				516	s->cnt = 0;
				517	++s->str;
				518	break;
				519	default:
				520	if (isdigit((u_char) * s->str)) {
				521	s->cnt = strtol(s->str, &ep, 0);
				522	if (*ep == ']') {
				523	s->str = ep + 1;
				524	break;
				525	}
				526	}
				527	errx(1, "illegal sequence count");
				528	/* NOTREACHED */
				529	}
				530
				531	s->state = s->cnt ? SEQUENCE : INFINITE;
				532	}
				533
				534	/*
				535	* Translate \??? into a character. Up to 3 octal digits, if no digits either
				536	* an escape code or a literal character.
				537	*/
				538	static int backslash(s)
				539	register STR *s;
				540	{
				541	register int ch, cnt, val;
				542
				543	for (cnt = val = 0;;) {
				544	ch = (u_char) * ++s->str;
				545	if (!isascii(ch) \|\| !isdigit(ch))
				546	break;
				547	val = val * 8 + ch - '0';
				548	if (++cnt == 3) {
				549	++s->str;
				550	break;
				551	}
				552	}
				553	if (cnt)
				554	return (val);
				555	if (ch != '\0')
				556	++s->str;
				557	switch (ch) {
				558	case 'a': /* escape characters */
				559	return ('\7');
				560	case 'b':
				561	return ('\b');
				562	case 'f':
				563	return ('\f');
				564	case 'n':
				565	return ('\n');
				566	case 'r':
				567	return ('\r');
				568	case 't':
				569	return ('\t');
				570	case 'v':
				571	return ('\13');
				572	case '\0': /* \" -> \ */
				573	s->state = EOS;
				574	return ('\\');
				575	default: /* \x" -> x */
				576	return (ch);
				577	}
				578	}