blob: 6eb86750d0417afb091d0fd89f96b20595908b9d [file] [log] [blame]
Erik Andersenfb002d02000-03-05 08:07:00 +00001/* vi: set sw=4 ts=4: */
2/*
Erik Andersen8f8d6d52000-05-01 22:30:37 +00003 * Mini tr implementation for busybox
Erik Andersenfb002d02000-03-05 08:07:00 +00004 *
Rob Landleycd545282006-06-30 16:35:40 +00005 ** Copyright (c) 1987,1997, Prentice Hall All rights reserved.
6 *
7 * The name of Prentice Hall may not be used to endorse or promote
8 * products derived from this software without specific prior
9 * written permission.
10 *
Erik Andersen5afc8642000-05-02 00:07:56 +000011 * Copyright (c) Michiel Huisjes
12 *
Eric Andersenc7bda1c2004-03-15 08:29:22 +000013 * This version of tr is adapted from Minix tr and was modified
Eric Andersencb81e642003-07-14 21:21:08 +000014 * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
Erik Andersenfb002d02000-03-05 08:07:00 +000015 *
Rob Landleycd545282006-06-30 16:35:40 +000016 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
Erik Andersenfb002d02000-03-05 08:07:00 +000017 */
18
Eric Andersencbe31da2001-02-20 06:14:08 +000019#include "busybox.h"
Erik Andersen330fd2b2000-05-19 05:35:19 +000020
Rob Landleyab58d5c2006-06-30 19:04:09 +000021// Even with -funsigned-char, gcc still complains about char as an array index.
22
23#define GCC4_IS_STUPID int
24
Eric Andersen22ecf042001-07-02 17:32:40 +000025#define ASCII 0377
Erik Andersenfb002d02000-03-05 08:07:00 +000026
Mark Whitley8b7a0d82001-05-24 21:31:09 +000027/* some "globals" shared across this file */
Erik Andersen8f8d6d52000-05-01 22:30:37 +000028static char com_fl, del_fl, sq_fl;
Mark Whitley59ab0252001-01-23 22:30:04 +000029/* these last are pointers to static buffers declared in tr_main */
Rob Landleyab58d5c2006-06-30 19:04:09 +000030static char *poutput, *pvector, *pinvec, *poutvec;
Erik Andersenfb002d02000-03-05 08:07:00 +000031
Eric Anderseneaecbf32001-10-31 10:41:31 +000032static void convert(void)
Erik Andersenfb002d02000-03-05 08:07:00 +000033{
Rob Landleycd545282006-06-30 16:35:40 +000034 int read_chars = 0, in_index = 0, out_index = 0, c, coded, last = -1;
Erik Andersenfb002d02000-03-05 08:07:00 +000035
Erik Andersen8f8d6d52000-05-01 22:30:37 +000036 for (;;) {
Rob Landleycd545282006-06-30 16:35:40 +000037 // If we're out of input, flush output and read more input.
38
Erik Andersen8f8d6d52000-05-01 22:30:37 +000039 if (in_index == read_chars) {
Rob Landleycd545282006-06-30 16:35:40 +000040 if (out_index) {
41 if (write(1, (char *) poutput, out_index) != out_index)
42 bb_error_msg_and_die(bb_msg_write_error);
43 out_index = 0;
44 }
45
46 if ((read_chars = read(0, bb_common_bufsiz1, BUFSIZ)) <= 0) {
Mark Whitley59ab0252001-01-23 22:30:04 +000047 if (write(1, (char *) poutput, out_index) != out_index)
Manuel Novoa III cad53642003-03-19 09:13:01 +000048 bb_error_msg(bb_msg_write_error);
Erik Andersen8f8d6d52000-05-01 22:30:37 +000049 exit(0);
Erik Andersenfb002d02000-03-05 08:07:00 +000050 }
Erik Andersen8f8d6d52000-05-01 22:30:37 +000051 in_index = 0;
52 }
Rob Landleycd545282006-06-30 16:35:40 +000053 c = bb_common_bufsiz1[in_index++];
Mark Whitley59ab0252001-01-23 22:30:04 +000054 coded = pvector[c];
55 if (del_fl && pinvec[c])
Erik Andersen8f8d6d52000-05-01 22:30:37 +000056 continue;
Mark Whitley59ab0252001-01-23 22:30:04 +000057 if (sq_fl && last == coded && (pinvec[c] || poutvec[coded]))
Erik Andersen8f8d6d52000-05-01 22:30:37 +000058 continue;
Mark Whitley59ab0252001-01-23 22:30:04 +000059 poutput[out_index++] = last = coded;
Erik Andersenfb002d02000-03-05 08:07:00 +000060 }
61
Erik Andersenfb002d02000-03-05 08:07:00 +000062 /* NOTREACHED */
Erik Andersen8f8d6d52000-05-01 22:30:37 +000063}
64
Rob Landleyab58d5c2006-06-30 19:04:09 +000065static void map(char *string1, unsigned int string1_len,
66 char *string2, unsigned int string2_len)
Erik Andersen8f8d6d52000-05-01 22:30:37 +000067{
Rob Landleyab58d5c2006-06-30 19:04:09 +000068 char last = '0';
Eric Andersen00143ba2000-07-13 16:40:41 +000069 unsigned int i, j;
Erik Andersen8f8d6d52000-05-01 22:30:37 +000070
Eric Andersen00143ba2000-07-13 16:40:41 +000071 for (j = 0, i = 0; i < string1_len; i++) {
72 if (string2_len <= j)
Rob Landleyab58d5c2006-06-30 19:04:09 +000073 pvector[(GCC4_IS_STUPID)string1[i]] = last;
Erik Andersen8f8d6d52000-05-01 22:30:37 +000074 else
Rob Landleyab58d5c2006-06-30 19:04:09 +000075 pvector[(GCC4_IS_STUPID)string1[i]] = last = string2[j++];
Erik Andersen8f8d6d52000-05-01 22:30:37 +000076 }
77}
78
Mark Whitley8b7a0d82001-05-24 21:31:09 +000079/* supported constructs:
80 * Ranges, e.g., [0-9] ==> 0123456789
81 * Escapes, e.g., \a ==> Control-G
Rob Landleyf1048142005-10-08 21:21:08 +000082 * Character classes, e.g. [:upper:] ==> A ... Z
Mark Whitley8b7a0d82001-05-24 21:31:09 +000083 */
Rob Landleyab58d5c2006-06-30 19:04:09 +000084static unsigned int expand(const char *arg, char *buffer)
Erik Andersen8f8d6d52000-05-01 22:30:37 +000085{
Rob Landleyab58d5c2006-06-30 19:04:09 +000086 char *buffer_start = buffer;
Erik Andersen8f8d6d52000-05-01 22:30:37 +000087 int i, ac;
88
89 while (*arg) {
90 if (*arg == '\\') {
91 arg++;
Manuel Novoa III cad53642003-03-19 09:13:01 +000092 *buffer++ = bb_process_escape_sequence(&arg);
Eric Andersen5a4a46a2001-07-09 21:32:29 +000093 } else if (*(arg+1) == '-') {
94 ac = *(arg+2);
95 if(ac == 0) {
96 *buffer++ = *arg++;
97 continue;
98 }
99 i = *arg;
100 while (i <= ac)
101 *buffer++ = i++;
102 arg += 3; /* Skip the assumed a-z */
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000103 } else if (*arg == '[') {
104 arg++;
Rob Landley998dbee2006-04-19 22:22:06 +0000105 i = *arg++;
106 if (ENABLE_FEATURE_TR_CLASSES && i == ':') {
Rob Landleyf1048142005-10-08 21:21:08 +0000107 if (strncmp(arg, "alpha", 5) == 0) {
108 for (i = 'A'; i <= 'Z'; i++)
109 *buffer++ = i;
110 for (i = 'a'; i <= 'z'; i++)
111 *buffer++ = i;
112 }
113 else if (strncmp(arg, "alnum", 5) == 0) {
Rob Landley998dbee2006-04-19 22:22:06 +0000114 for (i = '0'; i <= '9'; i++)
115 *buffer++ = i;
Rob Landleyf1048142005-10-08 21:21:08 +0000116 for (i = 'A'; i <= 'Z'; i++)
117 *buffer++ = i;
118 for (i = 'a'; i <= 'z'; i++)
119 *buffer++ = i;
Rob Landleyf1048142005-10-08 21:21:08 +0000120 }
121 else if (strncmp(arg, "digit", 5) == 0)
122 for (i = '0'; i <= '9'; i++)
123 *buffer++ = i;
124 else if (strncmp(arg, "lower", 5) == 0)
125 for (i = 'a'; i <= 'z'; i++)
126 *buffer++ = i;
127 else if (strncmp(arg, "upper", 5) == 0)
128 for (i = 'A'; i <= 'Z'; i++)
129 *buffer++ = i;
Rob Landley998dbee2006-04-19 22:22:06 +0000130 else if (strncmp(arg, "space", 5) == 0) {
131 const char s[] = "\t\n\v\f\r ";
132 strcat((char*)buffer, s);
133 buffer += sizeof(s) - 1;
134 }
135 else if (strncmp(arg, "blank", 5) == 0) {
136 *buffer++ = '\t';
137 *buffer++ = ' ';
138 }
Rob Landleyf1048142005-10-08 21:21:08 +0000139 /* gcc gives a warning if braces aren't used here */
140 else if (strncmp(arg, "punct", 5) == 0) {
141 for (i = 0; i <= ASCII; i++)
142 if (isprint(i) && (!isalnum(i)) && (!isspace(i)))
143 *buffer++ = i;
144 }
145 else if (strncmp(arg, "cntrl", 5) == 0) {
146 for (i = 0; i <= ASCII; i++)
147 if (iscntrl(i))
148 *buffer++ = i;
149 }
150 else {
Rob Landley998dbee2006-04-19 22:22:06 +0000151 *buffer++ = '[';
152 *buffer++ = ':';
Rob Landleyf1048142005-10-08 21:21:08 +0000153 continue;
154 }
155 break;
156 }
Rob Landley998dbee2006-04-19 22:22:06 +0000157 if (ENABLE_FEATURE_TR_EQUIV && i == '=') {
Rob Landleyf1048142005-10-08 21:21:08 +0000158 *buffer++ = *arg;
159 /* skip the closing =] */
160 arg += 3;
161 continue;
162 }
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000163 if (*arg++ != '-') {
164 *buffer++ = '[';
165 arg -= 2;
166 continue;
167 }
168 ac = *arg++;
169 while (i <= ac)
170 *buffer++ = i++;
Mark Whitley8b7a0d82001-05-24 21:31:09 +0000171 arg++; /* Skip the assumed ']' */
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000172 } else
173 *buffer++ = *arg++;
174 }
Eric Andersen00143ba2000-07-13 16:40:41 +0000175
176 return (buffer - buffer_start);
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000177}
178
Rob Landleyab58d5c2006-06-30 19:04:09 +0000179static int complement(char *buffer, int buffer_len)
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000180{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000181 short i, j, ix;
Eric Andersenfad04fd2000-07-14 06:49:52 +0000182 char conv[ASCII + 2];
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000183
Eric Andersen1ca20a72001-03-21 07:34:27 +0000184 ix = 0;
Eric Andersen00143ba2000-07-13 16:40:41 +0000185 for (i = 0; i <= ASCII; i++) {
186 for (j = 0; j < buffer_len; j++)
187 if (buffer[j] == i)
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000188 break;
Eric Andersen00143ba2000-07-13 16:40:41 +0000189 if (j == buffer_len)
Eric Andersen1ca20a72001-03-21 07:34:27 +0000190 conv[ix++] = i & ASCII;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000191 }
Eric Andersen1ca20a72001-03-21 07:34:27 +0000192 memcpy(buffer, conv, ix);
193 return ix;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000194}
195
Rob Landleydfba7412006-03-06 20:47:33 +0000196int tr_main(int argc, char **argv)
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000197{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000198 unsigned char *ptr;
Eric Andersenfad04fd2000-07-14 06:49:52 +0000199 int output_length=0, input_length;
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000200 int idx = 1;
Eric Andersenfad04fd2000-07-14 06:49:52 +0000201 int i;
Eric Andersenbdfd0d72001-10-24 05:00:29 +0000202 RESERVE_CONFIG_BUFFER(output, BUFSIZ);
Rob Landley998f4492006-04-10 16:40:47 +0000203 RESERVE_CONFIG_BUFFER(vector, ASCII+1);
Eric Andersenbdfd0d72001-10-24 05:00:29 +0000204 RESERVE_CONFIG_BUFFER(invec, ASCII+1);
205 RESERVE_CONFIG_BUFFER(outvec, ASCII+1);
Mark Whitley59ab0252001-01-23 22:30:04 +0000206
207 /* ... but make them available globally */
Rob Landleyab58d5c2006-06-30 19:04:09 +0000208 poutput = output;
209 pvector = vector;
210 pinvec = invec;
211 poutvec = outvec;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000212
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000213 if (argc > 1 && argv[idx][0] == '-') {
214 for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) {
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000215 switch (*ptr) {
216 case 'c':
217 com_fl = TRUE;
218 break;
219 case 'd':
220 del_fl = TRUE;
221 break;
222 case 's':
223 sq_fl = TRUE;
224 break;
225 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +0000226 bb_show_usage();
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000227 }
228 }
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000229 idx++;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000230 }
231 for (i = 0; i <= ASCII; i++) {
232 vector[i] = i;
233 invec[i] = outvec[i] = FALSE;
234 }
235
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000236 if (argv[idx] != NULL) {
Rob Landleycd545282006-06-30 16:35:40 +0000237 input_length = expand(argv[idx++], bb_common_bufsiz1);
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000238 if (com_fl)
Rob Landleycd545282006-06-30 16:35:40 +0000239 input_length = complement(bb_common_bufsiz1, input_length);
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000240 if (argv[idx] != NULL) {
241 if (*argv[idx] == '\0')
Manuel Novoa III cad53642003-03-19 09:13:01 +0000242 bb_error_msg_and_die("STRING2 cannot be empty");
Rob Landleyab58d5c2006-06-30 19:04:09 +0000243 output_length = expand(argv[idx], output);
244 map(bb_common_bufsiz1, input_length, output, output_length);
Eric Andersena03d86c2000-07-10 16:38:50 +0000245 }
Eric Andersen00143ba2000-07-13 16:40:41 +0000246 for (i = 0; i < input_length; i++)
Rob Landleyab58d5c2006-06-30 19:04:09 +0000247 invec[(GCC4_IS_STUPID)bb_common_bufsiz1[i]] = TRUE;
Eric Andersen00143ba2000-07-13 16:40:41 +0000248 for (i = 0; i < output_length; i++)
Rob Landleyab58d5c2006-06-30 19:04:09 +0000249 outvec[(GCC4_IS_STUPID)output[i]] = TRUE;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000250 }
251 convert();
Erik Andersenfb002d02000-03-05 08:07:00 +0000252 return (0);
253}