blob: 752b13bf863affe1a90ae1be8a4bed5d4eb089b5 [file] [log] [blame]
Erik Andersenfb002d02000-03-05 08:07:00 +00001/* vi: set sw=4 ts=4: */
2/*
Erik Andersen8f8d6d52000-05-01 22:30:37 +00003 * Mini tr implementation for busybox
Erik Andersenfb002d02000-03-05 08:07:00 +00004 *
Erik Andersen5afc8642000-05-02 00:07:56 +00005 * Copyright (c) Michiel Huisjes
6 *
Eric Andersenc7bda1c2004-03-15 08:29:22 +00007 * This version of tr is adapted from Minix tr and was modified
Eric Andersencb81e642003-07-14 21:21:08 +00008 * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
Erik Andersenfb002d02000-03-05 08:07:00 +00009 *
Erik Andersen8f8d6d52000-05-01 22:30:37 +000010 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Eric Andersenc7bda1c2004-03-15 08:29:22 +000023 *
Erik Andersen8f8d6d52000-05-01 22:30:37 +000024 * Original copyright notice is retained at the end of this file.
Erik Andersenfb002d02000-03-05 08:07:00 +000025 */
26
Erik Andersenfb002d02000-03-05 08:07:00 +000027#include <stdio.h>
Erik Andersenfb002d02000-03-05 08:07:00 +000028#include <string.h>
Erik Andersen8f8d6d52000-05-01 22:30:37 +000029#include <stdlib.h>
Erik Andersenfb002d02000-03-05 08:07:00 +000030#include <unistd.h>
Rob Landleyf1048142005-10-08 21:21:08 +000031#include <ctype.h>
Erik Andersen8f8d6d52000-05-01 22:30:37 +000032#include <sys/types.h>
Eric Andersencbe31da2001-02-20 06:14:08 +000033#include "busybox.h"
Erik Andersen330fd2b2000-05-19 05:35:19 +000034
Eric Andersen22ecf042001-07-02 17:32:40 +000035#define ASCII 0377
Erik Andersenfb002d02000-03-05 08:07:00 +000036
Mark Whitley8b7a0d82001-05-24 21:31:09 +000037/* some "globals" shared across this file */
Erik Andersen8f8d6d52000-05-01 22:30:37 +000038static char com_fl, del_fl, sq_fl;
Erik Andersen8f8d6d52000-05-01 22:30:37 +000039static short in_index, out_index;
Mark Whitley59ab0252001-01-23 22:30:04 +000040/* these last are pointers to static buffers declared in tr_main */
"Vladimir N. Oleynik"6f347ef2005-10-15 10:23:55 +000041static unsigned char *poutput;
Mark Whitley59ab0252001-01-23 22:30:04 +000042static unsigned char *pvector;
Eric Andersen5e678872006-01-30 19:48:23 +000043static unsigned char *pinvec, *poutvec;
Erik Andersenfb002d02000-03-05 08:07:00 +000044
"Vladimir N. Oleynik"6f347ef2005-10-15 10:23:55 +000045#define input bb_common_bufsiz1
Erik Andersenfb002d02000-03-05 08:07:00 +000046
Eric Anderseneaecbf32001-10-31 10:41:31 +000047static void convert(void)
Erik Andersenfb002d02000-03-05 08:07:00 +000048{
Erik Andersen8f8d6d52000-05-01 22:30:37 +000049 short read_chars = 0;
50 short c, coded;
51 short last = -1;
Erik Andersenfb002d02000-03-05 08:07:00 +000052
Erik Andersen8f8d6d52000-05-01 22:30:37 +000053 for (;;) {
54 if (in_index == read_chars) {
"Vladimir N. Oleynik"6f347ef2005-10-15 10:23:55 +000055 if ((read_chars = read(0, input, BUFSIZ)) <= 0) {
Mark Whitley59ab0252001-01-23 22:30:04 +000056 if (write(1, (char *) poutput, out_index) != out_index)
Manuel Novoa III cad53642003-03-19 09:13:01 +000057 bb_error_msg(bb_msg_write_error);
Erik Andersen8f8d6d52000-05-01 22:30:37 +000058 exit(0);
Erik Andersenfb002d02000-03-05 08:07:00 +000059 }
Erik Andersen8f8d6d52000-05-01 22:30:37 +000060 in_index = 0;
61 }
"Vladimir N. Oleynik"6f347ef2005-10-15 10:23:55 +000062 c = input[in_index++];
Mark Whitley59ab0252001-01-23 22:30:04 +000063 coded = pvector[c];
64 if (del_fl && pinvec[c])
Erik Andersen8f8d6d52000-05-01 22:30:37 +000065 continue;
Mark Whitley59ab0252001-01-23 22:30:04 +000066 if (sq_fl && last == coded && (pinvec[c] || poutvec[coded]))
Erik Andersen8f8d6d52000-05-01 22:30:37 +000067 continue;
Mark Whitley59ab0252001-01-23 22:30:04 +000068 poutput[out_index++] = last = coded;
Erik Andersen8f8d6d52000-05-01 22:30:37 +000069 if (out_index == BUFSIZ) {
Eric Andersen5a4a46a2001-07-09 21:32:29 +000070 if (write(1, (char *) poutput, out_index) != out_index)
Manuel Novoa III cad53642003-03-19 09:13:01 +000071 bb_error_msg_and_die(bb_msg_write_error);
Erik Andersen8f8d6d52000-05-01 22:30:37 +000072 out_index = 0;
73 }
Erik Andersenfb002d02000-03-05 08:07:00 +000074 }
75
Erik Andersenfb002d02000-03-05 08:07:00 +000076 /* NOTREACHED */
Erik Andersen8f8d6d52000-05-01 22:30:37 +000077}
78
Eric Andersen00143ba2000-07-13 16:40:41 +000079static void map(register unsigned char *string1, unsigned int string1_len,
80 register unsigned char *string2, unsigned int string2_len)
Erik Andersen8f8d6d52000-05-01 22:30:37 +000081{
82 unsigned char last = '0';
Eric Andersen00143ba2000-07-13 16:40:41 +000083 unsigned int i, j;
Erik Andersen8f8d6d52000-05-01 22:30:37 +000084
Eric Andersen00143ba2000-07-13 16:40:41 +000085 for (j = 0, i = 0; i < string1_len; i++) {
86 if (string2_len <= j)
Mark Whitley59ab0252001-01-23 22:30:04 +000087 pvector[string1[i]] = last;
Erik Andersen8f8d6d52000-05-01 22:30:37 +000088 else
Mark Whitley59ab0252001-01-23 22:30:04 +000089 pvector[string1[i]] = last = string2[j++];
Erik Andersen8f8d6d52000-05-01 22:30:37 +000090 }
91}
92
Mark Whitley8b7a0d82001-05-24 21:31:09 +000093/* supported constructs:
94 * Ranges, e.g., [0-9] ==> 0123456789
95 * Escapes, e.g., \a ==> Control-G
Rob Landleyf1048142005-10-08 21:21:08 +000096 * Character classes, e.g. [:upper:] ==> A ... Z
Mark Whitley8b7a0d82001-05-24 21:31:09 +000097 */
Eric Andersene5dfced2001-04-09 22:48:12 +000098static unsigned int expand(const char *arg, register unsigned char *buffer)
Erik Andersen8f8d6d52000-05-01 22:30:37 +000099{
Eric Andersen00143ba2000-07-13 16:40:41 +0000100 unsigned char *buffer_start = buffer;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000101 int i, ac;
102
103 while (*arg) {
104 if (*arg == '\\') {
105 arg++;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000106 *buffer++ = bb_process_escape_sequence(&arg);
Eric Andersen5a4a46a2001-07-09 21:32:29 +0000107 } else if (*(arg+1) == '-') {
108 ac = *(arg+2);
109 if(ac == 0) {
110 *buffer++ = *arg++;
111 continue;
112 }
113 i = *arg;
114 while (i <= ac)
115 *buffer++ = i++;
116 arg += 3; /* Skip the assumed a-z */
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000117 } else if (*arg == '[') {
118 arg++;
Rob Landley998dbee2006-04-19 22:22:06 +0000119 i = *arg++;
120 if (ENABLE_FEATURE_TR_CLASSES && i == ':') {
Rob Landleyf1048142005-10-08 21:21:08 +0000121 if (strncmp(arg, "alpha", 5) == 0) {
122 for (i = 'A'; i <= 'Z'; i++)
123 *buffer++ = i;
124 for (i = 'a'; i <= 'z'; i++)
125 *buffer++ = i;
126 }
127 else if (strncmp(arg, "alnum", 5) == 0) {
Rob Landley998dbee2006-04-19 22:22:06 +0000128 for (i = '0'; i <= '9'; i++)
129 *buffer++ = i;
Rob Landleyf1048142005-10-08 21:21:08 +0000130 for (i = 'A'; i <= 'Z'; i++)
131 *buffer++ = i;
132 for (i = 'a'; i <= 'z'; i++)
133 *buffer++ = i;
Rob Landleyf1048142005-10-08 21:21:08 +0000134 }
135 else if (strncmp(arg, "digit", 5) == 0)
136 for (i = '0'; i <= '9'; i++)
137 *buffer++ = i;
138 else if (strncmp(arg, "lower", 5) == 0)
139 for (i = 'a'; i <= 'z'; i++)
140 *buffer++ = i;
141 else if (strncmp(arg, "upper", 5) == 0)
142 for (i = 'A'; i <= 'Z'; i++)
143 *buffer++ = i;
Rob Landley998dbee2006-04-19 22:22:06 +0000144 else if (strncmp(arg, "space", 5) == 0) {
145 const char s[] = "\t\n\v\f\r ";
146 strcat((char*)buffer, s);
147 buffer += sizeof(s) - 1;
148 }
149 else if (strncmp(arg, "blank", 5) == 0) {
150 *buffer++ = '\t';
151 *buffer++ = ' ';
152 }
Rob Landleyf1048142005-10-08 21:21:08 +0000153 /* gcc gives a warning if braces aren't used here */
154 else if (strncmp(arg, "punct", 5) == 0) {
155 for (i = 0; i <= ASCII; i++)
156 if (isprint(i) && (!isalnum(i)) && (!isspace(i)))
157 *buffer++ = i;
158 }
159 else if (strncmp(arg, "cntrl", 5) == 0) {
160 for (i = 0; i <= ASCII; i++)
161 if (iscntrl(i))
162 *buffer++ = i;
163 }
164 else {
Rob Landley998dbee2006-04-19 22:22:06 +0000165 *buffer++ = '[';
166 *buffer++ = ':';
Rob Landleyf1048142005-10-08 21:21:08 +0000167 continue;
168 }
169 break;
170 }
Rob Landley998dbee2006-04-19 22:22:06 +0000171 if (ENABLE_FEATURE_TR_EQUIV && i == '=') {
Rob Landleyf1048142005-10-08 21:21:08 +0000172 *buffer++ = *arg;
173 /* skip the closing =] */
174 arg += 3;
175 continue;
176 }
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000177 if (*arg++ != '-') {
178 *buffer++ = '[';
179 arg -= 2;
180 continue;
181 }
182 ac = *arg++;
183 while (i <= ac)
184 *buffer++ = i++;
Mark Whitley8b7a0d82001-05-24 21:31:09 +0000185 arg++; /* Skip the assumed ']' */
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000186 } else
187 *buffer++ = *arg++;
188 }
Eric Andersen00143ba2000-07-13 16:40:41 +0000189
190 return (buffer - buffer_start);
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000191}
192
Eric Andersenfad04fd2000-07-14 06:49:52 +0000193static int complement(unsigned char *buffer, int buffer_len)
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000194{
Eric Andersen1ca20a72001-03-21 07:34:27 +0000195 register short i, j, ix;
Eric Andersenfad04fd2000-07-14 06:49:52 +0000196 char conv[ASCII + 2];
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000197
Eric Andersen1ca20a72001-03-21 07:34:27 +0000198 ix = 0;
Eric Andersen00143ba2000-07-13 16:40:41 +0000199 for (i = 0; i <= ASCII; i++) {
200 for (j = 0; j < buffer_len; j++)
201 if (buffer[j] == i)
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000202 break;
Eric Andersen00143ba2000-07-13 16:40:41 +0000203 if (j == buffer_len)
Eric Andersen1ca20a72001-03-21 07:34:27 +0000204 conv[ix++] = i & ASCII;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000205 }
Eric Andersen1ca20a72001-03-21 07:34:27 +0000206 memcpy(buffer, conv, ix);
207 return ix;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000208}
209
Rob Landleydfba7412006-03-06 20:47:33 +0000210int tr_main(int argc, char **argv)
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000211{
212 register unsigned char *ptr;
Eric Andersenfad04fd2000-07-14 06:49:52 +0000213 int output_length=0, input_length;
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000214 int idx = 1;
Eric Andersenfad04fd2000-07-14 06:49:52 +0000215 int i;
Eric Andersenbdfd0d72001-10-24 05:00:29 +0000216 RESERVE_CONFIG_BUFFER(output, BUFSIZ);
Rob Landley998f4492006-04-10 16:40:47 +0000217 RESERVE_CONFIG_BUFFER(vector, ASCII+1);
Eric Andersenbdfd0d72001-10-24 05:00:29 +0000218 RESERVE_CONFIG_BUFFER(invec, ASCII+1);
219 RESERVE_CONFIG_BUFFER(outvec, ASCII+1);
Mark Whitley59ab0252001-01-23 22:30:04 +0000220
221 /* ... but make them available globally */
Eric Andersen5e678872006-01-30 19:48:23 +0000222 poutput = (unsigned char*)output;
223 pvector = (unsigned char*)vector;
224 pinvec = (unsigned char*)invec;
225 poutvec = (unsigned char*)outvec;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000226
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000227 if (argc > 1 && argv[idx][0] == '-') {
228 for (ptr = (unsigned char *) &argv[idx][1]; *ptr; ptr++) {
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000229 switch (*ptr) {
230 case 'c':
231 com_fl = TRUE;
232 break;
233 case 'd':
234 del_fl = TRUE;
235 break;
236 case 's':
237 sq_fl = TRUE;
238 break;
239 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +0000240 bb_show_usage();
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000241 }
242 }
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000243 idx++;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000244 }
245 for (i = 0; i <= ASCII; i++) {
246 vector[i] = i;
247 invec[i] = outvec[i] = FALSE;
248 }
249
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000250 if (argv[idx] != NULL) {
Eric Andersen5e678872006-01-30 19:48:23 +0000251 input_length = expand(argv[idx++], (unsigned char*)input);
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000252 if (com_fl)
Eric Andersen5e678872006-01-30 19:48:23 +0000253 input_length = complement((unsigned char*)input, input_length);
Eric Andersenf6aa13d2001-03-23 17:08:21 +0000254 if (argv[idx] != NULL) {
255 if (*argv[idx] == '\0')
Manuel Novoa III cad53642003-03-19 09:13:01 +0000256 bb_error_msg_and_die("STRING2 cannot be empty");
Eric Andersen5e678872006-01-30 19:48:23 +0000257 output_length = expand(argv[idx], (unsigned char*)output);
258 map((unsigned char*)input, input_length, (unsigned char*)output, output_length);
Eric Andersena03d86c2000-07-10 16:38:50 +0000259 }
Eric Andersen00143ba2000-07-13 16:40:41 +0000260 for (i = 0; i < input_length; i++)
Eric Andersened438062004-03-12 22:10:40 +0000261 invec[(unsigned char)input[i]] = TRUE;
Eric Andersen00143ba2000-07-13 16:40:41 +0000262 for (i = 0; i < output_length; i++)
Eric Andersened438062004-03-12 22:10:40 +0000263 outvec[(unsigned char)output[i]] = TRUE;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000264 }
265 convert();
Erik Andersenfb002d02000-03-05 08:07:00 +0000266 return (0);
267}
268
Erik Andersenfb002d02000-03-05 08:07:00 +0000269/*
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000270 * Copyright (c) 1987,1997, Prentice Hall
271 * All rights reserved.
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000272 *
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000273 * Redistribution and use of the MINIX operating system in source and
274 * binary forms, with or without modification, are permitted provided
275 * that the following conditions are met:
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000276 *
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000277 * Redistributions of source code must retain the above copyright
278 * notice, this list of conditions and the following disclaimer.
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000279 *
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000280 * Redistributions in binary form must reproduce the above
281 * copyright notice, this list of conditions and the following
282 * disclaimer in the documentation and/or other materials provided
283 * with the distribution.
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000284 *
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000285 * Neither the name of Prentice Hall nor the names of the software
286 * authors or contributors may be used to endorse or promote
287 * products derived from this software without specific prior
288 * written permission.
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000289 *
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000290 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND
291 * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
292 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
293 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
294 * IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE
295 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
296 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
297 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
298 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
299 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
300 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
301 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
302 *
Erik Andersenfb002d02000-03-05 08:07:00 +0000303 */
Erik Andersenfb002d02000-03-05 08:07:00 +0000304