blob: c24cf6611f3f18b9588bd3b3e8db0a060a17b689 [file] [log] [blame]
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00001/*
Mark Whitley807f0fd2000-08-02 18:30:11 +00002 * cut.c - minimalist version of cut
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00003 *
Eric Andersen8ec10a92001-01-27 09:33:39 +00004 * Copyright (C) 1999,2000,2001 by Lineo, inc.
Mark Whitley6c6ea6c2001-01-04 22:21:13 +00005 * Written by Mark Whitley <markw@lineo.com>, <markw@codepoet.org>
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00006 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Mark Whitley807f0fd2000-08-02 18:30:11 +000020 *
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000021 */
22
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000023#include <stdio.h>
Mark Whitley807f0fd2000-08-02 18:30:11 +000024#include <stdlib.h>
25#include <unistd.h> /* getopt */
26#include <string.h>
Mark Whitleyb6967632001-05-18 23:04:51 +000027#include <limits.h>
Eric Andersen3570a342000-09-25 21:45:58 +000028#include "busybox.h"
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000029
30
Mark Whitley807f0fd2000-08-02 18:30:11 +000031/* globals from other files */
32extern int optind;
33extern char *optarg;
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000034
35
Mark Whitleyb6967632001-05-18 23:04:51 +000036/* option vars */
Mark Whitley807f0fd2000-08-02 18:30:11 +000037static char part = 0; /* (b)yte, (c)har, (f)ields */
Mark Whitley807f0fd2000-08-02 18:30:11 +000038static unsigned int supress_non_delimited_lines = 0;
Mark Whitleyb6967632001-05-18 23:04:51 +000039static char delim = '\t'; /* delimiter, default is tab */
40
41struct cut_list {
42 int startpos;
43 int endpos;
44};
45
46static const int BOL = 0;
47static const int EOL = INT_MAX;
48static const int NON_RANGE = -1;
49
50static struct cut_list *cut_lists = NULL; /* growable array holding a series of lists */
51static unsigned int nlists = 0; /* number of elements in above list */
52
53
54static int cmpfunc(const void *a, const void *b)
55{
56 struct cut_list *la = (struct cut_list *)a;
57 struct cut_list *lb = (struct cut_list *)b;
58
59 if (la->startpos > lb->startpos)
60 return 1;
61 if (la->startpos < lb->startpos)
62 return -1;
63 return 0;
64}
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000065
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000066
67/*
Mark Whitleyb6967632001-05-18 23:04:51 +000068 * parse_lists() - parses a list and puts values into startpos and endpos.
Mark Whitley807f0fd2000-08-02 18:30:11 +000069 * valid list formats: N, N-, N-M, -M
Mark Whitleyb6967632001-05-18 23:04:51 +000070 * more than one list can be seperated by commas
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000071 */
Mark Whitleyb6967632001-05-18 23:04:51 +000072static void parse_lists(char *lists)
Mark Whitley807f0fd2000-08-02 18:30:11 +000073{
Mark Whitleyb6967632001-05-18 23:04:51 +000074 char *ltok = NULL;
75 char *ntok = NULL;
76 char *junk;
77 int s = 0, e = 0;
Mark Whitley807f0fd2000-08-02 18:30:11 +000078
Mark Whitleyb6967632001-05-18 23:04:51 +000079 /* take apart the lists, one by one (they are seperated with commas */
80 while ((ltok = strsep(&lists, ",")) != NULL) {
81
82 /* it's actually legal to pass an empty list */
83 if (strlen(ltok) == 0)
84 continue;
85
86 /* get the start pos */
87 ntok = strsep(&ltok, "-");
88 if (ntok == NULL) {
89 fprintf(stderr, "Help ntok is null for starting position! What do I do?\n");
90 } else if (strlen(ntok) == 0) {
91 s = BOL;
92 } else {
93 s = strtoul(ntok, &junk, 10);
94 if(*junk != '\0' || s < 0)
Manuel Novoa III cad53642003-03-19 09:13:01 +000095 bb_error_msg_and_die("invalid byte or field list");
Mark Whitleyb6967632001-05-18 23:04:51 +000096
97 /* account for the fact that arrays are zero based, while the user
98 * expects the first char on the line to be char # 1 */
99 if (s != 0)
100 s--;
101 }
102
103 /* get the end pos */
104 ntok = strsep(&ltok, "-");
105 if (ntok == NULL) {
106 e = NON_RANGE;
107 } else if (strlen(ntok) == 0) {
108 e = EOL;
109 } else {
110 e = strtoul(ntok, &junk, 10);
111 if(*junk != '\0' || e < 0)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000112 bb_error_msg_and_die("invalid byte or field list");
Mark Whitleyb6967632001-05-18 23:04:51 +0000113 /* if the user specified and end position of 0, that means "til the
114 * end of the line */
115 if (e == 0)
116 e = INT_MAX;
117 e--; /* again, arrays are zero based, lines are 1 based */
118 if (e == s)
119 e = NON_RANGE;
120 }
121
122 /* if there's something left to tokenize, the user past an invalid list */
123 if (ltok)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000124 bb_error_msg_and_die("invalid byte or field list");
Mark Whitleyb6967632001-05-18 23:04:51 +0000125
126 /* add the new list */
127 cut_lists = xrealloc(cut_lists, sizeof(struct cut_list) * (++nlists));
128 cut_lists[nlists-1].startpos = s;
129 cut_lists[nlists-1].endpos = e;
130 }
131
132 /* make sure we got some cut positions out of all that */
133 if (nlists == 0)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000134 bb_error_msg_and_die("missing list of positions");
Mark Whitleyb6967632001-05-18 23:04:51 +0000135
136 /* now that the lists are parsed, we need to sort them to make life easier
137 * on us when it comes time to print the chars / fields / lines */
138 qsort(cut_lists, nlists, sizeof(struct cut_list), cmpfunc);
139
140}
141
142
143static void cut_line_by_chars(const char *line)
144{
145 int c, l;
146 /* set up a list so we can keep track of what's been printed */
147 char *printed = xcalloc(strlen(line), sizeof(char));
148
149 /* print the chars specified in each cut list */
150 for (c = 0; c < nlists; c++) {
151 l = cut_lists[c].startpos;
152 while (l < strlen(line)) {
153 if (!printed[l]) {
154 putchar(line[l]);
155 printed[l] = 'X';
Mark Whitley807f0fd2000-08-02 18:30:11 +0000156 }
Mark Whitleyb6967632001-05-18 23:04:51 +0000157 l++;
158 if (cut_lists[c].endpos == NON_RANGE || l > cut_lists[c].endpos)
159 break;
160 }
161 }
162 putchar('\n'); /* cuz we were handed a chomped line */
163 free(printed);
164}
165
166
167static void cut_line_by_fields(char *line)
168{
169 int c, f;
170 int ndelim = -1; /* zero-based / one-based problem */
171 int nfields_printed = 0;
172 char *field = NULL;
173 char d[2] = { delim, 0 };
174 char *printed;
175
176 /* test the easy case first: does this line contain any delimiters? */
177 if (strchr(line, delim) == NULL) {
178 if (!supress_non_delimited_lines)
179 puts(line);
180 return;
181 }
182
183 /* set up a list so we can keep track of what's been printed */
184 printed = xcalloc(strlen(line), sizeof(char));
185
186 /* process each list on this line, for as long as we've got a line to process */
187 for (c = 0; c < nlists && line; c++) {
188 f = cut_lists[c].startpos;
189 do {
190
191 /* find the field we're looking for */
192 while (line && ndelim < f) {
193 field = strsep(&line, d);
194 ndelim++;
195 }
196
197 /* we found it, and it hasn't been printed yet */
198 if (field && ndelim == f && !printed[ndelim]) {
199 /* if this isn't our first time through, we need to print the
200 * delimiter after the last field that was printed */
201 if (nfields_printed > 0)
202 putchar(delim);
203 fputs(field, stdout);
204 printed[ndelim] = 'X';
205 nfields_printed++;
206 }
207
208 f++;
209
210 /* keep going as long as we have a line to work with, this is a
211 * list, and we're not at the end of that list */
212 } while (line && cut_lists[c].endpos != NON_RANGE && f <= cut_lists[c].endpos);
213 }
214
215 /* if we printed anything at all, we need to finish it with a newline cuz
216 * we were handed a chomped line */
217 putchar('\n');
218
219 free(printed);
220}
221
222
223static void cut_file_by_lines(const char *line, unsigned int linenum)
224{
225 static int c = 0;
226 static int l = -1;
227
228 /* I can't initialize this above cuz the "initializer isn't
229 * constant" *sigh* */
230 if (l == -1)
231 l = cut_lists[c].startpos;
232
233 /* get out if we have no more lists to process or if the lines are lower
234 * than what we're interested in */
235 if (c >= nlists || linenum < l)
236 return;
237
238 /* if the line we're looking for is lower than the one we were passed, it
239 * means we displayed it already, so move on */
240 while (l < linenum) {
241 l++;
242 /* move on to the next list if we're at the end of this one */
243 if (cut_lists[c].endpos == NON_RANGE || l > cut_lists[c].endpos) {
244 c++;
245 /* get out if there's no more lists to process */
246 if (c >= nlists)
247 return;
248 l = cut_lists[c].startpos;
249 /* get out if the current line is lower than the one we just became
250 * interested in */
251 if (linenum < l)
252 return;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000253 }
254 }
255
Mark Whitleyb6967632001-05-18 23:04:51 +0000256 /* If we made it here, it means we've found the line we're looking for, so print it */
257 puts(line);
Mark Whitley807f0fd2000-08-02 18:30:11 +0000258}
Erik Andersen7ab9c7e2000-05-12 19:41:47 +0000259
260
Mark Whitley807f0fd2000-08-02 18:30:11 +0000261/*
262 * snippy-snip
263 */
264static void cut_file(FILE *file)
265{
Mark Whitleyb6967632001-05-18 23:04:51 +0000266 char *line = NULL;
267 unsigned int linenum = 0; /* keep these zero-based to be consistent */
Mark Whitley807f0fd2000-08-02 18:30:11 +0000268
269 /* go through every line in the file */
Manuel Novoa III cad53642003-03-19 09:13:01 +0000270 while ((line = bb_get_chomped_line_from_file(file)) != NULL) {
Mark Whitleyb6967632001-05-18 23:04:51 +0000271
272 /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */
273 if (part == 'c' || part == 'b')
274 cut_line_by_chars(line);
275
Mark Whitley807f0fd2000-08-02 18:30:11 +0000276 /* cut based on fields */
277 else if (part == 'f') {
Mark Whitleyb6967632001-05-18 23:04:51 +0000278 if (delim == '\n')
279 cut_file_by_lines(line, linenum);
280 else
281 cut_line_by_fields(line);
Mark Whitley807f0fd2000-08-02 18:30:11 +0000282 }
Mark Whitleyb6967632001-05-18 23:04:51 +0000283
284 linenum++;
285 free(line);
Mark Whitley807f0fd2000-08-02 18:30:11 +0000286 }
287}
288
Mark Whitleyb6967632001-05-18 23:04:51 +0000289
Mark Whitley807f0fd2000-08-02 18:30:11 +0000290extern int cut_main(int argc, char **argv)
291{
292 int opt;
293
294 while ((opt = getopt(argc, argv, "b:c:d:f:ns")) > 0) {
295 switch (opt) {
296 case 'b':
297 case 'c':
298 case 'f':
299 /* make sure they didn't ask for two types of lists */
300 if (part != 0) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000301 bb_error_msg_and_die("only one type of list may be specified");
Mark Whitley807f0fd2000-08-02 18:30:11 +0000302 }
303 part = (char)opt;
Mark Whitleyb6967632001-05-18 23:04:51 +0000304 parse_lists(optarg);
Mark Whitley807f0fd2000-08-02 18:30:11 +0000305 break;
306 case 'd':
307 if (strlen(optarg) > 1) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000308 bb_error_msg_and_die("the delimiter must be a single character");
Mark Whitley807f0fd2000-08-02 18:30:11 +0000309 }
310 delim = optarg[0];
311 break;
312 case 'n':
313 /* no-op */
314 break;
315 case 's':
316 supress_non_delimited_lines++;
317 break;
318 }
319 }
320
321 if (part == 0) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000322 bb_error_msg_and_die("you must specify a list of bytes, characters, or fields");
Mark Whitley807f0fd2000-08-02 18:30:11 +0000323 }
324
Mark Whitleyb6967632001-05-18 23:04:51 +0000325 /* non-field (char or byte) cutting has some special handling */
326 if (part != 'f') {
327 if (supress_non_delimited_lines) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000328 bb_error_msg_and_die("suppressing non-delimited lines makes sense"
Mark Whitleyb6967632001-05-18 23:04:51 +0000329 " only when operating on fields");
330 }
331 if (delim != '\t' && part != 'f') {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000332 bb_error_msg_and_die("a delimiter may be specified only when operating on fields");
Mark Whitleyb6967632001-05-18 23:04:51 +0000333 }
Mark Whitley807f0fd2000-08-02 18:30:11 +0000334 }
335
336 /* argv[(optind)..(argc-1)] should be names of file to process. If no
337 * files were specified or '-' was specified, take input from stdin.
338 * Otherwise, we process all the files specified. */
339 if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
340 cut_file(stdin);
341 }
342 else {
343 int i;
344 FILE *file;
345 for (i = optind; i < argc; i++) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000346 file = bb_wfopen(argv[i], "r");
Eric Andersen34506362001-08-02 05:02:46 +0000347 if(file) {
Mark Whitley807f0fd2000-08-02 18:30:11 +0000348 cut_file(file);
349 fclose(file);
350 }
351 }
352 }
353
Matt Kraai3e856ce2000-12-01 02:55:13 +0000354 return EXIT_SUCCESS;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000355}