blob: 839bab68ebb331cd2b22be897f6d7413c007eb2c [file] [log] [blame]
Eric Andersenc9e70242003-06-20 09:16:00 +00001/* vi: set sw=8 ts=8: */
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00002/*
Mark Whitley807f0fd2000-08-02 18:30:11 +00003 * cut.c - minimalist version of cut
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00004 *
Eric Andersen8ec10a92001-01-27 09:33:39 +00005 * Copyright (C) 1999,2000,2001 by Lineo, inc.
Eric Andersenc7bda1c2004-03-15 08:29:22 +00006 * Written by Mark Whitley <markw@codepoet.org>
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00007 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Mark Whitley807f0fd2000-08-02 18:30:11 +000021 *
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000022 */
23
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000024#include <stdio.h>
Mark Whitley807f0fd2000-08-02 18:30:11 +000025#include <stdlib.h>
Eric Andersen8876fb22003-06-20 09:01:58 +000026#include <unistd.h>
Mark Whitley807f0fd2000-08-02 18:30:11 +000027#include <string.h>
Mark Whitleyb6967632001-05-18 23:04:51 +000028#include <limits.h>
Eric Andersen3570a342000-09-25 21:45:58 +000029#include "busybox.h"
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000030
31
Mark Whitleyb6967632001-05-18 23:04:51 +000032/* option vars */
Eric Andersen8876fb22003-06-20 09:01:58 +000033static const char optstring[] = "b:c:f:d:sn";
34#define OPT_BYTE_FLGS 1
35#define OPT_CHAR_FLGS 2
36#define OPT_FIELDS_FLGS 4
37#define OPT_DELIM_FLGS 8
38#define OPT_SUPRESS_FLGS 16
39static char part; /* (b)yte, (c)har, (f)ields */
40static unsigned int supress_non_delimited_lines;
Mark Whitleyb6967632001-05-18 23:04:51 +000041static char delim = '\t'; /* delimiter, default is tab */
42
43struct cut_list {
44 int startpos;
45 int endpos;
46};
47
48static const int BOL = 0;
49static const int EOL = INT_MAX;
50static const int NON_RANGE = -1;
51
52static struct cut_list *cut_lists = NULL; /* growable array holding a series of lists */
53static unsigned int nlists = 0; /* number of elements in above list */
54
55
56static int cmpfunc(const void *a, const void *b)
57{
58 struct cut_list *la = (struct cut_list *)a;
59 struct cut_list *lb = (struct cut_list *)b;
60
61 if (la->startpos > lb->startpos)
62 return 1;
63 if (la->startpos < lb->startpos)
64 return -1;
65 return 0;
66}
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000067
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000068
69/*
Mark Whitleyb6967632001-05-18 23:04:51 +000070 * parse_lists() - parses a list and puts values into startpos and endpos.
Eric Andersenc7bda1c2004-03-15 08:29:22 +000071 * valid list formats: N, N-, N-M, -M
Eric Andersenaff114c2004-04-14 17:51:38 +000072 * more than one list can be separated by commas
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000073 */
Mark Whitleyb6967632001-05-18 23:04:51 +000074static void parse_lists(char *lists)
Mark Whitley807f0fd2000-08-02 18:30:11 +000075{
Mark Whitleyb6967632001-05-18 23:04:51 +000076 char *ltok = NULL;
77 char *ntok = NULL;
78 char *junk;
79 int s = 0, e = 0;
Mark Whitley807f0fd2000-08-02 18:30:11 +000080
Eric Andersenaff114c2004-04-14 17:51:38 +000081 /* take apart the lists, one by one (they are separated with commas */
Mark Whitleyb6967632001-05-18 23:04:51 +000082 while ((ltok = strsep(&lists, ",")) != NULL) {
83
84 /* it's actually legal to pass an empty list */
85 if (strlen(ltok) == 0)
86 continue;
87
88 /* get the start pos */
89 ntok = strsep(&ltok, "-");
90 if (ntok == NULL) {
91 fprintf(stderr, "Help ntok is null for starting position! What do I do?\n");
92 } else if (strlen(ntok) == 0) {
93 s = BOL;
94 } else {
95 s = strtoul(ntok, &junk, 10);
96 if(*junk != '\0' || s < 0)
Manuel Novoa III cad53642003-03-19 09:13:01 +000097 bb_error_msg_and_die("invalid byte or field list");
Eric Andersenc7bda1c2004-03-15 08:29:22 +000098
Mark Whitleyb6967632001-05-18 23:04:51 +000099 /* account for the fact that arrays are zero based, while the user
100 * expects the first char on the line to be char # 1 */
101 if (s != 0)
102 s--;
103 }
104
105 /* get the end pos */
106 ntok = strsep(&ltok, "-");
107 if (ntok == NULL) {
108 e = NON_RANGE;
109 } else if (strlen(ntok) == 0) {
110 e = EOL;
111 } else {
112 e = strtoul(ntok, &junk, 10);
113 if(*junk != '\0' || e < 0)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000114 bb_error_msg_and_die("invalid byte or field list");
Mark Whitleyb6967632001-05-18 23:04:51 +0000115 /* if the user specified and end position of 0, that means "til the
116 * end of the line */
117 if (e == 0)
118 e = INT_MAX;
119 e--; /* again, arrays are zero based, lines are 1 based */
120 if (e == s)
121 e = NON_RANGE;
122 }
123
124 /* if there's something left to tokenize, the user past an invalid list */
125 if (ltok)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000126 bb_error_msg_and_die("invalid byte or field list");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000127
Mark Whitleyb6967632001-05-18 23:04:51 +0000128 /* add the new list */
129 cut_lists = xrealloc(cut_lists, sizeof(struct cut_list) * (++nlists));
130 cut_lists[nlists-1].startpos = s;
131 cut_lists[nlists-1].endpos = e;
132 }
133
134 /* make sure we got some cut positions out of all that */
135 if (nlists == 0)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000136 bb_error_msg_and_die("missing list of positions");
Mark Whitleyb6967632001-05-18 23:04:51 +0000137
138 /* now that the lists are parsed, we need to sort them to make life easier
139 * on us when it comes time to print the chars / fields / lines */
140 qsort(cut_lists, nlists, sizeof(struct cut_list), cmpfunc);
141
142}
143
144
145static void cut_line_by_chars(const char *line)
146{
147 int c, l;
148 /* set up a list so we can keep track of what's been printed */
149 char *printed = xcalloc(strlen(line), sizeof(char));
150
151 /* print the chars specified in each cut list */
152 for (c = 0; c < nlists; c++) {
153 l = cut_lists[c].startpos;
154 while (l < strlen(line)) {
155 if (!printed[l]) {
156 putchar(line[l]);
157 printed[l] = 'X';
Mark Whitley807f0fd2000-08-02 18:30:11 +0000158 }
Mark Whitleyb6967632001-05-18 23:04:51 +0000159 l++;
160 if (cut_lists[c].endpos == NON_RANGE || l > cut_lists[c].endpos)
161 break;
162 }
163 }
164 putchar('\n'); /* cuz we were handed a chomped line */
165 free(printed);
166}
167
168
169static void cut_line_by_fields(char *line)
170{
171 int c, f;
172 int ndelim = -1; /* zero-based / one-based problem */
173 int nfields_printed = 0;
174 char *field = NULL;
175 char d[2] = { delim, 0 };
176 char *printed;
177
178 /* test the easy case first: does this line contain any delimiters? */
179 if (strchr(line, delim) == NULL) {
180 if (!supress_non_delimited_lines)
181 puts(line);
182 return;
183 }
184
185 /* set up a list so we can keep track of what's been printed */
186 printed = xcalloc(strlen(line), sizeof(char));
187
188 /* process each list on this line, for as long as we've got a line to process */
189 for (c = 0; c < nlists && line; c++) {
190 f = cut_lists[c].startpos;
191 do {
192
193 /* find the field we're looking for */
194 while (line && ndelim < f) {
195 field = strsep(&line, d);
196 ndelim++;
197 }
198
199 /* we found it, and it hasn't been printed yet */
200 if (field && ndelim == f && !printed[ndelim]) {
201 /* if this isn't our first time through, we need to print the
202 * delimiter after the last field that was printed */
203 if (nfields_printed > 0)
204 putchar(delim);
205 fputs(field, stdout);
206 printed[ndelim] = 'X';
207 nfields_printed++;
208 }
209
210 f++;
211
212 /* keep going as long as we have a line to work with, this is a
213 * list, and we're not at the end of that list */
214 } while (line && cut_lists[c].endpos != NON_RANGE && f <= cut_lists[c].endpos);
215 }
216
217 /* if we printed anything at all, we need to finish it with a newline cuz
218 * we were handed a chomped line */
219 putchar('\n');
220
221 free(printed);
222}
223
224
225static void cut_file_by_lines(const char *line, unsigned int linenum)
226{
227 static int c = 0;
228 static int l = -1;
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000229
Mark Whitleyb6967632001-05-18 23:04:51 +0000230 /* I can't initialize this above cuz the "initializer isn't
231 * constant" *sigh* */
232 if (l == -1)
233 l = cut_lists[c].startpos;
234
235 /* get out if we have no more lists to process or if the lines are lower
236 * than what we're interested in */
237 if (c >= nlists || linenum < l)
238 return;
239
240 /* if the line we're looking for is lower than the one we were passed, it
241 * means we displayed it already, so move on */
242 while (l < linenum) {
243 l++;
244 /* move on to the next list if we're at the end of this one */
245 if (cut_lists[c].endpos == NON_RANGE || l > cut_lists[c].endpos) {
246 c++;
247 /* get out if there's no more lists to process */
248 if (c >= nlists)
249 return;
250 l = cut_lists[c].startpos;
251 /* get out if the current line is lower than the one we just became
252 * interested in */
253 if (linenum < l)
254 return;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000255 }
256 }
257
Mark Whitleyb6967632001-05-18 23:04:51 +0000258 /* If we made it here, it means we've found the line we're looking for, so print it */
259 puts(line);
Mark Whitley807f0fd2000-08-02 18:30:11 +0000260}
Erik Andersen7ab9c7e2000-05-12 19:41:47 +0000261
262
Mark Whitley807f0fd2000-08-02 18:30:11 +0000263/*
264 * snippy-snip
265 */
266static void cut_file(FILE *file)
267{
Mark Whitleyb6967632001-05-18 23:04:51 +0000268 char *line = NULL;
269 unsigned int linenum = 0; /* keep these zero-based to be consistent */
Mark Whitley807f0fd2000-08-02 18:30:11 +0000270
271 /* go through every line in the file */
Manuel Novoa III cad53642003-03-19 09:13:01 +0000272 while ((line = bb_get_chomped_line_from_file(file)) != NULL) {
Mark Whitleyb6967632001-05-18 23:04:51 +0000273
274 /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */
Eric Andersen8876fb22003-06-20 09:01:58 +0000275 if ((part & (OPT_CHAR_FLGS | OPT_BYTE_FLGS)))
Mark Whitleyb6967632001-05-18 23:04:51 +0000276 cut_line_by_chars(line);
277
Mark Whitley807f0fd2000-08-02 18:30:11 +0000278 /* cut based on fields */
Eric Andersen8876fb22003-06-20 09:01:58 +0000279 else {
Mark Whitleyb6967632001-05-18 23:04:51 +0000280 if (delim == '\n')
281 cut_file_by_lines(line, linenum);
282 else
283 cut_line_by_fields(line);
Mark Whitley807f0fd2000-08-02 18:30:11 +0000284 }
Mark Whitleyb6967632001-05-18 23:04:51 +0000285
286 linenum++;
287 free(line);
Mark Whitley807f0fd2000-08-02 18:30:11 +0000288 }
289}
290
Mark Whitleyb6967632001-05-18 23:04:51 +0000291
Mark Whitley807f0fd2000-08-02 18:30:11 +0000292extern int cut_main(int argc, char **argv)
293{
Eric Andersen8876fb22003-06-20 09:01:58 +0000294 unsigned long opt;
295 char *sopt, *sdopt;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000296
"Vladimir N. Oleynik"f704b272005-10-14 09:56:52 +0000297 bb_opt_complementally = "b--bcf:c--bcf:f--bcf";
Eric Andersen8876fb22003-06-20 09:01:58 +0000298 opt = bb_getopt_ulflags(argc, argv, optstring, &sopt, &sopt, &sopt, &sdopt);
299 part = opt & (OPT_BYTE_FLGS|OPT_CHAR_FLGS|OPT_FIELDS_FLGS);
300 if(part == 0)
301 bb_error_msg_and_die("you must specify a list of bytes, characters, or fields");
Mike Frysinger348e84c2005-05-11 00:39:03 +0000302 if(opt & BB_GETOPT_ERROR)
Eric Andersenc9e70242003-06-20 09:16:00 +0000303 bb_error_msg_and_die("only one type of list may be specified");
Eric Andersen8876fb22003-06-20 09:01:58 +0000304 parse_lists(sopt);
305 if((opt & (OPT_DELIM_FLGS))) {
306 if (strlen(sdopt) > 1) {
Eric Andersenc9e70242003-06-20 09:16:00 +0000307 bb_error_msg_and_die("the delimiter must be a single character");
308 }
Eric Andersen8876fb22003-06-20 09:01:58 +0000309 delim = sdopt[0];
Mark Whitley807f0fd2000-08-02 18:30:11 +0000310 }
Eric Andersen8876fb22003-06-20 09:01:58 +0000311 supress_non_delimited_lines = opt & OPT_SUPRESS_FLGS;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000312
Mark Whitleyb6967632001-05-18 23:04:51 +0000313 /* non-field (char or byte) cutting has some special handling */
Eric Andersen8876fb22003-06-20 09:01:58 +0000314 if (part != OPT_FIELDS_FLGS) {
Mark Whitleyb6967632001-05-18 23:04:51 +0000315 if (supress_non_delimited_lines) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000316 bb_error_msg_and_die("suppressing non-delimited lines makes sense"
Mark Whitleyb6967632001-05-18 23:04:51 +0000317 " only when operating on fields");
318 }
Eric Andersen8876fb22003-06-20 09:01:58 +0000319 if (delim != '\t') {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000320 bb_error_msg_and_die("a delimiter may be specified only when operating on fields");
Mark Whitleyb6967632001-05-18 23:04:51 +0000321 }
Mark Whitley807f0fd2000-08-02 18:30:11 +0000322 }
323
324 /* argv[(optind)..(argc-1)] should be names of file to process. If no
325 * files were specified or '-' was specified, take input from stdin.
326 * Otherwise, we process all the files specified. */
327 if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
328 cut_file(stdin);
329 }
330 else {
331 int i;
332 FILE *file;
333 for (i = optind; i < argc; i++) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000334 file = bb_wfopen(argv[i], "r");
Eric Andersen34506362001-08-02 05:02:46 +0000335 if(file) {
Mark Whitley807f0fd2000-08-02 18:30:11 +0000336 cut_file(file);
337 fclose(file);
338 }
339 }
340 }
341
Matt Kraai3e856ce2000-12-01 02:55:13 +0000342 return EXIT_SUCCESS;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000343}