blob: d88a891b021ae50e4856f34d37a53c88d52a6d34 [file] [log] [blame]
"Robert P. J. Day"63fc1a92006-07-02 19:47:05 +00001/* vi: set sw=4 ts=4: */
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00002/*
Mark Whitley807f0fd2000-08-02 18:30:11 +00003 * cut.c - minimalist version of cut
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00004 *
Eric Andersen8ec10a92001-01-27 09:33:39 +00005 * Copyright (C) 1999,2000,2001 by Lineo, inc.
Eric Andersenc7bda1c2004-03-15 08:29:22 +00006 * Written by Mark Whitley <markw@codepoet.org>
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +00007 * debloated by Bernhard Fischer
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00008 *
"Robert P. J. Day"801ab142006-07-12 07:56:04 +00009 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000010 */
11
Eric Andersen3570a342000-09-25 21:45:58 +000012#include "busybox.h"
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000013
Mark Whitleyb6967632001-05-18 23:04:51 +000014/* option vars */
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +000015static const char *const optstring = "b:c:f:d:sn";
16
17#define CUT_OPT_BYTE_FLGS (1<<0)
18#define CUT_OPT_CHAR_FLGS (1<<1)
19#define CUT_OPT_FIELDS_FLGS (1<<2)
20#define CUT_OPT_DELIM_FLGS (1<<3)
21#define CUT_OPT_SUPPRESS_FLGS (1<<4)
22static unsigned long opt;
23
24static char delim = '\t'; /* delimiter, default is tab */
Mark Whitleyb6967632001-05-18 23:04:51 +000025
26struct cut_list {
27 int startpos;
28 int endpos;
29};
30
Rob Landleybc68cd12006-03-10 19:22:06 +000031enum {
32 BOL = 0,
33 EOL = INT_MAX,
34 NON_RANGE = -1
35};
Mark Whitleyb6967632001-05-18 23:04:51 +000036
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +000037/* growable array holding a series of lists */
38static struct cut_list *cut_lists;
39static unsigned int nlists; /* number of elements in above list */
Mark Whitleyb6967632001-05-18 23:04:51 +000040
41
42static int cmpfunc(const void *a, const void *b)
43{
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +000044 return (((struct cut_list *) a)->startpos -
45 ((struct cut_list *) b)->startpos);
Mark Whitleyb6967632001-05-18 23:04:51 +000046
47}
48
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +000049static void cut_file(FILE * file)
Mark Whitley807f0fd2000-08-02 18:30:11 +000050{
Mark Whitleyb6967632001-05-18 23:04:51 +000051 char *line = NULL;
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +000052 unsigned int linenum = 0; /* keep these zero-based to be consistent */
Mark Whitley807f0fd2000-08-02 18:30:11 +000053
54 /* go through every line in the file */
Manuel Novoa III cad53642003-03-19 09:13:01 +000055 while ((line = bb_get_chomped_line_from_file(file)) != NULL) {
Mark Whitleyb6967632001-05-18 23:04:51 +000056
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +000057 /* set up a list so we can keep track of what's been printed */
58 char * printed = xzalloc(strlen(line) * sizeof(char));
59 char * orig_line = line;
60 unsigned int cl_pos = 0;
61 int spos;
62
Mark Whitleyb6967632001-05-18 23:04:51 +000063 /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +000064 if ((opt & (CUT_OPT_CHAR_FLGS | CUT_OPT_BYTE_FLGS))) {
65 /* print the chars specified in each cut list */
66 for (; cl_pos < nlists; cl_pos++) {
67 spos = cut_lists[cl_pos].startpos;
68 while (spos < strlen(line)) {
69 if (!printed[spos]) {
70 printed[spos] = 'X';
71 putchar(line[spos]);
72 }
73 spos++;
74 if (spos > cut_lists[cl_pos].endpos
75 || cut_lists[cl_pos].endpos == NON_RANGE)
76 break;
77 }
78 }
79 } else if (delim == '\n') { /* cut by lines */
80 spos = cut_lists[cl_pos].startpos;
Mark Whitleyb6967632001-05-18 23:04:51 +000081
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +000082 /* get out if we have no more lists to process or if the lines
83 * are lower than what we're interested in */
84 if (linenum < spos || cl_pos >= nlists)
85 goto next_line;
86
87 /* if the line we're looking for is lower than the one we were
88 * passed, it means we displayed it already, so move on */
89 while (spos < linenum) {
90 spos++;
91 /* go to the next list if we're at the end of this one */
92 if (spos > cut_lists[cl_pos].endpos
93 || cut_lists[cl_pos].endpos == NON_RANGE) {
94 cl_pos++;
95 /* get out if there's no more lists to process */
96 if (cl_pos >= nlists)
97 goto next_line;
98 spos = cut_lists[cl_pos].startpos;
99 /* get out if the current line is lower than the one
100 * we just became interested in */
101 if (linenum < spos)
102 goto next_line;
103 }
104 }
105
106 /* If we made it here, it means we've found the line we're
107 * looking for, so print it */
108 puts(line);
109 goto next_line;
110 } else { /* cut by fields */
111 int ndelim = -1; /* zero-based / one-based problem */
112 int nfields_printed = 0;
113 char *field = NULL;
114 const char delimiter[2] = { delim, 0 };
115
116 /* does this line contain any delimiters? */
117 if (strchr(line, delim) == NULL) {
118 if (!(opt & CUT_OPT_SUPPRESS_FLGS))
119 puts(line);
120 goto next_line;
121 }
122
123 /* process each list on this line, for as long as we've got
124 * a line to process */
125 for (; cl_pos < nlists && line; cl_pos++) {
126 spos = cut_lists[cl_pos].startpos;
127 do {
128
129 /* find the field we're looking for */
130 while (line && ndelim < spos) {
131 field = strsep(&line, delimiter);
132 ndelim++;
133 }
134
135 /* we found it, and it hasn't been printed yet */
136 if (field && ndelim == spos && !printed[ndelim]) {
137 /* if this isn't our first time through, we need to
138 * print the delimiter after the last field that was
139 * printed */
140 if (nfields_printed > 0)
141 putchar(delim);
142 fputs(field, stdout);
143 printed[ndelim] = 'X';
144 nfields_printed++; /* shouldn't overflow.. */
145 }
146
147 spos++;
148
149 /* keep going as long as we have a line to work with,
150 * this is a list, and we're not at the end of that
151 * list */
152 } while (spos <= cut_lists[cl_pos].endpos && line
153 && cut_lists[cl_pos].endpos != NON_RANGE);
154 }
Mark Whitley807f0fd2000-08-02 18:30:11 +0000155 }
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000156 /* if we printed anything at all, we need to finish it with a
157 * newline cuz we were handed a chomped line */
158 putchar('\n');
159 next_line:
Mark Whitleyb6967632001-05-18 23:04:51 +0000160 linenum++;
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000161 free(printed);
162 free(orig_line);
Mark Whitley807f0fd2000-08-02 18:30:11 +0000163 }
164}
165
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000166static int getval(char *ntok)
167{
168 char *junk;
169 int i = strtoul(ntok, &junk, 10);
170
171 if (*junk != '\0' || i < 0)
172 bb_error_msg_and_die("invalid byte or field list");
173 return i;
174}
175
176static const char * const _op_on_field = " only when operating on fields";
Mark Whitleyb6967632001-05-18 23:04:51 +0000177
Rob Landleydfba7412006-03-06 20:47:33 +0000178int cut_main(int argc, char **argv)
Mark Whitley807f0fd2000-08-02 18:30:11 +0000179{
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000180 char *sopt, *ltok;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000181
"Vladimir N. Oleynik"f704b272005-10-14 09:56:52 +0000182 bb_opt_complementally = "b--bcf:c--bcf:f--bcf";
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000183 opt =
184 bb_getopt_ulflags(argc, argv, optstring, &sopt, &sopt, &sopt, &ltok);
185 if (!(opt & (CUT_OPT_BYTE_FLGS | CUT_OPT_CHAR_FLGS | CUT_OPT_FIELDS_FLGS)))
186 bb_error_msg_and_die
187 ("expected a list of bytes, characters, or fields");
188 if (opt & BB_GETOPT_ERROR)
Eric Andersenc9e70242003-06-20 09:16:00 +0000189 bb_error_msg_and_die("only one type of list may be specified");
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000190
191 if ((opt & (CUT_OPT_DELIM_FLGS))) {
192 if (strlen(ltok) > 1) {
Eric Andersenc9e70242003-06-20 09:16:00 +0000193 bb_error_msg_and_die("the delimiter must be a single character");
194 }
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000195 delim = ltok[0];
Mark Whitley807f0fd2000-08-02 18:30:11 +0000196 }
Mark Whitley807f0fd2000-08-02 18:30:11 +0000197
Mark Whitleyb6967632001-05-18 23:04:51 +0000198 /* non-field (char or byte) cutting has some special handling */
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000199 if (!(opt & CUT_OPT_FIELDS_FLGS)) {
200 if (opt & CUT_OPT_SUPPRESS_FLGS) {
201 bb_error_msg_and_die
202 ("suppressing non-delimited lines makes sense%s",
203 _op_on_field);
Mark Whitleyb6967632001-05-18 23:04:51 +0000204 }
Eric Andersen8876fb22003-06-20 09:01:58 +0000205 if (delim != '\t') {
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000206 bb_error_msg_and_die
207 ("a delimiter may be specified%s", _op_on_field);
Mark Whitleyb6967632001-05-18 23:04:51 +0000208 }
Mark Whitley807f0fd2000-08-02 18:30:11 +0000209 }
210
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000211 /*
212 * parse list and put values into startpos and endpos.
213 * valid list formats: N, N-, N-M, -M
214 * more than one list can be separated by commas
215 */
216 {
217 char *ntok;
218 int s = 0, e = 0;
219
220 /* take apart the lists, one by one (they are separated with commas */
221 while ((ltok = strsep(&sopt, ",")) != NULL) {
222
223 /* it's actually legal to pass an empty list */
224 if (strlen(ltok) == 0)
225 continue;
226
227 /* get the start pos */
228 ntok = strsep(&ltok, "-");
229 if (ntok == NULL) {
230 bb_error_msg
231 ("internal error: ntok is null for start pos!?\n");
232 } else if (strlen(ntok) == 0) {
233 s = BOL;
234 } else {
235 s = getval(ntok);
236 /* account for the fact that arrays are zero based, while
237 * the user expects the first char on the line to be char #1 */
238 if (s != 0)
239 s--;
240 }
241
242 /* get the end pos */
243 ntok = strsep(&ltok, "-");
244 if (ntok == NULL) {
245 e = NON_RANGE;
246 } else if (strlen(ntok) == 0) {
247 e = EOL;
248 } else {
249 e = getval(ntok);
250 /* if the user specified and end position of 0, that means "til the
251 * end of the line */
252 if (e == 0)
253 e = EOL;
254 e--; /* again, arrays are zero based, lines are 1 based */
255 if (e == s)
256 e = NON_RANGE;
257 }
258
259 /* if there's something left to tokenize, the user passed
260 * an invalid list */
261 if (ltok)
262 bb_error_msg_and_die("invalid byte or field list");
263
264 /* add the new list */
265 cut_lists =
266 xrealloc(cut_lists, sizeof(struct cut_list) * (++nlists));
267 cut_lists[nlists - 1].startpos = s;
268 cut_lists[nlists - 1].endpos = e;
269 }
270
271 /* make sure we got some cut positions out of all that */
272 if (nlists == 0)
273 bb_error_msg_and_die("missing list of positions");
274
275 /* now that the lists are parsed, we need to sort them to make life
276 * easier on us when it comes time to print the chars / fields / lines
277 */
278 qsort(cut_lists, nlists, sizeof(struct cut_list), cmpfunc);
279 }
280
Mark Whitley807f0fd2000-08-02 18:30:11 +0000281 /* argv[(optind)..(argc-1)] should be names of file to process. If no
282 * files were specified or '-' was specified, take input from stdin.
283 * Otherwise, we process all the files specified. */
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000284 if (argv[optind] == NULL
285 || (argv[optind][0] == '-' && argv[optind][1] == '\0')) {
Mark Whitley807f0fd2000-08-02 18:30:11 +0000286 cut_file(stdin);
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000287 } else {
Mark Whitley807f0fd2000-08-02 18:30:11 +0000288 FILE *file;
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000289
290 for (; optind < argc; optind++) {
291 file = bb_wfopen(argv[optind], "r");
292 if (file) {
Mark Whitley807f0fd2000-08-02 18:30:11 +0000293 cut_file(file);
294 fclose(file);
295 }
296 }
297 }
Bernhard Reutner-Fischer73561cc2006-08-28 23:31:54 +0000298 if (ENABLE_FEATURE_CLEAN_UP)
299 free(cut_lists);
Matt Kraai3e856ce2000-12-01 02:55:13 +0000300 return EXIT_SUCCESS;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000301}