blob: 6e81aa8f4e2d758e9a900f01f8218f37b7cab064 [file] [log] [blame]
Glenn L McGrath545106f2002-11-11 06:21:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +00007 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
Glenn L McGrath545106f2002-11-11 06:21:00 +00008 */
9
Glenn L McGrath545106f2002-11-11 06:21:00 +000010#include "busybox.h"
Rob Landleyd921b2e2006-08-03 15:41:12 +000011#include "xregex.h"
12#include <math.h>
Glenn L McGrath545106f2002-11-11 06:21:00 +000013
14
15#define MAXVARFMT 240
16#define MINNVBLOCK 64
17
18/* variable flags */
19#define VF_NUMBER 0x0001 /* 1 = primary type is number */
20#define VF_ARRAY 0x0002 /* 1 = it's an array */
21
22#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
23#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
24#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
25#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
26#define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
27#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
28#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
29
30/* these flags are static, don't change them when value is changed */
31#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
32
33/* Variable */
34typedef struct var_s {
35 unsigned short type; /* flags */
36 double number;
37 char *string;
38 union {
Mike Frysinger4b7b8a52006-04-16 05:55:15 +000039 int aidx; /* func arg idx (for compilation stage) */
Glenn L McGrath545106f2002-11-11 06:21:00 +000040 struct xhash_s *array; /* array ptr */
41 struct var_s *parent; /* for func args, ptr to actual parameter */
42 char **walker; /* list of array elements (for..in) */
43 } x;
44} var;
45
46/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47typedef struct chain_s {
48 struct node_s *first;
49 struct node_s *last;
50 char *programname;
51} chain;
52
53/* Function */
54typedef struct func_s {
55 unsigned short nargs;
56 struct chain_s body;
57} func;
58
59/* I/O stream */
60typedef struct rstream_s {
61 FILE *F;
62 char *buffer;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +000063 int adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +000064 int size;
65 int pos;
66 unsigned short is_pipe;
67} rstream;
68
69typedef struct hash_item_s {
70 union {
71 struct var_s v; /* variable/array hash */
72 struct rstream_s rs; /* redirect streams hash */
73 struct func_s f; /* functions hash */
74 } data;
75 struct hash_item_s *next; /* next in chain */
76 char name[1]; /* really it's longer */
77} hash_item;
78
79typedef struct xhash_s {
80 unsigned int nel; /* num of elements */
81 unsigned int csize; /* current hash size */
82 unsigned int nprime; /* next hash size in PRIMES[] */
83 unsigned int glen; /* summary length of item names */
84 struct hash_item_s **items;
85} xhash;
86
87/* Tree node */
88typedef struct node_s {
Mike Frysingerf87b3e32005-09-27 04:16:22 +000089 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +000090 unsigned short lineno;
91 union {
92 struct node_s *n;
93 var *v;
94 int i;
95 char *s;
96 regex_t *re;
97 } l;
98 union {
99 struct node_s *n;
100 regex_t *ire;
101 func *f;
102 int argno;
103 } r;
104 union {
105 struct node_s *n;
106 } a;
107} node;
108
109/* Block of temporary variables */
110typedef struct nvblock_s {
111 int size;
112 var *pos;
113 struct nvblock_s *prev;
114 struct nvblock_s *next;
115 var nv[0];
116} nvblock;
117
118typedef struct tsplitter_s {
119 node n;
120 regex_t re[2];
121} tsplitter;
122
123/* simple token classes */
124/* Order and hex values are very important!!! See next_token() */
125#define TC_SEQSTART 1 /* ( */
126#define TC_SEQTERM (1 << 1) /* ) */
127#define TC_REGEXP (1 << 2) /* /.../ */
128#define TC_OUTRDR (1 << 3) /* | > >> */
129#define TC_UOPPOST (1 << 4) /* unary postfix operator */
130#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
131#define TC_BINOPX (1 << 6) /* two-opnd operator */
132#define TC_IN (1 << 7)
133#define TC_COMMA (1 << 8)
134#define TC_PIPE (1 << 9) /* input redirection pipe */
135#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
136#define TC_ARRTERM (1 << 11) /* ] */
137#define TC_GRPSTART (1 << 12) /* { */
138#define TC_GRPTERM (1 << 13) /* } */
139#define TC_SEMICOL (1 << 14)
140#define TC_NEWLINE (1 << 15)
141#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
142#define TC_WHILE (1 << 17)
143#define TC_ELSE (1 << 18)
144#define TC_BUILTIN (1 << 19)
145#define TC_GETLINE (1 << 20)
146#define TC_FUNCDECL (1 << 21) /* `function' `func' */
147#define TC_BEGIN (1 << 22)
148#define TC_END (1 << 23)
149#define TC_EOF (1 << 24)
150#define TC_VARIABLE (1 << 25)
151#define TC_ARRAY (1 << 26)
152#define TC_FUNCTION (1 << 27)
153#define TC_STRING (1 << 28)
154#define TC_NUMBER (1 << 29)
155
156#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
157
158/* combined token classes */
159#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
161#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
163
164#define TC_STATEMNT (TC_STATX | TC_WHILE)
165#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
166
167/* word tokens, cannot mean something else if not expected */
168#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
170
171/* discard newlines after these */
172#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173 TC_BINOP | TC_OPTERM)
174
175/* what can expression begin with */
176#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177/* what can group begin with */
178#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
179
180/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181/* operator is inserted between them */
182#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183 TC_STRING | TC_NUMBER | TC_UOPPOST)
184#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
185
186#define OF_RES1 0x010000
187#define OF_RES2 0x020000
188#define OF_STR1 0x040000
189#define OF_STR2 0x080000
190#define OF_NUM1 0x100000
191#define OF_CHECKED 0x200000
192
193/* combined operator flags */
194#define xx 0
195#define xV OF_RES2
196#define xS (OF_RES2 | OF_STR2)
197#define Vx OF_RES1
198#define VV (OF_RES1 | OF_RES2)
199#define Nx (OF_RES1 | OF_NUM1)
200#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
201#define Sx (OF_RES1 | OF_STR1)
202#define SV (OF_RES1 | OF_STR1 | OF_RES2)
203#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
204
205#define OPCLSMASK 0xFF00
206#define OPNMASK 0x007F
207
208/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
211 */
212#define P(x) (x << 24)
213#define PRIMASK 0x7F000000
214#define PRIMASK2 0x7E000000
215
216/* Operation classes */
217
218#define SHIFT_TIL_THIS 0x0600
219#define RECUR_FROM_THIS 0x1000
220
221enum {
222 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
223 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
224
225 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
226 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
227 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
228
229 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
230 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
231 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
232 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
233 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
234 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
235 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
236 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
237 OC_DONE=0x2800,
238
239 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
240 ST_WHILE=0x3300
241};
242
243/* simple builtins */
244enum {
245 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
246 F_ti, F_le, F_sy, F_ff, F_cl
247};
248
249/* builtins */
250enum {
251 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
252 B_ge, B_gs, B_su
253};
254
255/* tokens and their corresponding info values */
256
257#define NTC "\377" /* switch to next token class (tc<<1) */
258#define NTCC '\377'
259
260#define OC_B OC_BUILTIN
261
262static char * const tokenlist =
263 "\1(" NTC
264 "\1)" NTC
265 "\1/" NTC /* REGEXP */
266 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
267 "\2++" "\2--" NTC /* UOPPOST */
268 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
269 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
270 "\2*=" "\2/=" "\2%=" "\2^="
271 "\1+" "\1-" "\3**=" "\2**"
272 "\1/" "\1%" "\1^" "\1*"
273 "\2!=" "\2>=" "\2<=" "\1>"
274 "\1<" "\2!~" "\1~" "\2&&"
275 "\2||" "\1?" "\1:" NTC
276 "\2in" NTC
277 "\1," NTC
278 "\1|" NTC
279 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
280 "\1]" NTC
281 "\1{" NTC
282 "\1}" NTC
283 "\1;" NTC
284 "\1\n" NTC
285 "\2if" "\2do" "\3for" "\5break" /* STATX */
286 "\10continue" "\6delete" "\5print"
287 "\6printf" "\4next" "\10nextfile"
288 "\6return" "\4exit" NTC
289 "\5while" NTC
290 "\4else" NTC
291
292 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
293 "\3cos" "\3exp" "\3int" "\3log"
294 "\4rand" "\3sin" "\4sqrt" "\5srand"
295 "\6gensub" "\4gsub" "\5index" "\6length"
296 "\5match" "\5split" "\7sprintf" "\3sub"
297 "\6substr" "\7systime" "\10strftime"
298 "\7tolower" "\7toupper" NTC
299 "\7getline" NTC
300 "\4func" "\10function" NTC
301 "\5BEGIN" NTC
302 "\3END" "\0"
303 ;
304
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000305static const uint32_t tokeninfo[] = {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000306
307 0,
308 0,
309 OC_REGEXP,
310 xS|'a', xS|'w', xS|'|',
311 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
312 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
313 OC_FIELD|xV|P(5),
314 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
315 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
316 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
317 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
318 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
319 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
320 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
321 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
322 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
323 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
324 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
325 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
326 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
327 OC_COLON|xx|P(67)|':',
328 OC_IN|SV|P(49),
329 OC_COMMA|SS|P(80),
330 OC_PGETLINE|SV|P(37),
331 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
332 OC_UNARY|xV|P(19)|'!',
333 0,
334 0,
335 0,
336 0,
337 0,
338 ST_IF, ST_DO, ST_FOR, OC_BREAK,
339 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
340 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
341 OC_RETURN|Vx, OC_EXIT|Nx,
342 ST_WHILE,
343 0,
344
345 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
346 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
347 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
348 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
349 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
350 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
351 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
352 OC_GETLINE|SV|P(0),
353 0, 0,
354 0,
355 0
356};
357
358/* internal variable names and their initial values */
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000359/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000360enum {
361 CONVFMT=0, OFMT, FS, OFS,
362 ORS, RS, RT, FILENAME,
363 SUBSEP, ARGIND, ARGC, ARGV,
364 ERRNO, FNR,
365 NR, NF, IGNORECASE,
366 ENVIRON, F0, _intvarcount_
367};
368
369static char * vNames =
370 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000371 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
Glenn L McGrath545106f2002-11-11 06:21:00 +0000372 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
373 "ERRNO\0" "FNR\0"
374 "NR\0" "NF\0*" "IGNORECASE\0*"
375 "ENVIRON\0" "$\0*" "\0";
376
377static char * vValues =
378 "%.6g\0" "%.6g\0" " \0" " \0"
379 "\n\0" "\n\0" "\0" "\0"
380 "\034\0"
381 "\377";
382
383/* hash size may grow to these values */
384#define FIRST_PRIME 61;
385static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
Rob Landleybc68cd12006-03-10 19:22:06 +0000386enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
Glenn L McGrath545106f2002-11-11 06:21:00 +0000387
388/* globals */
389
390extern char **environ;
391
392static var * V[_intvarcount_];
393static chain beginseq, mainseq, endseq, *seq;
394static int nextrec, nextfile;
395static node *break_ptr, *continue_ptr;
396static rstream *iF;
397static xhash *vhash, *ahash, *fdhash, *fnhash;
398static char *programname;
399static short lineno;
400static int is_f0_split;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000401static int nfields;
402static var *Fields;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000403static tsplitter fsplitter, rsplitter;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000404static nvblock *cb;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000405static char *pos;
406static char *buf;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000407static int icase;
408static int exiting;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000409
410static struct {
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000411 uint32_t tclass;
412 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000413 char *string;
414 double number;
415 short lineno;
416 int rollback;
417} t;
418
419/* function prototypes */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000420static void handle_special(var *);
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000421static node *parse_expr(uint32_t);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000422static void chain_group(void);
423static var *evaluate(node *, var *);
424static rstream *next_input_file(void);
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000425static int fmt_num(char *, int, const char *, double, int);
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +0000426static int awk_exit(int) ATTRIBUTE_NORETURN;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000427
428/* ---- error handling ---- */
429
430static const char EMSG_INTERNAL_ERROR[] = "Internal error";
431static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
432static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
433static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
434static const char EMSG_INV_FMT[] = "Invalid format specifier";
435static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
436static const char EMSG_NOT_ARRAY[] = "Not an array";
437static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
438static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
439#ifndef CONFIG_FEATURE_AWK_MATH
440static const char EMSG_NO_MATH[] = "Math support is not compiled in";
441#endif
442
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +0000443static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
Glenn L McGrathd4036f82002-11-28 09:30:40 +0000444static void syntax_error(const char * const message)
445{
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000446 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000447}
448
449#define runtime_error(x) syntax_error(x)
450
451
452/* ---- hash stuff ---- */
453
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000454static unsigned int hashidx(const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000455{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000456 unsigned int idx=0;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000457
458 while (*name) idx = *name++ + (idx << 6) - idx;
459 return idx;
460}
461
462/* create new hash */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000463static xhash *hash_init(void)
464{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000465 xhash *newhash;
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000466
Rob Landley9ffd4232006-05-21 18:30:35 +0000467 newhash = (xhash *)xzalloc(sizeof(xhash));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000468 newhash->csize = FIRST_PRIME;
Rob Landley9ffd4232006-05-21 18:30:35 +0000469 newhash->items = (hash_item **)xzalloc(newhash->csize * sizeof(hash_item *));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000470
471 return newhash;
472}
473
474/* find item in hash, return ptr to data, NULL if not found */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000475static void *hash_search(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000476{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000477 hash_item *hi;
478
479 hi = hash->items [ hashidx(name) % hash->csize ];
480 while (hi) {
481 if (strcmp(hi->name, name) == 0)
482 return &(hi->data);
483 hi = hi->next;
484 }
485 return NULL;
486}
487
488/* grow hash if it becomes too big */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000489static void hash_rebuild(xhash *hash)
490{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000491 unsigned int newsize, i, idx;
492 hash_item **newitems, *hi, *thi;
493
494 if (hash->nprime == NPRIMES)
495 return;
496
497 newsize = PRIMES[hash->nprime++];
Rob Landley9ffd4232006-05-21 18:30:35 +0000498 newitems = (hash_item **)xzalloc(newsize * sizeof(hash_item *));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000499
500 for (i=0; i<hash->csize; i++) {
501 hi = hash->items[i];
502 while (hi) {
503 thi = hi;
504 hi = thi->next;
505 idx = hashidx(thi->name) % newsize;
506 thi->next = newitems[idx];
507 newitems[idx] = thi;
508 }
509 }
510
511 free(hash->items);
512 hash->csize = newsize;
513 hash->items = newitems;
514}
515
516/* find item in hash, add it if necessary. Return ptr to data */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000517static void *hash_find(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000518{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000519 hash_item *hi;
520 unsigned int idx;
521 int l;
522
523 hi = hash_search(hash, name);
524 if (! hi) {
525 if (++hash->nel / hash->csize > 10)
526 hash_rebuild(hash);
527
Rob Landleya3896512006-05-07 20:20:34 +0000528 l = strlen(name) + 1;
Rob Landley9ffd4232006-05-21 18:30:35 +0000529 hi = xzalloc(sizeof(hash_item) + l);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000530 memcpy(hi->name, name, l);
531
532 idx = hashidx(name) % hash->csize;
533 hi->next = hash->items[idx];
534 hash->items[idx] = hi;
535 hash->glen += l;
536 }
537 return &(hi->data);
538}
539
540#define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
541#define newvar(name) (var *) hash_find ( vhash , (name) )
542#define newfile(name) (rstream *) hash_find ( fdhash , (name) )
543#define newfunc(name) (func *) hash_find ( fnhash , (name) )
544
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000545static void hash_remove(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000546{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000547 hash_item *hi, **phi;
548
549 phi = &(hash->items[ hashidx(name) % hash->csize ]);
550 while (*phi) {
551 hi = *phi;
552 if (strcmp(hi->name, name) == 0) {
Rob Landleya3896512006-05-07 20:20:34 +0000553 hash->glen -= (strlen(name) + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000554 hash->nel--;
555 *phi = hi->next;
556 free(hi);
557 break;
558 }
559 phi = &(hi->next);
560 }
561}
562
563/* ------ some useful functions ------ */
564
Mike Frysinger10a11e22005-09-27 02:23:02 +0000565static void skip_spaces(char **s)
566{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000567 char *p = *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000568
569 while(*p == ' ' || *p == '\t' ||
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000570 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
Mike Frysingerde2b9382005-09-27 03:18:00 +0000571 p++;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000572 }
573 *s = p;
574}
575
Mike Frysinger10a11e22005-09-27 02:23:02 +0000576static char *nextword(char **s)
577{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000578 char *p = *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000579
580 while (*(*s)++) ;
581
582 return p;
583}
584
Mike Frysinger10a11e22005-09-27 02:23:02 +0000585static char nextchar(char **s)
586{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000587 char c, *pps;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000588
589 c = *((*s)++);
590 pps = *s;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000591 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000592 if (c == '\\' && *s == pps) c = *((*s)++);
593 return c;
594}
595
Rob Landley88621d72006-08-29 19:41:06 +0000596static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000597{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000598 return (isalnum(c) || c == '_');
599}
600
Mike Frysinger10a11e22005-09-27 02:23:02 +0000601static FILE *afopen(const char *path, const char *mode)
602{
Rob Landleyd921b2e2006-08-03 15:41:12 +0000603 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000604}
605
606/* -------- working with variables (set/get/copy/etc) -------- */
607
Mike Frysinger10a11e22005-09-27 02:23:02 +0000608static xhash *iamarray(var *v)
609{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000610 var *a = v;
611
612 while (a->type & VF_CHILD)
613 a = a->x.parent;
614
615 if (! (a->type & VF_ARRAY)) {
616 a->type |= VF_ARRAY;
617 a->x.array = hash_init();
618 }
619 return a->x.array;
620}
621
Mike Frysinger10a11e22005-09-27 02:23:02 +0000622static void clear_array(xhash *array)
623{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000624 unsigned int i;
625 hash_item *hi, *thi;
626
627 for (i=0; i<array->csize; i++) {
628 hi = array->items[i];
629 while (hi) {
630 thi = hi;
631 hi = hi->next;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000632 free(thi->data.v.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000633 free(thi);
634 }
635 array->items[i] = NULL;
636 }
637 array->glen = array->nel = 0;
638}
639
640/* clear a variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000641static var *clrvar(var *v)
642{
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000643 if (!(v->type & VF_FSTR))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000644 free(v->string);
645
646 v->type &= VF_DONTTOUCH;
647 v->type |= VF_DIRTY;
648 v->string = NULL;
649 return v;
650}
651
652/* assign string value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000653static var *setvar_p(var *v, char *value)
654{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000655 clrvar(v);
656 v->string = value;
657 handle_special(v);
658
659 return v;
660}
661
662/* same as setvar_p but make a copy of string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000663static var *setvar_s(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000664{
Rob Landleyd921b2e2006-08-03 15:41:12 +0000665 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000666}
667
668/* same as setvar_s but set USER flag */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000669static var *setvar_u(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000670{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000671 setvar_s(v, value);
672 v->type |= VF_USER;
673 return v;
674}
675
676/* set array element to user string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000677static void setari_u(var *a, int idx, const char *s)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000678{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000679 var *v;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000680 static char sidx[12];
681
682 sprintf(sidx, "%d", idx);
683 v = findvar(iamarray(a), sidx);
684 setvar_u(v, s);
685}
686
687/* assign numeric value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000688static var *setvar_i(var *v, double value)
689{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000690 clrvar(v);
691 v->type |= VF_NUMBER;
692 v->number = value;
693 handle_special(v);
694 return v;
695}
696
Mike Frysinger10a11e22005-09-27 02:23:02 +0000697static char *getvar_s(var *v)
698{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000699 /* if v is numeric and has no cached string, convert it to string */
700 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
701 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
Rob Landleyd921b2e2006-08-03 15:41:12 +0000702 v->string = xstrdup(buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000703 v->type |= VF_CACHED;
704 }
705 return (v->string == NULL) ? "" : v->string;
706}
707
Mike Frysinger10a11e22005-09-27 02:23:02 +0000708static double getvar_i(var *v)
709{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000710 char *s;
711
712 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
713 v->number = 0;
714 s = v->string;
715 if (s && *s) {
716 v->number = strtod(s, &s);
717 if (v->type & VF_USER) {
718 skip_spaces(&s);
719 if (*s != '\0')
720 v->type &= ~VF_USER;
721 }
722 } else {
723 v->type &= ~VF_USER;
724 }
725 v->type |= VF_CACHED;
726 }
727 return v->number;
728}
729
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000730static var *copyvar(var *dest, const var *src)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000731{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000732 if (dest != src) {
733 clrvar(dest);
734 dest->type |= (src->type & ~VF_DONTTOUCH);
735 dest->number = src->number;
736 if (src->string)
Rob Landleyd921b2e2006-08-03 15:41:12 +0000737 dest->string = xstrdup(src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000738 }
739 handle_special(dest);
740 return dest;
741}
742
Mike Frysinger10a11e22005-09-27 02:23:02 +0000743static var *incvar(var *v)
744{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000745 return setvar_i(v, getvar_i(v)+1.);
746}
747
748/* return true if v is number or numeric string */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000749static int is_numeric(var *v)
750{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000751 getvar_i(v);
752 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
753}
754
755/* return 1 when value of v corresponds to true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000756static int istrue(var *v)
757{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000758 if (is_numeric(v))
759 return (v->number == 0) ? 0 : 1;
760 else
761 return (v->string && *(v->string)) ? 1 : 0;
762}
763
Eric Andersenaff114c2004-04-14 17:51:38 +0000764/* temporary variables allocator. Last allocated should be first freed */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000765static var *nvalloc(int n)
766{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000767 nvblock *pb = NULL;
768 var *v, *r;
769 int size;
770
771 while (cb) {
772 pb = cb;
773 if ((cb->pos - cb->nv) + n <= cb->size) break;
774 cb = cb->next;
775 }
776
777 if (! cb) {
778 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
779 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
780 cb->size = size;
781 cb->pos = cb->nv;
782 cb->prev = pb;
783 cb->next = NULL;
784 if (pb) pb->next = cb;
785 }
786
787 v = r = cb->pos;
788 cb->pos += n;
789
790 while (v < cb->pos) {
791 v->type = 0;
792 v->string = NULL;
793 v++;
794 }
795
796 return r;
797}
798
Mike Frysinger10a11e22005-09-27 02:23:02 +0000799static void nvfree(var *v)
800{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000801 var *p;
802
803 if (v < cb->nv || v >= cb->pos)
804 runtime_error(EMSG_INTERNAL_ERROR);
805
806 for (p=v; p<cb->pos; p++) {
807 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
808 clear_array(iamarray(p));
809 free(p->x.array->items);
810 free(p->x.array);
811 }
812 if (p->type & VF_WALK)
813 free(p->x.walker);
814
815 clrvar(p);
816 }
817
818 cb->pos = v;
819 while (cb->prev && cb->pos == cb->nv) {
820 cb = cb->prev;
821 }
822}
823
824/* ------- awk program text parsing ------- */
825
826/* Parse next token pointed by global pos, place results into global t.
827 * If token isn't expected, give away. Return token class
828 */
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000829static uint32_t next_token(uint32_t expected)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000830{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000831 char *p, *pp, *s;
832 char *tl;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000833 uint32_t tc;
834 const uint32_t *ti;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000835 int l;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000836 static int concat_inserted;
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000837 static uint32_t save_tclass, save_info;
838 static uint32_t ltclass = TC_OPTERM;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000839
840 if (t.rollback) {
841
842 t.rollback = FALSE;
843
844 } else if (concat_inserted) {
845
846 concat_inserted = FALSE;
847 t.tclass = save_tclass;
848 t.info = save_info;
849
850 } else {
851
852 p = pos;
853
854 readnext:
855 skip_spaces(&p);
856 lineno = t.lineno;
857 if (*p == '#')
858 while (*p != '\n' && *p != '\0') p++;
859
860 if (*p == '\n')
861 t.lineno++;
862
863 if (*p == '\0') {
864 tc = TC_EOF;
865
866 } else if (*p == '\"') {
867 /* it's a string */
868 t.string = s = ++p;
869 while (*p != '\"') {
870 if (*p == '\0' || *p == '\n')
871 syntax_error(EMSG_UNEXP_EOS);
872 *(s++) = nextchar(&p);
873 }
874 p++;
875 *s = '\0';
876 tc = TC_STRING;
877
878 } else if ((expected & TC_REGEXP) && *p == '/') {
879 /* it's regexp */
880 t.string = s = ++p;
881 while (*p != '/') {
882 if (*p == '\0' || *p == '\n')
883 syntax_error(EMSG_UNEXP_EOS);
884 if ((*s++ = *p++) == '\\') {
885 pp = p;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000886 *(s-1) = bb_process_escape_sequence((const char **)&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000887 if (*pp == '\\') *s++ = '\\';
888 if (p == pp) *s++ = *p++;
889 }
890 }
891 p++;
892 *s = '\0';
893 tc = TC_REGEXP;
894
895 } else if (*p == '.' || isdigit(*p)) {
896 /* it's a number */
897 t.number = strtod(p, &p);
898 if (*p == '.')
899 syntax_error(EMSG_UNEXP_TOKEN);
900 tc = TC_NUMBER;
901
902 } else {
903 /* search for something known */
904 tl = tokenlist;
905 tc = 0x00000001;
906 ti = tokeninfo;
907 while (*tl) {
908 l = *(tl++);
909 if (l == NTCC) {
910 tc <<= 1;
911 continue;
912 }
913 /* if token class is expected, token
914 * matches and it's not a longer word,
915 * then this is what we are looking for
916 */
917 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
918 *tl == *p && strncmp(p, tl, l) == 0 &&
919 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
920 t.info = *ti;
921 p += l;
922 break;
923 }
924 ti++;
925 tl += l;
926 }
927
928 if (! *tl) {
929 /* it's a name (var/array/function),
930 * otherwise it's something wrong
931 */
932 if (! isalnum_(*p))
933 syntax_error(EMSG_UNEXP_TOKEN);
934
935 t.string = --p;
936 while(isalnum_(*(++p))) {
937 *(p-1) = *p;
938 }
939 *(p-1) = '\0';
940 tc = TC_VARIABLE;
Bernhard Reutner-Fischerbb204622005-10-17 14:21:06 +0000941 /* also consume whitespace between functionname and bracket */
Rob Landley46e351d2006-02-14 16:05:32 +0000942 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000943 if (*p == '(') {
944 tc = TC_FUNCTION;
945 } else {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000946 if (*p == '[') {
947 p++;
948 tc = TC_ARRAY;
949 }
950 }
951 }
952 }
953 pos = p;
954
955 /* skipping newlines in some cases */
956 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
957 goto readnext;
958
959 /* insert concatenation operator when needed */
960 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
961 concat_inserted = TRUE;
962 save_tclass = tc;
963 save_info = t.info;
964 tc = TC_BINOP;
965 t.info = OC_CONCAT | SS | P(35);
966 }
967
968 t.tclass = tc;
969 }
970 ltclass = t.tclass;
971
972 /* Are we ready for this? */
973 if (! (ltclass & expected))
974 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
975 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
976
977 return ltclass;
978}
979
980static void rollback_token(void) { t.rollback = TRUE; }
981
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000982static node *new_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000983{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000984 node *n;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000985
Rob Landley9ffd4232006-05-21 18:30:35 +0000986 n = (node *)xzalloc(sizeof(node));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000987 n->info = info;
988 n->lineno = lineno;
989 return n;
990}
991
Mike Frysinger10a11e22005-09-27 02:23:02 +0000992static node *mk_re_node(char *s, node *n, regex_t *re)
993{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000994 n->info = OC_REGEXP;
995 n->l.re = re;
996 n->r.ire = re + 1;
997 xregcomp(re, s, REG_EXTENDED);
998 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
999
1000 return n;
1001}
1002
Mike Frysinger10a11e22005-09-27 02:23:02 +00001003static node *condition(void)
1004{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001005 next_token(TC_SEQSTART);
1006 return parse_expr(TC_SEQTERM);
1007}
1008
1009/* parse expression terminated by given argument, return ptr
1010 * to built subtree. Terminator is eaten by parse_expr */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001011static node *parse_expr(uint32_t iexp)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001012{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001013 node sn;
1014 node *cn = &sn;
1015 node *vn, *glptr;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001016 uint32_t tc, xtc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001017 var *v;
1018
1019 sn.info = PRIMASK;
1020 sn.r.n = glptr = NULL;
1021 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1022
1023 while (! ((tc = next_token(xtc)) & iexp)) {
1024 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1025 /* input redirection (<) attached to glptr node */
1026 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
Glenn L McGrath4bded582004-02-22 11:55:09 +00001027 cn->a.n = glptr;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001028 xtc = TC_OPERAND | TC_UOPPRE;
1029 glptr = NULL;
1030
1031 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1032 /* for binary and postfix-unary operators, jump back over
1033 * previous operators with higher priority */
1034 vn = cn;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001035 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
Glenn L McGrath545106f2002-11-11 06:21:00 +00001036 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1037 vn = vn->a.n;
1038 if ((t.info & OPCLSMASK) == OC_TERNARY)
1039 t.info += P(6);
1040 cn = vn->a.n->r.n = new_node(t.info);
1041 cn->a.n = vn->a.n;
1042 if (tc & TC_BINOP) {
1043 cn->l.n = vn;
1044 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1045 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1046 /* it's a pipe */
1047 next_token(TC_GETLINE);
1048 /* give maximum priority to this pipe */
1049 cn->info &= ~PRIMASK;
1050 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1051 }
1052 } else {
1053 cn->r.n = vn;
1054 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1055 }
1056 vn->a.n = cn;
1057
1058 } else {
1059 /* for operands and prefix-unary operators, attach them
1060 * to last node */
1061 vn = cn;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001062 cn = vn->r.n = new_node(t.info);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001063 cn->a.n = vn;
1064 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1065 if (tc & (TC_OPERAND | TC_REGEXP)) {
Rob Landleyed830e82005-06-07 02:43:52 +00001066 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001067 /* one should be very careful with switch on tclass -
Glenn L McGrath545106f2002-11-11 06:21:00 +00001068 * only simple tclasses should be used! */
1069 switch (tc) {
1070 case TC_VARIABLE:
1071 case TC_ARRAY:
1072 cn->info = OC_VAR;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001073 if ((v = hash_search(ahash, t.string)) != NULL) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001074 cn->info = OC_FNARG;
1075 cn->l.i = v->x.aidx;
1076 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001077 cn->l.v = newvar(t.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001078 }
1079 if (tc & TC_ARRAY) {
1080 cn->info |= xS;
1081 cn->r.n = parse_expr(TC_ARRTERM);
1082 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001083 break;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001084
Glenn L McGrath545106f2002-11-11 06:21:00 +00001085 case TC_NUMBER:
1086 case TC_STRING:
1087 cn->info = OC_VAR;
Rob Landley9ffd4232006-05-21 18:30:35 +00001088 v = cn->l.v = xzalloc(sizeof(var));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001089 if (tc & TC_NUMBER)
1090 setvar_i(v, t.number);
1091 else
1092 setvar_s(v, t.string);
1093 break;
1094
1095 case TC_REGEXP:
1096 mk_re_node(t.string, cn,
Rob Landley9ffd4232006-05-21 18:30:35 +00001097 (regex_t *)xzalloc(sizeof(regex_t)*2));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001098 break;
1099
1100 case TC_FUNCTION:
Mike Frysingerde2b9382005-09-27 03:18:00 +00001101 cn->info = OC_FUNC;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001102 cn->r.f = newfunc(t.string);
1103 cn->l.n = condition();
1104 break;
1105
1106 case TC_SEQSTART:
1107 cn = vn->r.n = parse_expr(TC_SEQTERM);
1108 cn->a.n = vn;
1109 break;
1110
1111 case TC_GETLINE:
1112 glptr = cn;
1113 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1114 break;
1115
1116 case TC_BUILTIN:
1117 cn->l.n = condition();
1118 break;
1119 }
1120 }
1121 }
1122 }
1123 return sn.r.n;
1124}
1125
1126/* add node to chain. Return ptr to alloc'd node */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001127static node *chain_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001128{
"Robert P. J. Day"68229832006-07-01 13:08:46 +00001129 node *n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001130
1131 if (! seq->first)
1132 seq->first = seq->last = new_node(0);
1133
1134 if (seq->programname != programname) {
1135 seq->programname = programname;
1136 n = chain_node(OC_NEWSOURCE);
Rob Landleyd921b2e2006-08-03 15:41:12 +00001137 n->l.s = xstrdup(programname);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001138 }
1139
1140 n = seq->last;
1141 n->info = info;
1142 seq->last = n->a.n = new_node(OC_DONE);
1143
1144 return n;
1145}
1146
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001147static void chain_expr(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001148{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001149 node *n;
1150
1151 n = chain_node(info);
1152 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1153 if (t.tclass & TC_GRPTERM)
1154 rollback_token();
1155}
1156
Mike Frysinger10a11e22005-09-27 02:23:02 +00001157static node *chain_loop(node *nn)
1158{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001159 node *n, *n2, *save_brk, *save_cont;
1160
1161 save_brk = break_ptr;
1162 save_cont = continue_ptr;
1163
1164 n = chain_node(OC_BR | Vx);
1165 continue_ptr = new_node(OC_EXEC);
1166 break_ptr = new_node(OC_EXEC);
1167 chain_group();
1168 n2 = chain_node(OC_EXEC | Vx);
1169 n2->l.n = nn;
1170 n2->a.n = n;
1171 continue_ptr->a.n = n2;
1172 break_ptr->a.n = n->r.n = seq->last;
1173
1174 continue_ptr = save_cont;
1175 break_ptr = save_brk;
1176
1177 return n;
1178}
1179
1180/* parse group and attach it to chain */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001181static void chain_group(void)
1182{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001183 uint32_t c;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001184 node *n, *n2, *n3;
1185
1186 do {
1187 c = next_token(TC_GRPSEQ);
1188 } while (c & TC_NEWLINE);
1189
1190 if (c & TC_GRPSTART) {
1191 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001192 if (t.tclass & TC_NEWLINE) continue;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001193 rollback_token();
1194 chain_group();
1195 }
1196 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1197 rollback_token();
1198 chain_expr(OC_EXEC | Vx);
1199 } else { /* TC_STATEMNT */
1200 switch (t.info & OPCLSMASK) {
1201 case ST_IF:
1202 n = chain_node(OC_BR | Vx);
1203 n->l.n = condition();
1204 chain_group();
1205 n2 = chain_node(OC_EXEC);
1206 n->r.n = seq->last;
1207 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1208 chain_group();
1209 n2->a.n = seq->last;
1210 } else {
1211 rollback_token();
1212 }
1213 break;
1214
1215 case ST_WHILE:
1216 n2 = condition();
1217 n = chain_loop(NULL);
1218 n->l.n = n2;
1219 break;
1220
1221 case ST_DO:
1222 n2 = chain_node(OC_EXEC);
1223 n = chain_loop(NULL);
1224 n2->a.n = n->a.n;
1225 next_token(TC_WHILE);
1226 n->l.n = condition();
1227 break;
1228
1229 case ST_FOR:
1230 next_token(TC_SEQSTART);
1231 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1232 if (t.tclass & TC_SEQTERM) { /* for-in */
1233 if ((n2->info & OPCLSMASK) != OC_IN)
1234 syntax_error(EMSG_UNEXP_TOKEN);
1235 n = chain_node(OC_WALKINIT | VV);
1236 n->l.n = n2->l.n;
1237 n->r.n = n2->r.n;
1238 n = chain_loop(NULL);
1239 n->info = OC_WALKNEXT | Vx;
1240 n->l.n = n2->l.n;
1241 } else { /* for(;;) */
1242 n = chain_node(OC_EXEC | Vx);
1243 n->l.n = n2;
1244 n2 = parse_expr(TC_SEMICOL);
1245 n3 = parse_expr(TC_SEQTERM);
1246 n = chain_loop(n3);
1247 n->l.n = n2;
1248 if (! n2)
1249 n->info = OC_EXEC;
1250 }
1251 break;
1252
1253 case OC_PRINT:
1254 case OC_PRINTF:
1255 n = chain_node(t.info);
1256 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1257 if (t.tclass & TC_OUTRDR) {
1258 n->info |= t.info;
1259 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1260 }
1261 if (t.tclass & TC_GRPTERM)
1262 rollback_token();
1263 break;
1264
1265 case OC_BREAK:
1266 n = chain_node(OC_EXEC);
1267 n->a.n = break_ptr;
1268 break;
1269
1270 case OC_CONTINUE:
1271 n = chain_node(OC_EXEC);
1272 n->a.n = continue_ptr;
1273 break;
1274
1275 /* delete, next, nextfile, return, exit */
1276 default:
1277 chain_expr(t.info);
1278
1279 }
1280 }
1281}
1282
Mike Frysinger10a11e22005-09-27 02:23:02 +00001283static void parse_program(char *p)
1284{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001285 uint32_t tclass;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001286 node *cn;
1287 func *f;
1288 var *v;
1289
1290 pos = p;
1291 t.lineno = 1;
1292 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1293 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1294
1295 if (tclass & TC_OPTERM)
1296 continue;
1297
1298 seq = &mainseq;
1299 if (tclass & TC_BEGIN) {
1300 seq = &beginseq;
1301 chain_group();
1302
1303 } else if (tclass & TC_END) {
1304 seq = &endseq;
1305 chain_group();
1306
1307 } else if (tclass & TC_FUNCDECL) {
1308 next_token(TC_FUNCTION);
1309 pos++;
1310 f = newfunc(t.string);
1311 f->body.first = NULL;
1312 f->nargs = 0;
1313 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1314 v = findvar(ahash, t.string);
1315 v->x.aidx = (f->nargs)++;
1316
1317 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1318 break;
1319 }
1320 seq = &(f->body);
1321 chain_group();
1322 clear_array(ahash);
1323
1324 } else if (tclass & TC_OPSEQ) {
1325 rollback_token();
1326 cn = chain_node(OC_TEST);
1327 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1328 if (t.tclass & TC_GRPSTART) {
1329 rollback_token();
1330 chain_group();
1331 } else {
1332 chain_node(OC_PRINT);
1333 }
1334 cn->r.n = mainseq.last;
1335
1336 } else /* if (tclass & TC_GRPSTART) */ {
1337 rollback_token();
1338 chain_group();
1339 }
1340 }
1341}
1342
1343
1344/* -------- program execution part -------- */
1345
Mike Frysinger10a11e22005-09-27 02:23:02 +00001346static node *mk_splitter(char *s, tsplitter *spl)
1347{
"Robert P. J. Day"68229832006-07-01 13:08:46 +00001348 regex_t *re, *ire;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001349 node *n;
1350
1351 re = &spl->re[0];
1352 ire = &spl->re[1];
1353 n = &spl->n;
1354 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1355 regfree(re);
1356 regfree(ire);
1357 }
Rob Landleya3896512006-05-07 20:20:34 +00001358 if (strlen(s) > 1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001359 mk_re_node(s, n, re);
1360 } else {
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001361 n->info = (uint32_t) *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001362 }
1363
1364 return n;
1365}
1366
1367/* use node as a regular expression. Supplied with node ptr and regex_t
Eric Andersenaff114c2004-04-14 17:51:38 +00001368 * storage space. Return ptr to regex (if result points to preg, it should
Glenn L McGrath545106f2002-11-11 06:21:00 +00001369 * be later regfree'd manually
1370 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001371static regex_t *as_regex(node *op, regex_t *preg)
1372{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001373 var *v;
1374 char *s;
1375
1376 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1377 return icase ? op->r.ire : op->l.re;
1378 } else {
1379 v = nvalloc(1);
1380 s = getvar_s(evaluate(op, v));
1381 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1382 nvfree(v);
1383 return preg;
1384 }
1385}
1386
1387/* gradually increasing buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001388static void qrealloc(char **b, int n, int *size)
1389{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001390 if (! *b || n >= *size)
1391 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1392}
1393
1394/* resize field storage space */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001395static void fsrealloc(int size)
1396{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001397 static int maxfields = 0;
1398 int i;
1399
1400 if (size >= maxfields) {
1401 i = maxfields;
1402 maxfields = size + 16;
1403 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1404 for (; i<maxfields; i++) {
1405 Fields[i].type = VF_SPECIAL;
1406 Fields[i].string = NULL;
1407 }
1408 }
1409
1410 if (size < nfields) {
1411 for (i=size; i<nfields; i++) {
1412 clrvar(Fields+i);
1413 }
1414 }
1415 nfields = size;
1416}
1417
Mike Frysinger10a11e22005-09-27 02:23:02 +00001418static int awk_split(char *s, node *spl, char **slist)
1419{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001420 int l, n=0;
1421 char c[4];
1422 char *s1;
1423 regmatch_t pmatch[2];
1424
1425 /* in worst case, each char would be a separate field */
Rob Landleyd921b2e2006-08-03 15:41:12 +00001426 *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001427
1428 c[0] = c[1] = (char)spl->info;
1429 c[2] = c[3] = '\0';
1430 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1431
1432 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1433 while (*s) {
1434 l = strcspn(s, c+2);
1435 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1436 pmatch[0].rm_so <= l) {
1437 l = pmatch[0].rm_so;
1438 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1439 } else {
1440 pmatch[0].rm_eo = l;
1441 if (*(s+l)) pmatch[0].rm_eo++;
1442 }
1443
1444 memcpy(s1, s, l);
1445 *(s1+l) = '\0';
1446 nextword(&s1);
1447 s += pmatch[0].rm_eo;
1448 n++;
1449 }
1450 } else if (c[0] == '\0') { /* null split */
1451 while(*s) {
1452 *(s1++) = *(s++);
1453 *(s1++) = '\0';
1454 n++;
1455 }
1456 } else if (c[0] != ' ') { /* single-character split */
1457 if (icase) {
1458 c[0] = toupper(c[0]);
1459 c[1] = tolower(c[1]);
1460 }
1461 if (*s1) n++;
1462 while ((s1 = strpbrk(s1, c))) {
1463 *(s1++) = '\0';
1464 n++;
1465 }
1466 } else { /* space split */
1467 while (*s) {
1468 while (isspace(*s)) s++;
1469 if (! *s) break;
1470 n++;
1471 while (*s && !isspace(*s))
1472 *(s1++) = *(s++);
1473 *(s1++) = '\0';
1474 }
1475 }
1476 return n;
1477}
1478
Mike Frysinger10a11e22005-09-27 02:23:02 +00001479static void split_f0(void)
1480{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001481 static char *fstrings = NULL;
1482 int i, n;
1483 char *s;
1484
1485 if (is_f0_split)
1486 return;
1487
1488 is_f0_split = TRUE;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00001489 free(fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001490 fsrealloc(0);
1491 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1492 fsrealloc(n);
1493 s = fstrings;
1494 for (i=0; i<n; i++) {
1495 Fields[i].string = nextword(&s);
1496 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1497 }
1498
1499 /* set NF manually to avoid side effects */
1500 clrvar(V[NF]);
1501 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1502 V[NF]->number = nfields;
1503}
1504
1505/* perform additional actions when some internal variables changed */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001506static void handle_special(var *v)
1507{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001508 int n;
1509 char *b, *sep, *s;
1510 int sl, l, len, i, bsize;
1511
1512 if (! (v->type & VF_SPECIAL))
1513 return;
1514
1515 if (v == V[NF]) {
1516 n = (int)getvar_i(v);
1517 fsrealloc(n);
1518
1519 /* recalculate $0 */
1520 sep = getvar_s(V[OFS]);
Rob Landleya3896512006-05-07 20:20:34 +00001521 sl = strlen(sep);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001522 b = NULL;
1523 len = 0;
1524 for (i=0; i<n; i++) {
1525 s = getvar_s(&Fields[i]);
Rob Landleya3896512006-05-07 20:20:34 +00001526 l = strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001527 if (b) {
1528 memcpy(b+len, sep, sl);
1529 len += sl;
1530 }
1531 qrealloc(&b, len+l+sl, &bsize);
1532 memcpy(b+len, s, l);
1533 len += l;
1534 }
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00001535 if (b) b[len] = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00001536 setvar_p(V[F0], b);
1537 is_f0_split = TRUE;
1538
1539 } else if (v == V[F0]) {
1540 is_f0_split = FALSE;
1541
1542 } else if (v == V[FS]) {
1543 mk_splitter(getvar_s(v), &fsplitter);
1544
1545 } else if (v == V[RS]) {
1546 mk_splitter(getvar_s(v), &rsplitter);
1547
1548 } else if (v == V[IGNORECASE]) {
1549 icase = istrue(v);
1550
1551 } else { /* $n */
1552 n = getvar_i(V[NF]);
1553 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1554 /* right here v is invalid. Just to note... */
1555 }
1556}
1557
1558/* step through func/builtin/etc arguments */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001559static node *nextarg(node **pn)
1560{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001561 node *n;
1562
1563 n = *pn;
1564 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1565 *pn = n->r.n;
1566 n = n->l.n;
1567 } else {
1568 *pn = NULL;
1569 }
1570 return n;
1571}
1572
Mike Frysinger10a11e22005-09-27 02:23:02 +00001573static void hashwalk_init(var *v, xhash *array)
1574{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001575 char **w;
1576 hash_item *hi;
1577 int i;
1578
1579 if (v->type & VF_WALK)
1580 free(v->x.walker);
1581
1582 v->type |= VF_WALK;
Rob Landley9ffd4232006-05-21 18:30:35 +00001583 w = v->x.walker = (char **)xzalloc(2 + 2*sizeof(char *) + array->glen);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001584 *w = *(w+1) = (char *)(w + 2);
1585 for (i=0; i<array->csize; i++) {
1586 hi = array->items[i];
1587 while(hi) {
1588 strcpy(*w, hi->name);
1589 nextword(w);
1590 hi = hi->next;
1591 }
1592 }
1593}
1594
Mike Frysinger10a11e22005-09-27 02:23:02 +00001595static int hashwalk_next(var *v)
1596{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001597 char **w;
1598
1599 w = v->x.walker;
1600 if (*(w+1) == *w)
1601 return FALSE;
1602
1603 setvar_s(v, nextword(w+1));
1604 return TRUE;
1605}
1606
1607/* evaluate node, return 1 when result is true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001608static int ptest(node *pattern)
1609{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001610 static var v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001611 return istrue(evaluate(pattern, &v));
1612}
1613
1614/* read next record from stream rsm into a variable v */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001615static int awk_getline(rstream *rsm, var *v)
1616{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001617 char *b;
1618 regmatch_t pmatch[2];
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001619 int a, p, pp=0, size;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001620 int fd, so, eo, r, rp;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001621 char c, *m, *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001622
1623 /* we're using our own buffer since we need access to accumulating
1624 * characters
1625 */
1626 fd = fileno(rsm->F);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001627 m = rsm->buffer;
1628 a = rsm->adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001629 p = rsm->pos;
1630 size = rsm->size;
1631 c = (char) rsplitter.n.info;
1632 rp = 0;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001633
1634 if (! m) qrealloc(&m, 256, &size);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001635 do {
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001636 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001637 so = eo = p;
1638 r = 1;
1639 if (p > 0) {
1640 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1641 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1642 b, 1, pmatch, 0) == 0) {
1643 so = pmatch[0].rm_so;
1644 eo = pmatch[0].rm_eo;
1645 if (b[eo] != '\0')
1646 break;
1647 }
1648 } else if (c != '\0') {
1649 s = strchr(b+pp, c);
Rob Landley46e351d2006-02-14 16:05:32 +00001650 if (! s) s = memchr(b+pp, '\0', p - pp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001651 if (s) {
1652 so = eo = s-b;
1653 eo++;
1654 break;
1655 }
1656 } else {
1657 while (b[rp] == '\n')
1658 rp++;
1659 s = strstr(b+rp, "\n\n");
1660 if (s) {
1661 so = eo = s-b;
1662 while (b[eo] == '\n') eo++;
1663 if (b[eo] != '\0')
1664 break;
1665 }
1666 }
1667 }
1668
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001669 if (a > 0) {
1670 memmove(m, (const void *)(m+a), p+1);
1671 b = m;
1672 a = 0;
1673 }
1674
1675 qrealloc(&m, a+p+128, &size);
1676 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001677 pp = p;
1678 p += safe_read(fd, b+p, size-p-1);
1679 if (p < pp) {
1680 p = 0;
1681 r = 0;
1682 setvar_i(V[ERRNO], errno);
1683 }
1684 b[p] = '\0';
1685
1686 } while (p > pp);
1687
1688 if (p == 0) {
1689 r--;
1690 } else {
1691 c = b[so]; b[so] = '\0';
1692 setvar_s(v, b+rp);
1693 v->type |= VF_USER;
1694 b[so] = c;
1695 c = b[eo]; b[eo] = '\0';
1696 setvar_s(V[RT], b+so);
1697 b[eo] = c;
1698 }
1699
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001700 rsm->buffer = m;
1701 rsm->adv = a + eo;
1702 rsm->pos = p - eo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001703 rsm->size = size;
1704
1705 return r;
1706}
1707
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001708static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001709{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001710 int r=0;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001711 char c;
1712 const char *s=format;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001713
1714 if (int_as_int && n == (int)n) {
1715 r = snprintf(b, size, "%d", (int)n);
1716 } else {
1717 do { c = *s; } while (*s && *++s);
1718 if (strchr("diouxX", c)) {
1719 r = snprintf(b, size, format, (int)n);
1720 } else if (strchr("eEfgG", c)) {
1721 r = snprintf(b, size, format, n);
1722 } else {
1723 runtime_error(EMSG_INV_FMT);
1724 }
1725 }
1726 return r;
1727}
1728
1729
1730/* formatted output into an allocated buffer, return ptr to buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001731static char *awk_printf(node *n)
1732{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001733 char *b = NULL;
1734 char *fmt, *s, *s1, *f;
1735 int i, j, incr, bsize;
1736 char c, c1;
1737 var *v, *arg;
1738
1739 v = nvalloc(1);
Rob Landleyd921b2e2006-08-03 15:41:12 +00001740 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001741
1742 i = 0;
1743 while (*f) {
1744 s = f;
1745 while (*f && (*f != '%' || *(++f) == '%'))
1746 f++;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001747 while (*f && !isalpha(*f))
Glenn L McGrath545106f2002-11-11 06:21:00 +00001748 f++;
1749
1750 incr = (f - s) + MAXVARFMT;
1751 qrealloc(&b, incr+i, &bsize);
1752 c = *f; if (c != '\0') f++;
1753 c1 = *f ; *f = '\0';
1754 arg = evaluate(nextarg(&n), v);
1755
1756 j = i;
1757 if (c == 'c' || !c) {
1758 i += sprintf(b+i, s,
1759 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1760
1761 } else if (c == 's') {
1762 s1 = getvar_s(arg);
Rob Landleya3896512006-05-07 20:20:34 +00001763 qrealloc(&b, incr+i+strlen(s1), &bsize);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001764 i += sprintf(b+i, s, s1);
1765
1766 } else {
1767 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1768 }
1769 *f = c1;
1770
1771 /* if there was an error while sprintf, return value is negative */
1772 if (i < j) i = j;
1773
1774 }
1775
1776 b = xrealloc(b, i+1);
1777 free(fmt);
1778 nvfree(v);
1779 b[i] = '\0';
1780 return b;
1781}
1782
1783/* common substitution routine
1784 * replace (nm) substring of (src) that match (n) with (repl), store
1785 * result into (dest), return number of substitutions. If nm=0, replace
1786 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1787 * subexpression matching (\1-\9)
1788 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001789static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1790{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001791 char *ds = NULL;
1792 char *sp, *s;
1793 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1794 regmatch_t pmatch[10];
1795 regex_t sreg, *re;
1796
1797 re = as_regex(rn, &sreg);
1798 if (! src) src = V[F0];
1799 if (! dest) dest = V[F0];
1800
1801 i = di = 0;
1802 sp = getvar_s(src);
Rob Landleya3896512006-05-07 20:20:34 +00001803 rl = strlen(repl);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001804 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1805 so = pmatch[0].rm_so;
1806 eo = pmatch[0].rm_eo;
1807
1808 qrealloc(&ds, di + eo + rl, &dssize);
1809 memcpy(ds + di, sp, eo);
1810 di += eo;
1811 if (++i >= nm) {
1812 /* replace */
1813 di -= (eo - so);
1814 nbs = 0;
1815 for (s = repl; *s; s++) {
1816 ds[di++] = c = *s;
1817 if (c == '\\') {
1818 nbs++;
1819 continue;
1820 }
1821 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1822 di -= ((nbs + 3) >> 1);
1823 j = 0;
1824 if (c != '&') {
1825 j = c - '0';
1826 nbs++;
1827 }
1828 if (nbs % 2) {
1829 ds[di++] = c;
1830 } else {
1831 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1832 qrealloc(&ds, di + rl + n, &dssize);
1833 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1834 di += n;
1835 }
1836 }
1837 nbs = 0;
1838 }
1839 }
1840
1841 sp += eo;
1842 if (i == nm) break;
1843 if (eo == so) {
1844 if (! (ds[di++] = *sp++)) break;
1845 }
1846 }
1847
1848 qrealloc(&ds, di + strlen(sp), &dssize);
1849 strcpy(ds + di, sp);
1850 setvar_p(dest, ds);
1851 if (re == &sreg) regfree(re);
1852 return i;
1853}
1854
Mike Frysinger10a11e22005-09-27 02:23:02 +00001855static var *exec_builtin(node *op, var *res)
1856{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001857 int (*to_xxx)(int);
1858 var *tv;
1859 node *an[4];
1860 var *av[4];
1861 char *as[4];
1862 regmatch_t pmatch[2];
1863 regex_t sreg, *re;
1864 static tsplitter tspl;
1865 node *spl;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001866 uint32_t isr, info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001867 int nargs;
1868 time_t tt;
1869 char *s, *s1;
1870 int i, l, ll, n;
1871
1872 tv = nvalloc(4);
1873 isr = info = op->info;
1874 op = op->l.n;
1875
1876 av[2] = av[3] = NULL;
1877 for (i=0 ; i<4 && op ; i++) {
1878 an[i] = nextarg(&op);
1879 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1880 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1881 isr >>= 1;
1882 }
1883
1884 nargs = i;
1885 if (nargs < (info >> 30))
1886 runtime_error(EMSG_TOO_FEW_ARGS);
1887
1888 switch (info & OPNMASK) {
1889
1890 case B_a2:
1891#ifdef CONFIG_FEATURE_AWK_MATH
1892 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1893#else
1894 runtime_error(EMSG_NO_MATH);
1895#endif
1896 break;
1897
1898 case B_sp:
1899 if (nargs > 2) {
1900 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1901 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1902 } else {
1903 spl = &fsplitter.n;
1904 }
1905
1906 n = awk_split(as[0], spl, &s);
1907 s1 = s;
1908 clear_array(iamarray(av[1]));
1909 for (i=1; i<=n; i++)
1910 setari_u(av[1], i, nextword(&s1));
1911 free(s);
1912 setvar_i(res, n);
1913 break;
1914
1915 case B_ss:
Rob Landleya3896512006-05-07 20:20:34 +00001916 l = strlen(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001917 i = getvar_i(av[1]) - 1;
1918 if (i>l) i=l; if (i<0) i=0;
1919 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1920 if (n<0) n=0;
1921 s = xmalloc(n+1);
1922 strncpy(s, as[0]+i, n);
1923 s[n] = '\0';
1924 setvar_p(res, s);
1925 break;
1926
1927 case B_lo:
1928 to_xxx = tolower;
1929 goto lo_cont;
1930
1931 case B_up:
1932 to_xxx = toupper;
1933lo_cont:
Rob Landleyd921b2e2006-08-03 15:41:12 +00001934 s1 = s = xstrdup(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001935 while (*s1) {
1936 *s1 = (*to_xxx)(*s1);
1937 s1++;
1938 }
1939 setvar_p(res, s);
1940 break;
1941
1942 case B_ix:
1943 n = 0;
Rob Landleya3896512006-05-07 20:20:34 +00001944 ll = strlen(as[1]);
1945 l = strlen(as[0]) - ll;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001946 if (ll > 0 && l >= 0) {
1947 if (! icase) {
1948 s = strstr(as[0], as[1]);
1949 if (s) n = (s - as[0]) + 1;
1950 } else {
1951 /* this piece of code is terribly slow and
1952 * really should be rewritten
1953 */
1954 for (i=0; i<=l; i++) {
1955 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1956 n = i+1;
1957 break;
1958 }
1959 }
1960 }
1961 }
1962 setvar_i(res, n);
1963 break;
1964
1965 case B_ti:
1966 if (nargs > 1)
1967 tt = getvar_i(av[1]);
1968 else
1969 time(&tt);
1970 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1971 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1972 buf[i] = '\0';
1973 setvar_s(res, buf);
1974 break;
1975
1976 case B_ma:
1977 re = as_regex(an[1], &sreg);
1978 n = regexec(re, as[0], 1, pmatch, 0);
1979 if (n == 0) {
1980 pmatch[0].rm_so++;
1981 pmatch[0].rm_eo++;
1982 } else {
1983 pmatch[0].rm_so = 0;
1984 pmatch[0].rm_eo = -1;
1985 }
1986 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1987 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1988 setvar_i(res, pmatch[0].rm_so);
1989 if (re == &sreg) regfree(re);
1990 break;
1991
1992 case B_ge:
1993 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
1994 break;
1995
1996 case B_gs:
1997 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
1998 break;
1999
2000 case B_su:
2001 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2002 break;
2003 }
2004
2005 nvfree(tv);
2006 return res;
2007}
2008
2009/*
2010 * Evaluate node - the heart of the program. Supplied with subtree
2011 * and place where to store result. returns ptr to result.
2012 */
2013#define XC(n) ((n) >> 8)
2014
Mike Frysinger10a11e22005-09-27 02:23:02 +00002015static var *evaluate(node *op, var *res)
2016{
Mike Frysingerde2b9382005-09-27 03:18:00 +00002017 /* This procedure is recursive so we should count every byte */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002018 static var *fnargs = NULL;
2019 static unsigned int seed = 1;
2020 static regex_t sreg;
2021 node *op1;
2022 var *v1;
2023 union {
2024 var *v;
2025 char *s;
2026 double d;
2027 int i;
2028 } L, R;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002029 uint32_t opinfo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002030 short opn;
2031 union {
2032 char *s;
2033 rstream *rsm;
2034 FILE *F;
2035 var *v;
2036 regex_t *re;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002037 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002038 } X;
2039
2040 if (! op)
2041 return setvar_s(res, NULL);
2042
2043 v1 = nvalloc(2);
2044
2045 while (op) {
2046
2047 opinfo = op->info;
2048 opn = (short)(opinfo & OPNMASK);
2049 lineno = op->lineno;
2050
Mike Frysingerde2b9382005-09-27 03:18:00 +00002051 /* execute inevitable things */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002052 op1 = op->l.n;
2053 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2054 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2055 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2056 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2057 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2058
2059 switch (XC(opinfo & OPCLSMASK)) {
2060
2061 /* -- iterative node type -- */
2062
2063 /* test pattern */
2064 case XC( OC_TEST ):
2065 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2066 /* it's range pattern */
2067 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2068 op->info |= OF_CHECKED;
2069 if (ptest(op1->r.n))
2070 op->info &= ~OF_CHECKED;
2071
2072 op = op->a.n;
2073 } else {
2074 op = op->r.n;
2075 }
2076 } else {
2077 op = (ptest(op1)) ? op->a.n : op->r.n;
2078 }
2079 break;
2080
2081 /* just evaluate an expression, also used as unconditional jump */
2082 case XC( OC_EXEC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002083 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002084
2085 /* branch, used in if-else and various loops */
2086 case XC( OC_BR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002087 op = istrue(L.v) ? op->a.n : op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002088 break;
2089
2090 /* initialize for-in loop */
2091 case XC( OC_WALKINIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002092 hashwalk_init(L.v, iamarray(R.v));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002093 break;
2094
2095 /* get next array item */
2096 case XC( OC_WALKNEXT ):
2097 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2098 break;
2099
2100 case XC( OC_PRINT ):
2101 case XC( OC_PRINTF ):
2102 X.F = stdout;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002103 if (op->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002104 X.rsm = newfile(R.s);
2105 if (! X.rsm->F) {
2106 if (opn == '|') {
2107 if((X.rsm->F = popen(R.s, "w")) == NULL)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002108 bb_perror_msg_and_die("popen");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002109 X.rsm->is_pipe = 1;
2110 } else {
Rob Landleyd921b2e2006-08-03 15:41:12 +00002111 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002112 }
2113 }
2114 X.F = X.rsm->F;
2115 }
2116
2117 if ((opinfo & OPCLSMASK) == OC_PRINT) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002118 if (! op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002119 fputs(getvar_s(V[F0]), X.F);
2120 } else {
2121 while (op1) {
2122 L.v = evaluate(nextarg(&op1), v1);
2123 if (L.v->type & VF_NUMBER) {
2124 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2125 getvar_i(L.v), TRUE);
2126 fputs(buf, X.F);
2127 } else {
2128 fputs(getvar_s(L.v), X.F);
2129 }
2130
2131 if (op1) fputs(getvar_s(V[OFS]), X.F);
2132 }
2133 }
2134 fputs(getvar_s(V[ORS]), X.F);
2135
2136 } else { /* OC_PRINTF */
2137 L.s = awk_printf(op1);
2138 fputs(L.s, X.F);
2139 free(L.s);
2140 }
2141 fflush(X.F);
2142 break;
2143
2144 case XC( OC_DELETE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002145 X.info = op1->info & OPCLSMASK;
2146 if (X.info == OC_VAR) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002147 R.v = op1->l.v;
2148 } else if (X.info == OC_FNARG) {
2149 R.v = &fnargs[op1->l.i];
2150 } else {
2151 runtime_error(EMSG_NOT_ARRAY);
2152 }
2153
Mike Frysingerde2b9382005-09-27 03:18:00 +00002154 if (op1->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002155 clrvar(L.v);
2156 L.s = getvar_s(evaluate(op1->r.n, v1));
2157 hash_remove(iamarray(R.v), L.s);
2158 } else {
2159 clear_array(iamarray(R.v));
2160 }
2161 break;
2162
2163 case XC( OC_NEWSOURCE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002164 programname = op->l.s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002165 break;
2166
2167 case XC( OC_RETURN ):
2168 copyvar(res, L.v);
2169 break;
2170
2171 case XC( OC_NEXTFILE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002172 nextfile = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002173 case XC( OC_NEXT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002174 nextrec = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002175 case XC( OC_DONE ):
2176 clrvar(res);
2177 break;
2178
2179 case XC( OC_EXIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002180 awk_exit(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002181
2182 /* -- recursive node type -- */
2183
2184 case XC( OC_VAR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002185 L.v = op->l.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002186 if (L.v == V[NF])
2187 split_f0();
2188 goto v_cont;
2189
2190 case XC( OC_FNARG ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002191 L.v = &fnargs[op->l.i];
Glenn L McGrath545106f2002-11-11 06:21:00 +00002192
2193v_cont:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002194 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002195 break;
2196
2197 case XC( OC_IN ):
2198 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2199 break;
2200
2201 case XC( OC_REGEXP ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002202 op1 = op;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002203 L.s = getvar_s(V[F0]);
2204 goto re_cont;
2205
2206 case XC( OC_MATCH ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002207 op1 = op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002208re_cont:
2209 X.re = as_regex(op1, &sreg);
2210 R.i = regexec(X.re, L.s, 0, NULL, 0);
2211 if (X.re == &sreg) regfree(X.re);
2212 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2213 break;
2214
2215 case XC( OC_MOVE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002216 /* if source is a temporary string, jusk relink it to dest */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002217 if (R.v == v1+1 && R.v->string) {
2218 res = setvar_p(L.v, R.v->string);
2219 R.v->string = NULL;
2220 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002221 res = copyvar(L.v, R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002222 }
2223 break;
2224
2225 case XC( OC_TERNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002226 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002227 runtime_error(EMSG_POSSIBLE_ERROR);
2228 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2229 break;
2230
2231 case XC( OC_FUNC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002232 if (! op->r.f->body.first)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002233 runtime_error(EMSG_UNDEF_FUNC);
2234
2235 X.v = R.v = nvalloc(op->r.f->nargs+1);
2236 while (op1) {
2237 L.v = evaluate(nextarg(&op1), v1);
2238 copyvar(R.v, L.v);
2239 R.v->type |= VF_CHILD;
2240 R.v->x.parent = L.v;
2241 if (++R.v - X.v >= op->r.f->nargs)
2242 break;
2243 }
2244
2245 R.v = fnargs;
2246 fnargs = X.v;
2247
2248 L.s = programname;
2249 res = evaluate(op->r.f->body.first, res);
2250 programname = L.s;
2251
2252 nvfree(fnargs);
2253 fnargs = R.v;
2254 break;
2255
2256 case XC( OC_GETLINE ):
2257 case XC( OC_PGETLINE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002258 if (op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002259 X.rsm = newfile(L.s);
2260 if (! X.rsm->F) {
2261 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2262 X.rsm->F = popen(L.s, "r");
2263 X.rsm->is_pipe = TRUE;
2264 } else {
Rob Landleyd921b2e2006-08-03 15:41:12 +00002265 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002266 }
2267 }
2268 } else {
2269 if (! iF) iF = next_input_file();
2270 X.rsm = iF;
2271 }
2272
2273 if (! X.rsm->F) {
2274 setvar_i(V[ERRNO], errno);
2275 setvar_i(res, -1);
2276 break;
2277 }
2278
2279 if (! op->r.n)
2280 R.v = V[F0];
2281
2282 L.i = awk_getline(X.rsm, R.v);
2283 if (L.i > 0) {
2284 if (! op1) {
2285 incvar(V[FNR]);
2286 incvar(V[NR]);
2287 }
2288 }
2289 setvar_i(res, L.i);
2290 break;
2291
Mike Frysingerde2b9382005-09-27 03:18:00 +00002292 /* simple builtins */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002293 case XC( OC_FBLTIN ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002294 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002295
2296 case F_in:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002297 R.d = (int)L.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002298 break;
2299
2300 case F_rn:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002301 R.d = (double)rand() / (double)RAND_MAX;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002302 break;
2303
2304#ifdef CONFIG_FEATURE_AWK_MATH
2305 case F_co:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002306 R.d = cos(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002307 break;
2308
2309 case F_ex:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002310 R.d = exp(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002311 break;
2312
2313 case F_lg:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002314 R.d = log(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002315 break;
2316
2317 case F_si:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002318 R.d = sin(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002319 break;
2320
2321 case F_sq:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002322 R.d = sqrt(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002323 break;
2324#else
2325 case F_co:
2326 case F_ex:
2327 case F_lg:
2328 case F_si:
2329 case F_sq:
2330 runtime_error(EMSG_NO_MATH);
2331 break;
2332#endif
2333
2334 case F_sr:
2335 R.d = (double)seed;
2336 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2337 srand(seed);
2338 break;
2339
2340 case F_ti:
2341 R.d = time(NULL);
2342 break;
2343
2344 case F_le:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002345 if (! op1)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002346 L.s = getvar_s(V[F0]);
Rob Landleya3896512006-05-07 20:20:34 +00002347 R.d = strlen(L.s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002348 break;
2349
2350 case F_sy:
2351 fflush(NULL);
Rob Landley51843362006-01-09 05:26:58 +00002352 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002353 break;
2354
2355 case F_ff:
2356 if (! op1)
2357 fflush(stdout);
2358 else {
2359 if (L.s && *L.s) {
2360 X.rsm = newfile(L.s);
2361 fflush(X.rsm->F);
2362 } else {
2363 fflush(NULL);
2364 }
2365 }
2366 break;
2367
2368 case F_cl:
2369 X.rsm = (rstream *)hash_search(fdhash, L.s);
2370 if (X.rsm) {
2371 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00002372 free(X.rsm->buffer);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002373 hash_remove(fdhash, L.s);
2374 }
2375 if (R.i != 0)
2376 setvar_i(V[ERRNO], errno);
2377 R.d = (double)R.i;
2378 break;
2379 }
2380 setvar_i(res, R.d);
2381 break;
2382
2383 case XC( OC_BUILTIN ):
2384 res = exec_builtin(op, res);
2385 break;
2386
2387 case XC( OC_SPRINTF ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002388 setvar_p(res, awk_printf(op1));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002389 break;
2390
2391 case XC( OC_UNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002392 X.v = R.v;
2393 L.d = R.d = getvar_i(R.v);
2394 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002395 case 'P':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002396 L.d = ++R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002397 goto r_op_change;
2398 case 'p':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002399 R.d++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002400 goto r_op_change;
2401 case 'M':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002402 L.d = --R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002403 goto r_op_change;
2404 case 'm':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002405 R.d--;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002406 goto r_op_change;
2407 case '!':
2408 L.d = istrue(X.v) ? 0 : 1;
2409 break;
2410 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002411 L.d = -R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002412 break;
2413 r_op_change:
2414 setvar_i(X.v, R.d);
2415 }
2416 setvar_i(res, L.d);
2417 break;
2418
2419 case XC( OC_FIELD ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002420 R.i = (int)getvar_i(R.v);
2421 if (R.i == 0) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002422 res = V[F0];
2423 } else {
2424 split_f0();
2425 if (R.i > nfields)
2426 fsrealloc(R.i);
2427
2428 res = &Fields[R.i-1];
2429 }
2430 break;
2431
2432 /* concatenation (" ") and index joining (",") */
2433 case XC( OC_CONCAT ):
2434 case XC( OC_COMMA ):
Rob Landleya3896512006-05-07 20:20:34 +00002435 opn = strlen(L.s) + strlen(R.s) + 2;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002436 X.s = (char *)xmalloc(opn);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002437 strcpy(X.s, L.s);
2438 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2439 L.s = getvar_s(V[SUBSEP]);
Rob Landleya3896512006-05-07 20:20:34 +00002440 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002441 strcat(X.s, L.s);
2442 }
2443 strcat(X.s, R.s);
2444 setvar_p(res, X.s);
2445 break;
2446
2447 case XC( OC_LAND ):
2448 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2449 break;
2450
2451 case XC( OC_LOR ):
2452 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2453 break;
2454
2455 case XC( OC_BINARY ):
2456 case XC( OC_REPLACE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002457 R.d = getvar_i(R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002458 switch (opn) {
2459 case '+':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002460 L.d += R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002461 break;
2462 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002463 L.d -= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002464 break;
2465 case '*':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002466 L.d *= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002467 break;
2468 case '/':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002469 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2470 L.d /= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002471 break;
2472 case '&':
2473#ifdef CONFIG_FEATURE_AWK_MATH
Mike Frysingerde2b9382005-09-27 03:18:00 +00002474 L.d = pow(L.d, R.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002475#else
2476 runtime_error(EMSG_NO_MATH);
2477#endif
2478 break;
2479 case '%':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002480 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2481 L.d -= (int)(L.d / R.d) * R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002482 break;
2483 }
2484 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2485 break;
2486
2487 case XC( OC_COMPARE ):
2488 if (is_numeric(L.v) && is_numeric(R.v)) {
2489 L.d = getvar_i(L.v) - getvar_i(R.v);
2490 } else {
2491 L.s = getvar_s(L.v);
2492 R.s = getvar_s(R.v);
2493 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2494 }
2495 switch (opn & 0xfe) {
2496 case 0:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002497 R.i = (L.d > 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002498 break;
2499 case 2:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002500 R.i = (L.d >= 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002501 break;
2502 case 4:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002503 R.i = (L.d == 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002504 break;
2505 }
2506 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2507 break;
2508
2509 default:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002510 runtime_error(EMSG_POSSIBLE_ERROR);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002511 }
2512 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2513 op = op->a.n;
2514 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2515 break;
2516 if (nextrec)
2517 break;
2518 }
2519 nvfree(v1);
2520 return res;
2521}
2522
2523
2524/* -------- main & co. -------- */
2525
Mike Frysinger10a11e22005-09-27 02:23:02 +00002526static int awk_exit(int r)
2527{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002528 unsigned int i;
2529 hash_item *hi;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002530 static var tv;
2531
2532 if (! exiting) {
2533 exiting = TRUE;
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00002534 nextrec = FALSE;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002535 evaluate(endseq.first, &tv);
2536 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002537
2538 /* waiting for children */
2539 for (i=0; i<fdhash->csize; i++) {
2540 hi = fdhash->items[i];
2541 while(hi) {
2542 if (hi->data.rs.F && hi->data.rs.is_pipe)
2543 pclose(hi->data.rs.F);
2544 hi = hi->next;
2545 }
2546 }
2547
2548 exit(r);
2549}
2550
2551/* if expr looks like "var=value", perform assignment and return 1,
2552 * otherwise return 0 */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00002553static int is_assignment(const char *expr)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002554{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002555 char *exprc, *s, *s0, *s1;
2556
Rob Landleyd921b2e2006-08-03 15:41:12 +00002557 exprc = xstrdup(expr);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002558 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2559 free(exprc);
2560 return FALSE;
2561 }
2562
2563 *(s++) = '\0';
2564 s0 = s1 = s;
2565 while (*s)
2566 *(s1++) = nextchar(&s);
2567
2568 *s1 = '\0';
2569 setvar_u(newvar(exprc), s0);
2570 free(exprc);
2571 return TRUE;
2572}
2573
2574/* switch to next input file */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002575static rstream *next_input_file(void)
2576{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002577 static rstream rsm;
2578 FILE *F = NULL;
2579 char *fname, *ind;
2580 static int files_happen = FALSE;
2581
2582 if (rsm.F) fclose(rsm.F);
2583 rsm.F = NULL;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002584 rsm.pos = rsm.adv = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002585
2586 do {
2587 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2588 if (files_happen)
2589 return NULL;
2590 fname = "-";
2591 F = stdin;
2592 } else {
2593 ind = getvar_s(incvar(V[ARGIND]));
2594 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2595 if (fname && *fname && !is_assignment(fname))
2596 F = afopen(fname, "r");
2597 }
2598 } while (!F);
2599
2600 files_happen = TRUE;
2601 setvar_s(V[FILENAME], fname);
2602 rsm.F = F;
2603 return &rsm;
2604}
2605
Rob Landleydfba7412006-03-06 20:47:33 +00002606int awk_main(int argc, char **argv)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002607{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002608 char *s, *s1;
Rob Landley46e351d2006-02-14 16:05:32 +00002609 int i, j, c, flen;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002610 var *v;
2611 static var tv;
2612 char **envp;
2613 static int from_file = FALSE;
2614 rstream *rsm;
2615 FILE *F, *stdfiles[3];
2616 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2617
2618 /* allocate global buffer */
2619 buf = xmalloc(MAXVARFMT+1);
2620
2621 vhash = hash_init();
2622 ahash = hash_init();
2623 fdhash = hash_init();
2624 fnhash = hash_init();
2625
2626 /* initialize variables */
2627 for (i=0; *vNames; i++) {
2628 V[i] = v = newvar(nextword(&vNames));
2629 if (*vValues != '\377')
2630 setvar_s(v, nextword(&vValues));
2631 else
2632 setvar_i(v, 0);
2633
2634 if (*vNames == '*') {
2635 v->type |= VF_SPECIAL;
2636 vNames++;
2637 }
2638 }
2639
2640 handle_special(V[FS]);
2641 handle_special(V[RS]);
2642
2643 stdfiles[0] = stdin;
2644 stdfiles[1] = stdout;
2645 stdfiles[2] = stderr;
2646 for (i=0; i<3; i++) {
2647 rsm = newfile(nextword(&stdnames));
2648 rsm->F = stdfiles[i];
2649 }
2650
2651 for (envp=environ; *envp; envp++) {
Rob Landleyd921b2e2006-08-03 15:41:12 +00002652 s = xstrdup(*envp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002653 s1 = strchr(s, '=');
Eric Andersen67776be2004-07-30 23:52:08 +00002654 if (!s1) {
2655 goto keep_going;
2656 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002657 *(s1++) = '\0';
2658 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
Eric Andersen67776be2004-07-30 23:52:08 +00002659keep_going:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002660 free(s);
2661 }
2662
2663 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2664 switch (c) {
2665 case 'F':
2666 setvar_s(V[FS], optarg);
2667 break;
2668 case 'v':
2669 if (! is_assignment(optarg))
Manuel Novoa III cad53642003-03-19 09:13:01 +00002670 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002671 break;
2672 case 'f':
2673 from_file = TRUE;
2674 F = afopen(programname = optarg, "r");
2675 s = NULL;
2676 /* one byte is reserved for some trick in next_token */
Rob Landley46e351d2006-02-14 16:05:32 +00002677 if (fseek(F, 0, SEEK_END) == 0) {
2678 flen = ftell(F);
2679 s = (char *)xmalloc(flen+4);
2680 fseek(F, 0, SEEK_SET);
2681 i = 1 + fread(s+1, 1, flen, F);
2682 } else {
2683 for (i=j=1; j>0; i+=j) {
2684 s = (char *)xrealloc(s, i+4096);
2685 j = fread(s+i, 1, 4094, F);
2686 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002687 }
2688 s[i] = '\0';
2689 fclose(F);
2690 parse_program(s+1);
2691 free(s);
2692 break;
2693 case 'W':
Denis Vlasenko6d655be2006-09-06 19:02:46 +00002694 bb_error_msg("Warning: unrecognized option '-W %s' ignored", optarg);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002695 break;
2696
2697 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +00002698 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002699 }
2700 }
2701
2702 if (!from_file) {
2703 if (argc == optind)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002704 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002705 programname="cmd. line";
2706 parse_program(argv[optind++]);
2707
2708 }
2709
2710 /* fill in ARGV array */
2711 setvar_i(V[ARGC], argc - optind + 1);
2712 setari_u(V[ARGV], 0, "awk");
2713 for(i=optind; i < argc; i++)
2714 setari_u(V[ARGV], i+1-optind, argv[i]);
2715
2716 evaluate(beginseq.first, &tv);
2717 if (! mainseq.first && ! endseq.first)
2718 awk_exit(EXIT_SUCCESS);
2719
2720 /* input file could already be opened in BEGIN block */
2721 if (! iF) iF = next_input_file();
2722
2723 /* passing through input files */
2724 while (iF) {
2725
2726 nextfile = FALSE;
2727 setvar_i(V[FNR], 0);
2728
2729 while ((c = awk_getline(iF, V[F0])) > 0) {
2730
2731 nextrec = FALSE;
2732 incvar(V[NR]);
2733 incvar(V[FNR]);
2734 evaluate(mainseq.first, &tv);
2735
2736 if (nextfile)
2737 break;
2738 }
2739
2740 if (c < 0)
2741 runtime_error(strerror(errno));
2742
2743 iF = next_input_file();
2744
2745 }
2746
Glenn L McGrath545106f2002-11-11 06:21:00 +00002747 awk_exit(EXIT_SUCCESS);
2748
2749 return 0;
2750}
2751