blob: 16c871f8c851c297a9b00dc07e807e3de197be9b [file] [log] [blame]
Glenn L McGrath545106f2002-11-11 06:21:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +00007 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
Glenn L McGrath545106f2002-11-11 06:21:00 +00008 */
9
10#include <stdio.h>
11#include <stdlib.h>
12#include <unistd.h>
13#include <errno.h>
14#include <string.h>
Bernhard Reutner-Fischera2a647d2006-05-19 12:30:00 +000015#include <strings.h>
Glenn L McGrath545106f2002-11-11 06:21:00 +000016#include <time.h>
17#include <math.h>
18#include <ctype.h>
19#include <getopt.h>
Glenn L McGrath545106f2002-11-11 06:21:00 +000020
"Vladimir N. Oleynik"23f62fc2005-09-14 16:59:11 +000021#include "xregex.h"
Glenn L McGrath545106f2002-11-11 06:21:00 +000022#include "busybox.h"
23
24
25#define MAXVARFMT 240
26#define MINNVBLOCK 64
27
28/* variable flags */
29#define VF_NUMBER 0x0001 /* 1 = primary type is number */
30#define VF_ARRAY 0x0002 /* 1 = it's an array */
31
32#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
33#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
34#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
35#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
36#define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
37#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
38#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
39
40/* these flags are static, don't change them when value is changed */
41#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
42
43/* Variable */
44typedef struct var_s {
45 unsigned short type; /* flags */
46 double number;
47 char *string;
48 union {
Mike Frysinger4b7b8a52006-04-16 05:55:15 +000049 int aidx; /* func arg idx (for compilation stage) */
Glenn L McGrath545106f2002-11-11 06:21:00 +000050 struct xhash_s *array; /* array ptr */
51 struct var_s *parent; /* for func args, ptr to actual parameter */
52 char **walker; /* list of array elements (for..in) */
53 } x;
54} var;
55
56/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
57typedef struct chain_s {
58 struct node_s *first;
59 struct node_s *last;
60 char *programname;
61} chain;
62
63/* Function */
64typedef struct func_s {
65 unsigned short nargs;
66 struct chain_s body;
67} func;
68
69/* I/O stream */
70typedef struct rstream_s {
71 FILE *F;
72 char *buffer;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +000073 int adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +000074 int size;
75 int pos;
76 unsigned short is_pipe;
77} rstream;
78
79typedef struct hash_item_s {
80 union {
81 struct var_s v; /* variable/array hash */
82 struct rstream_s rs; /* redirect streams hash */
83 struct func_s f; /* functions hash */
84 } data;
85 struct hash_item_s *next; /* next in chain */
86 char name[1]; /* really it's longer */
87} hash_item;
88
89typedef struct xhash_s {
90 unsigned int nel; /* num of elements */
91 unsigned int csize; /* current hash size */
92 unsigned int nprime; /* next hash size in PRIMES[] */
93 unsigned int glen; /* summary length of item names */
94 struct hash_item_s **items;
95} xhash;
96
97/* Tree node */
98typedef struct node_s {
Mike Frysingerf87b3e32005-09-27 04:16:22 +000099 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000100 unsigned short lineno;
101 union {
102 struct node_s *n;
103 var *v;
104 int i;
105 char *s;
106 regex_t *re;
107 } l;
108 union {
109 struct node_s *n;
110 regex_t *ire;
111 func *f;
112 int argno;
113 } r;
114 union {
115 struct node_s *n;
116 } a;
117} node;
118
119/* Block of temporary variables */
120typedef struct nvblock_s {
121 int size;
122 var *pos;
123 struct nvblock_s *prev;
124 struct nvblock_s *next;
125 var nv[0];
126} nvblock;
127
128typedef struct tsplitter_s {
129 node n;
130 regex_t re[2];
131} tsplitter;
132
133/* simple token classes */
134/* Order and hex values are very important!!! See next_token() */
135#define TC_SEQSTART 1 /* ( */
136#define TC_SEQTERM (1 << 1) /* ) */
137#define TC_REGEXP (1 << 2) /* /.../ */
138#define TC_OUTRDR (1 << 3) /* | > >> */
139#define TC_UOPPOST (1 << 4) /* unary postfix operator */
140#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
141#define TC_BINOPX (1 << 6) /* two-opnd operator */
142#define TC_IN (1 << 7)
143#define TC_COMMA (1 << 8)
144#define TC_PIPE (1 << 9) /* input redirection pipe */
145#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
146#define TC_ARRTERM (1 << 11) /* ] */
147#define TC_GRPSTART (1 << 12) /* { */
148#define TC_GRPTERM (1 << 13) /* } */
149#define TC_SEMICOL (1 << 14)
150#define TC_NEWLINE (1 << 15)
151#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
152#define TC_WHILE (1 << 17)
153#define TC_ELSE (1 << 18)
154#define TC_BUILTIN (1 << 19)
155#define TC_GETLINE (1 << 20)
156#define TC_FUNCDECL (1 << 21) /* `function' `func' */
157#define TC_BEGIN (1 << 22)
158#define TC_END (1 << 23)
159#define TC_EOF (1 << 24)
160#define TC_VARIABLE (1 << 25)
161#define TC_ARRAY (1 << 26)
162#define TC_FUNCTION (1 << 27)
163#define TC_STRING (1 << 28)
164#define TC_NUMBER (1 << 29)
165
166#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
167
168/* combined token classes */
169#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
170#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
171#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
172 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
173
174#define TC_STATEMNT (TC_STATX | TC_WHILE)
175#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
176
177/* word tokens, cannot mean something else if not expected */
178#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
179 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
180
181/* discard newlines after these */
182#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
183 TC_BINOP | TC_OPTERM)
184
185/* what can expression begin with */
186#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
187/* what can group begin with */
188#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
189
190/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
191/* operator is inserted between them */
192#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
193 TC_STRING | TC_NUMBER | TC_UOPPOST)
194#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
195
196#define OF_RES1 0x010000
197#define OF_RES2 0x020000
198#define OF_STR1 0x040000
199#define OF_STR2 0x080000
200#define OF_NUM1 0x100000
201#define OF_CHECKED 0x200000
202
203/* combined operator flags */
204#define xx 0
205#define xV OF_RES2
206#define xS (OF_RES2 | OF_STR2)
207#define Vx OF_RES1
208#define VV (OF_RES1 | OF_RES2)
209#define Nx (OF_RES1 | OF_NUM1)
210#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
211#define Sx (OF_RES1 | OF_STR1)
212#define SV (OF_RES1 | OF_STR1 | OF_RES2)
213#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
214
215#define OPCLSMASK 0xFF00
216#define OPNMASK 0x007F
217
218/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
219 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
220 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
221 */
222#define P(x) (x << 24)
223#define PRIMASK 0x7F000000
224#define PRIMASK2 0x7E000000
225
226/* Operation classes */
227
228#define SHIFT_TIL_THIS 0x0600
229#define RECUR_FROM_THIS 0x1000
230
231enum {
232 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
233 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
234
235 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
236 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
237 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
238
239 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
240 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
241 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
242 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
243 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
244 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
245 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
246 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
247 OC_DONE=0x2800,
248
249 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
250 ST_WHILE=0x3300
251};
252
253/* simple builtins */
254enum {
255 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
256 F_ti, F_le, F_sy, F_ff, F_cl
257};
258
259/* builtins */
260enum {
261 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
262 B_ge, B_gs, B_su
263};
264
265/* tokens and their corresponding info values */
266
267#define NTC "\377" /* switch to next token class (tc<<1) */
268#define NTCC '\377'
269
270#define OC_B OC_BUILTIN
271
272static char * const tokenlist =
273 "\1(" NTC
274 "\1)" NTC
275 "\1/" NTC /* REGEXP */
276 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
277 "\2++" "\2--" NTC /* UOPPOST */
278 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
279 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
280 "\2*=" "\2/=" "\2%=" "\2^="
281 "\1+" "\1-" "\3**=" "\2**"
282 "\1/" "\1%" "\1^" "\1*"
283 "\2!=" "\2>=" "\2<=" "\1>"
284 "\1<" "\2!~" "\1~" "\2&&"
285 "\2||" "\1?" "\1:" NTC
286 "\2in" NTC
287 "\1," NTC
288 "\1|" NTC
289 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
290 "\1]" NTC
291 "\1{" NTC
292 "\1}" NTC
293 "\1;" NTC
294 "\1\n" NTC
295 "\2if" "\2do" "\3for" "\5break" /* STATX */
296 "\10continue" "\6delete" "\5print"
297 "\6printf" "\4next" "\10nextfile"
298 "\6return" "\4exit" NTC
299 "\5while" NTC
300 "\4else" NTC
301
302 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
303 "\3cos" "\3exp" "\3int" "\3log"
304 "\4rand" "\3sin" "\4sqrt" "\5srand"
305 "\6gensub" "\4gsub" "\5index" "\6length"
306 "\5match" "\5split" "\7sprintf" "\3sub"
307 "\6substr" "\7systime" "\10strftime"
308 "\7tolower" "\7toupper" NTC
309 "\7getline" NTC
310 "\4func" "\10function" NTC
311 "\5BEGIN" NTC
312 "\3END" "\0"
313 ;
314
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000315static const uint32_t tokeninfo[] = {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000316
317 0,
318 0,
319 OC_REGEXP,
320 xS|'a', xS|'w', xS|'|',
321 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
322 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
323 OC_FIELD|xV|P(5),
324 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
325 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
326 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
327 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
328 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
329 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
330 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
331 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
332 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
333 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
334 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
335 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
336 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
337 OC_COLON|xx|P(67)|':',
338 OC_IN|SV|P(49),
339 OC_COMMA|SS|P(80),
340 OC_PGETLINE|SV|P(37),
341 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
342 OC_UNARY|xV|P(19)|'!',
343 0,
344 0,
345 0,
346 0,
347 0,
348 ST_IF, ST_DO, ST_FOR, OC_BREAK,
349 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
350 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
351 OC_RETURN|Vx, OC_EXIT|Nx,
352 ST_WHILE,
353 0,
354
355 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
356 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
357 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
358 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
359 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
360 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
361 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
362 OC_GETLINE|SV|P(0),
363 0, 0,
364 0,
365 0
366};
367
368/* internal variable names and their initial values */
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000369/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000370enum {
371 CONVFMT=0, OFMT, FS, OFS,
372 ORS, RS, RT, FILENAME,
373 SUBSEP, ARGIND, ARGC, ARGV,
374 ERRNO, FNR,
375 NR, NF, IGNORECASE,
376 ENVIRON, F0, _intvarcount_
377};
378
379static char * vNames =
380 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000381 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
Glenn L McGrath545106f2002-11-11 06:21:00 +0000382 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
383 "ERRNO\0" "FNR\0"
384 "NR\0" "NF\0*" "IGNORECASE\0*"
385 "ENVIRON\0" "$\0*" "\0";
386
387static char * vValues =
388 "%.6g\0" "%.6g\0" " \0" " \0"
389 "\n\0" "\n\0" "\0" "\0"
390 "\034\0"
391 "\377";
392
393/* hash size may grow to these values */
394#define FIRST_PRIME 61;
395static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
Rob Landleybc68cd12006-03-10 19:22:06 +0000396enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
Glenn L McGrath545106f2002-11-11 06:21:00 +0000397
398/* globals */
399
400extern char **environ;
401
402static var * V[_intvarcount_];
403static chain beginseq, mainseq, endseq, *seq;
404static int nextrec, nextfile;
405static node *break_ptr, *continue_ptr;
406static rstream *iF;
407static xhash *vhash, *ahash, *fdhash, *fnhash;
408static char *programname;
409static short lineno;
410static int is_f0_split;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000411static int nfields;
412static var *Fields;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000413static tsplitter fsplitter, rsplitter;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000414static nvblock *cb;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000415static char *pos;
416static char *buf;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000417static int icase;
418static int exiting;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000419
420static struct {
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000421 uint32_t tclass;
422 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000423 char *string;
424 double number;
425 short lineno;
426 int rollback;
427} t;
428
429/* function prototypes */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000430static void handle_special(var *);
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000431static node *parse_expr(uint32_t);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000432static void chain_group(void);
433static var *evaluate(node *, var *);
434static rstream *next_input_file(void);
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000435static int fmt_num(char *, int, const char *, double, int);
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +0000436static int awk_exit(int) ATTRIBUTE_NORETURN;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000437
438/* ---- error handling ---- */
439
440static const char EMSG_INTERNAL_ERROR[] = "Internal error";
441static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
442static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
443static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
444static const char EMSG_INV_FMT[] = "Invalid format specifier";
445static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
446static const char EMSG_NOT_ARRAY[] = "Not an array";
447static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
448static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
449#ifndef CONFIG_FEATURE_AWK_MATH
450static const char EMSG_NO_MATH[] = "Math support is not compiled in";
451#endif
452
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +0000453static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
Glenn L McGrathd4036f82002-11-28 09:30:40 +0000454static void syntax_error(const char * const message)
455{
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000456 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000457}
458
459#define runtime_error(x) syntax_error(x)
460
461
462/* ---- hash stuff ---- */
463
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000464static unsigned int hashidx(const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000465{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000466 unsigned int idx=0;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000467
468 while (*name) idx = *name++ + (idx << 6) - idx;
469 return idx;
470}
471
472/* create new hash */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000473static xhash *hash_init(void)
474{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000475 xhash *newhash;
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000476
Rob Landley9ffd4232006-05-21 18:30:35 +0000477 newhash = (xhash *)xzalloc(sizeof(xhash));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000478 newhash->csize = FIRST_PRIME;
Rob Landley9ffd4232006-05-21 18:30:35 +0000479 newhash->items = (hash_item **)xzalloc(newhash->csize * sizeof(hash_item *));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000480
481 return newhash;
482}
483
484/* find item in hash, return ptr to data, NULL if not found */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000485static void *hash_search(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000486{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000487 hash_item *hi;
488
489 hi = hash->items [ hashidx(name) % hash->csize ];
490 while (hi) {
491 if (strcmp(hi->name, name) == 0)
492 return &(hi->data);
493 hi = hi->next;
494 }
495 return NULL;
496}
497
498/* grow hash if it becomes too big */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000499static void hash_rebuild(xhash *hash)
500{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000501 unsigned int newsize, i, idx;
502 hash_item **newitems, *hi, *thi;
503
504 if (hash->nprime == NPRIMES)
505 return;
506
507 newsize = PRIMES[hash->nprime++];
Rob Landley9ffd4232006-05-21 18:30:35 +0000508 newitems = (hash_item **)xzalloc(newsize * sizeof(hash_item *));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000509
510 for (i=0; i<hash->csize; i++) {
511 hi = hash->items[i];
512 while (hi) {
513 thi = hi;
514 hi = thi->next;
515 idx = hashidx(thi->name) % newsize;
516 thi->next = newitems[idx];
517 newitems[idx] = thi;
518 }
519 }
520
521 free(hash->items);
522 hash->csize = newsize;
523 hash->items = newitems;
524}
525
526/* find item in hash, add it if necessary. Return ptr to data */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000527static void *hash_find(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000528{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000529 hash_item *hi;
530 unsigned int idx;
531 int l;
532
533 hi = hash_search(hash, name);
534 if (! hi) {
535 if (++hash->nel / hash->csize > 10)
536 hash_rebuild(hash);
537
Rob Landleya3896512006-05-07 20:20:34 +0000538 l = strlen(name) + 1;
Rob Landley9ffd4232006-05-21 18:30:35 +0000539 hi = xzalloc(sizeof(hash_item) + l);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000540 memcpy(hi->name, name, l);
541
542 idx = hashidx(name) % hash->csize;
543 hi->next = hash->items[idx];
544 hash->items[idx] = hi;
545 hash->glen += l;
546 }
547 return &(hi->data);
548}
549
550#define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
551#define newvar(name) (var *) hash_find ( vhash , (name) )
552#define newfile(name) (rstream *) hash_find ( fdhash , (name) )
553#define newfunc(name) (func *) hash_find ( fnhash , (name) )
554
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000555static void hash_remove(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000556{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000557 hash_item *hi, **phi;
558
559 phi = &(hash->items[ hashidx(name) % hash->csize ]);
560 while (*phi) {
561 hi = *phi;
562 if (strcmp(hi->name, name) == 0) {
Rob Landleya3896512006-05-07 20:20:34 +0000563 hash->glen -= (strlen(name) + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000564 hash->nel--;
565 *phi = hi->next;
566 free(hi);
567 break;
568 }
569 phi = &(hi->next);
570 }
571}
572
573/* ------ some useful functions ------ */
574
Mike Frysinger10a11e22005-09-27 02:23:02 +0000575static void skip_spaces(char **s)
576{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000577 char *p = *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000578
579 while(*p == ' ' || *p == '\t' ||
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000580 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
Mike Frysingerde2b9382005-09-27 03:18:00 +0000581 p++;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000582 }
583 *s = p;
584}
585
Mike Frysinger10a11e22005-09-27 02:23:02 +0000586static char *nextword(char **s)
587{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000588 char *p = *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000589
590 while (*(*s)++) ;
591
592 return p;
593}
594
Mike Frysinger10a11e22005-09-27 02:23:02 +0000595static char nextchar(char **s)
596{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000597 char c, *pps;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000598
599 c = *((*s)++);
600 pps = *s;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000601 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000602 if (c == '\\' && *s == pps) c = *((*s)++);
603 return c;
604}
605
Mike Frysinger10a11e22005-09-27 02:23:02 +0000606static inline int isalnum_(int c)
607{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000608 return (isalnum(c) || c == '_');
609}
610
Mike Frysinger10a11e22005-09-27 02:23:02 +0000611static FILE *afopen(const char *path, const char *mode)
612{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000613 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000614}
615
616/* -------- working with variables (set/get/copy/etc) -------- */
617
Mike Frysinger10a11e22005-09-27 02:23:02 +0000618static xhash *iamarray(var *v)
619{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000620 var *a = v;
621
622 while (a->type & VF_CHILD)
623 a = a->x.parent;
624
625 if (! (a->type & VF_ARRAY)) {
626 a->type |= VF_ARRAY;
627 a->x.array = hash_init();
628 }
629 return a->x.array;
630}
631
Mike Frysinger10a11e22005-09-27 02:23:02 +0000632static void clear_array(xhash *array)
633{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000634 unsigned int i;
635 hash_item *hi, *thi;
636
637 for (i=0; i<array->csize; i++) {
638 hi = array->items[i];
639 while (hi) {
640 thi = hi;
641 hi = hi->next;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000642 free(thi->data.v.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000643 free(thi);
644 }
645 array->items[i] = NULL;
646 }
647 array->glen = array->nel = 0;
648}
649
650/* clear a variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000651static var *clrvar(var *v)
652{
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000653 if (!(v->type & VF_FSTR))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000654 free(v->string);
655
656 v->type &= VF_DONTTOUCH;
657 v->type |= VF_DIRTY;
658 v->string = NULL;
659 return v;
660}
661
662/* assign string value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000663static var *setvar_p(var *v, char *value)
664{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000665 clrvar(v);
666 v->string = value;
667 handle_special(v);
668
669 return v;
670}
671
672/* same as setvar_p but make a copy of string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000673static var *setvar_s(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000674{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000675 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000676}
677
678/* same as setvar_s but set USER flag */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000679static var *setvar_u(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000680{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000681 setvar_s(v, value);
682 v->type |= VF_USER;
683 return v;
684}
685
686/* set array element to user string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000687static void setari_u(var *a, int idx, const char *s)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000688{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000689 var *v;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000690 static char sidx[12];
691
692 sprintf(sidx, "%d", idx);
693 v = findvar(iamarray(a), sidx);
694 setvar_u(v, s);
695}
696
697/* assign numeric value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000698static var *setvar_i(var *v, double value)
699{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000700 clrvar(v);
701 v->type |= VF_NUMBER;
702 v->number = value;
703 handle_special(v);
704 return v;
705}
706
Mike Frysinger10a11e22005-09-27 02:23:02 +0000707static char *getvar_s(var *v)
708{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000709 /* if v is numeric and has no cached string, convert it to string */
710 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
711 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000712 v->string = bb_xstrdup(buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000713 v->type |= VF_CACHED;
714 }
715 return (v->string == NULL) ? "" : v->string;
716}
717
Mike Frysinger10a11e22005-09-27 02:23:02 +0000718static double getvar_i(var *v)
719{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000720 char *s;
721
722 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
723 v->number = 0;
724 s = v->string;
725 if (s && *s) {
726 v->number = strtod(s, &s);
727 if (v->type & VF_USER) {
728 skip_spaces(&s);
729 if (*s != '\0')
730 v->type &= ~VF_USER;
731 }
732 } else {
733 v->type &= ~VF_USER;
734 }
735 v->type |= VF_CACHED;
736 }
737 return v->number;
738}
739
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000740static var *copyvar(var *dest, const var *src)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000741{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000742 if (dest != src) {
743 clrvar(dest);
744 dest->type |= (src->type & ~VF_DONTTOUCH);
745 dest->number = src->number;
746 if (src->string)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000747 dest->string = bb_xstrdup(src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000748 }
749 handle_special(dest);
750 return dest;
751}
752
Mike Frysinger10a11e22005-09-27 02:23:02 +0000753static var *incvar(var *v)
754{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000755 return setvar_i(v, getvar_i(v)+1.);
756}
757
758/* return true if v is number or numeric string */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000759static int is_numeric(var *v)
760{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000761 getvar_i(v);
762 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
763}
764
765/* return 1 when value of v corresponds to true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000766static int istrue(var *v)
767{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000768 if (is_numeric(v))
769 return (v->number == 0) ? 0 : 1;
770 else
771 return (v->string && *(v->string)) ? 1 : 0;
772}
773
Eric Andersenaff114c2004-04-14 17:51:38 +0000774/* temporary variables allocator. Last allocated should be first freed */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000775static var *nvalloc(int n)
776{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000777 nvblock *pb = NULL;
778 var *v, *r;
779 int size;
780
781 while (cb) {
782 pb = cb;
783 if ((cb->pos - cb->nv) + n <= cb->size) break;
784 cb = cb->next;
785 }
786
787 if (! cb) {
788 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
789 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
790 cb->size = size;
791 cb->pos = cb->nv;
792 cb->prev = pb;
793 cb->next = NULL;
794 if (pb) pb->next = cb;
795 }
796
797 v = r = cb->pos;
798 cb->pos += n;
799
800 while (v < cb->pos) {
801 v->type = 0;
802 v->string = NULL;
803 v++;
804 }
805
806 return r;
807}
808
Mike Frysinger10a11e22005-09-27 02:23:02 +0000809static void nvfree(var *v)
810{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000811 var *p;
812
813 if (v < cb->nv || v >= cb->pos)
814 runtime_error(EMSG_INTERNAL_ERROR);
815
816 for (p=v; p<cb->pos; p++) {
817 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
818 clear_array(iamarray(p));
819 free(p->x.array->items);
820 free(p->x.array);
821 }
822 if (p->type & VF_WALK)
823 free(p->x.walker);
824
825 clrvar(p);
826 }
827
828 cb->pos = v;
829 while (cb->prev && cb->pos == cb->nv) {
830 cb = cb->prev;
831 }
832}
833
834/* ------- awk program text parsing ------- */
835
836/* Parse next token pointed by global pos, place results into global t.
837 * If token isn't expected, give away. Return token class
838 */
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000839static uint32_t next_token(uint32_t expected)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000840{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000841 char *p, *pp, *s;
842 char *tl;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000843 uint32_t tc;
844 const uint32_t *ti;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000845 int l;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000846 static int concat_inserted;
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000847 static uint32_t save_tclass, save_info;
848 static uint32_t ltclass = TC_OPTERM;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000849
850 if (t.rollback) {
851
852 t.rollback = FALSE;
853
854 } else if (concat_inserted) {
855
856 concat_inserted = FALSE;
857 t.tclass = save_tclass;
858 t.info = save_info;
859
860 } else {
861
862 p = pos;
863
864 readnext:
865 skip_spaces(&p);
866 lineno = t.lineno;
867 if (*p == '#')
868 while (*p != '\n' && *p != '\0') p++;
869
870 if (*p == '\n')
871 t.lineno++;
872
873 if (*p == '\0') {
874 tc = TC_EOF;
875
876 } else if (*p == '\"') {
877 /* it's a string */
878 t.string = s = ++p;
879 while (*p != '\"') {
880 if (*p == '\0' || *p == '\n')
881 syntax_error(EMSG_UNEXP_EOS);
882 *(s++) = nextchar(&p);
883 }
884 p++;
885 *s = '\0';
886 tc = TC_STRING;
887
888 } else if ((expected & TC_REGEXP) && *p == '/') {
889 /* it's regexp */
890 t.string = s = ++p;
891 while (*p != '/') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 if ((*s++ = *p++) == '\\') {
895 pp = p;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000896 *(s-1) = bb_process_escape_sequence((const char **)&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000897 if (*pp == '\\') *s++ = '\\';
898 if (p == pp) *s++ = *p++;
899 }
900 }
901 p++;
902 *s = '\0';
903 tc = TC_REGEXP;
904
905 } else if (*p == '.' || isdigit(*p)) {
906 /* it's a number */
907 t.number = strtod(p, &p);
908 if (*p == '.')
909 syntax_error(EMSG_UNEXP_TOKEN);
910 tc = TC_NUMBER;
911
912 } else {
913 /* search for something known */
914 tl = tokenlist;
915 tc = 0x00000001;
916 ti = tokeninfo;
917 while (*tl) {
918 l = *(tl++);
919 if (l == NTCC) {
920 tc <<= 1;
921 continue;
922 }
923 /* if token class is expected, token
924 * matches and it's not a longer word,
925 * then this is what we are looking for
926 */
927 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
928 *tl == *p && strncmp(p, tl, l) == 0 &&
929 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
930 t.info = *ti;
931 p += l;
932 break;
933 }
934 ti++;
935 tl += l;
936 }
937
938 if (! *tl) {
939 /* it's a name (var/array/function),
940 * otherwise it's something wrong
941 */
942 if (! isalnum_(*p))
943 syntax_error(EMSG_UNEXP_TOKEN);
944
945 t.string = --p;
946 while(isalnum_(*(++p))) {
947 *(p-1) = *p;
948 }
949 *(p-1) = '\0';
950 tc = TC_VARIABLE;
Bernhard Reutner-Fischerbb204622005-10-17 14:21:06 +0000951 /* also consume whitespace between functionname and bracket */
Rob Landley46e351d2006-02-14 16:05:32 +0000952 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000953 if (*p == '(') {
954 tc = TC_FUNCTION;
955 } else {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000956 if (*p == '[') {
957 p++;
958 tc = TC_ARRAY;
959 }
960 }
961 }
962 }
963 pos = p;
964
965 /* skipping newlines in some cases */
966 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
967 goto readnext;
968
969 /* insert concatenation operator when needed */
970 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
971 concat_inserted = TRUE;
972 save_tclass = tc;
973 save_info = t.info;
974 tc = TC_BINOP;
975 t.info = OC_CONCAT | SS | P(35);
976 }
977
978 t.tclass = tc;
979 }
980 ltclass = t.tclass;
981
982 /* Are we ready for this? */
983 if (! (ltclass & expected))
984 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
985 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
986
987 return ltclass;
988}
989
990static void rollback_token(void) { t.rollback = TRUE; }
991
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000992static node *new_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000993{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000994 node *n;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000995
Rob Landley9ffd4232006-05-21 18:30:35 +0000996 n = (node *)xzalloc(sizeof(node));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000997 n->info = info;
998 n->lineno = lineno;
999 return n;
1000}
1001
Mike Frysinger10a11e22005-09-27 02:23:02 +00001002static node *mk_re_node(char *s, node *n, regex_t *re)
1003{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001004 n->info = OC_REGEXP;
1005 n->l.re = re;
1006 n->r.ire = re + 1;
1007 xregcomp(re, s, REG_EXTENDED);
1008 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1009
1010 return n;
1011}
1012
Mike Frysinger10a11e22005-09-27 02:23:02 +00001013static node *condition(void)
1014{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001015 next_token(TC_SEQSTART);
1016 return parse_expr(TC_SEQTERM);
1017}
1018
1019/* parse expression terminated by given argument, return ptr
1020 * to built subtree. Terminator is eaten by parse_expr */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001021static node *parse_expr(uint32_t iexp)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001022{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001023 node sn;
1024 node *cn = &sn;
1025 node *vn, *glptr;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001026 uint32_t tc, xtc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001027 var *v;
1028
1029 sn.info = PRIMASK;
1030 sn.r.n = glptr = NULL;
1031 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1032
1033 while (! ((tc = next_token(xtc)) & iexp)) {
1034 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1035 /* input redirection (<) attached to glptr node */
1036 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
Glenn L McGrath4bded582004-02-22 11:55:09 +00001037 cn->a.n = glptr;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001038 xtc = TC_OPERAND | TC_UOPPRE;
1039 glptr = NULL;
1040
1041 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1042 /* for binary and postfix-unary operators, jump back over
1043 * previous operators with higher priority */
1044 vn = cn;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001045 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
Glenn L McGrath545106f2002-11-11 06:21:00 +00001046 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1047 vn = vn->a.n;
1048 if ((t.info & OPCLSMASK) == OC_TERNARY)
1049 t.info += P(6);
1050 cn = vn->a.n->r.n = new_node(t.info);
1051 cn->a.n = vn->a.n;
1052 if (tc & TC_BINOP) {
1053 cn->l.n = vn;
1054 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1055 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1056 /* it's a pipe */
1057 next_token(TC_GETLINE);
1058 /* give maximum priority to this pipe */
1059 cn->info &= ~PRIMASK;
1060 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1061 }
1062 } else {
1063 cn->r.n = vn;
1064 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1065 }
1066 vn->a.n = cn;
1067
1068 } else {
1069 /* for operands and prefix-unary operators, attach them
1070 * to last node */
1071 vn = cn;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001072 cn = vn->r.n = new_node(t.info);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001073 cn->a.n = vn;
1074 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1075 if (tc & (TC_OPERAND | TC_REGEXP)) {
Rob Landleyed830e82005-06-07 02:43:52 +00001076 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001077 /* one should be very careful with switch on tclass -
Glenn L McGrath545106f2002-11-11 06:21:00 +00001078 * only simple tclasses should be used! */
1079 switch (tc) {
1080 case TC_VARIABLE:
1081 case TC_ARRAY:
1082 cn->info = OC_VAR;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001083 if ((v = hash_search(ahash, t.string)) != NULL) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001084 cn->info = OC_FNARG;
1085 cn->l.i = v->x.aidx;
1086 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001087 cn->l.v = newvar(t.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001088 }
1089 if (tc & TC_ARRAY) {
1090 cn->info |= xS;
1091 cn->r.n = parse_expr(TC_ARRTERM);
1092 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001093 break;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001094
Glenn L McGrath545106f2002-11-11 06:21:00 +00001095 case TC_NUMBER:
1096 case TC_STRING:
1097 cn->info = OC_VAR;
Rob Landley9ffd4232006-05-21 18:30:35 +00001098 v = cn->l.v = xzalloc(sizeof(var));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001099 if (tc & TC_NUMBER)
1100 setvar_i(v, t.number);
1101 else
1102 setvar_s(v, t.string);
1103 break;
1104
1105 case TC_REGEXP:
1106 mk_re_node(t.string, cn,
Rob Landley9ffd4232006-05-21 18:30:35 +00001107 (regex_t *)xzalloc(sizeof(regex_t)*2));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001108 break;
1109
1110 case TC_FUNCTION:
Mike Frysingerde2b9382005-09-27 03:18:00 +00001111 cn->info = OC_FUNC;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001112 cn->r.f = newfunc(t.string);
1113 cn->l.n = condition();
1114 break;
1115
1116 case TC_SEQSTART:
1117 cn = vn->r.n = parse_expr(TC_SEQTERM);
1118 cn->a.n = vn;
1119 break;
1120
1121 case TC_GETLINE:
1122 glptr = cn;
1123 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1124 break;
1125
1126 case TC_BUILTIN:
1127 cn->l.n = condition();
1128 break;
1129 }
1130 }
1131 }
1132 }
1133 return sn.r.n;
1134}
1135
1136/* add node to chain. Return ptr to alloc'd node */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001137static node *chain_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001138{
"Robert P. J. Day"68229832006-07-01 13:08:46 +00001139 node *n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001140
1141 if (! seq->first)
1142 seq->first = seq->last = new_node(0);
1143
1144 if (seq->programname != programname) {
1145 seq->programname = programname;
1146 n = chain_node(OC_NEWSOURCE);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001147 n->l.s = bb_xstrdup(programname);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001148 }
1149
1150 n = seq->last;
1151 n->info = info;
1152 seq->last = n->a.n = new_node(OC_DONE);
1153
1154 return n;
1155}
1156
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001157static void chain_expr(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001158{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001159 node *n;
1160
1161 n = chain_node(info);
1162 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1163 if (t.tclass & TC_GRPTERM)
1164 rollback_token();
1165}
1166
Mike Frysinger10a11e22005-09-27 02:23:02 +00001167static node *chain_loop(node *nn)
1168{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001169 node *n, *n2, *save_brk, *save_cont;
1170
1171 save_brk = break_ptr;
1172 save_cont = continue_ptr;
1173
1174 n = chain_node(OC_BR | Vx);
1175 continue_ptr = new_node(OC_EXEC);
1176 break_ptr = new_node(OC_EXEC);
1177 chain_group();
1178 n2 = chain_node(OC_EXEC | Vx);
1179 n2->l.n = nn;
1180 n2->a.n = n;
1181 continue_ptr->a.n = n2;
1182 break_ptr->a.n = n->r.n = seq->last;
1183
1184 continue_ptr = save_cont;
1185 break_ptr = save_brk;
1186
1187 return n;
1188}
1189
1190/* parse group and attach it to chain */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001191static void chain_group(void)
1192{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001193 uint32_t c;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001194 node *n, *n2, *n3;
1195
1196 do {
1197 c = next_token(TC_GRPSEQ);
1198 } while (c & TC_NEWLINE);
1199
1200 if (c & TC_GRPSTART) {
1201 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001202 if (t.tclass & TC_NEWLINE) continue;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001203 rollback_token();
1204 chain_group();
1205 }
1206 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1207 rollback_token();
1208 chain_expr(OC_EXEC | Vx);
1209 } else { /* TC_STATEMNT */
1210 switch (t.info & OPCLSMASK) {
1211 case ST_IF:
1212 n = chain_node(OC_BR | Vx);
1213 n->l.n = condition();
1214 chain_group();
1215 n2 = chain_node(OC_EXEC);
1216 n->r.n = seq->last;
1217 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1218 chain_group();
1219 n2->a.n = seq->last;
1220 } else {
1221 rollback_token();
1222 }
1223 break;
1224
1225 case ST_WHILE:
1226 n2 = condition();
1227 n = chain_loop(NULL);
1228 n->l.n = n2;
1229 break;
1230
1231 case ST_DO:
1232 n2 = chain_node(OC_EXEC);
1233 n = chain_loop(NULL);
1234 n2->a.n = n->a.n;
1235 next_token(TC_WHILE);
1236 n->l.n = condition();
1237 break;
1238
1239 case ST_FOR:
1240 next_token(TC_SEQSTART);
1241 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1242 if (t.tclass & TC_SEQTERM) { /* for-in */
1243 if ((n2->info & OPCLSMASK) != OC_IN)
1244 syntax_error(EMSG_UNEXP_TOKEN);
1245 n = chain_node(OC_WALKINIT | VV);
1246 n->l.n = n2->l.n;
1247 n->r.n = n2->r.n;
1248 n = chain_loop(NULL);
1249 n->info = OC_WALKNEXT | Vx;
1250 n->l.n = n2->l.n;
1251 } else { /* for(;;) */
1252 n = chain_node(OC_EXEC | Vx);
1253 n->l.n = n2;
1254 n2 = parse_expr(TC_SEMICOL);
1255 n3 = parse_expr(TC_SEQTERM);
1256 n = chain_loop(n3);
1257 n->l.n = n2;
1258 if (! n2)
1259 n->info = OC_EXEC;
1260 }
1261 break;
1262
1263 case OC_PRINT:
1264 case OC_PRINTF:
1265 n = chain_node(t.info);
1266 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1267 if (t.tclass & TC_OUTRDR) {
1268 n->info |= t.info;
1269 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1270 }
1271 if (t.tclass & TC_GRPTERM)
1272 rollback_token();
1273 break;
1274
1275 case OC_BREAK:
1276 n = chain_node(OC_EXEC);
1277 n->a.n = break_ptr;
1278 break;
1279
1280 case OC_CONTINUE:
1281 n = chain_node(OC_EXEC);
1282 n->a.n = continue_ptr;
1283 break;
1284
1285 /* delete, next, nextfile, return, exit */
1286 default:
1287 chain_expr(t.info);
1288
1289 }
1290 }
1291}
1292
Mike Frysinger10a11e22005-09-27 02:23:02 +00001293static void parse_program(char *p)
1294{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001295 uint32_t tclass;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001296 node *cn;
1297 func *f;
1298 var *v;
1299
1300 pos = p;
1301 t.lineno = 1;
1302 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1303 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1304
1305 if (tclass & TC_OPTERM)
1306 continue;
1307
1308 seq = &mainseq;
1309 if (tclass & TC_BEGIN) {
1310 seq = &beginseq;
1311 chain_group();
1312
1313 } else if (tclass & TC_END) {
1314 seq = &endseq;
1315 chain_group();
1316
1317 } else if (tclass & TC_FUNCDECL) {
1318 next_token(TC_FUNCTION);
1319 pos++;
1320 f = newfunc(t.string);
1321 f->body.first = NULL;
1322 f->nargs = 0;
1323 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1324 v = findvar(ahash, t.string);
1325 v->x.aidx = (f->nargs)++;
1326
1327 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1328 break;
1329 }
1330 seq = &(f->body);
1331 chain_group();
1332 clear_array(ahash);
1333
1334 } else if (tclass & TC_OPSEQ) {
1335 rollback_token();
1336 cn = chain_node(OC_TEST);
1337 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1338 if (t.tclass & TC_GRPSTART) {
1339 rollback_token();
1340 chain_group();
1341 } else {
1342 chain_node(OC_PRINT);
1343 }
1344 cn->r.n = mainseq.last;
1345
1346 } else /* if (tclass & TC_GRPSTART) */ {
1347 rollback_token();
1348 chain_group();
1349 }
1350 }
1351}
1352
1353
1354/* -------- program execution part -------- */
1355
Mike Frysinger10a11e22005-09-27 02:23:02 +00001356static node *mk_splitter(char *s, tsplitter *spl)
1357{
"Robert P. J. Day"68229832006-07-01 13:08:46 +00001358 regex_t *re, *ire;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001359 node *n;
1360
1361 re = &spl->re[0];
1362 ire = &spl->re[1];
1363 n = &spl->n;
1364 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1365 regfree(re);
1366 regfree(ire);
1367 }
Rob Landleya3896512006-05-07 20:20:34 +00001368 if (strlen(s) > 1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001369 mk_re_node(s, n, re);
1370 } else {
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001371 n->info = (uint32_t) *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001372 }
1373
1374 return n;
1375}
1376
1377/* use node as a regular expression. Supplied with node ptr and regex_t
Eric Andersenaff114c2004-04-14 17:51:38 +00001378 * storage space. Return ptr to regex (if result points to preg, it should
Glenn L McGrath545106f2002-11-11 06:21:00 +00001379 * be later regfree'd manually
1380 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001381static regex_t *as_regex(node *op, regex_t *preg)
1382{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001383 var *v;
1384 char *s;
1385
1386 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1387 return icase ? op->r.ire : op->l.re;
1388 } else {
1389 v = nvalloc(1);
1390 s = getvar_s(evaluate(op, v));
1391 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1392 nvfree(v);
1393 return preg;
1394 }
1395}
1396
1397/* gradually increasing buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001398static void qrealloc(char **b, int n, int *size)
1399{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001400 if (! *b || n >= *size)
1401 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1402}
1403
1404/* resize field storage space */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001405static void fsrealloc(int size)
1406{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001407 static int maxfields = 0;
1408 int i;
1409
1410 if (size >= maxfields) {
1411 i = maxfields;
1412 maxfields = size + 16;
1413 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1414 for (; i<maxfields; i++) {
1415 Fields[i].type = VF_SPECIAL;
1416 Fields[i].string = NULL;
1417 }
1418 }
1419
1420 if (size < nfields) {
1421 for (i=size; i<nfields; i++) {
1422 clrvar(Fields+i);
1423 }
1424 }
1425 nfields = size;
1426}
1427
Mike Frysinger10a11e22005-09-27 02:23:02 +00001428static int awk_split(char *s, node *spl, char **slist)
1429{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001430 int l, n=0;
1431 char c[4];
1432 char *s1;
1433 regmatch_t pmatch[2];
1434
1435 /* in worst case, each char would be a separate field */
Rob Landleya3896512006-05-07 20:20:34 +00001436 *slist = s1 = bb_xstrndup(s, strlen(s) * 2 + 3);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001437
1438 c[0] = c[1] = (char)spl->info;
1439 c[2] = c[3] = '\0';
1440 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1441
1442 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1443 while (*s) {
1444 l = strcspn(s, c+2);
1445 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1446 pmatch[0].rm_so <= l) {
1447 l = pmatch[0].rm_so;
1448 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1449 } else {
1450 pmatch[0].rm_eo = l;
1451 if (*(s+l)) pmatch[0].rm_eo++;
1452 }
1453
1454 memcpy(s1, s, l);
1455 *(s1+l) = '\0';
1456 nextword(&s1);
1457 s += pmatch[0].rm_eo;
1458 n++;
1459 }
1460 } else if (c[0] == '\0') { /* null split */
1461 while(*s) {
1462 *(s1++) = *(s++);
1463 *(s1++) = '\0';
1464 n++;
1465 }
1466 } else if (c[0] != ' ') { /* single-character split */
1467 if (icase) {
1468 c[0] = toupper(c[0]);
1469 c[1] = tolower(c[1]);
1470 }
1471 if (*s1) n++;
1472 while ((s1 = strpbrk(s1, c))) {
1473 *(s1++) = '\0';
1474 n++;
1475 }
1476 } else { /* space split */
1477 while (*s) {
1478 while (isspace(*s)) s++;
1479 if (! *s) break;
1480 n++;
1481 while (*s && !isspace(*s))
1482 *(s1++) = *(s++);
1483 *(s1++) = '\0';
1484 }
1485 }
1486 return n;
1487}
1488
Mike Frysinger10a11e22005-09-27 02:23:02 +00001489static void split_f0(void)
1490{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001491 static char *fstrings = NULL;
1492 int i, n;
1493 char *s;
1494
1495 if (is_f0_split)
1496 return;
1497
1498 is_f0_split = TRUE;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00001499 free(fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001500 fsrealloc(0);
1501 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1502 fsrealloc(n);
1503 s = fstrings;
1504 for (i=0; i<n; i++) {
1505 Fields[i].string = nextword(&s);
1506 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1507 }
1508
1509 /* set NF manually to avoid side effects */
1510 clrvar(V[NF]);
1511 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1512 V[NF]->number = nfields;
1513}
1514
1515/* perform additional actions when some internal variables changed */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001516static void handle_special(var *v)
1517{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001518 int n;
1519 char *b, *sep, *s;
1520 int sl, l, len, i, bsize;
1521
1522 if (! (v->type & VF_SPECIAL))
1523 return;
1524
1525 if (v == V[NF]) {
1526 n = (int)getvar_i(v);
1527 fsrealloc(n);
1528
1529 /* recalculate $0 */
1530 sep = getvar_s(V[OFS]);
Rob Landleya3896512006-05-07 20:20:34 +00001531 sl = strlen(sep);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001532 b = NULL;
1533 len = 0;
1534 for (i=0; i<n; i++) {
1535 s = getvar_s(&Fields[i]);
Rob Landleya3896512006-05-07 20:20:34 +00001536 l = strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001537 if (b) {
1538 memcpy(b+len, sep, sl);
1539 len += sl;
1540 }
1541 qrealloc(&b, len+l+sl, &bsize);
1542 memcpy(b+len, s, l);
1543 len += l;
1544 }
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00001545 if (b) b[len] = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00001546 setvar_p(V[F0], b);
1547 is_f0_split = TRUE;
1548
1549 } else if (v == V[F0]) {
1550 is_f0_split = FALSE;
1551
1552 } else if (v == V[FS]) {
1553 mk_splitter(getvar_s(v), &fsplitter);
1554
1555 } else if (v == V[RS]) {
1556 mk_splitter(getvar_s(v), &rsplitter);
1557
1558 } else if (v == V[IGNORECASE]) {
1559 icase = istrue(v);
1560
1561 } else { /* $n */
1562 n = getvar_i(V[NF]);
1563 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1564 /* right here v is invalid. Just to note... */
1565 }
1566}
1567
1568/* step through func/builtin/etc arguments */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001569static node *nextarg(node **pn)
1570{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001571 node *n;
1572
1573 n = *pn;
1574 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1575 *pn = n->r.n;
1576 n = n->l.n;
1577 } else {
1578 *pn = NULL;
1579 }
1580 return n;
1581}
1582
Mike Frysinger10a11e22005-09-27 02:23:02 +00001583static void hashwalk_init(var *v, xhash *array)
1584{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001585 char **w;
1586 hash_item *hi;
1587 int i;
1588
1589 if (v->type & VF_WALK)
1590 free(v->x.walker);
1591
1592 v->type |= VF_WALK;
Rob Landley9ffd4232006-05-21 18:30:35 +00001593 w = v->x.walker = (char **)xzalloc(2 + 2*sizeof(char *) + array->glen);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001594 *w = *(w+1) = (char *)(w + 2);
1595 for (i=0; i<array->csize; i++) {
1596 hi = array->items[i];
1597 while(hi) {
1598 strcpy(*w, hi->name);
1599 nextword(w);
1600 hi = hi->next;
1601 }
1602 }
1603}
1604
Mike Frysinger10a11e22005-09-27 02:23:02 +00001605static int hashwalk_next(var *v)
1606{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001607 char **w;
1608
1609 w = v->x.walker;
1610 if (*(w+1) == *w)
1611 return FALSE;
1612
1613 setvar_s(v, nextword(w+1));
1614 return TRUE;
1615}
1616
1617/* evaluate node, return 1 when result is true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001618static int ptest(node *pattern)
1619{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001620 static var v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001621 return istrue(evaluate(pattern, &v));
1622}
1623
1624/* read next record from stream rsm into a variable v */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001625static int awk_getline(rstream *rsm, var *v)
1626{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001627 char *b;
1628 regmatch_t pmatch[2];
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001629 int a, p, pp=0, size;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001630 int fd, so, eo, r, rp;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001631 char c, *m, *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001632
1633 /* we're using our own buffer since we need access to accumulating
1634 * characters
1635 */
1636 fd = fileno(rsm->F);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001637 m = rsm->buffer;
1638 a = rsm->adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001639 p = rsm->pos;
1640 size = rsm->size;
1641 c = (char) rsplitter.n.info;
1642 rp = 0;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001643
1644 if (! m) qrealloc(&m, 256, &size);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001645 do {
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001646 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001647 so = eo = p;
1648 r = 1;
1649 if (p > 0) {
1650 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1651 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1652 b, 1, pmatch, 0) == 0) {
1653 so = pmatch[0].rm_so;
1654 eo = pmatch[0].rm_eo;
1655 if (b[eo] != '\0')
1656 break;
1657 }
1658 } else if (c != '\0') {
1659 s = strchr(b+pp, c);
Rob Landley46e351d2006-02-14 16:05:32 +00001660 if (! s) s = memchr(b+pp, '\0', p - pp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001661 if (s) {
1662 so = eo = s-b;
1663 eo++;
1664 break;
1665 }
1666 } else {
1667 while (b[rp] == '\n')
1668 rp++;
1669 s = strstr(b+rp, "\n\n");
1670 if (s) {
1671 so = eo = s-b;
1672 while (b[eo] == '\n') eo++;
1673 if (b[eo] != '\0')
1674 break;
1675 }
1676 }
1677 }
1678
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001679 if (a > 0) {
1680 memmove(m, (const void *)(m+a), p+1);
1681 b = m;
1682 a = 0;
1683 }
1684
1685 qrealloc(&m, a+p+128, &size);
1686 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001687 pp = p;
1688 p += safe_read(fd, b+p, size-p-1);
1689 if (p < pp) {
1690 p = 0;
1691 r = 0;
1692 setvar_i(V[ERRNO], errno);
1693 }
1694 b[p] = '\0';
1695
1696 } while (p > pp);
1697
1698 if (p == 0) {
1699 r--;
1700 } else {
1701 c = b[so]; b[so] = '\0';
1702 setvar_s(v, b+rp);
1703 v->type |= VF_USER;
1704 b[so] = c;
1705 c = b[eo]; b[eo] = '\0';
1706 setvar_s(V[RT], b+so);
1707 b[eo] = c;
1708 }
1709
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001710 rsm->buffer = m;
1711 rsm->adv = a + eo;
1712 rsm->pos = p - eo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001713 rsm->size = size;
1714
1715 return r;
1716}
1717
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001718static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001719{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001720 int r=0;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001721 char c;
1722 const char *s=format;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001723
1724 if (int_as_int && n == (int)n) {
1725 r = snprintf(b, size, "%d", (int)n);
1726 } else {
1727 do { c = *s; } while (*s && *++s);
1728 if (strchr("diouxX", c)) {
1729 r = snprintf(b, size, format, (int)n);
1730 } else if (strchr("eEfgG", c)) {
1731 r = snprintf(b, size, format, n);
1732 } else {
1733 runtime_error(EMSG_INV_FMT);
1734 }
1735 }
1736 return r;
1737}
1738
1739
1740/* formatted output into an allocated buffer, return ptr to buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001741static char *awk_printf(node *n)
1742{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001743 char *b = NULL;
1744 char *fmt, *s, *s1, *f;
1745 int i, j, incr, bsize;
1746 char c, c1;
1747 var *v, *arg;
1748
1749 v = nvalloc(1);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001750 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001751
1752 i = 0;
1753 while (*f) {
1754 s = f;
1755 while (*f && (*f != '%' || *(++f) == '%'))
1756 f++;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001757 while (*f && !isalpha(*f))
Glenn L McGrath545106f2002-11-11 06:21:00 +00001758 f++;
1759
1760 incr = (f - s) + MAXVARFMT;
1761 qrealloc(&b, incr+i, &bsize);
1762 c = *f; if (c != '\0') f++;
1763 c1 = *f ; *f = '\0';
1764 arg = evaluate(nextarg(&n), v);
1765
1766 j = i;
1767 if (c == 'c' || !c) {
1768 i += sprintf(b+i, s,
1769 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1770
1771 } else if (c == 's') {
1772 s1 = getvar_s(arg);
Rob Landleya3896512006-05-07 20:20:34 +00001773 qrealloc(&b, incr+i+strlen(s1), &bsize);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001774 i += sprintf(b+i, s, s1);
1775
1776 } else {
1777 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1778 }
1779 *f = c1;
1780
1781 /* if there was an error while sprintf, return value is negative */
1782 if (i < j) i = j;
1783
1784 }
1785
1786 b = xrealloc(b, i+1);
1787 free(fmt);
1788 nvfree(v);
1789 b[i] = '\0';
1790 return b;
1791}
1792
1793/* common substitution routine
1794 * replace (nm) substring of (src) that match (n) with (repl), store
1795 * result into (dest), return number of substitutions. If nm=0, replace
1796 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1797 * subexpression matching (\1-\9)
1798 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001799static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1800{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001801 char *ds = NULL;
1802 char *sp, *s;
1803 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1804 regmatch_t pmatch[10];
1805 regex_t sreg, *re;
1806
1807 re = as_regex(rn, &sreg);
1808 if (! src) src = V[F0];
1809 if (! dest) dest = V[F0];
1810
1811 i = di = 0;
1812 sp = getvar_s(src);
Rob Landleya3896512006-05-07 20:20:34 +00001813 rl = strlen(repl);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001814 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1815 so = pmatch[0].rm_so;
1816 eo = pmatch[0].rm_eo;
1817
1818 qrealloc(&ds, di + eo + rl, &dssize);
1819 memcpy(ds + di, sp, eo);
1820 di += eo;
1821 if (++i >= nm) {
1822 /* replace */
1823 di -= (eo - so);
1824 nbs = 0;
1825 for (s = repl; *s; s++) {
1826 ds[di++] = c = *s;
1827 if (c == '\\') {
1828 nbs++;
1829 continue;
1830 }
1831 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1832 di -= ((nbs + 3) >> 1);
1833 j = 0;
1834 if (c != '&') {
1835 j = c - '0';
1836 nbs++;
1837 }
1838 if (nbs % 2) {
1839 ds[di++] = c;
1840 } else {
1841 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1842 qrealloc(&ds, di + rl + n, &dssize);
1843 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1844 di += n;
1845 }
1846 }
1847 nbs = 0;
1848 }
1849 }
1850
1851 sp += eo;
1852 if (i == nm) break;
1853 if (eo == so) {
1854 if (! (ds[di++] = *sp++)) break;
1855 }
1856 }
1857
1858 qrealloc(&ds, di + strlen(sp), &dssize);
1859 strcpy(ds + di, sp);
1860 setvar_p(dest, ds);
1861 if (re == &sreg) regfree(re);
1862 return i;
1863}
1864
Mike Frysinger10a11e22005-09-27 02:23:02 +00001865static var *exec_builtin(node *op, var *res)
1866{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001867 int (*to_xxx)(int);
1868 var *tv;
1869 node *an[4];
1870 var *av[4];
1871 char *as[4];
1872 regmatch_t pmatch[2];
1873 regex_t sreg, *re;
1874 static tsplitter tspl;
1875 node *spl;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001876 uint32_t isr, info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001877 int nargs;
1878 time_t tt;
1879 char *s, *s1;
1880 int i, l, ll, n;
1881
1882 tv = nvalloc(4);
1883 isr = info = op->info;
1884 op = op->l.n;
1885
1886 av[2] = av[3] = NULL;
1887 for (i=0 ; i<4 && op ; i++) {
1888 an[i] = nextarg(&op);
1889 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1890 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1891 isr >>= 1;
1892 }
1893
1894 nargs = i;
1895 if (nargs < (info >> 30))
1896 runtime_error(EMSG_TOO_FEW_ARGS);
1897
1898 switch (info & OPNMASK) {
1899
1900 case B_a2:
1901#ifdef CONFIG_FEATURE_AWK_MATH
1902 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1903#else
1904 runtime_error(EMSG_NO_MATH);
1905#endif
1906 break;
1907
1908 case B_sp:
1909 if (nargs > 2) {
1910 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1911 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1912 } else {
1913 spl = &fsplitter.n;
1914 }
1915
1916 n = awk_split(as[0], spl, &s);
1917 s1 = s;
1918 clear_array(iamarray(av[1]));
1919 for (i=1; i<=n; i++)
1920 setari_u(av[1], i, nextword(&s1));
1921 free(s);
1922 setvar_i(res, n);
1923 break;
1924
1925 case B_ss:
Rob Landleya3896512006-05-07 20:20:34 +00001926 l = strlen(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001927 i = getvar_i(av[1]) - 1;
1928 if (i>l) i=l; if (i<0) i=0;
1929 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1930 if (n<0) n=0;
1931 s = xmalloc(n+1);
1932 strncpy(s, as[0]+i, n);
1933 s[n] = '\0';
1934 setvar_p(res, s);
1935 break;
1936
1937 case B_lo:
1938 to_xxx = tolower;
1939 goto lo_cont;
1940
1941 case B_up:
1942 to_xxx = toupper;
1943lo_cont:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001944 s1 = s = bb_xstrdup(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001945 while (*s1) {
1946 *s1 = (*to_xxx)(*s1);
1947 s1++;
1948 }
1949 setvar_p(res, s);
1950 break;
1951
1952 case B_ix:
1953 n = 0;
Rob Landleya3896512006-05-07 20:20:34 +00001954 ll = strlen(as[1]);
1955 l = strlen(as[0]) - ll;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001956 if (ll > 0 && l >= 0) {
1957 if (! icase) {
1958 s = strstr(as[0], as[1]);
1959 if (s) n = (s - as[0]) + 1;
1960 } else {
1961 /* this piece of code is terribly slow and
1962 * really should be rewritten
1963 */
1964 for (i=0; i<=l; i++) {
1965 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1966 n = i+1;
1967 break;
1968 }
1969 }
1970 }
1971 }
1972 setvar_i(res, n);
1973 break;
1974
1975 case B_ti:
1976 if (nargs > 1)
1977 tt = getvar_i(av[1]);
1978 else
1979 time(&tt);
1980 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1981 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1982 buf[i] = '\0';
1983 setvar_s(res, buf);
1984 break;
1985
1986 case B_ma:
1987 re = as_regex(an[1], &sreg);
1988 n = regexec(re, as[0], 1, pmatch, 0);
1989 if (n == 0) {
1990 pmatch[0].rm_so++;
1991 pmatch[0].rm_eo++;
1992 } else {
1993 pmatch[0].rm_so = 0;
1994 pmatch[0].rm_eo = -1;
1995 }
1996 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1997 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1998 setvar_i(res, pmatch[0].rm_so);
1999 if (re == &sreg) regfree(re);
2000 break;
2001
2002 case B_ge:
2003 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2004 break;
2005
2006 case B_gs:
2007 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2008 break;
2009
2010 case B_su:
2011 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2012 break;
2013 }
2014
2015 nvfree(tv);
2016 return res;
2017}
2018
2019/*
2020 * Evaluate node - the heart of the program. Supplied with subtree
2021 * and place where to store result. returns ptr to result.
2022 */
2023#define XC(n) ((n) >> 8)
2024
Mike Frysinger10a11e22005-09-27 02:23:02 +00002025static var *evaluate(node *op, var *res)
2026{
Mike Frysingerde2b9382005-09-27 03:18:00 +00002027 /* This procedure is recursive so we should count every byte */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002028 static var *fnargs = NULL;
2029 static unsigned int seed = 1;
2030 static regex_t sreg;
2031 node *op1;
2032 var *v1;
2033 union {
2034 var *v;
2035 char *s;
2036 double d;
2037 int i;
2038 } L, R;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002039 uint32_t opinfo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002040 short opn;
2041 union {
2042 char *s;
2043 rstream *rsm;
2044 FILE *F;
2045 var *v;
2046 regex_t *re;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002047 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002048 } X;
2049
2050 if (! op)
2051 return setvar_s(res, NULL);
2052
2053 v1 = nvalloc(2);
2054
2055 while (op) {
2056
2057 opinfo = op->info;
2058 opn = (short)(opinfo & OPNMASK);
2059 lineno = op->lineno;
2060
Mike Frysingerde2b9382005-09-27 03:18:00 +00002061 /* execute inevitable things */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002062 op1 = op->l.n;
2063 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2064 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2065 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2066 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2067 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2068
2069 switch (XC(opinfo & OPCLSMASK)) {
2070
2071 /* -- iterative node type -- */
2072
2073 /* test pattern */
2074 case XC( OC_TEST ):
2075 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2076 /* it's range pattern */
2077 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2078 op->info |= OF_CHECKED;
2079 if (ptest(op1->r.n))
2080 op->info &= ~OF_CHECKED;
2081
2082 op = op->a.n;
2083 } else {
2084 op = op->r.n;
2085 }
2086 } else {
2087 op = (ptest(op1)) ? op->a.n : op->r.n;
2088 }
2089 break;
2090
2091 /* just evaluate an expression, also used as unconditional jump */
2092 case XC( OC_EXEC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002093 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002094
2095 /* branch, used in if-else and various loops */
2096 case XC( OC_BR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002097 op = istrue(L.v) ? op->a.n : op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002098 break;
2099
2100 /* initialize for-in loop */
2101 case XC( OC_WALKINIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002102 hashwalk_init(L.v, iamarray(R.v));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002103 break;
2104
2105 /* get next array item */
2106 case XC( OC_WALKNEXT ):
2107 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2108 break;
2109
2110 case XC( OC_PRINT ):
2111 case XC( OC_PRINTF ):
2112 X.F = stdout;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002113 if (op->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002114 X.rsm = newfile(R.s);
2115 if (! X.rsm->F) {
2116 if (opn == '|') {
2117 if((X.rsm->F = popen(R.s, "w")) == NULL)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002118 bb_perror_msg_and_die("popen");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002119 X.rsm->is_pipe = 1;
2120 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002121 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002122 }
2123 }
2124 X.F = X.rsm->F;
2125 }
2126
2127 if ((opinfo & OPCLSMASK) == OC_PRINT) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002128 if (! op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002129 fputs(getvar_s(V[F0]), X.F);
2130 } else {
2131 while (op1) {
2132 L.v = evaluate(nextarg(&op1), v1);
2133 if (L.v->type & VF_NUMBER) {
2134 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2135 getvar_i(L.v), TRUE);
2136 fputs(buf, X.F);
2137 } else {
2138 fputs(getvar_s(L.v), X.F);
2139 }
2140
2141 if (op1) fputs(getvar_s(V[OFS]), X.F);
2142 }
2143 }
2144 fputs(getvar_s(V[ORS]), X.F);
2145
2146 } else { /* OC_PRINTF */
2147 L.s = awk_printf(op1);
2148 fputs(L.s, X.F);
2149 free(L.s);
2150 }
2151 fflush(X.F);
2152 break;
2153
2154 case XC( OC_DELETE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002155 X.info = op1->info & OPCLSMASK;
2156 if (X.info == OC_VAR) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002157 R.v = op1->l.v;
2158 } else if (X.info == OC_FNARG) {
2159 R.v = &fnargs[op1->l.i];
2160 } else {
2161 runtime_error(EMSG_NOT_ARRAY);
2162 }
2163
Mike Frysingerde2b9382005-09-27 03:18:00 +00002164 if (op1->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002165 clrvar(L.v);
2166 L.s = getvar_s(evaluate(op1->r.n, v1));
2167 hash_remove(iamarray(R.v), L.s);
2168 } else {
2169 clear_array(iamarray(R.v));
2170 }
2171 break;
2172
2173 case XC( OC_NEWSOURCE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002174 programname = op->l.s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002175 break;
2176
2177 case XC( OC_RETURN ):
2178 copyvar(res, L.v);
2179 break;
2180
2181 case XC( OC_NEXTFILE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002182 nextfile = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002183 case XC( OC_NEXT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002184 nextrec = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002185 case XC( OC_DONE ):
2186 clrvar(res);
2187 break;
2188
2189 case XC( OC_EXIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002190 awk_exit(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002191
2192 /* -- recursive node type -- */
2193
2194 case XC( OC_VAR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002195 L.v = op->l.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002196 if (L.v == V[NF])
2197 split_f0();
2198 goto v_cont;
2199
2200 case XC( OC_FNARG ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002201 L.v = &fnargs[op->l.i];
Glenn L McGrath545106f2002-11-11 06:21:00 +00002202
2203v_cont:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002204 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002205 break;
2206
2207 case XC( OC_IN ):
2208 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2209 break;
2210
2211 case XC( OC_REGEXP ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002212 op1 = op;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002213 L.s = getvar_s(V[F0]);
2214 goto re_cont;
2215
2216 case XC( OC_MATCH ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002217 op1 = op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002218re_cont:
2219 X.re = as_regex(op1, &sreg);
2220 R.i = regexec(X.re, L.s, 0, NULL, 0);
2221 if (X.re == &sreg) regfree(X.re);
2222 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2223 break;
2224
2225 case XC( OC_MOVE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002226 /* if source is a temporary string, jusk relink it to dest */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002227 if (R.v == v1+1 && R.v->string) {
2228 res = setvar_p(L.v, R.v->string);
2229 R.v->string = NULL;
2230 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002231 res = copyvar(L.v, R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002232 }
2233 break;
2234
2235 case XC( OC_TERNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002236 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002237 runtime_error(EMSG_POSSIBLE_ERROR);
2238 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2239 break;
2240
2241 case XC( OC_FUNC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002242 if (! op->r.f->body.first)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002243 runtime_error(EMSG_UNDEF_FUNC);
2244
2245 X.v = R.v = nvalloc(op->r.f->nargs+1);
2246 while (op1) {
2247 L.v = evaluate(nextarg(&op1), v1);
2248 copyvar(R.v, L.v);
2249 R.v->type |= VF_CHILD;
2250 R.v->x.parent = L.v;
2251 if (++R.v - X.v >= op->r.f->nargs)
2252 break;
2253 }
2254
2255 R.v = fnargs;
2256 fnargs = X.v;
2257
2258 L.s = programname;
2259 res = evaluate(op->r.f->body.first, res);
2260 programname = L.s;
2261
2262 nvfree(fnargs);
2263 fnargs = R.v;
2264 break;
2265
2266 case XC( OC_GETLINE ):
2267 case XC( OC_PGETLINE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002268 if (op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002269 X.rsm = newfile(L.s);
2270 if (! X.rsm->F) {
2271 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2272 X.rsm->F = popen(L.s, "r");
2273 X.rsm->is_pipe = TRUE;
2274 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002275 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002276 }
2277 }
2278 } else {
2279 if (! iF) iF = next_input_file();
2280 X.rsm = iF;
2281 }
2282
2283 if (! X.rsm->F) {
2284 setvar_i(V[ERRNO], errno);
2285 setvar_i(res, -1);
2286 break;
2287 }
2288
2289 if (! op->r.n)
2290 R.v = V[F0];
2291
2292 L.i = awk_getline(X.rsm, R.v);
2293 if (L.i > 0) {
2294 if (! op1) {
2295 incvar(V[FNR]);
2296 incvar(V[NR]);
2297 }
2298 }
2299 setvar_i(res, L.i);
2300 break;
2301
Mike Frysingerde2b9382005-09-27 03:18:00 +00002302 /* simple builtins */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002303 case XC( OC_FBLTIN ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002304 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002305
2306 case F_in:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002307 R.d = (int)L.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002308 break;
2309
2310 case F_rn:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002311 R.d = (double)rand() / (double)RAND_MAX;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002312 break;
2313
2314#ifdef CONFIG_FEATURE_AWK_MATH
2315 case F_co:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002316 R.d = cos(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002317 break;
2318
2319 case F_ex:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002320 R.d = exp(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002321 break;
2322
2323 case F_lg:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002324 R.d = log(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002325 break;
2326
2327 case F_si:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002328 R.d = sin(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002329 break;
2330
2331 case F_sq:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002332 R.d = sqrt(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002333 break;
2334#else
2335 case F_co:
2336 case F_ex:
2337 case F_lg:
2338 case F_si:
2339 case F_sq:
2340 runtime_error(EMSG_NO_MATH);
2341 break;
2342#endif
2343
2344 case F_sr:
2345 R.d = (double)seed;
2346 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2347 srand(seed);
2348 break;
2349
2350 case F_ti:
2351 R.d = time(NULL);
2352 break;
2353
2354 case F_le:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002355 if (! op1)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002356 L.s = getvar_s(V[F0]);
Rob Landleya3896512006-05-07 20:20:34 +00002357 R.d = strlen(L.s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002358 break;
2359
2360 case F_sy:
2361 fflush(NULL);
Rob Landley51843362006-01-09 05:26:58 +00002362 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002363 break;
2364
2365 case F_ff:
2366 if (! op1)
2367 fflush(stdout);
2368 else {
2369 if (L.s && *L.s) {
2370 X.rsm = newfile(L.s);
2371 fflush(X.rsm->F);
2372 } else {
2373 fflush(NULL);
2374 }
2375 }
2376 break;
2377
2378 case F_cl:
2379 X.rsm = (rstream *)hash_search(fdhash, L.s);
2380 if (X.rsm) {
2381 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00002382 free(X.rsm->buffer);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002383 hash_remove(fdhash, L.s);
2384 }
2385 if (R.i != 0)
2386 setvar_i(V[ERRNO], errno);
2387 R.d = (double)R.i;
2388 break;
2389 }
2390 setvar_i(res, R.d);
2391 break;
2392
2393 case XC( OC_BUILTIN ):
2394 res = exec_builtin(op, res);
2395 break;
2396
2397 case XC( OC_SPRINTF ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002398 setvar_p(res, awk_printf(op1));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002399 break;
2400
2401 case XC( OC_UNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002402 X.v = R.v;
2403 L.d = R.d = getvar_i(R.v);
2404 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002405 case 'P':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002406 L.d = ++R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002407 goto r_op_change;
2408 case 'p':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002409 R.d++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002410 goto r_op_change;
2411 case 'M':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002412 L.d = --R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002413 goto r_op_change;
2414 case 'm':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002415 R.d--;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002416 goto r_op_change;
2417 case '!':
2418 L.d = istrue(X.v) ? 0 : 1;
2419 break;
2420 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002421 L.d = -R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002422 break;
2423 r_op_change:
2424 setvar_i(X.v, R.d);
2425 }
2426 setvar_i(res, L.d);
2427 break;
2428
2429 case XC( OC_FIELD ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002430 R.i = (int)getvar_i(R.v);
2431 if (R.i == 0) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002432 res = V[F0];
2433 } else {
2434 split_f0();
2435 if (R.i > nfields)
2436 fsrealloc(R.i);
2437
2438 res = &Fields[R.i-1];
2439 }
2440 break;
2441
2442 /* concatenation (" ") and index joining (",") */
2443 case XC( OC_CONCAT ):
2444 case XC( OC_COMMA ):
Rob Landleya3896512006-05-07 20:20:34 +00002445 opn = strlen(L.s) + strlen(R.s) + 2;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002446 X.s = (char *)xmalloc(opn);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002447 strcpy(X.s, L.s);
2448 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2449 L.s = getvar_s(V[SUBSEP]);
Rob Landleya3896512006-05-07 20:20:34 +00002450 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002451 strcat(X.s, L.s);
2452 }
2453 strcat(X.s, R.s);
2454 setvar_p(res, X.s);
2455 break;
2456
2457 case XC( OC_LAND ):
2458 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2459 break;
2460
2461 case XC( OC_LOR ):
2462 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2463 break;
2464
2465 case XC( OC_BINARY ):
2466 case XC( OC_REPLACE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002467 R.d = getvar_i(R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002468 switch (opn) {
2469 case '+':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002470 L.d += R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002471 break;
2472 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002473 L.d -= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002474 break;
2475 case '*':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002476 L.d *= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002477 break;
2478 case '/':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002479 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2480 L.d /= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002481 break;
2482 case '&':
2483#ifdef CONFIG_FEATURE_AWK_MATH
Mike Frysingerde2b9382005-09-27 03:18:00 +00002484 L.d = pow(L.d, R.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002485#else
2486 runtime_error(EMSG_NO_MATH);
2487#endif
2488 break;
2489 case '%':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002490 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2491 L.d -= (int)(L.d / R.d) * R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002492 break;
2493 }
2494 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2495 break;
2496
2497 case XC( OC_COMPARE ):
2498 if (is_numeric(L.v) && is_numeric(R.v)) {
2499 L.d = getvar_i(L.v) - getvar_i(R.v);
2500 } else {
2501 L.s = getvar_s(L.v);
2502 R.s = getvar_s(R.v);
2503 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2504 }
2505 switch (opn & 0xfe) {
2506 case 0:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002507 R.i = (L.d > 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002508 break;
2509 case 2:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002510 R.i = (L.d >= 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002511 break;
2512 case 4:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002513 R.i = (L.d == 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002514 break;
2515 }
2516 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2517 break;
2518
2519 default:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002520 runtime_error(EMSG_POSSIBLE_ERROR);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002521 }
2522 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2523 op = op->a.n;
2524 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2525 break;
2526 if (nextrec)
2527 break;
2528 }
2529 nvfree(v1);
2530 return res;
2531}
2532
2533
2534/* -------- main & co. -------- */
2535
Mike Frysinger10a11e22005-09-27 02:23:02 +00002536static int awk_exit(int r)
2537{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002538 unsigned int i;
2539 hash_item *hi;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002540 static var tv;
2541
2542 if (! exiting) {
2543 exiting = TRUE;
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00002544 nextrec = FALSE;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002545 evaluate(endseq.first, &tv);
2546 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002547
2548 /* waiting for children */
2549 for (i=0; i<fdhash->csize; i++) {
2550 hi = fdhash->items[i];
2551 while(hi) {
2552 if (hi->data.rs.F && hi->data.rs.is_pipe)
2553 pclose(hi->data.rs.F);
2554 hi = hi->next;
2555 }
2556 }
2557
2558 exit(r);
2559}
2560
2561/* if expr looks like "var=value", perform assignment and return 1,
2562 * otherwise return 0 */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00002563static int is_assignment(const char *expr)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002564{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002565 char *exprc, *s, *s0, *s1;
2566
Manuel Novoa III cad53642003-03-19 09:13:01 +00002567 exprc = bb_xstrdup(expr);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002568 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2569 free(exprc);
2570 return FALSE;
2571 }
2572
2573 *(s++) = '\0';
2574 s0 = s1 = s;
2575 while (*s)
2576 *(s1++) = nextchar(&s);
2577
2578 *s1 = '\0';
2579 setvar_u(newvar(exprc), s0);
2580 free(exprc);
2581 return TRUE;
2582}
2583
2584/* switch to next input file */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002585static rstream *next_input_file(void)
2586{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002587 static rstream rsm;
2588 FILE *F = NULL;
2589 char *fname, *ind;
2590 static int files_happen = FALSE;
2591
2592 if (rsm.F) fclose(rsm.F);
2593 rsm.F = NULL;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002594 rsm.pos = rsm.adv = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002595
2596 do {
2597 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2598 if (files_happen)
2599 return NULL;
2600 fname = "-";
2601 F = stdin;
2602 } else {
2603 ind = getvar_s(incvar(V[ARGIND]));
2604 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2605 if (fname && *fname && !is_assignment(fname))
2606 F = afopen(fname, "r");
2607 }
2608 } while (!F);
2609
2610 files_happen = TRUE;
2611 setvar_s(V[FILENAME], fname);
2612 rsm.F = F;
2613 return &rsm;
2614}
2615
Rob Landleydfba7412006-03-06 20:47:33 +00002616int awk_main(int argc, char **argv)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002617{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002618 char *s, *s1;
Rob Landley46e351d2006-02-14 16:05:32 +00002619 int i, j, c, flen;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002620 var *v;
2621 static var tv;
2622 char **envp;
2623 static int from_file = FALSE;
2624 rstream *rsm;
2625 FILE *F, *stdfiles[3];
2626 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2627
2628 /* allocate global buffer */
2629 buf = xmalloc(MAXVARFMT+1);
2630
2631 vhash = hash_init();
2632 ahash = hash_init();
2633 fdhash = hash_init();
2634 fnhash = hash_init();
2635
2636 /* initialize variables */
2637 for (i=0; *vNames; i++) {
2638 V[i] = v = newvar(nextword(&vNames));
2639 if (*vValues != '\377')
2640 setvar_s(v, nextword(&vValues));
2641 else
2642 setvar_i(v, 0);
2643
2644 if (*vNames == '*') {
2645 v->type |= VF_SPECIAL;
2646 vNames++;
2647 }
2648 }
2649
2650 handle_special(V[FS]);
2651 handle_special(V[RS]);
2652
2653 stdfiles[0] = stdin;
2654 stdfiles[1] = stdout;
2655 stdfiles[2] = stderr;
2656 for (i=0; i<3; i++) {
2657 rsm = newfile(nextword(&stdnames));
2658 rsm->F = stdfiles[i];
2659 }
2660
2661 for (envp=environ; *envp; envp++) {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002662 s = bb_xstrdup(*envp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002663 s1 = strchr(s, '=');
Eric Andersen67776be2004-07-30 23:52:08 +00002664 if (!s1) {
2665 goto keep_going;
2666 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002667 *(s1++) = '\0';
2668 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
Eric Andersen67776be2004-07-30 23:52:08 +00002669keep_going:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002670 free(s);
2671 }
2672
2673 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2674 switch (c) {
2675 case 'F':
2676 setvar_s(V[FS], optarg);
2677 break;
2678 case 'v':
2679 if (! is_assignment(optarg))
Manuel Novoa III cad53642003-03-19 09:13:01 +00002680 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002681 break;
2682 case 'f':
2683 from_file = TRUE;
2684 F = afopen(programname = optarg, "r");
2685 s = NULL;
2686 /* one byte is reserved for some trick in next_token */
Rob Landley46e351d2006-02-14 16:05:32 +00002687 if (fseek(F, 0, SEEK_END) == 0) {
2688 flen = ftell(F);
2689 s = (char *)xmalloc(flen+4);
2690 fseek(F, 0, SEEK_SET);
2691 i = 1 + fread(s+1, 1, flen, F);
2692 } else {
2693 for (i=j=1; j>0; i+=j) {
2694 s = (char *)xrealloc(s, i+4096);
2695 j = fread(s+i, 1, 4094, F);
2696 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002697 }
2698 s[i] = '\0';
2699 fclose(F);
2700 parse_program(s+1);
2701 free(s);
2702 break;
2703 case 'W':
Manuel Novoa III cad53642003-03-19 09:13:01 +00002704 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002705 break;
2706
2707 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +00002708 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002709 }
2710 }
2711
2712 if (!from_file) {
2713 if (argc == optind)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002714 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002715 programname="cmd. line";
2716 parse_program(argv[optind++]);
2717
2718 }
2719
2720 /* fill in ARGV array */
2721 setvar_i(V[ARGC], argc - optind + 1);
2722 setari_u(V[ARGV], 0, "awk");
2723 for(i=optind; i < argc; i++)
2724 setari_u(V[ARGV], i+1-optind, argv[i]);
2725
2726 evaluate(beginseq.first, &tv);
2727 if (! mainseq.first && ! endseq.first)
2728 awk_exit(EXIT_SUCCESS);
2729
2730 /* input file could already be opened in BEGIN block */
2731 if (! iF) iF = next_input_file();
2732
2733 /* passing through input files */
2734 while (iF) {
2735
2736 nextfile = FALSE;
2737 setvar_i(V[FNR], 0);
2738
2739 while ((c = awk_getline(iF, V[F0])) > 0) {
2740
2741 nextrec = FALSE;
2742 incvar(V[NR]);
2743 incvar(V[FNR]);
2744 evaluate(mainseq.first, &tv);
2745
2746 if (nextfile)
2747 break;
2748 }
2749
2750 if (c < 0)
2751 runtime_error(strerror(errno));
2752
2753 iF = next_input_file();
2754
2755 }
2756
Glenn L McGrath545106f2002-11-11 06:21:00 +00002757 awk_exit(EXIT_SUCCESS);
2758
2759 return 0;
2760}
2761