blob: 8f746b48ce75726e883b5c3564422032d84a5633 [file] [log] [blame]
Glenn L McGrath545106f2002-11-11 06:21:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <unistd.h>
26#include <errno.h>
27#include <string.h>
28#include <time.h>
29#include <math.h>
30#include <ctype.h>
31#include <getopt.h>
32#include <regex.h>
33
34#include "busybox.h"
35
36
37#define MAXVARFMT 240
38#define MINNVBLOCK 64
39
40/* variable flags */
41#define VF_NUMBER 0x0001 /* 1 = primary type is number */
42#define VF_ARRAY 0x0002 /* 1 = it's an array */
43
44#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
45#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
46#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
47#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
48#define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
49#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
50#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
51
52/* these flags are static, don't change them when value is changed */
53#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55/* Variable */
56typedef struct var_s {
57 unsigned short type; /* flags */
58 double number;
59 char *string;
60 union {
61 int aidx; /* func arg index (on compilation stage) */
62 struct xhash_s *array; /* array ptr */
63 struct var_s *parent; /* for func args, ptr to actual parameter */
64 char **walker; /* list of array elements (for..in) */
65 } x;
66} var;
67
68/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69typedef struct chain_s {
70 struct node_s *first;
71 struct node_s *last;
72 char *programname;
73} chain;
74
75/* Function */
76typedef struct func_s {
77 unsigned short nargs;
78 struct chain_s body;
79} func;
80
81/* I/O stream */
82typedef struct rstream_s {
83 FILE *F;
84 char *buffer;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +000085 int adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +000086 int size;
87 int pos;
88 unsigned short is_pipe;
89} rstream;
90
91typedef struct hash_item_s {
92 union {
93 struct var_s v; /* variable/array hash */
94 struct rstream_s rs; /* redirect streams hash */
95 struct func_s f; /* functions hash */
96 } data;
97 struct hash_item_s *next; /* next in chain */
98 char name[1]; /* really it's longer */
99} hash_item;
100
101typedef struct xhash_s {
102 unsigned int nel; /* num of elements */
103 unsigned int csize; /* current hash size */
104 unsigned int nprime; /* next hash size in PRIMES[] */
105 unsigned int glen; /* summary length of item names */
106 struct hash_item_s **items;
107} xhash;
108
109/* Tree node */
110typedef struct node_s {
111 unsigned long info;
112 unsigned short lineno;
113 union {
114 struct node_s *n;
115 var *v;
116 int i;
117 char *s;
118 regex_t *re;
119 } l;
120 union {
121 struct node_s *n;
122 regex_t *ire;
123 func *f;
124 int argno;
125 } r;
126 union {
127 struct node_s *n;
128 } a;
129} node;
130
131/* Block of temporary variables */
132typedef struct nvblock_s {
133 int size;
134 var *pos;
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
137 var nv[0];
138} nvblock;
139
140typedef struct tsplitter_s {
141 node n;
142 regex_t re[2];
143} tsplitter;
144
145/* simple token classes */
146/* Order and hex values are very important!!! See next_token() */
147#define TC_SEQSTART 1 /* ( */
148#define TC_SEQTERM (1 << 1) /* ) */
149#define TC_REGEXP (1 << 2) /* /.../ */
150#define TC_OUTRDR (1 << 3) /* | > >> */
151#define TC_UOPPOST (1 << 4) /* unary postfix operator */
152#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153#define TC_BINOPX (1 << 6) /* two-opnd operator */
154#define TC_IN (1 << 7)
155#define TC_COMMA (1 << 8)
156#define TC_PIPE (1 << 9) /* input redirection pipe */
157#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158#define TC_ARRTERM (1 << 11) /* ] */
159#define TC_GRPSTART (1 << 12) /* { */
160#define TC_GRPTERM (1 << 13) /* } */
161#define TC_SEMICOL (1 << 14)
162#define TC_NEWLINE (1 << 15)
163#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164#define TC_WHILE (1 << 17)
165#define TC_ELSE (1 << 18)
166#define TC_BUILTIN (1 << 19)
167#define TC_GETLINE (1 << 20)
168#define TC_FUNCDECL (1 << 21) /* `function' `func' */
169#define TC_BEGIN (1 << 22)
170#define TC_END (1 << 23)
171#define TC_EOF (1 << 24)
172#define TC_VARIABLE (1 << 25)
173#define TC_ARRAY (1 << 26)
174#define TC_FUNCTION (1 << 27)
175#define TC_STRING (1 << 28)
176#define TC_NUMBER (1 << 29)
177
178#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
179
180/* combined token classes */
181#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186#define TC_STATEMNT (TC_STATX | TC_WHILE)
187#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
188
189/* word tokens, cannot mean something else if not expected */
190#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193/* discard newlines after these */
194#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195 TC_BINOP | TC_OPTERM)
196
197/* what can expression begin with */
198#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199/* what can group begin with */
200#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203/* operator is inserted between them */
204#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205 TC_STRING | TC_NUMBER | TC_UOPPOST)
206#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207
208#define OF_RES1 0x010000
209#define OF_RES2 0x020000
210#define OF_STR1 0x040000
211#define OF_STR2 0x080000
212#define OF_NUM1 0x100000
213#define OF_CHECKED 0x200000
214
215/* combined operator flags */
216#define xx 0
217#define xV OF_RES2
218#define xS (OF_RES2 | OF_STR2)
219#define Vx OF_RES1
220#define VV (OF_RES1 | OF_RES2)
221#define Nx (OF_RES1 | OF_NUM1)
222#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223#define Sx (OF_RES1 | OF_STR1)
224#define SV (OF_RES1 | OF_STR1 | OF_RES2)
225#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227#define OPCLSMASK 0xFF00
228#define OPNMASK 0x007F
229
230/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233 */
234#define P(x) (x << 24)
235#define PRIMASK 0x7F000000
236#define PRIMASK2 0x7E000000
237
238/* Operation classes */
239
240#define SHIFT_TIL_THIS 0x0600
241#define RECUR_FROM_THIS 0x1000
242
243enum {
244 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
245 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
246
247 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
248 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
249 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
250
251 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
252 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
253 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
254 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
255 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
256 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
257 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
258 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
259 OC_DONE=0x2800,
260
261 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
262 ST_WHILE=0x3300
263};
264
265/* simple builtins */
266enum {
267 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
269};
270
271/* builtins */
272enum {
273 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
274 B_ge, B_gs, B_su
275};
276
277/* tokens and their corresponding info values */
278
279#define NTC "\377" /* switch to next token class (tc<<1) */
280#define NTCC '\377'
281
282#define OC_B OC_BUILTIN
283
284static char * const tokenlist =
285 "\1(" NTC
286 "\1)" NTC
287 "\1/" NTC /* REGEXP */
288 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
289 "\2++" "\2--" NTC /* UOPPOST */
290 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
291 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
292 "\2*=" "\2/=" "\2%=" "\2^="
293 "\1+" "\1-" "\3**=" "\2**"
294 "\1/" "\1%" "\1^" "\1*"
295 "\2!=" "\2>=" "\2<=" "\1>"
296 "\1<" "\2!~" "\1~" "\2&&"
297 "\2||" "\1?" "\1:" NTC
298 "\2in" NTC
299 "\1," NTC
300 "\1|" NTC
301 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
302 "\1]" NTC
303 "\1{" NTC
304 "\1}" NTC
305 "\1;" NTC
306 "\1\n" NTC
307 "\2if" "\2do" "\3for" "\5break" /* STATX */
308 "\10continue" "\6delete" "\5print"
309 "\6printf" "\4next" "\10nextfile"
310 "\6return" "\4exit" NTC
311 "\5while" NTC
312 "\4else" NTC
313
314 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
315 "\3cos" "\3exp" "\3int" "\3log"
316 "\4rand" "\3sin" "\4sqrt" "\5srand"
317 "\6gensub" "\4gsub" "\5index" "\6length"
318 "\5match" "\5split" "\7sprintf" "\3sub"
319 "\6substr" "\7systime" "\10strftime"
320 "\7tolower" "\7toupper" NTC
321 "\7getline" NTC
322 "\4func" "\10function" NTC
323 "\5BEGIN" NTC
324 "\3END" "\0"
325 ;
326
327static unsigned long tokeninfo[] = {
328
329 0,
330 0,
331 OC_REGEXP,
332 xS|'a', xS|'w', xS|'|',
333 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
334 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
335 OC_FIELD|xV|P(5),
336 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
337 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
338 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
339 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
341 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
342 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
343 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
344 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
345 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
346 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
347 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
348 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
349 OC_COLON|xx|P(67)|':',
350 OC_IN|SV|P(49),
351 OC_COMMA|SS|P(80),
352 OC_PGETLINE|SV|P(37),
353 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
354 OC_UNARY|xV|P(19)|'!',
355 0,
356 0,
357 0,
358 0,
359 0,
360 ST_IF, ST_DO, ST_FOR, OC_BREAK,
361 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
362 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
363 OC_RETURN|Vx, OC_EXIT|Nx,
364 ST_WHILE,
365 0,
366
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374 OC_GETLINE|SV|P(0),
375 0, 0,
376 0,
377 0
378};
379
380/* internal variable names and their initial values */
381/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382enum {
383 CONVFMT=0, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, ARGIND, ARGC, ARGV,
386 ERRNO, FNR,
387 NR, NF, IGNORECASE,
388 ENVIRON, F0, _intvarcount_
389};
390
391static char * vNames =
392 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
393 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
394 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
395 "ERRNO\0" "FNR\0"
396 "NR\0" "NF\0*" "IGNORECASE\0*"
397 "ENVIRON\0" "$\0*" "\0";
398
399static char * vValues =
400 "%.6g\0" "%.6g\0" " \0" " \0"
401 "\n\0" "\n\0" "\0" "\0"
402 "\034\0"
403 "\377";
404
405/* hash size may grow to these values */
406#define FIRST_PRIME 61;
407static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
409
410/* globals */
411
412extern char **environ;
413
414static var * V[_intvarcount_];
415static chain beginseq, mainseq, endseq, *seq;
416static int nextrec, nextfile;
417static node *break_ptr, *continue_ptr;
418static rstream *iF;
419static xhash *vhash, *ahash, *fdhash, *fnhash;
420static char *programname;
421static short lineno;
422static int is_f0_split;
423static int nfields = 0;
424static var *Fields = NULL;
425static tsplitter fsplitter, rsplitter;
426static nvblock *cb = NULL;
427static char *pos;
428static char *buf;
429static int icase = FALSE;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +0000430static int exiting = FALSE;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000431
432static struct {
433 unsigned long tclass;
434 unsigned long info;
435 char *string;
436 double number;
437 short lineno;
438 int rollback;
439} t;
440
441/* function prototypes */
442extern void xregcomp(regex_t *preg, const char *regex, int cflags);
443static void handle_special(var *);
444static node *parse_expr(unsigned long);
445static void chain_group(void);
446static var *evaluate(node *, var *);
447static rstream *next_input_file(void);
448static int fmt_num(char *, int, char *, double, int);
449static int awk_exit(int);
450
451/* ---- error handling ---- */
452
453static const char EMSG_INTERNAL_ERROR[] = "Internal error";
454static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
455static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
456static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
457static const char EMSG_INV_FMT[] = "Invalid format specifier";
458static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
459static const char EMSG_NOT_ARRAY[] = "Not an array";
460static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
461static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
462#ifndef CONFIG_FEATURE_AWK_MATH
463static const char EMSG_NO_MATH[] = "Math support is not compiled in";
464#endif
465
Glenn L McGrathd4036f82002-11-28 09:30:40 +0000466static void syntax_error(const char * const message)
467{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000468 bb_error_msg("%s:%i: %s", programname, lineno, message);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +0000469 exit(1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000470}
471
472#define runtime_error(x) syntax_error(x)
473
474
475/* ---- hash stuff ---- */
476
477static unsigned int hashidx(char *name) {
478
479 register unsigned int idx=0;
480
481 while (*name) idx = *name++ + (idx << 6) - idx;
482 return idx;
483}
484
485/* create new hash */
486static xhash *hash_init(void) {
487
488 xhash *newhash;
489
490 newhash = (xhash *)xcalloc(1, sizeof(xhash));
491 newhash->csize = FIRST_PRIME;
492 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
493
494 return newhash;
495}
496
497/* find item in hash, return ptr to data, NULL if not found */
498static void *hash_search(xhash *hash, char *name) {
499
500 hash_item *hi;
501
502 hi = hash->items [ hashidx(name) % hash->csize ];
503 while (hi) {
504 if (strcmp(hi->name, name) == 0)
505 return &(hi->data);
506 hi = hi->next;
507 }
508 return NULL;
509}
510
511/* grow hash if it becomes too big */
512static void hash_rebuild(xhash *hash) {
513
514 unsigned int newsize, i, idx;
515 hash_item **newitems, *hi, *thi;
516
517 if (hash->nprime == NPRIMES)
518 return;
519
520 newsize = PRIMES[hash->nprime++];
521 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
522
523 for (i=0; i<hash->csize; i++) {
524 hi = hash->items[i];
525 while (hi) {
526 thi = hi;
527 hi = thi->next;
528 idx = hashidx(thi->name) % newsize;
529 thi->next = newitems[idx];
530 newitems[idx] = thi;
531 }
532 }
533
534 free(hash->items);
535 hash->csize = newsize;
536 hash->items = newitems;
537}
538
539/* find item in hash, add it if necessary. Return ptr to data */
540static void *hash_find(xhash *hash, char *name) {
541
542 hash_item *hi;
543 unsigned int idx;
544 int l;
545
546 hi = hash_search(hash, name);
547 if (! hi) {
548 if (++hash->nel / hash->csize > 10)
549 hash_rebuild(hash);
550
Manuel Novoa III cad53642003-03-19 09:13:01 +0000551 l = bb_strlen(name) + 1;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000552 hi = xcalloc(sizeof(hash_item) + l, 1);
553 memcpy(hi->name, name, l);
554
555 idx = hashidx(name) % hash->csize;
556 hi->next = hash->items[idx];
557 hash->items[idx] = hi;
558 hash->glen += l;
559 }
560 return &(hi->data);
561}
562
563#define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
564#define newvar(name) (var *) hash_find ( vhash , (name) )
565#define newfile(name) (rstream *) hash_find ( fdhash , (name) )
566#define newfunc(name) (func *) hash_find ( fnhash , (name) )
567
568static void hash_remove(xhash *hash, char *name) {
569
570 hash_item *hi, **phi;
571
572 phi = &(hash->items[ hashidx(name) % hash->csize ]);
573 while (*phi) {
574 hi = *phi;
575 if (strcmp(hi->name, name) == 0) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000576 hash->glen -= (bb_strlen(name) + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000577 hash->nel--;
578 *phi = hi->next;
579 free(hi);
580 break;
581 }
582 phi = &(hi->next);
583 }
584}
585
586/* ------ some useful functions ------ */
587
588static void skip_spaces(char **s) {
589
590 register char *p = *s;
591
592 while(*p == ' ' || *p == '\t' ||
593 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
594 p++;
595 }
596 *s = p;
597}
598
599static char *nextword(char **s) {
600
601 register char *p = *s;
602
603 while (*(*s)++) ;
604
605 return p;
606}
607
608static char nextchar(char **s) {
609
610 register char c, *pps;
611
612 c = *((*s)++);
613 pps = *s;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000614 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000615 if (c == '\\' && *s == pps) c = *((*s)++);
616 return c;
617}
618
619static inline int isalnum_(int c) {
620
621 return (isalnum(c) || c == '_');
622}
623
624static FILE *afopen(const char *path, const char *mode) {
625
Manuel Novoa III cad53642003-03-19 09:13:01 +0000626 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000627}
628
629/* -------- working with variables (set/get/copy/etc) -------- */
630
631static xhash *iamarray(var *v) {
632
633 var *a = v;
634
635 while (a->type & VF_CHILD)
636 a = a->x.parent;
637
638 if (! (a->type & VF_ARRAY)) {
639 a->type |= VF_ARRAY;
640 a->x.array = hash_init();
641 }
642 return a->x.array;
643}
644
645static void clear_array(xhash *array) {
646
647 unsigned int i;
648 hash_item *hi, *thi;
649
650 for (i=0; i<array->csize; i++) {
651 hi = array->items[i];
652 while (hi) {
653 thi = hi;
654 hi = hi->next;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000655 free(thi->data.v.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000656 free(thi);
657 }
658 array->items[i] = NULL;
659 }
660 array->glen = array->nel = 0;
661}
662
663/* clear a variable */
664static var *clrvar(var *v) {
665
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000666 if (!(v->type & VF_FSTR))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000667 free(v->string);
668
669 v->type &= VF_DONTTOUCH;
670 v->type |= VF_DIRTY;
671 v->string = NULL;
672 return v;
673}
674
675/* assign string value to variable */
676static var *setvar_p(var *v, char *value) {
677
678 clrvar(v);
679 v->string = value;
680 handle_special(v);
681
682 return v;
683}
684
685/* same as setvar_p but make a copy of string */
686static var *setvar_s(var *v, char *value) {
687
Manuel Novoa III cad53642003-03-19 09:13:01 +0000688 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000689}
690
691/* same as setvar_s but set USER flag */
692static var *setvar_u(var *v, char *value) {
693
694 setvar_s(v, value);
695 v->type |= VF_USER;
696 return v;
697}
698
699/* set array element to user string */
700static void setari_u(var *a, int idx, char *s) {
701
702 register var *v;
703 static char sidx[12];
704
705 sprintf(sidx, "%d", idx);
706 v = findvar(iamarray(a), sidx);
707 setvar_u(v, s);
708}
709
710/* assign numeric value to variable */
711static var *setvar_i(var *v, double value) {
712
713 clrvar(v);
714 v->type |= VF_NUMBER;
715 v->number = value;
716 handle_special(v);
717 return v;
718}
719
720static char *getvar_s(var *v) {
721
722 /* if v is numeric and has no cached string, convert it to string */
723 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
724 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000725 v->string = bb_xstrdup(buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000726 v->type |= VF_CACHED;
727 }
728 return (v->string == NULL) ? "" : v->string;
729}
730
731static double getvar_i(var *v) {
732
733 char *s;
734
735 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
736 v->number = 0;
737 s = v->string;
738 if (s && *s) {
739 v->number = strtod(s, &s);
740 if (v->type & VF_USER) {
741 skip_spaces(&s);
742 if (*s != '\0')
743 v->type &= ~VF_USER;
744 }
745 } else {
746 v->type &= ~VF_USER;
747 }
748 v->type |= VF_CACHED;
749 }
750 return v->number;
751}
752
753static var *copyvar(var *dest, var *src) {
754
755 if (dest != src) {
756 clrvar(dest);
757 dest->type |= (src->type & ~VF_DONTTOUCH);
758 dest->number = src->number;
759 if (src->string)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000760 dest->string = bb_xstrdup(src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000761 }
762 handle_special(dest);
763 return dest;
764}
765
766static var *incvar(var *v) {
767
768 return setvar_i(v, getvar_i(v)+1.);
769}
770
771/* return true if v is number or numeric string */
772static int is_numeric(var *v) {
773
774 getvar_i(v);
775 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
776}
777
778/* return 1 when value of v corresponds to true, 0 otherwise */
779static int istrue(var *v) {
780
781 if (is_numeric(v))
782 return (v->number == 0) ? 0 : 1;
783 else
784 return (v->string && *(v->string)) ? 1 : 0;
785}
786
787/* temporary varables allocator. Last allocated should be first freed */
788static var *nvalloc(int n) {
789
790 nvblock *pb = NULL;
791 var *v, *r;
792 int size;
793
794 while (cb) {
795 pb = cb;
796 if ((cb->pos - cb->nv) + n <= cb->size) break;
797 cb = cb->next;
798 }
799
800 if (! cb) {
801 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
802 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
803 cb->size = size;
804 cb->pos = cb->nv;
805 cb->prev = pb;
806 cb->next = NULL;
807 if (pb) pb->next = cb;
808 }
809
810 v = r = cb->pos;
811 cb->pos += n;
812
813 while (v < cb->pos) {
814 v->type = 0;
815 v->string = NULL;
816 v++;
817 }
818
819 return r;
820}
821
822static void nvfree(var *v) {
823
824 var *p;
825
826 if (v < cb->nv || v >= cb->pos)
827 runtime_error(EMSG_INTERNAL_ERROR);
828
829 for (p=v; p<cb->pos; p++) {
830 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
831 clear_array(iamarray(p));
832 free(p->x.array->items);
833 free(p->x.array);
834 }
835 if (p->type & VF_WALK)
836 free(p->x.walker);
837
838 clrvar(p);
839 }
840
841 cb->pos = v;
842 while (cb->prev && cb->pos == cb->nv) {
843 cb = cb->prev;
844 }
845}
846
847/* ------- awk program text parsing ------- */
848
849/* Parse next token pointed by global pos, place results into global t.
850 * If token isn't expected, give away. Return token class
851 */
852static unsigned long next_token(unsigned long expected) {
853
854 char *p, *pp, *s;
855 char *tl;
856 unsigned long tc, *ti;
857 int l;
858 static int concat_inserted = FALSE;
859 static unsigned long save_tclass, save_info;
860 static unsigned long ltclass = TC_OPTERM;
861
862 if (t.rollback) {
863
864 t.rollback = FALSE;
865
866 } else if (concat_inserted) {
867
868 concat_inserted = FALSE;
869 t.tclass = save_tclass;
870 t.info = save_info;
871
872 } else {
873
874 p = pos;
875
876 readnext:
877 skip_spaces(&p);
878 lineno = t.lineno;
879 if (*p == '#')
880 while (*p != '\n' && *p != '\0') p++;
881
882 if (*p == '\n')
883 t.lineno++;
884
885 if (*p == '\0') {
886 tc = TC_EOF;
887
888 } else if (*p == '\"') {
889 /* it's a string */
890 t.string = s = ++p;
891 while (*p != '\"') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 *(s++) = nextchar(&p);
895 }
896 p++;
897 *s = '\0';
898 tc = TC_STRING;
899
900 } else if ((expected & TC_REGEXP) && *p == '/') {
901 /* it's regexp */
902 t.string = s = ++p;
903 while (*p != '/') {
904 if (*p == '\0' || *p == '\n')
905 syntax_error(EMSG_UNEXP_EOS);
906 if ((*s++ = *p++) == '\\') {
907 pp = p;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000908 *(s-1) = bb_process_escape_sequence((const char **)&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000909 if (*pp == '\\') *s++ = '\\';
910 if (p == pp) *s++ = *p++;
911 }
912 }
913 p++;
914 *s = '\0';
915 tc = TC_REGEXP;
916
917 } else if (*p == '.' || isdigit(*p)) {
918 /* it's a number */
919 t.number = strtod(p, &p);
920 if (*p == '.')
921 syntax_error(EMSG_UNEXP_TOKEN);
922 tc = TC_NUMBER;
923
924 } else {
925 /* search for something known */
926 tl = tokenlist;
927 tc = 0x00000001;
928 ti = tokeninfo;
929 while (*tl) {
930 l = *(tl++);
931 if (l == NTCC) {
932 tc <<= 1;
933 continue;
934 }
935 /* if token class is expected, token
936 * matches and it's not a longer word,
937 * then this is what we are looking for
938 */
939 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940 *tl == *p && strncmp(p, tl, l) == 0 &&
941 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
942 t.info = *ti;
943 p += l;
944 break;
945 }
946 ti++;
947 tl += l;
948 }
949
950 if (! *tl) {
951 /* it's a name (var/array/function),
952 * otherwise it's something wrong
953 */
954 if (! isalnum_(*p))
955 syntax_error(EMSG_UNEXP_TOKEN);
956
957 t.string = --p;
958 while(isalnum_(*(++p))) {
959 *(p-1) = *p;
960 }
961 *(p-1) = '\0';
962 tc = TC_VARIABLE;
963 if (*p == '(') {
964 tc = TC_FUNCTION;
965 } else {
966 skip_spaces(&p);
967 if (*p == '[') {
968 p++;
969 tc = TC_ARRAY;
970 }
971 }
972 }
973 }
974 pos = p;
975
976 /* skipping newlines in some cases */
977 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
978 goto readnext;
979
980 /* insert concatenation operator when needed */
981 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
982 concat_inserted = TRUE;
983 save_tclass = tc;
984 save_info = t.info;
985 tc = TC_BINOP;
986 t.info = OC_CONCAT | SS | P(35);
987 }
988
989 t.tclass = tc;
990 }
991 ltclass = t.tclass;
992
993 /* Are we ready for this? */
994 if (! (ltclass & expected))
995 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
996 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
997
998 return ltclass;
999}
1000
1001static void rollback_token(void) { t.rollback = TRUE; }
1002
1003static node *new_node(unsigned long info) {
1004
1005 register node *n;
1006
1007 n = (node *)xcalloc(sizeof(node), 1);
1008 n->info = info;
1009 n->lineno = lineno;
1010 return n;
1011}
1012
1013static node *mk_re_node(char *s, node *n, regex_t *re) {
1014
1015 n->info = OC_REGEXP;
1016 n->l.re = re;
1017 n->r.ire = re + 1;
1018 xregcomp(re, s, REG_EXTENDED);
1019 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1020
1021 return n;
1022}
1023
1024static node *condition(void) {
1025
1026 next_token(TC_SEQSTART);
1027 return parse_expr(TC_SEQTERM);
1028}
1029
1030/* parse expression terminated by given argument, return ptr
1031 * to built subtree. Terminator is eaten by parse_expr */
1032static node *parse_expr(unsigned long iexp) {
1033
1034 node sn;
1035 node *cn = &sn;
1036 node *vn, *glptr;
1037 unsigned long tc, xtc;
1038 var *v;
1039
1040 sn.info = PRIMASK;
1041 sn.r.n = glptr = NULL;
1042 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1043
1044 while (! ((tc = next_token(xtc)) & iexp)) {
1045 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1046 /* input redirection (<) attached to glptr node */
1047 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1048 xtc = TC_OPERAND | TC_UOPPRE;
1049 glptr = NULL;
1050
1051 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1052 /* for binary and postfix-unary operators, jump back over
1053 * previous operators with higher priority */
1054 vn = cn;
1055 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1056 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1057 vn = vn->a.n;
1058 if ((t.info & OPCLSMASK) == OC_TERNARY)
1059 t.info += P(6);
1060 cn = vn->a.n->r.n = new_node(t.info);
1061 cn->a.n = vn->a.n;
1062 if (tc & TC_BINOP) {
1063 cn->l.n = vn;
1064 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1065 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1066 /* it's a pipe */
1067 next_token(TC_GETLINE);
1068 /* give maximum priority to this pipe */
1069 cn->info &= ~PRIMASK;
1070 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1071 }
1072 } else {
1073 cn->r.n = vn;
1074 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1075 }
1076 vn->a.n = cn;
1077
1078 } else {
1079 /* for operands and prefix-unary operators, attach them
1080 * to last node */
1081 vn = cn;
1082 cn = vn->r.n = new_node(t.info);
1083 cn->a.n = vn;
1084 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1085 if (tc & (TC_OPERAND | TC_REGEXP)) {
1086 xtc = TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1087 /* one should be very careful with switch on tclass -
1088 * only simple tclasses should be used! */
1089 switch (tc) {
1090 case TC_VARIABLE:
1091 case TC_ARRAY:
1092 cn->info = OC_VAR;
1093 if ((v = hash_search(ahash, t.string)) != NULL) {
1094 cn->info = OC_FNARG;
1095 cn->l.i = v->x.aidx;
1096 } else {
1097 cn->l.v = newvar(t.string);
1098 }
1099 if (tc & TC_ARRAY) {
1100 cn->info |= xS;
1101 cn->r.n = parse_expr(TC_ARRTERM);
1102 }
1103 xtc = TC_UOPPOST | TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1104 break;
1105
1106 case TC_NUMBER:
1107 case TC_STRING:
1108 cn->info = OC_VAR;
1109 v = cn->l.v = xcalloc(sizeof(var), 1);
1110 if (tc & TC_NUMBER)
1111 setvar_i(v, t.number);
1112 else
1113 setvar_s(v, t.string);
1114 break;
1115
1116 case TC_REGEXP:
1117 mk_re_node(t.string, cn,
1118 (regex_t *)xcalloc(sizeof(regex_t),2));
1119 break;
1120
1121 case TC_FUNCTION:
1122 cn->info = OC_FUNC;
1123 cn->r.f = newfunc(t.string);
1124 cn->l.n = condition();
1125 break;
1126
1127 case TC_SEQSTART:
1128 cn = vn->r.n = parse_expr(TC_SEQTERM);
1129 cn->a.n = vn;
1130 break;
1131
1132 case TC_GETLINE:
1133 glptr = cn;
1134 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1135 break;
1136
1137 case TC_BUILTIN:
1138 cn->l.n = condition();
1139 break;
1140 }
1141 }
1142 }
1143 }
1144 return sn.r.n;
1145}
1146
1147/* add node to chain. Return ptr to alloc'd node */
1148static node *chain_node(unsigned long info) {
1149
1150 register node *n;
1151
1152 if (! seq->first)
1153 seq->first = seq->last = new_node(0);
1154
1155 if (seq->programname != programname) {
1156 seq->programname = programname;
1157 n = chain_node(OC_NEWSOURCE);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001158 n->l.s = bb_xstrdup(programname);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001159 }
1160
1161 n = seq->last;
1162 n->info = info;
1163 seq->last = n->a.n = new_node(OC_DONE);
1164
1165 return n;
1166}
1167
1168static void chain_expr(unsigned long info) {
1169
1170 node *n;
1171
1172 n = chain_node(info);
1173 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1174 if (t.tclass & TC_GRPTERM)
1175 rollback_token();
1176}
1177
1178static node *chain_loop(node *nn) {
1179
1180 node *n, *n2, *save_brk, *save_cont;
1181
1182 save_brk = break_ptr;
1183 save_cont = continue_ptr;
1184
1185 n = chain_node(OC_BR | Vx);
1186 continue_ptr = new_node(OC_EXEC);
1187 break_ptr = new_node(OC_EXEC);
1188 chain_group();
1189 n2 = chain_node(OC_EXEC | Vx);
1190 n2->l.n = nn;
1191 n2->a.n = n;
1192 continue_ptr->a.n = n2;
1193 break_ptr->a.n = n->r.n = seq->last;
1194
1195 continue_ptr = save_cont;
1196 break_ptr = save_brk;
1197
1198 return n;
1199}
1200
1201/* parse group and attach it to chain */
1202static void chain_group(void) {
1203
1204 unsigned long c;
1205 node *n, *n2, *n3;
1206
1207 do {
1208 c = next_token(TC_GRPSEQ);
1209 } while (c & TC_NEWLINE);
1210
1211 if (c & TC_GRPSTART) {
1212 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1213 rollback_token();
1214 chain_group();
1215 }
1216 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1217 rollback_token();
1218 chain_expr(OC_EXEC | Vx);
1219 } else { /* TC_STATEMNT */
1220 switch (t.info & OPCLSMASK) {
1221 case ST_IF:
1222 n = chain_node(OC_BR | Vx);
1223 n->l.n = condition();
1224 chain_group();
1225 n2 = chain_node(OC_EXEC);
1226 n->r.n = seq->last;
1227 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1228 chain_group();
1229 n2->a.n = seq->last;
1230 } else {
1231 rollback_token();
1232 }
1233 break;
1234
1235 case ST_WHILE:
1236 n2 = condition();
1237 n = chain_loop(NULL);
1238 n->l.n = n2;
1239 break;
1240
1241 case ST_DO:
1242 n2 = chain_node(OC_EXEC);
1243 n = chain_loop(NULL);
1244 n2->a.n = n->a.n;
1245 next_token(TC_WHILE);
1246 n->l.n = condition();
1247 break;
1248
1249 case ST_FOR:
1250 next_token(TC_SEQSTART);
1251 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1252 if (t.tclass & TC_SEQTERM) { /* for-in */
1253 if ((n2->info & OPCLSMASK) != OC_IN)
1254 syntax_error(EMSG_UNEXP_TOKEN);
1255 n = chain_node(OC_WALKINIT | VV);
1256 n->l.n = n2->l.n;
1257 n->r.n = n2->r.n;
1258 n = chain_loop(NULL);
1259 n->info = OC_WALKNEXT | Vx;
1260 n->l.n = n2->l.n;
1261 } else { /* for(;;) */
1262 n = chain_node(OC_EXEC | Vx);
1263 n->l.n = n2;
1264 n2 = parse_expr(TC_SEMICOL);
1265 n3 = parse_expr(TC_SEQTERM);
1266 n = chain_loop(n3);
1267 n->l.n = n2;
1268 if (! n2)
1269 n->info = OC_EXEC;
1270 }
1271 break;
1272
1273 case OC_PRINT:
1274 case OC_PRINTF:
1275 n = chain_node(t.info);
1276 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1277 if (t.tclass & TC_OUTRDR) {
1278 n->info |= t.info;
1279 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1280 }
1281 if (t.tclass & TC_GRPTERM)
1282 rollback_token();
1283 break;
1284
1285 case OC_BREAK:
1286 n = chain_node(OC_EXEC);
1287 n->a.n = break_ptr;
1288 break;
1289
1290 case OC_CONTINUE:
1291 n = chain_node(OC_EXEC);
1292 n->a.n = continue_ptr;
1293 break;
1294
1295 /* delete, next, nextfile, return, exit */
1296 default:
1297 chain_expr(t.info);
1298
1299 }
1300 }
1301}
1302
1303static void parse_program(char *p) {
1304
1305 unsigned long tclass;
1306 node *cn;
1307 func *f;
1308 var *v;
1309
1310 pos = p;
1311 t.lineno = 1;
1312 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1313 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1314
1315 if (tclass & TC_OPTERM)
1316 continue;
1317
1318 seq = &mainseq;
1319 if (tclass & TC_BEGIN) {
1320 seq = &beginseq;
1321 chain_group();
1322
1323 } else if (tclass & TC_END) {
1324 seq = &endseq;
1325 chain_group();
1326
1327 } else if (tclass & TC_FUNCDECL) {
1328 next_token(TC_FUNCTION);
1329 pos++;
1330 f = newfunc(t.string);
1331 f->body.first = NULL;
1332 f->nargs = 0;
1333 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1334 v = findvar(ahash, t.string);
1335 v->x.aidx = (f->nargs)++;
1336
1337 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1338 break;
1339 }
1340 seq = &(f->body);
1341 chain_group();
1342 clear_array(ahash);
1343
1344 } else if (tclass & TC_OPSEQ) {
1345 rollback_token();
1346 cn = chain_node(OC_TEST);
1347 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1348 if (t.tclass & TC_GRPSTART) {
1349 rollback_token();
1350 chain_group();
1351 } else {
1352 chain_node(OC_PRINT);
1353 }
1354 cn->r.n = mainseq.last;
1355
1356 } else /* if (tclass & TC_GRPSTART) */ {
1357 rollback_token();
1358 chain_group();
1359 }
1360 }
1361}
1362
1363
1364/* -------- program execution part -------- */
1365
1366static node *mk_splitter(char *s, tsplitter *spl) {
1367
1368 register regex_t *re, *ire;
1369 node *n;
1370
1371 re = &spl->re[0];
1372 ire = &spl->re[1];
1373 n = &spl->n;
1374 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1375 regfree(re);
1376 regfree(ire);
1377 }
Manuel Novoa III cad53642003-03-19 09:13:01 +00001378 if (bb_strlen(s) > 1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001379 mk_re_node(s, n, re);
1380 } else {
1381 n->info = (unsigned long) *s;
1382 }
1383
1384 return n;
1385}
1386
1387/* use node as a regular expression. Supplied with node ptr and regex_t
1388 * storage space. Return ptr to regex (if result points to preg, it shuold
1389 * be later regfree'd manually
1390 */
1391static regex_t *as_regex(node *op, regex_t *preg) {
1392
1393 var *v;
1394 char *s;
1395
1396 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1397 return icase ? op->r.ire : op->l.re;
1398 } else {
1399 v = nvalloc(1);
1400 s = getvar_s(evaluate(op, v));
1401 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1402 nvfree(v);
1403 return preg;
1404 }
1405}
1406
1407/* gradually increasing buffer */
1408static void qrealloc(char **b, int n, int *size) {
1409
1410 if (! *b || n >= *size)
1411 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1412}
1413
1414/* resize field storage space */
1415static void fsrealloc(int size) {
1416
1417 static int maxfields = 0;
1418 int i;
1419
1420 if (size >= maxfields) {
1421 i = maxfields;
1422 maxfields = size + 16;
1423 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1424 for (; i<maxfields; i++) {
1425 Fields[i].type = VF_SPECIAL;
1426 Fields[i].string = NULL;
1427 }
1428 }
1429
1430 if (size < nfields) {
1431 for (i=size; i<nfields; i++) {
1432 clrvar(Fields+i);
1433 }
1434 }
1435 nfields = size;
1436}
1437
1438static int awk_split(char *s, node *spl, char **slist) {
1439
1440 int l, n=0;
1441 char c[4];
1442 char *s1;
1443 regmatch_t pmatch[2];
1444
1445 /* in worst case, each char would be a separate field */
Manuel Novoa III cad53642003-03-19 09:13:01 +00001446 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001447
1448 c[0] = c[1] = (char)spl->info;
1449 c[2] = c[3] = '\0';
1450 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1451
1452 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1453 while (*s) {
1454 l = strcspn(s, c+2);
1455 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1456 pmatch[0].rm_so <= l) {
1457 l = pmatch[0].rm_so;
1458 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1459 } else {
1460 pmatch[0].rm_eo = l;
1461 if (*(s+l)) pmatch[0].rm_eo++;
1462 }
1463
1464 memcpy(s1, s, l);
1465 *(s1+l) = '\0';
1466 nextword(&s1);
1467 s += pmatch[0].rm_eo;
1468 n++;
1469 }
1470 } else if (c[0] == '\0') { /* null split */
1471 while(*s) {
1472 *(s1++) = *(s++);
1473 *(s1++) = '\0';
1474 n++;
1475 }
1476 } else if (c[0] != ' ') { /* single-character split */
1477 if (icase) {
1478 c[0] = toupper(c[0]);
1479 c[1] = tolower(c[1]);
1480 }
1481 if (*s1) n++;
1482 while ((s1 = strpbrk(s1, c))) {
1483 *(s1++) = '\0';
1484 n++;
1485 }
1486 } else { /* space split */
1487 while (*s) {
1488 while (isspace(*s)) s++;
1489 if (! *s) break;
1490 n++;
1491 while (*s && !isspace(*s))
1492 *(s1++) = *(s++);
1493 *(s1++) = '\0';
1494 }
1495 }
1496 return n;
1497}
1498
1499static void split_f0(void) {
1500
1501 static char *fstrings = NULL;
1502 int i, n;
1503 char *s;
1504
1505 if (is_f0_split)
1506 return;
1507
1508 is_f0_split = TRUE;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00001509 free(fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001510 fsrealloc(0);
1511 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1512 fsrealloc(n);
1513 s = fstrings;
1514 for (i=0; i<n; i++) {
1515 Fields[i].string = nextword(&s);
1516 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1517 }
1518
1519 /* set NF manually to avoid side effects */
1520 clrvar(V[NF]);
1521 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1522 V[NF]->number = nfields;
1523}
1524
1525/* perform additional actions when some internal variables changed */
1526static void handle_special(var *v) {
1527
1528 int n;
1529 char *b, *sep, *s;
1530 int sl, l, len, i, bsize;
1531
1532 if (! (v->type & VF_SPECIAL))
1533 return;
1534
1535 if (v == V[NF]) {
1536 n = (int)getvar_i(v);
1537 fsrealloc(n);
1538
1539 /* recalculate $0 */
1540 sep = getvar_s(V[OFS]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001541 sl = bb_strlen(sep);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001542 b = NULL;
1543 len = 0;
1544 for (i=0; i<n; i++) {
1545 s = getvar_s(&Fields[i]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001546 l = bb_strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001547 if (b) {
1548 memcpy(b+len, sep, sl);
1549 len += sl;
1550 }
1551 qrealloc(&b, len+l+sl, &bsize);
1552 memcpy(b+len, s, l);
1553 len += l;
1554 }
1555 b[len] = '\0';
1556 setvar_p(V[F0], b);
1557 is_f0_split = TRUE;
1558
1559 } else if (v == V[F0]) {
1560 is_f0_split = FALSE;
1561
1562 } else if (v == V[FS]) {
1563 mk_splitter(getvar_s(v), &fsplitter);
1564
1565 } else if (v == V[RS]) {
1566 mk_splitter(getvar_s(v), &rsplitter);
1567
1568 } else if (v == V[IGNORECASE]) {
1569 icase = istrue(v);
1570
1571 } else { /* $n */
1572 n = getvar_i(V[NF]);
1573 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1574 /* right here v is invalid. Just to note... */
1575 }
1576}
1577
1578/* step through func/builtin/etc arguments */
1579static node *nextarg(node **pn) {
1580
1581 node *n;
1582
1583 n = *pn;
1584 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1585 *pn = n->r.n;
1586 n = n->l.n;
1587 } else {
1588 *pn = NULL;
1589 }
1590 return n;
1591}
1592
1593static void hashwalk_init(var *v, xhash *array) {
1594
1595 char **w;
1596 hash_item *hi;
1597 int i;
1598
1599 if (v->type & VF_WALK)
1600 free(v->x.walker);
1601
1602 v->type |= VF_WALK;
1603 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1604 *w = *(w+1) = (char *)(w + 2);
1605 for (i=0; i<array->csize; i++) {
1606 hi = array->items[i];
1607 while(hi) {
1608 strcpy(*w, hi->name);
1609 nextword(w);
1610 hi = hi->next;
1611 }
1612 }
1613}
1614
1615static int hashwalk_next(var *v) {
1616
1617 char **w;
1618
1619 w = v->x.walker;
1620 if (*(w+1) == *w)
1621 return FALSE;
1622
1623 setvar_s(v, nextword(w+1));
1624 return TRUE;
1625}
1626
1627/* evaluate node, return 1 when result is true, 0 otherwise */
1628static int ptest(node *pattern) {
1629 static var v;
1630
1631 return istrue(evaluate(pattern, &v));
1632}
1633
1634/* read next record from stream rsm into a variable v */
1635static int awk_getline(rstream *rsm, var *v) {
1636
1637 char *b;
1638 regmatch_t pmatch[2];
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001639 int a, p, pp=0, size;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001640 int fd, so, eo, r, rp;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001641 char c, *m, *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001642
1643 /* we're using our own buffer since we need access to accumulating
1644 * characters
1645 */
1646 fd = fileno(rsm->F);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001647 m = rsm->buffer;
1648 a = rsm->adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001649 p = rsm->pos;
1650 size = rsm->size;
1651 c = (char) rsplitter.n.info;
1652 rp = 0;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001653
1654 if (! m) qrealloc(&m, 256, &size);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001655 do {
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001656 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001657 so = eo = p;
1658 r = 1;
1659 if (p > 0) {
1660 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1661 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1662 b, 1, pmatch, 0) == 0) {
1663 so = pmatch[0].rm_so;
1664 eo = pmatch[0].rm_eo;
1665 if (b[eo] != '\0')
1666 break;
1667 }
1668 } else if (c != '\0') {
1669 s = strchr(b+pp, c);
1670 if (s) {
1671 so = eo = s-b;
1672 eo++;
1673 break;
1674 }
1675 } else {
1676 while (b[rp] == '\n')
1677 rp++;
1678 s = strstr(b+rp, "\n\n");
1679 if (s) {
1680 so = eo = s-b;
1681 while (b[eo] == '\n') eo++;
1682 if (b[eo] != '\0')
1683 break;
1684 }
1685 }
1686 }
1687
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001688 if (a > 0) {
1689 memmove(m, (const void *)(m+a), p+1);
1690 b = m;
1691 a = 0;
1692 }
1693
1694 qrealloc(&m, a+p+128, &size);
1695 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001696 pp = p;
1697 p += safe_read(fd, b+p, size-p-1);
1698 if (p < pp) {
1699 p = 0;
1700 r = 0;
1701 setvar_i(V[ERRNO], errno);
1702 }
1703 b[p] = '\0';
1704
1705 } while (p > pp);
1706
1707 if (p == 0) {
1708 r--;
1709 } else {
1710 c = b[so]; b[so] = '\0';
1711 setvar_s(v, b+rp);
1712 v->type |= VF_USER;
1713 b[so] = c;
1714 c = b[eo]; b[eo] = '\0';
1715 setvar_s(V[RT], b+so);
1716 b[eo] = c;
1717 }
1718
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001719 rsm->buffer = m;
1720 rsm->adv = a + eo;
1721 rsm->pos = p - eo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001722 rsm->size = size;
1723
1724 return r;
1725}
1726
1727static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1728
1729 int r=0;
1730 char c, *s=format;
1731
1732 if (int_as_int && n == (int)n) {
1733 r = snprintf(b, size, "%d", (int)n);
1734 } else {
1735 do { c = *s; } while (*s && *++s);
1736 if (strchr("diouxX", c)) {
1737 r = snprintf(b, size, format, (int)n);
1738 } else if (strchr("eEfgG", c)) {
1739 r = snprintf(b, size, format, n);
1740 } else {
1741 runtime_error(EMSG_INV_FMT);
1742 }
1743 }
1744 return r;
1745}
1746
1747
1748/* formatted output into an allocated buffer, return ptr to buffer */
1749static char *awk_printf(node *n) {
1750
1751 char *b = NULL;
1752 char *fmt, *s, *s1, *f;
1753 int i, j, incr, bsize;
1754 char c, c1;
1755 var *v, *arg;
1756
1757 v = nvalloc(1);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001758 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001759
1760 i = 0;
1761 while (*f) {
1762 s = f;
1763 while (*f && (*f != '%' || *(++f) == '%'))
1764 f++;
1765 while (*f && !isalpha(*f))
1766 f++;
1767
1768 incr = (f - s) + MAXVARFMT;
1769 qrealloc(&b, incr+i, &bsize);
1770 c = *f; if (c != '\0') f++;
1771 c1 = *f ; *f = '\0';
1772 arg = evaluate(nextarg(&n), v);
1773
1774 j = i;
1775 if (c == 'c' || !c) {
1776 i += sprintf(b+i, s,
1777 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1778
1779 } else if (c == 's') {
1780 s1 = getvar_s(arg);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001781 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001782 i += sprintf(b+i, s, s1);
1783
1784 } else {
1785 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1786 }
1787 *f = c1;
1788
1789 /* if there was an error while sprintf, return value is negative */
1790 if (i < j) i = j;
1791
1792 }
1793
1794 b = xrealloc(b, i+1);
1795 free(fmt);
1796 nvfree(v);
1797 b[i] = '\0';
1798 return b;
1799}
1800
1801/* common substitution routine
1802 * replace (nm) substring of (src) that match (n) with (repl), store
1803 * result into (dest), return number of substitutions. If nm=0, replace
1804 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1805 * subexpression matching (\1-\9)
1806 */
1807static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1808
1809 char *ds = NULL;
1810 char *sp, *s;
1811 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1812 regmatch_t pmatch[10];
1813 regex_t sreg, *re;
1814
1815 re = as_regex(rn, &sreg);
1816 if (! src) src = V[F0];
1817 if (! dest) dest = V[F0];
1818
1819 i = di = 0;
1820 sp = getvar_s(src);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001821 rl = bb_strlen(repl);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001822 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1823 so = pmatch[0].rm_so;
1824 eo = pmatch[0].rm_eo;
1825
1826 qrealloc(&ds, di + eo + rl, &dssize);
1827 memcpy(ds + di, sp, eo);
1828 di += eo;
1829 if (++i >= nm) {
1830 /* replace */
1831 di -= (eo - so);
1832 nbs = 0;
1833 for (s = repl; *s; s++) {
1834 ds[di++] = c = *s;
1835 if (c == '\\') {
1836 nbs++;
1837 continue;
1838 }
1839 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1840 di -= ((nbs + 3) >> 1);
1841 j = 0;
1842 if (c != '&') {
1843 j = c - '0';
1844 nbs++;
1845 }
1846 if (nbs % 2) {
1847 ds[di++] = c;
1848 } else {
1849 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1850 qrealloc(&ds, di + rl + n, &dssize);
1851 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1852 di += n;
1853 }
1854 }
1855 nbs = 0;
1856 }
1857 }
1858
1859 sp += eo;
1860 if (i == nm) break;
1861 if (eo == so) {
1862 if (! (ds[di++] = *sp++)) break;
1863 }
1864 }
1865
1866 qrealloc(&ds, di + strlen(sp), &dssize);
1867 strcpy(ds + di, sp);
1868 setvar_p(dest, ds);
1869 if (re == &sreg) regfree(re);
1870 return i;
1871}
1872
1873static var *exec_builtin(node *op, var *res) {
1874
1875 int (*to_xxx)(int);
1876 var *tv;
1877 node *an[4];
1878 var *av[4];
1879 char *as[4];
1880 regmatch_t pmatch[2];
1881 regex_t sreg, *re;
1882 static tsplitter tspl;
1883 node *spl;
1884 unsigned long isr, info;
1885 int nargs;
1886 time_t tt;
1887 char *s, *s1;
1888 int i, l, ll, n;
1889
1890 tv = nvalloc(4);
1891 isr = info = op->info;
1892 op = op->l.n;
1893
1894 av[2] = av[3] = NULL;
1895 for (i=0 ; i<4 && op ; i++) {
1896 an[i] = nextarg(&op);
1897 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1898 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1899 isr >>= 1;
1900 }
1901
1902 nargs = i;
1903 if (nargs < (info >> 30))
1904 runtime_error(EMSG_TOO_FEW_ARGS);
1905
1906 switch (info & OPNMASK) {
1907
1908 case B_a2:
1909#ifdef CONFIG_FEATURE_AWK_MATH
1910 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1911#else
1912 runtime_error(EMSG_NO_MATH);
1913#endif
1914 break;
1915
1916 case B_sp:
1917 if (nargs > 2) {
1918 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1919 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1920 } else {
1921 spl = &fsplitter.n;
1922 }
1923
1924 n = awk_split(as[0], spl, &s);
1925 s1 = s;
1926 clear_array(iamarray(av[1]));
1927 for (i=1; i<=n; i++)
1928 setari_u(av[1], i, nextword(&s1));
1929 free(s);
1930 setvar_i(res, n);
1931 break;
1932
1933 case B_ss:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001934 l = bb_strlen(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001935 i = getvar_i(av[1]) - 1;
1936 if (i>l) i=l; if (i<0) i=0;
1937 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1938 if (n<0) n=0;
1939 s = xmalloc(n+1);
1940 strncpy(s, as[0]+i, n);
1941 s[n] = '\0';
1942 setvar_p(res, s);
1943 break;
1944
1945 case B_lo:
1946 to_xxx = tolower;
1947 goto lo_cont;
1948
1949 case B_up:
1950 to_xxx = toupper;
1951lo_cont:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001952 s1 = s = bb_xstrdup(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001953 while (*s1) {
1954 *s1 = (*to_xxx)(*s1);
1955 s1++;
1956 }
1957 setvar_p(res, s);
1958 break;
1959
1960 case B_ix:
1961 n = 0;
Manuel Novoa III cad53642003-03-19 09:13:01 +00001962 ll = bb_strlen(as[1]);
1963 l = bb_strlen(as[0]) - ll;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001964 if (ll > 0 && l >= 0) {
1965 if (! icase) {
1966 s = strstr(as[0], as[1]);
1967 if (s) n = (s - as[0]) + 1;
1968 } else {
1969 /* this piece of code is terribly slow and
1970 * really should be rewritten
1971 */
1972 for (i=0; i<=l; i++) {
1973 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1974 n = i+1;
1975 break;
1976 }
1977 }
1978 }
1979 }
1980 setvar_i(res, n);
1981 break;
1982
1983 case B_ti:
1984 if (nargs > 1)
1985 tt = getvar_i(av[1]);
1986 else
1987 time(&tt);
1988 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1989 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1990 buf[i] = '\0';
1991 setvar_s(res, buf);
1992 break;
1993
1994 case B_ma:
1995 re = as_regex(an[1], &sreg);
1996 n = regexec(re, as[0], 1, pmatch, 0);
1997 if (n == 0) {
1998 pmatch[0].rm_so++;
1999 pmatch[0].rm_eo++;
2000 } else {
2001 pmatch[0].rm_so = 0;
2002 pmatch[0].rm_eo = -1;
2003 }
2004 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2005 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2006 setvar_i(res, pmatch[0].rm_so);
2007 if (re == &sreg) regfree(re);
2008 break;
2009
2010 case B_ge:
2011 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2012 break;
2013
2014 case B_gs:
2015 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2016 break;
2017
2018 case B_su:
2019 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2020 break;
2021 }
2022
2023 nvfree(tv);
2024 return res;
2025}
2026
2027/*
2028 * Evaluate node - the heart of the program. Supplied with subtree
2029 * and place where to store result. returns ptr to result.
2030 */
2031#define XC(n) ((n) >> 8)
2032
2033static var *evaluate(node *op, var *res) {
2034
2035 /* This procedure is recursive so we should count every byte */
2036 static var *fnargs = NULL;
2037 static unsigned int seed = 1;
2038 static regex_t sreg;
2039 node *op1;
2040 var *v1;
2041 union {
2042 var *v;
2043 char *s;
2044 double d;
2045 int i;
2046 } L, R;
2047 unsigned long opinfo;
2048 short opn;
2049 union {
2050 char *s;
2051 rstream *rsm;
2052 FILE *F;
2053 var *v;
2054 regex_t *re;
2055 unsigned long info;
2056 } X;
2057
2058 if (! op)
2059 return setvar_s(res, NULL);
2060
2061 v1 = nvalloc(2);
2062
2063 while (op) {
2064
2065 opinfo = op->info;
2066 opn = (short)(opinfo & OPNMASK);
2067 lineno = op->lineno;
2068
2069 /* execute inevitable things */
2070 op1 = op->l.n;
2071 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2072 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2073 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2074 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2075 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2076
2077 switch (XC(opinfo & OPCLSMASK)) {
2078
2079 /* -- iterative node type -- */
2080
2081 /* test pattern */
2082 case XC( OC_TEST ):
2083 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2084 /* it's range pattern */
2085 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2086 op->info |= OF_CHECKED;
2087 if (ptest(op1->r.n))
2088 op->info &= ~OF_CHECKED;
2089
2090 op = op->a.n;
2091 } else {
2092 op = op->r.n;
2093 }
2094 } else {
2095 op = (ptest(op1)) ? op->a.n : op->r.n;
2096 }
2097 break;
2098
2099 /* just evaluate an expression, also used as unconditional jump */
2100 case XC( OC_EXEC ):
2101 break;
2102
2103 /* branch, used in if-else and various loops */
2104 case XC( OC_BR ):
2105 op = istrue(L.v) ? op->a.n : op->r.n;
2106 break;
2107
2108 /* initialize for-in loop */
2109 case XC( OC_WALKINIT ):
2110 hashwalk_init(L.v, iamarray(R.v));
2111 break;
2112
2113 /* get next array item */
2114 case XC( OC_WALKNEXT ):
2115 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2116 break;
2117
2118 case XC( OC_PRINT ):
2119 case XC( OC_PRINTF ):
2120 X.F = stdout;
2121 if (op->r.n) {
2122 X.rsm = newfile(R.s);
2123 if (! X.rsm->F) {
2124 if (opn == '|') {
2125 if((X.rsm->F = popen(R.s, "w")) == NULL)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002126 bb_perror_msg_and_die("popen");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002127 X.rsm->is_pipe = 1;
2128 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002129 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002130 }
2131 }
2132 X.F = X.rsm->F;
2133 }
2134
2135 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2136 if (! op1) {
2137 fputs(getvar_s(V[F0]), X.F);
2138 } else {
2139 while (op1) {
2140 L.v = evaluate(nextarg(&op1), v1);
2141 if (L.v->type & VF_NUMBER) {
2142 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2143 getvar_i(L.v), TRUE);
2144 fputs(buf, X.F);
2145 } else {
2146 fputs(getvar_s(L.v), X.F);
2147 }
2148
2149 if (op1) fputs(getvar_s(V[OFS]), X.F);
2150 }
2151 }
2152 fputs(getvar_s(V[ORS]), X.F);
2153
2154 } else { /* OC_PRINTF */
2155 L.s = awk_printf(op1);
2156 fputs(L.s, X.F);
2157 free(L.s);
2158 }
2159 fflush(X.F);
2160 break;
2161
2162 case XC( OC_DELETE ):
2163 X.info = op1->info & OPCLSMASK;
2164 if (X.info == OC_VAR) {
2165 R.v = op1->l.v;
2166 } else if (X.info == OC_FNARG) {
2167 R.v = &fnargs[op1->l.i];
2168 } else {
2169 runtime_error(EMSG_NOT_ARRAY);
2170 }
2171
2172 if (op1->r.n) {
2173 clrvar(L.v);
2174 L.s = getvar_s(evaluate(op1->r.n, v1));
2175 hash_remove(iamarray(R.v), L.s);
2176 } else {
2177 clear_array(iamarray(R.v));
2178 }
2179 break;
2180
2181 case XC( OC_NEWSOURCE ):
2182 programname = op->l.s;
2183 break;
2184
2185 case XC( OC_RETURN ):
2186 copyvar(res, L.v);
2187 break;
2188
2189 case XC( OC_NEXTFILE ):
2190 nextfile = TRUE;
2191 case XC( OC_NEXT ):
2192 nextrec = TRUE;
2193 case XC( OC_DONE ):
2194 clrvar(res);
2195 break;
2196
2197 case XC( OC_EXIT ):
2198 awk_exit(L.d);
2199
2200 /* -- recursive node type -- */
2201
2202 case XC( OC_VAR ):
2203 L.v = op->l.v;
2204 if (L.v == V[NF])
2205 split_f0();
2206 goto v_cont;
2207
2208 case XC( OC_FNARG ):
2209 L.v = &fnargs[op->l.i];
2210
2211v_cont:
2212 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2213 break;
2214
2215 case XC( OC_IN ):
2216 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2217 break;
2218
2219 case XC( OC_REGEXP ):
2220 op1 = op;
2221 L.s = getvar_s(V[F0]);
2222 goto re_cont;
2223
2224 case XC( OC_MATCH ):
2225 op1 = op->r.n;
2226re_cont:
2227 X.re = as_regex(op1, &sreg);
2228 R.i = regexec(X.re, L.s, 0, NULL, 0);
2229 if (X.re == &sreg) regfree(X.re);
2230 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2231 break;
2232
2233 case XC( OC_MOVE ):
2234 /* if source is a temporary string, jusk relink it to dest */
2235 if (R.v == v1+1 && R.v->string) {
2236 res = setvar_p(L.v, R.v->string);
2237 R.v->string = NULL;
2238 } else {
2239 res = copyvar(L.v, R.v);
2240 }
2241 break;
2242
2243 case XC( OC_TERNARY ):
2244 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2245 runtime_error(EMSG_POSSIBLE_ERROR);
2246 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2247 break;
2248
2249 case XC( OC_FUNC ):
2250 if (! op->r.f->body.first)
2251 runtime_error(EMSG_UNDEF_FUNC);
2252
2253 X.v = R.v = nvalloc(op->r.f->nargs+1);
2254 while (op1) {
2255 L.v = evaluate(nextarg(&op1), v1);
2256 copyvar(R.v, L.v);
2257 R.v->type |= VF_CHILD;
2258 R.v->x.parent = L.v;
2259 if (++R.v - X.v >= op->r.f->nargs)
2260 break;
2261 }
2262
2263 R.v = fnargs;
2264 fnargs = X.v;
2265
2266 L.s = programname;
2267 res = evaluate(op->r.f->body.first, res);
2268 programname = L.s;
2269
2270 nvfree(fnargs);
2271 fnargs = R.v;
2272 break;
2273
2274 case XC( OC_GETLINE ):
2275 case XC( OC_PGETLINE ):
2276 if (op1) {
2277 X.rsm = newfile(L.s);
2278 if (! X.rsm->F) {
2279 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2280 X.rsm->F = popen(L.s, "r");
2281 X.rsm->is_pipe = TRUE;
2282 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002283 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002284 }
2285 }
2286 } else {
2287 if (! iF) iF = next_input_file();
2288 X.rsm = iF;
2289 }
2290
2291 if (! X.rsm->F) {
2292 setvar_i(V[ERRNO], errno);
2293 setvar_i(res, -1);
2294 break;
2295 }
2296
2297 if (! op->r.n)
2298 R.v = V[F0];
2299
2300 L.i = awk_getline(X.rsm, R.v);
2301 if (L.i > 0) {
2302 if (! op1) {
2303 incvar(V[FNR]);
2304 incvar(V[NR]);
2305 }
2306 }
2307 setvar_i(res, L.i);
2308 break;
2309
2310 /* simple builtins */
2311 case XC( OC_FBLTIN ):
2312 switch (opn) {
2313
2314 case F_in:
2315 R.d = (int)L.d;
2316 break;
2317
2318 case F_rn:
2319 R.d = (double)rand() / (double)RAND_MAX;
2320 break;
2321
2322#ifdef CONFIG_FEATURE_AWK_MATH
2323 case F_co:
2324 R.d = cos(L.d);
2325 break;
2326
2327 case F_ex:
2328 R.d = exp(L.d);
2329 break;
2330
2331 case F_lg:
2332 R.d = log(L.d);
2333 break;
2334
2335 case F_si:
2336 R.d = sin(L.d);
2337 break;
2338
2339 case F_sq:
2340 R.d = sqrt(L.d);
2341 break;
2342#else
2343 case F_co:
2344 case F_ex:
2345 case F_lg:
2346 case F_si:
2347 case F_sq:
2348 runtime_error(EMSG_NO_MATH);
2349 break;
2350#endif
2351
2352 case F_sr:
2353 R.d = (double)seed;
2354 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2355 srand(seed);
2356 break;
2357
2358 case F_ti:
2359 R.d = time(NULL);
2360 break;
2361
2362 case F_le:
2363 if (! op1)
2364 L.s = getvar_s(V[F0]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002365 R.d = bb_strlen(L.s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002366 break;
2367
2368 case F_sy:
2369 fflush(NULL);
2370 R.d = (L.s && *L.s) ? system(L.s) : 0;
2371 break;
2372
2373 case F_ff:
2374 if (! op1)
2375 fflush(stdout);
2376 else {
2377 if (L.s && *L.s) {
2378 X.rsm = newfile(L.s);
2379 fflush(X.rsm->F);
2380 } else {
2381 fflush(NULL);
2382 }
2383 }
2384 break;
2385
2386 case F_cl:
2387 X.rsm = (rstream *)hash_search(fdhash, L.s);
2388 if (X.rsm) {
2389 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00002390 free(X.rsm->buffer);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002391 hash_remove(fdhash, L.s);
2392 }
2393 if (R.i != 0)
2394 setvar_i(V[ERRNO], errno);
2395 R.d = (double)R.i;
2396 break;
2397 }
2398 setvar_i(res, R.d);
2399 break;
2400
2401 case XC( OC_BUILTIN ):
2402 res = exec_builtin(op, res);
2403 break;
2404
2405 case XC( OC_SPRINTF ):
2406 setvar_p(res, awk_printf(op1));
2407 break;
2408
2409 case XC( OC_UNARY ):
2410 X.v = R.v;
2411 L.d = R.d = getvar_i(R.v);
2412 switch (opn) {
2413 case 'P':
2414 L.d = ++R.d;
2415 goto r_op_change;
2416 case 'p':
2417 R.d++;
2418 goto r_op_change;
2419 case 'M':
2420 L.d = --R.d;
2421 goto r_op_change;
2422 case 'm':
2423 R.d--;
2424 goto r_op_change;
2425 case '!':
2426 L.d = istrue(X.v) ? 0 : 1;
2427 break;
2428 case '-':
2429 L.d = -R.d;
2430 break;
2431 r_op_change:
2432 setvar_i(X.v, R.d);
2433 }
2434 setvar_i(res, L.d);
2435 break;
2436
2437 case XC( OC_FIELD ):
2438 R.i = (int)getvar_i(R.v);
2439 if (R.i == 0) {
2440 res = V[F0];
2441 } else {
2442 split_f0();
2443 if (R.i > nfields)
2444 fsrealloc(R.i);
2445
2446 res = &Fields[R.i-1];
2447 }
2448 break;
2449
2450 /* concatenation (" ") and index joining (",") */
2451 case XC( OC_CONCAT ):
2452 case XC( OC_COMMA ):
Manuel Novoa III cad53642003-03-19 09:13:01 +00002453 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002454 X.s = (char *)xmalloc(opn);
2455 strcpy(X.s, L.s);
2456 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2457 L.s = getvar_s(V[SUBSEP]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002458 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002459 strcat(X.s, L.s);
2460 }
2461 strcat(X.s, R.s);
2462 setvar_p(res, X.s);
2463 break;
2464
2465 case XC( OC_LAND ):
2466 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2467 break;
2468
2469 case XC( OC_LOR ):
2470 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2471 break;
2472
2473 case XC( OC_BINARY ):
2474 case XC( OC_REPLACE ):
2475 R.d = getvar_i(R.v);
2476 switch (opn) {
2477 case '+':
2478 L.d += R.d;
2479 break;
2480 case '-':
2481 L.d -= R.d;
2482 break;
2483 case '*':
2484 L.d *= R.d;
2485 break;
2486 case '/':
2487 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2488 L.d /= R.d;
2489 break;
2490 case '&':
2491#ifdef CONFIG_FEATURE_AWK_MATH
2492 L.d = pow(L.d, R.d);
2493#else
2494 runtime_error(EMSG_NO_MATH);
2495#endif
2496 break;
2497 case '%':
2498 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2499 L.d -= (int)(L.d / R.d) * R.d;
2500 break;
2501 }
2502 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2503 break;
2504
2505 case XC( OC_COMPARE ):
2506 if (is_numeric(L.v) && is_numeric(R.v)) {
2507 L.d = getvar_i(L.v) - getvar_i(R.v);
2508 } else {
2509 L.s = getvar_s(L.v);
2510 R.s = getvar_s(R.v);
2511 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2512 }
2513 switch (opn & 0xfe) {
2514 case 0:
2515 R.i = (L.d > 0);
2516 break;
2517 case 2:
2518 R.i = (L.d >= 0);
2519 break;
2520 case 4:
2521 R.i = (L.d == 0);
2522 break;
2523 }
2524 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2525 break;
2526
2527 default:
2528 runtime_error(EMSG_POSSIBLE_ERROR);
2529 }
2530 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2531 op = op->a.n;
2532 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2533 break;
2534 if (nextrec)
2535 break;
2536 }
2537 nvfree(v1);
2538 return res;
2539}
2540
2541
2542/* -------- main & co. -------- */
2543
2544static int awk_exit(int r) {
2545
2546 unsigned int i;
2547 hash_item *hi;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002548 static var tv;
2549
2550 if (! exiting) {
2551 exiting = TRUE;
2552 evaluate(endseq.first, &tv);
2553 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002554
2555 /* waiting for children */
2556 for (i=0; i<fdhash->csize; i++) {
2557 hi = fdhash->items[i];
2558 while(hi) {
2559 if (hi->data.rs.F && hi->data.rs.is_pipe)
2560 pclose(hi->data.rs.F);
2561 hi = hi->next;
2562 }
2563 }
2564
2565 exit(r);
2566}
2567
2568/* if expr looks like "var=value", perform assignment and return 1,
2569 * otherwise return 0 */
2570static int is_assignment(char *expr) {
2571
2572 char *exprc, *s, *s0, *s1;
2573
Manuel Novoa III cad53642003-03-19 09:13:01 +00002574 exprc = bb_xstrdup(expr);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002575 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2576 free(exprc);
2577 return FALSE;
2578 }
2579
2580 *(s++) = '\0';
2581 s0 = s1 = s;
2582 while (*s)
2583 *(s1++) = nextchar(&s);
2584
2585 *s1 = '\0';
2586 setvar_u(newvar(exprc), s0);
2587 free(exprc);
2588 return TRUE;
2589}
2590
2591/* switch to next input file */
2592static rstream *next_input_file(void) {
2593
2594 static rstream rsm;
2595 FILE *F = NULL;
2596 char *fname, *ind;
2597 static int files_happen = FALSE;
2598
2599 if (rsm.F) fclose(rsm.F);
2600 rsm.F = NULL;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002601 rsm.pos = rsm.adv = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002602
2603 do {
2604 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2605 if (files_happen)
2606 return NULL;
2607 fname = "-";
2608 F = stdin;
2609 } else {
2610 ind = getvar_s(incvar(V[ARGIND]));
2611 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2612 if (fname && *fname && !is_assignment(fname))
2613 F = afopen(fname, "r");
2614 }
2615 } while (!F);
2616
2617 files_happen = TRUE;
2618 setvar_s(V[FILENAME], fname);
2619 rsm.F = F;
2620 return &rsm;
2621}
2622
2623extern int awk_main(int argc, char **argv) {
2624
2625 char *s, *s1;
2626 int i, j, c;
2627 var *v;
2628 static var tv;
2629 char **envp;
2630 static int from_file = FALSE;
2631 rstream *rsm;
2632 FILE *F, *stdfiles[3];
2633 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2634
2635 /* allocate global buffer */
2636 buf = xmalloc(MAXVARFMT+1);
2637
2638 vhash = hash_init();
2639 ahash = hash_init();
2640 fdhash = hash_init();
2641 fnhash = hash_init();
2642
2643 /* initialize variables */
2644 for (i=0; *vNames; i++) {
2645 V[i] = v = newvar(nextword(&vNames));
2646 if (*vValues != '\377')
2647 setvar_s(v, nextword(&vValues));
2648 else
2649 setvar_i(v, 0);
2650
2651 if (*vNames == '*') {
2652 v->type |= VF_SPECIAL;
2653 vNames++;
2654 }
2655 }
2656
2657 handle_special(V[FS]);
2658 handle_special(V[RS]);
2659
2660 stdfiles[0] = stdin;
2661 stdfiles[1] = stdout;
2662 stdfiles[2] = stderr;
2663 for (i=0; i<3; i++) {
2664 rsm = newfile(nextword(&stdnames));
2665 rsm->F = stdfiles[i];
2666 }
2667
2668 for (envp=environ; *envp; envp++) {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002669 s = bb_xstrdup(*envp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002670 s1 = strchr(s, '=');
2671 *(s1++) = '\0';
2672 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2673 free(s);
2674 }
2675
2676 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2677 switch (c) {
2678 case 'F':
2679 setvar_s(V[FS], optarg);
2680 break;
2681 case 'v':
2682 if (! is_assignment(optarg))
Manuel Novoa III cad53642003-03-19 09:13:01 +00002683 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002684 break;
2685 case 'f':
2686 from_file = TRUE;
2687 F = afopen(programname = optarg, "r");
2688 s = NULL;
2689 /* one byte is reserved for some trick in next_token */
2690 for (i=j=1; j>0; i+=j) {
2691 s = (char *)xrealloc(s, i+4096);
2692 j = fread(s+i, 1, 4094, F);
2693 }
2694 s[i] = '\0';
2695 fclose(F);
2696 parse_program(s+1);
2697 free(s);
2698 break;
2699 case 'W':
Manuel Novoa III cad53642003-03-19 09:13:01 +00002700 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002701 break;
2702
2703 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +00002704 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002705 }
2706 }
2707
2708 if (!from_file) {
2709 if (argc == optind)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002710 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002711 programname="cmd. line";
2712 parse_program(argv[optind++]);
2713
2714 }
2715
2716 /* fill in ARGV array */
2717 setvar_i(V[ARGC], argc - optind + 1);
2718 setari_u(V[ARGV], 0, "awk");
2719 for(i=optind; i < argc; i++)
2720 setari_u(V[ARGV], i+1-optind, argv[i]);
2721
2722 evaluate(beginseq.first, &tv);
2723 if (! mainseq.first && ! endseq.first)
2724 awk_exit(EXIT_SUCCESS);
2725
2726 /* input file could already be opened in BEGIN block */
2727 if (! iF) iF = next_input_file();
2728
2729 /* passing through input files */
2730 while (iF) {
2731
2732 nextfile = FALSE;
2733 setvar_i(V[FNR], 0);
2734
2735 while ((c = awk_getline(iF, V[F0])) > 0) {
2736
2737 nextrec = FALSE;
2738 incvar(V[NR]);
2739 incvar(V[FNR]);
2740 evaluate(mainseq.first, &tv);
2741
2742 if (nextfile)
2743 break;
2744 }
2745
2746 if (c < 0)
2747 runtime_error(strerror(errno));
2748
2749 iF = next_input_file();
2750
2751 }
2752
Glenn L McGrath545106f2002-11-11 06:21:00 +00002753 awk_exit(EXIT_SUCCESS);
2754
2755 return 0;
2756}
2757