blob: 087be44a5d3f588708d8070e633f77528ef407f8 [file] [log] [blame]
Glenn L McGrath545106f2002-11-11 06:21:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <unistd.h>
26#include <errno.h>
27#include <string.h>
28#include <time.h>
29#include <math.h>
30#include <ctype.h>
31#include <getopt.h>
Glenn L McGrath545106f2002-11-11 06:21:00 +000032
"Vladimir N. Oleynik"23f62fc2005-09-14 16:59:11 +000033#include "xregex.h"
Glenn L McGrath545106f2002-11-11 06:21:00 +000034#include "busybox.h"
35
36
37#define MAXVARFMT 240
38#define MINNVBLOCK 64
39
40/* variable flags */
41#define VF_NUMBER 0x0001 /* 1 = primary type is number */
42#define VF_ARRAY 0x0002 /* 1 = it's an array */
43
44#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
45#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
46#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
47#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
48#define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
49#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
50#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
51
52/* these flags are static, don't change them when value is changed */
53#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55/* Variable */
56typedef struct var_s {
57 unsigned short type; /* flags */
58 double number;
59 char *string;
60 union {
61 int aidx; /* func arg index (on compilation stage) */
62 struct xhash_s *array; /* array ptr */
63 struct var_s *parent; /* for func args, ptr to actual parameter */
64 char **walker; /* list of array elements (for..in) */
65 } x;
66} var;
67
68/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69typedef struct chain_s {
70 struct node_s *first;
71 struct node_s *last;
72 char *programname;
73} chain;
74
75/* Function */
76typedef struct func_s {
77 unsigned short nargs;
78 struct chain_s body;
79} func;
80
81/* I/O stream */
82typedef struct rstream_s {
83 FILE *F;
84 char *buffer;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +000085 int adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +000086 int size;
87 int pos;
88 unsigned short is_pipe;
89} rstream;
90
91typedef struct hash_item_s {
92 union {
93 struct var_s v; /* variable/array hash */
94 struct rstream_s rs; /* redirect streams hash */
95 struct func_s f; /* functions hash */
96 } data;
97 struct hash_item_s *next; /* next in chain */
98 char name[1]; /* really it's longer */
99} hash_item;
100
101typedef struct xhash_s {
102 unsigned int nel; /* num of elements */
103 unsigned int csize; /* current hash size */
104 unsigned int nprime; /* next hash size in PRIMES[] */
105 unsigned int glen; /* summary length of item names */
106 struct hash_item_s **items;
107} xhash;
108
109/* Tree node */
110typedef struct node_s {
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000111 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000112 unsigned short lineno;
113 union {
114 struct node_s *n;
115 var *v;
116 int i;
117 char *s;
118 regex_t *re;
119 } l;
120 union {
121 struct node_s *n;
122 regex_t *ire;
123 func *f;
124 int argno;
125 } r;
126 union {
127 struct node_s *n;
128 } a;
129} node;
130
131/* Block of temporary variables */
132typedef struct nvblock_s {
133 int size;
134 var *pos;
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
137 var nv[0];
138} nvblock;
139
140typedef struct tsplitter_s {
141 node n;
142 regex_t re[2];
143} tsplitter;
144
145/* simple token classes */
146/* Order and hex values are very important!!! See next_token() */
147#define TC_SEQSTART 1 /* ( */
148#define TC_SEQTERM (1 << 1) /* ) */
149#define TC_REGEXP (1 << 2) /* /.../ */
150#define TC_OUTRDR (1 << 3) /* | > >> */
151#define TC_UOPPOST (1 << 4) /* unary postfix operator */
152#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153#define TC_BINOPX (1 << 6) /* two-opnd operator */
154#define TC_IN (1 << 7)
155#define TC_COMMA (1 << 8)
156#define TC_PIPE (1 << 9) /* input redirection pipe */
157#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158#define TC_ARRTERM (1 << 11) /* ] */
159#define TC_GRPSTART (1 << 12) /* { */
160#define TC_GRPTERM (1 << 13) /* } */
161#define TC_SEMICOL (1 << 14)
162#define TC_NEWLINE (1 << 15)
163#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164#define TC_WHILE (1 << 17)
165#define TC_ELSE (1 << 18)
166#define TC_BUILTIN (1 << 19)
167#define TC_GETLINE (1 << 20)
168#define TC_FUNCDECL (1 << 21) /* `function' `func' */
169#define TC_BEGIN (1 << 22)
170#define TC_END (1 << 23)
171#define TC_EOF (1 << 24)
172#define TC_VARIABLE (1 << 25)
173#define TC_ARRAY (1 << 26)
174#define TC_FUNCTION (1 << 27)
175#define TC_STRING (1 << 28)
176#define TC_NUMBER (1 << 29)
177
178#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
179
180/* combined token classes */
181#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186#define TC_STATEMNT (TC_STATX | TC_WHILE)
187#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
188
189/* word tokens, cannot mean something else if not expected */
190#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193/* discard newlines after these */
194#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195 TC_BINOP | TC_OPTERM)
196
197/* what can expression begin with */
198#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199/* what can group begin with */
200#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203/* operator is inserted between them */
204#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205 TC_STRING | TC_NUMBER | TC_UOPPOST)
206#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207
208#define OF_RES1 0x010000
209#define OF_RES2 0x020000
210#define OF_STR1 0x040000
211#define OF_STR2 0x080000
212#define OF_NUM1 0x100000
213#define OF_CHECKED 0x200000
214
215/* combined operator flags */
216#define xx 0
217#define xV OF_RES2
218#define xS (OF_RES2 | OF_STR2)
219#define Vx OF_RES1
220#define VV (OF_RES1 | OF_RES2)
221#define Nx (OF_RES1 | OF_NUM1)
222#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223#define Sx (OF_RES1 | OF_STR1)
224#define SV (OF_RES1 | OF_STR1 | OF_RES2)
225#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227#define OPCLSMASK 0xFF00
228#define OPNMASK 0x007F
229
230/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233 */
234#define P(x) (x << 24)
235#define PRIMASK 0x7F000000
236#define PRIMASK2 0x7E000000
237
238/* Operation classes */
239
240#define SHIFT_TIL_THIS 0x0600
241#define RECUR_FROM_THIS 0x1000
242
243enum {
244 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
245 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
246
247 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
248 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
249 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
250
251 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
252 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
253 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
254 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
255 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
256 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
257 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
258 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
259 OC_DONE=0x2800,
260
261 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
262 ST_WHILE=0x3300
263};
264
265/* simple builtins */
266enum {
267 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
269};
270
271/* builtins */
272enum {
273 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
274 B_ge, B_gs, B_su
275};
276
277/* tokens and their corresponding info values */
278
279#define NTC "\377" /* switch to next token class (tc<<1) */
280#define NTCC '\377'
281
282#define OC_B OC_BUILTIN
283
284static char * const tokenlist =
285 "\1(" NTC
286 "\1)" NTC
287 "\1/" NTC /* REGEXP */
288 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
289 "\2++" "\2--" NTC /* UOPPOST */
290 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
291 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
292 "\2*=" "\2/=" "\2%=" "\2^="
293 "\1+" "\1-" "\3**=" "\2**"
294 "\1/" "\1%" "\1^" "\1*"
295 "\2!=" "\2>=" "\2<=" "\1>"
296 "\1<" "\2!~" "\1~" "\2&&"
297 "\2||" "\1?" "\1:" NTC
298 "\2in" NTC
299 "\1," NTC
300 "\1|" NTC
301 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
302 "\1]" NTC
303 "\1{" NTC
304 "\1}" NTC
305 "\1;" NTC
306 "\1\n" NTC
307 "\2if" "\2do" "\3for" "\5break" /* STATX */
308 "\10continue" "\6delete" "\5print"
309 "\6printf" "\4next" "\10nextfile"
310 "\6return" "\4exit" NTC
311 "\5while" NTC
312 "\4else" NTC
313
314 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
315 "\3cos" "\3exp" "\3int" "\3log"
316 "\4rand" "\3sin" "\4sqrt" "\5srand"
317 "\6gensub" "\4gsub" "\5index" "\6length"
318 "\5match" "\5split" "\7sprintf" "\3sub"
319 "\6substr" "\7systime" "\10strftime"
320 "\7tolower" "\7toupper" NTC
321 "\7getline" NTC
322 "\4func" "\10function" NTC
323 "\5BEGIN" NTC
324 "\3END" "\0"
325 ;
326
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000327static uint32_t tokeninfo[] = {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000328
329 0,
330 0,
331 OC_REGEXP,
332 xS|'a', xS|'w', xS|'|',
333 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
334 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
335 OC_FIELD|xV|P(5),
336 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
337 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
338 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
339 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
341 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
342 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
343 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
344 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
345 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
346 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
347 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
348 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
349 OC_COLON|xx|P(67)|':',
350 OC_IN|SV|P(49),
351 OC_COMMA|SS|P(80),
352 OC_PGETLINE|SV|P(37),
353 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
354 OC_UNARY|xV|P(19)|'!',
355 0,
356 0,
357 0,
358 0,
359 0,
360 ST_IF, ST_DO, ST_FOR, OC_BREAK,
361 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
362 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
363 OC_RETURN|Vx, OC_EXIT|Nx,
364 ST_WHILE,
365 0,
366
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374 OC_GETLINE|SV|P(0),
375 0, 0,
376 0,
377 0
378};
379
380/* internal variable names and their initial values */
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000381/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000382enum {
383 CONVFMT=0, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, ARGIND, ARGC, ARGV,
386 ERRNO, FNR,
387 NR, NF, IGNORECASE,
388 ENVIRON, F0, _intvarcount_
389};
390
391static char * vNames =
392 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000393 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
Glenn L McGrath545106f2002-11-11 06:21:00 +0000394 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
395 "ERRNO\0" "FNR\0"
396 "NR\0" "NF\0*" "IGNORECASE\0*"
397 "ENVIRON\0" "$\0*" "\0";
398
399static char * vValues =
400 "%.6g\0" "%.6g\0" " \0" " \0"
401 "\n\0" "\n\0" "\0" "\0"
402 "\034\0"
403 "\377";
404
405/* hash size may grow to these values */
406#define FIRST_PRIME 61;
407static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
409
410/* globals */
411
412extern char **environ;
413
414static var * V[_intvarcount_];
415static chain beginseq, mainseq, endseq, *seq;
416static int nextrec, nextfile;
417static node *break_ptr, *continue_ptr;
418static rstream *iF;
419static xhash *vhash, *ahash, *fdhash, *fnhash;
420static char *programname;
421static short lineno;
422static int is_f0_split;
423static int nfields = 0;
424static var *Fields = NULL;
425static tsplitter fsplitter, rsplitter;
426static nvblock *cb = NULL;
427static char *pos;
428static char *buf;
429static int icase = FALSE;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +0000430static int exiting = FALSE;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000431
432static struct {
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000433 uint32_t tclass;
434 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000435 char *string;
436 double number;
437 short lineno;
438 int rollback;
439} t;
440
441/* function prototypes */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000442static void handle_special(var *);
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000443static node *parse_expr(uint32_t);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000444static void chain_group(void);
445static var *evaluate(node *, var *);
446static rstream *next_input_file(void);
447static int fmt_num(char *, int, char *, double, int);
448static int awk_exit(int);
449
450/* ---- error handling ---- */
451
452static const char EMSG_INTERNAL_ERROR[] = "Internal error";
453static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
454static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
455static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
456static const char EMSG_INV_FMT[] = "Invalid format specifier";
457static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
458static const char EMSG_NOT_ARRAY[] = "Not an array";
459static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
460static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
461#ifndef CONFIG_FEATURE_AWK_MATH
462static const char EMSG_NO_MATH[] = "Math support is not compiled in";
463#endif
464
Glenn L McGrathd4036f82002-11-28 09:30:40 +0000465static void syntax_error(const char * const message)
466{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000467 bb_error_msg("%s:%i: %s", programname, lineno, message);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +0000468 exit(1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000469}
470
471#define runtime_error(x) syntax_error(x)
472
473
474/* ---- hash stuff ---- */
475
Mike Frysinger10a11e22005-09-27 02:23:02 +0000476static unsigned int hashidx(char *name)
477{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000478 register unsigned int idx=0;
479
480 while (*name) idx = *name++ + (idx << 6) - idx;
481 return idx;
482}
483
484/* create new hash */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000485static xhash *hash_init(void)
486{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000487 xhash *newhash;
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000488
Glenn L McGrath545106f2002-11-11 06:21:00 +0000489 newhash = (xhash *)xcalloc(1, sizeof(xhash));
490 newhash->csize = FIRST_PRIME;
491 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
492
493 return newhash;
494}
495
496/* find item in hash, return ptr to data, NULL if not found */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000497static void *hash_search(xhash *hash, char *name)
498{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000499 hash_item *hi;
500
501 hi = hash->items [ hashidx(name) % hash->csize ];
502 while (hi) {
503 if (strcmp(hi->name, name) == 0)
504 return &(hi->data);
505 hi = hi->next;
506 }
507 return NULL;
508}
509
510/* grow hash if it becomes too big */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000511static void hash_rebuild(xhash *hash)
512{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000513 unsigned int newsize, i, idx;
514 hash_item **newitems, *hi, *thi;
515
516 if (hash->nprime == NPRIMES)
517 return;
518
519 newsize = PRIMES[hash->nprime++];
520 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
521
522 for (i=0; i<hash->csize; i++) {
523 hi = hash->items[i];
524 while (hi) {
525 thi = hi;
526 hi = thi->next;
527 idx = hashidx(thi->name) % newsize;
528 thi->next = newitems[idx];
529 newitems[idx] = thi;
530 }
531 }
532
533 free(hash->items);
534 hash->csize = newsize;
535 hash->items = newitems;
536}
537
538/* find item in hash, add it if necessary. Return ptr to data */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000539static void *hash_find(xhash *hash, char *name)
540{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000541 hash_item *hi;
542 unsigned int idx;
543 int l;
544
545 hi = hash_search(hash, name);
546 if (! hi) {
547 if (++hash->nel / hash->csize > 10)
548 hash_rebuild(hash);
549
Manuel Novoa III cad53642003-03-19 09:13:01 +0000550 l = bb_strlen(name) + 1;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000551 hi = xcalloc(sizeof(hash_item) + l, 1);
552 memcpy(hi->name, name, l);
553
554 idx = hashidx(name) % hash->csize;
555 hi->next = hash->items[idx];
556 hash->items[idx] = hi;
557 hash->glen += l;
558 }
559 return &(hi->data);
560}
561
562#define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
563#define newvar(name) (var *) hash_find ( vhash , (name) )
564#define newfile(name) (rstream *) hash_find ( fdhash , (name) )
565#define newfunc(name) (func *) hash_find ( fnhash , (name) )
566
Mike Frysinger10a11e22005-09-27 02:23:02 +0000567static void hash_remove(xhash *hash, char *name)
568{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000569 hash_item *hi, **phi;
570
571 phi = &(hash->items[ hashidx(name) % hash->csize ]);
572 while (*phi) {
573 hi = *phi;
574 if (strcmp(hi->name, name) == 0) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000575 hash->glen -= (bb_strlen(name) + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000576 hash->nel--;
577 *phi = hi->next;
578 free(hi);
579 break;
580 }
581 phi = &(hi->next);
582 }
583}
584
585/* ------ some useful functions ------ */
586
Mike Frysinger10a11e22005-09-27 02:23:02 +0000587static void skip_spaces(char **s)
588{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000589 register char *p = *s;
590
591 while(*p == ' ' || *p == '\t' ||
592 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
Mike Frysingerde2b9382005-09-27 03:18:00 +0000593 p++;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000594 }
595 *s = p;
596}
597
Mike Frysinger10a11e22005-09-27 02:23:02 +0000598static char *nextword(char **s)
599{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000600 register char *p = *s;
601
602 while (*(*s)++) ;
603
604 return p;
605}
606
Mike Frysinger10a11e22005-09-27 02:23:02 +0000607static char nextchar(char **s)
608{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000609 register char c, *pps;
610
611 c = *((*s)++);
612 pps = *s;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000613 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000614 if (c == '\\' && *s == pps) c = *((*s)++);
615 return c;
616}
617
Mike Frysinger10a11e22005-09-27 02:23:02 +0000618static inline int isalnum_(int c)
619{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000620 return (isalnum(c) || c == '_');
621}
622
Mike Frysinger10a11e22005-09-27 02:23:02 +0000623static FILE *afopen(const char *path, const char *mode)
624{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000625 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000626}
627
628/* -------- working with variables (set/get/copy/etc) -------- */
629
Mike Frysinger10a11e22005-09-27 02:23:02 +0000630static xhash *iamarray(var *v)
631{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000632 var *a = v;
633
634 while (a->type & VF_CHILD)
635 a = a->x.parent;
636
637 if (! (a->type & VF_ARRAY)) {
638 a->type |= VF_ARRAY;
639 a->x.array = hash_init();
640 }
641 return a->x.array;
642}
643
Mike Frysinger10a11e22005-09-27 02:23:02 +0000644static void clear_array(xhash *array)
645{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000646 unsigned int i;
647 hash_item *hi, *thi;
648
649 for (i=0; i<array->csize; i++) {
650 hi = array->items[i];
651 while (hi) {
652 thi = hi;
653 hi = hi->next;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000654 free(thi->data.v.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000655 free(thi);
656 }
657 array->items[i] = NULL;
658 }
659 array->glen = array->nel = 0;
660}
661
662/* clear a variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000663static var *clrvar(var *v)
664{
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000665 if (!(v->type & VF_FSTR))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000666 free(v->string);
667
668 v->type &= VF_DONTTOUCH;
669 v->type |= VF_DIRTY;
670 v->string = NULL;
671 return v;
672}
673
674/* assign string value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000675static var *setvar_p(var *v, char *value)
676{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000677 clrvar(v);
678 v->string = value;
679 handle_special(v);
680
681 return v;
682}
683
684/* same as setvar_p but make a copy of string */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000685static var *setvar_s(var *v, char *value)
686{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000687 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000688}
689
690/* same as setvar_s but set USER flag */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000691static var *setvar_u(var *v, char *value)
692{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000693 setvar_s(v, value);
694 v->type |= VF_USER;
695 return v;
696}
697
698/* set array element to user string */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000699static void setari_u(var *a, int idx, char *s)
700{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000701 register var *v;
702 static char sidx[12];
703
704 sprintf(sidx, "%d", idx);
705 v = findvar(iamarray(a), sidx);
706 setvar_u(v, s);
707}
708
709/* assign numeric value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000710static var *setvar_i(var *v, double value)
711{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000712 clrvar(v);
713 v->type |= VF_NUMBER;
714 v->number = value;
715 handle_special(v);
716 return v;
717}
718
Mike Frysinger10a11e22005-09-27 02:23:02 +0000719static char *getvar_s(var *v)
720{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000721 /* if v is numeric and has no cached string, convert it to string */
722 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
723 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000724 v->string = bb_xstrdup(buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000725 v->type |= VF_CACHED;
726 }
727 return (v->string == NULL) ? "" : v->string;
728}
729
Mike Frysinger10a11e22005-09-27 02:23:02 +0000730static double getvar_i(var *v)
731{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000732 char *s;
733
734 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
735 v->number = 0;
736 s = v->string;
737 if (s && *s) {
738 v->number = strtod(s, &s);
739 if (v->type & VF_USER) {
740 skip_spaces(&s);
741 if (*s != '\0')
742 v->type &= ~VF_USER;
743 }
744 } else {
745 v->type &= ~VF_USER;
746 }
747 v->type |= VF_CACHED;
748 }
749 return v->number;
750}
751
Mike Frysinger10a11e22005-09-27 02:23:02 +0000752static var *copyvar(var *dest, var *src)
753{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000754 if (dest != src) {
755 clrvar(dest);
756 dest->type |= (src->type & ~VF_DONTTOUCH);
757 dest->number = src->number;
758 if (src->string)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000759 dest->string = bb_xstrdup(src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000760 }
761 handle_special(dest);
762 return dest;
763}
764
Mike Frysinger10a11e22005-09-27 02:23:02 +0000765static var *incvar(var *v)
766{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000767 return setvar_i(v, getvar_i(v)+1.);
768}
769
770/* return true if v is number or numeric string */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000771static int is_numeric(var *v)
772{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000773 getvar_i(v);
774 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
775}
776
777/* return 1 when value of v corresponds to true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000778static int istrue(var *v)
779{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000780 if (is_numeric(v))
781 return (v->number == 0) ? 0 : 1;
782 else
783 return (v->string && *(v->string)) ? 1 : 0;
784}
785
Eric Andersenaff114c2004-04-14 17:51:38 +0000786/* temporary variables allocator. Last allocated should be first freed */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000787static var *nvalloc(int n)
788{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000789 nvblock *pb = NULL;
790 var *v, *r;
791 int size;
792
793 while (cb) {
794 pb = cb;
795 if ((cb->pos - cb->nv) + n <= cb->size) break;
796 cb = cb->next;
797 }
798
799 if (! cb) {
800 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
801 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
802 cb->size = size;
803 cb->pos = cb->nv;
804 cb->prev = pb;
805 cb->next = NULL;
806 if (pb) pb->next = cb;
807 }
808
809 v = r = cb->pos;
810 cb->pos += n;
811
812 while (v < cb->pos) {
813 v->type = 0;
814 v->string = NULL;
815 v++;
816 }
817
818 return r;
819}
820
Mike Frysinger10a11e22005-09-27 02:23:02 +0000821static void nvfree(var *v)
822{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000823 var *p;
824
825 if (v < cb->nv || v >= cb->pos)
826 runtime_error(EMSG_INTERNAL_ERROR);
827
828 for (p=v; p<cb->pos; p++) {
829 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
830 clear_array(iamarray(p));
831 free(p->x.array->items);
832 free(p->x.array);
833 }
834 if (p->type & VF_WALK)
835 free(p->x.walker);
836
837 clrvar(p);
838 }
839
840 cb->pos = v;
841 while (cb->prev && cb->pos == cb->nv) {
842 cb = cb->prev;
843 }
844}
845
846/* ------- awk program text parsing ------- */
847
848/* Parse next token pointed by global pos, place results into global t.
849 * If token isn't expected, give away. Return token class
850 */
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000851static uint32_t next_token(uint32_t expected)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000852{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000853 char *p, *pp, *s;
854 char *tl;
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000855 uint32_t tc, *ti;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000856 int l;
857 static int concat_inserted = FALSE;
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000858 static uint32_t save_tclass, save_info;
859 static uint32_t ltclass = TC_OPTERM;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000860
861 if (t.rollback) {
862
863 t.rollback = FALSE;
864
865 } else if (concat_inserted) {
866
867 concat_inserted = FALSE;
868 t.tclass = save_tclass;
869 t.info = save_info;
870
871 } else {
872
873 p = pos;
874
875 readnext:
876 skip_spaces(&p);
877 lineno = t.lineno;
878 if (*p == '#')
879 while (*p != '\n' && *p != '\0') p++;
880
881 if (*p == '\n')
882 t.lineno++;
883
884 if (*p == '\0') {
885 tc = TC_EOF;
886
887 } else if (*p == '\"') {
888 /* it's a string */
889 t.string = s = ++p;
890 while (*p != '\"') {
891 if (*p == '\0' || *p == '\n')
892 syntax_error(EMSG_UNEXP_EOS);
893 *(s++) = nextchar(&p);
894 }
895 p++;
896 *s = '\0';
897 tc = TC_STRING;
898
899 } else if ((expected & TC_REGEXP) && *p == '/') {
900 /* it's regexp */
901 t.string = s = ++p;
902 while (*p != '/') {
903 if (*p == '\0' || *p == '\n')
904 syntax_error(EMSG_UNEXP_EOS);
905 if ((*s++ = *p++) == '\\') {
906 pp = p;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000907 *(s-1) = bb_process_escape_sequence((const char **)&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000908 if (*pp == '\\') *s++ = '\\';
909 if (p == pp) *s++ = *p++;
910 }
911 }
912 p++;
913 *s = '\0';
914 tc = TC_REGEXP;
915
916 } else if (*p == '.' || isdigit(*p)) {
917 /* it's a number */
918 t.number = strtod(p, &p);
919 if (*p == '.')
920 syntax_error(EMSG_UNEXP_TOKEN);
921 tc = TC_NUMBER;
922
923 } else {
924 /* search for something known */
925 tl = tokenlist;
926 tc = 0x00000001;
927 ti = tokeninfo;
928 while (*tl) {
929 l = *(tl++);
930 if (l == NTCC) {
931 tc <<= 1;
932 continue;
933 }
934 /* if token class is expected, token
935 * matches and it's not a longer word,
936 * then this is what we are looking for
937 */
938 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
939 *tl == *p && strncmp(p, tl, l) == 0 &&
940 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
941 t.info = *ti;
942 p += l;
943 break;
944 }
945 ti++;
946 tl += l;
947 }
948
949 if (! *tl) {
950 /* it's a name (var/array/function),
951 * otherwise it's something wrong
952 */
953 if (! isalnum_(*p))
954 syntax_error(EMSG_UNEXP_TOKEN);
955
956 t.string = --p;
957 while(isalnum_(*(++p))) {
958 *(p-1) = *p;
959 }
960 *(p-1) = '\0';
961 tc = TC_VARIABLE;
962 if (*p == '(') {
963 tc = TC_FUNCTION;
964 } else {
965 skip_spaces(&p);
966 if (*p == '[') {
967 p++;
968 tc = TC_ARRAY;
969 }
970 }
971 }
972 }
973 pos = p;
974
975 /* skipping newlines in some cases */
976 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
977 goto readnext;
978
979 /* insert concatenation operator when needed */
980 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
981 concat_inserted = TRUE;
982 save_tclass = tc;
983 save_info = t.info;
984 tc = TC_BINOP;
985 t.info = OC_CONCAT | SS | P(35);
986 }
987
988 t.tclass = tc;
989 }
990 ltclass = t.tclass;
991
992 /* Are we ready for this? */
993 if (! (ltclass & expected))
994 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
995 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
996
997 return ltclass;
998}
999
1000static void rollback_token(void) { t.rollback = TRUE; }
1001
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001002static node *new_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001003{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001004 register node *n;
1005
1006 n = (node *)xcalloc(sizeof(node), 1);
1007 n->info = info;
1008 n->lineno = lineno;
1009 return n;
1010}
1011
Mike Frysinger10a11e22005-09-27 02:23:02 +00001012static node *mk_re_node(char *s, node *n, regex_t *re)
1013{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001014 n->info = OC_REGEXP;
1015 n->l.re = re;
1016 n->r.ire = re + 1;
1017 xregcomp(re, s, REG_EXTENDED);
1018 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1019
1020 return n;
1021}
1022
Mike Frysinger10a11e22005-09-27 02:23:02 +00001023static node *condition(void)
1024{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001025 next_token(TC_SEQSTART);
1026 return parse_expr(TC_SEQTERM);
1027}
1028
1029/* parse expression terminated by given argument, return ptr
1030 * to built subtree. Terminator is eaten by parse_expr */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001031static node *parse_expr(uint32_t iexp)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001032{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001033 node sn;
1034 node *cn = &sn;
1035 node *vn, *glptr;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001036 uint32_t tc, xtc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001037 var *v;
1038
1039 sn.info = PRIMASK;
1040 sn.r.n = glptr = NULL;
1041 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1042
1043 while (! ((tc = next_token(xtc)) & iexp)) {
1044 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1045 /* input redirection (<) attached to glptr node */
1046 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
Glenn L McGrath4bded582004-02-22 11:55:09 +00001047 cn->a.n = glptr;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001048 xtc = TC_OPERAND | TC_UOPPRE;
1049 glptr = NULL;
1050
1051 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1052 /* for binary and postfix-unary operators, jump back over
1053 * previous operators with higher priority */
1054 vn = cn;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001055 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
Glenn L McGrath545106f2002-11-11 06:21:00 +00001056 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1057 vn = vn->a.n;
1058 if ((t.info & OPCLSMASK) == OC_TERNARY)
1059 t.info += P(6);
1060 cn = vn->a.n->r.n = new_node(t.info);
1061 cn->a.n = vn->a.n;
1062 if (tc & TC_BINOP) {
1063 cn->l.n = vn;
1064 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1065 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1066 /* it's a pipe */
1067 next_token(TC_GETLINE);
1068 /* give maximum priority to this pipe */
1069 cn->info &= ~PRIMASK;
1070 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1071 }
1072 } else {
1073 cn->r.n = vn;
1074 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1075 }
1076 vn->a.n = cn;
1077
1078 } else {
1079 /* for operands and prefix-unary operators, attach them
1080 * to last node */
1081 vn = cn;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001082 cn = vn->r.n = new_node(t.info);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001083 cn->a.n = vn;
1084 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1085 if (tc & (TC_OPERAND | TC_REGEXP)) {
Rob Landleyed830e82005-06-07 02:43:52 +00001086 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001087 /* one should be very careful with switch on tclass -
Glenn L McGrath545106f2002-11-11 06:21:00 +00001088 * only simple tclasses should be used! */
1089 switch (tc) {
1090 case TC_VARIABLE:
1091 case TC_ARRAY:
1092 cn->info = OC_VAR;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001093 if ((v = hash_search(ahash, t.string)) != NULL) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001094 cn->info = OC_FNARG;
1095 cn->l.i = v->x.aidx;
1096 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001097 cn->l.v = newvar(t.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001098 }
1099 if (tc & TC_ARRAY) {
1100 cn->info |= xS;
1101 cn->r.n = parse_expr(TC_ARRTERM);
1102 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001103 break;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001104
Glenn L McGrath545106f2002-11-11 06:21:00 +00001105 case TC_NUMBER:
1106 case TC_STRING:
1107 cn->info = OC_VAR;
1108 v = cn->l.v = xcalloc(sizeof(var), 1);
1109 if (tc & TC_NUMBER)
1110 setvar_i(v, t.number);
1111 else
1112 setvar_s(v, t.string);
1113 break;
1114
1115 case TC_REGEXP:
1116 mk_re_node(t.string, cn,
1117 (regex_t *)xcalloc(sizeof(regex_t),2));
1118 break;
1119
1120 case TC_FUNCTION:
Mike Frysingerde2b9382005-09-27 03:18:00 +00001121 cn->info = OC_FUNC;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001122 cn->r.f = newfunc(t.string);
1123 cn->l.n = condition();
1124 break;
1125
1126 case TC_SEQSTART:
1127 cn = vn->r.n = parse_expr(TC_SEQTERM);
1128 cn->a.n = vn;
1129 break;
1130
1131 case TC_GETLINE:
1132 glptr = cn;
1133 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1134 break;
1135
1136 case TC_BUILTIN:
1137 cn->l.n = condition();
1138 break;
1139 }
1140 }
1141 }
1142 }
1143 return sn.r.n;
1144}
1145
1146/* add node to chain. Return ptr to alloc'd node */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001147static node *chain_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001148{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001149 register node *n;
1150
1151 if (! seq->first)
1152 seq->first = seq->last = new_node(0);
1153
1154 if (seq->programname != programname) {
1155 seq->programname = programname;
1156 n = chain_node(OC_NEWSOURCE);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001157 n->l.s = bb_xstrdup(programname);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001158 }
1159
1160 n = seq->last;
1161 n->info = info;
1162 seq->last = n->a.n = new_node(OC_DONE);
1163
1164 return n;
1165}
1166
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001167static void chain_expr(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001168{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001169 node *n;
1170
1171 n = chain_node(info);
1172 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1173 if (t.tclass & TC_GRPTERM)
1174 rollback_token();
1175}
1176
Mike Frysinger10a11e22005-09-27 02:23:02 +00001177static node *chain_loop(node *nn)
1178{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001179 node *n, *n2, *save_brk, *save_cont;
1180
1181 save_brk = break_ptr;
1182 save_cont = continue_ptr;
1183
1184 n = chain_node(OC_BR | Vx);
1185 continue_ptr = new_node(OC_EXEC);
1186 break_ptr = new_node(OC_EXEC);
1187 chain_group();
1188 n2 = chain_node(OC_EXEC | Vx);
1189 n2->l.n = nn;
1190 n2->a.n = n;
1191 continue_ptr->a.n = n2;
1192 break_ptr->a.n = n->r.n = seq->last;
1193
1194 continue_ptr = save_cont;
1195 break_ptr = save_brk;
1196
1197 return n;
1198}
1199
1200/* parse group and attach it to chain */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001201static void chain_group(void)
1202{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001203 uint32_t c;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001204 node *n, *n2, *n3;
1205
1206 do {
1207 c = next_token(TC_GRPSEQ);
1208 } while (c & TC_NEWLINE);
1209
1210 if (c & TC_GRPSTART) {
1211 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001212 if (t.tclass & TC_NEWLINE) continue;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001213 rollback_token();
1214 chain_group();
1215 }
1216 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1217 rollback_token();
1218 chain_expr(OC_EXEC | Vx);
1219 } else { /* TC_STATEMNT */
1220 switch (t.info & OPCLSMASK) {
1221 case ST_IF:
1222 n = chain_node(OC_BR | Vx);
1223 n->l.n = condition();
1224 chain_group();
1225 n2 = chain_node(OC_EXEC);
1226 n->r.n = seq->last;
1227 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1228 chain_group();
1229 n2->a.n = seq->last;
1230 } else {
1231 rollback_token();
1232 }
1233 break;
1234
1235 case ST_WHILE:
1236 n2 = condition();
1237 n = chain_loop(NULL);
1238 n->l.n = n2;
1239 break;
1240
1241 case ST_DO:
1242 n2 = chain_node(OC_EXEC);
1243 n = chain_loop(NULL);
1244 n2->a.n = n->a.n;
1245 next_token(TC_WHILE);
1246 n->l.n = condition();
1247 break;
1248
1249 case ST_FOR:
1250 next_token(TC_SEQSTART);
1251 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1252 if (t.tclass & TC_SEQTERM) { /* for-in */
1253 if ((n2->info & OPCLSMASK) != OC_IN)
1254 syntax_error(EMSG_UNEXP_TOKEN);
1255 n = chain_node(OC_WALKINIT | VV);
1256 n->l.n = n2->l.n;
1257 n->r.n = n2->r.n;
1258 n = chain_loop(NULL);
1259 n->info = OC_WALKNEXT | Vx;
1260 n->l.n = n2->l.n;
1261 } else { /* for(;;) */
1262 n = chain_node(OC_EXEC | Vx);
1263 n->l.n = n2;
1264 n2 = parse_expr(TC_SEMICOL);
1265 n3 = parse_expr(TC_SEQTERM);
1266 n = chain_loop(n3);
1267 n->l.n = n2;
1268 if (! n2)
1269 n->info = OC_EXEC;
1270 }
1271 break;
1272
1273 case OC_PRINT:
1274 case OC_PRINTF:
1275 n = chain_node(t.info);
1276 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1277 if (t.tclass & TC_OUTRDR) {
1278 n->info |= t.info;
1279 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1280 }
1281 if (t.tclass & TC_GRPTERM)
1282 rollback_token();
1283 break;
1284
1285 case OC_BREAK:
1286 n = chain_node(OC_EXEC);
1287 n->a.n = break_ptr;
1288 break;
1289
1290 case OC_CONTINUE:
1291 n = chain_node(OC_EXEC);
1292 n->a.n = continue_ptr;
1293 break;
1294
1295 /* delete, next, nextfile, return, exit */
1296 default:
1297 chain_expr(t.info);
1298
1299 }
1300 }
1301}
1302
Mike Frysinger10a11e22005-09-27 02:23:02 +00001303static void parse_program(char *p)
1304{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001305 uint32_t tclass;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001306 node *cn;
1307 func *f;
1308 var *v;
1309
1310 pos = p;
1311 t.lineno = 1;
1312 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1313 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1314
1315 if (tclass & TC_OPTERM)
1316 continue;
1317
1318 seq = &mainseq;
1319 if (tclass & TC_BEGIN) {
1320 seq = &beginseq;
1321 chain_group();
1322
1323 } else if (tclass & TC_END) {
1324 seq = &endseq;
1325 chain_group();
1326
1327 } else if (tclass & TC_FUNCDECL) {
1328 next_token(TC_FUNCTION);
1329 pos++;
1330 f = newfunc(t.string);
1331 f->body.first = NULL;
1332 f->nargs = 0;
1333 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1334 v = findvar(ahash, t.string);
1335 v->x.aidx = (f->nargs)++;
1336
1337 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1338 break;
1339 }
1340 seq = &(f->body);
1341 chain_group();
1342 clear_array(ahash);
1343
1344 } else if (tclass & TC_OPSEQ) {
1345 rollback_token();
1346 cn = chain_node(OC_TEST);
1347 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1348 if (t.tclass & TC_GRPSTART) {
1349 rollback_token();
1350 chain_group();
1351 } else {
1352 chain_node(OC_PRINT);
1353 }
1354 cn->r.n = mainseq.last;
1355
1356 } else /* if (tclass & TC_GRPSTART) */ {
1357 rollback_token();
1358 chain_group();
1359 }
1360 }
1361}
1362
1363
1364/* -------- program execution part -------- */
1365
Mike Frysinger10a11e22005-09-27 02:23:02 +00001366static node *mk_splitter(char *s, tsplitter *spl)
1367{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001368 register regex_t *re, *ire;
1369 node *n;
1370
1371 re = &spl->re[0];
1372 ire = &spl->re[1];
1373 n = &spl->n;
1374 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1375 regfree(re);
1376 regfree(ire);
1377 }
Manuel Novoa III cad53642003-03-19 09:13:01 +00001378 if (bb_strlen(s) > 1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001379 mk_re_node(s, n, re);
1380 } else {
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001381 n->info = (uint32_t) *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001382 }
1383
1384 return n;
1385}
1386
1387/* use node as a regular expression. Supplied with node ptr and regex_t
Eric Andersenaff114c2004-04-14 17:51:38 +00001388 * storage space. Return ptr to regex (if result points to preg, it should
Glenn L McGrath545106f2002-11-11 06:21:00 +00001389 * be later regfree'd manually
1390 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001391static regex_t *as_regex(node *op, regex_t *preg)
1392{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001393 var *v;
1394 char *s;
1395
1396 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1397 return icase ? op->r.ire : op->l.re;
1398 } else {
1399 v = nvalloc(1);
1400 s = getvar_s(evaluate(op, v));
1401 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1402 nvfree(v);
1403 return preg;
1404 }
1405}
1406
1407/* gradually increasing buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001408static void qrealloc(char **b, int n, int *size)
1409{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001410 if (! *b || n >= *size)
1411 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1412}
1413
1414/* resize field storage space */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001415static void fsrealloc(int size)
1416{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001417 static int maxfields = 0;
1418 int i;
1419
1420 if (size >= maxfields) {
1421 i = maxfields;
1422 maxfields = size + 16;
1423 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1424 for (; i<maxfields; i++) {
1425 Fields[i].type = VF_SPECIAL;
1426 Fields[i].string = NULL;
1427 }
1428 }
1429
1430 if (size < nfields) {
1431 for (i=size; i<nfields; i++) {
1432 clrvar(Fields+i);
1433 }
1434 }
1435 nfields = size;
1436}
1437
Mike Frysinger10a11e22005-09-27 02:23:02 +00001438static int awk_split(char *s, node *spl, char **slist)
1439{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001440 int l, n=0;
1441 char c[4];
1442 char *s1;
1443 regmatch_t pmatch[2];
1444
1445 /* in worst case, each char would be a separate field */
Manuel Novoa III cad53642003-03-19 09:13:01 +00001446 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001447
1448 c[0] = c[1] = (char)spl->info;
1449 c[2] = c[3] = '\0';
1450 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1451
1452 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1453 while (*s) {
1454 l = strcspn(s, c+2);
1455 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1456 pmatch[0].rm_so <= l) {
1457 l = pmatch[0].rm_so;
1458 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1459 } else {
1460 pmatch[0].rm_eo = l;
1461 if (*(s+l)) pmatch[0].rm_eo++;
1462 }
1463
1464 memcpy(s1, s, l);
1465 *(s1+l) = '\0';
1466 nextword(&s1);
1467 s += pmatch[0].rm_eo;
1468 n++;
1469 }
1470 } else if (c[0] == '\0') { /* null split */
1471 while(*s) {
1472 *(s1++) = *(s++);
1473 *(s1++) = '\0';
1474 n++;
1475 }
1476 } else if (c[0] != ' ') { /* single-character split */
1477 if (icase) {
1478 c[0] = toupper(c[0]);
1479 c[1] = tolower(c[1]);
1480 }
1481 if (*s1) n++;
1482 while ((s1 = strpbrk(s1, c))) {
1483 *(s1++) = '\0';
1484 n++;
1485 }
1486 } else { /* space split */
1487 while (*s) {
1488 while (isspace(*s)) s++;
1489 if (! *s) break;
1490 n++;
1491 while (*s && !isspace(*s))
1492 *(s1++) = *(s++);
1493 *(s1++) = '\0';
1494 }
1495 }
1496 return n;
1497}
1498
Mike Frysinger10a11e22005-09-27 02:23:02 +00001499static void split_f0(void)
1500{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001501 static char *fstrings = NULL;
1502 int i, n;
1503 char *s;
1504
1505 if (is_f0_split)
1506 return;
1507
1508 is_f0_split = TRUE;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00001509 free(fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001510 fsrealloc(0);
1511 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1512 fsrealloc(n);
1513 s = fstrings;
1514 for (i=0; i<n; i++) {
1515 Fields[i].string = nextword(&s);
1516 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1517 }
1518
1519 /* set NF manually to avoid side effects */
1520 clrvar(V[NF]);
1521 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1522 V[NF]->number = nfields;
1523}
1524
1525/* perform additional actions when some internal variables changed */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001526static void handle_special(var *v)
1527{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001528 int n;
1529 char *b, *sep, *s;
1530 int sl, l, len, i, bsize;
1531
1532 if (! (v->type & VF_SPECIAL))
1533 return;
1534
1535 if (v == V[NF]) {
1536 n = (int)getvar_i(v);
1537 fsrealloc(n);
1538
1539 /* recalculate $0 */
1540 sep = getvar_s(V[OFS]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001541 sl = bb_strlen(sep);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001542 b = NULL;
1543 len = 0;
1544 for (i=0; i<n; i++) {
1545 s = getvar_s(&Fields[i]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001546 l = bb_strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001547 if (b) {
1548 memcpy(b+len, sep, sl);
1549 len += sl;
1550 }
1551 qrealloc(&b, len+l+sl, &bsize);
1552 memcpy(b+len, s, l);
1553 len += l;
1554 }
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00001555 if (b) b[len] = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00001556 setvar_p(V[F0], b);
1557 is_f0_split = TRUE;
1558
1559 } else if (v == V[F0]) {
1560 is_f0_split = FALSE;
1561
1562 } else if (v == V[FS]) {
1563 mk_splitter(getvar_s(v), &fsplitter);
1564
1565 } else if (v == V[RS]) {
1566 mk_splitter(getvar_s(v), &rsplitter);
1567
1568 } else if (v == V[IGNORECASE]) {
1569 icase = istrue(v);
1570
1571 } else { /* $n */
1572 n = getvar_i(V[NF]);
1573 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1574 /* right here v is invalid. Just to note... */
1575 }
1576}
1577
1578/* step through func/builtin/etc arguments */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001579static node *nextarg(node **pn)
1580{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001581 node *n;
1582
1583 n = *pn;
1584 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1585 *pn = n->r.n;
1586 n = n->l.n;
1587 } else {
1588 *pn = NULL;
1589 }
1590 return n;
1591}
1592
Mike Frysinger10a11e22005-09-27 02:23:02 +00001593static void hashwalk_init(var *v, xhash *array)
1594{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001595 char **w;
1596 hash_item *hi;
1597 int i;
1598
1599 if (v->type & VF_WALK)
1600 free(v->x.walker);
1601
1602 v->type |= VF_WALK;
1603 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1604 *w = *(w+1) = (char *)(w + 2);
1605 for (i=0; i<array->csize; i++) {
1606 hi = array->items[i];
1607 while(hi) {
1608 strcpy(*w, hi->name);
1609 nextword(w);
1610 hi = hi->next;
1611 }
1612 }
1613}
1614
Mike Frysinger10a11e22005-09-27 02:23:02 +00001615static int hashwalk_next(var *v)
1616{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001617 char **w;
1618
1619 w = v->x.walker;
1620 if (*(w+1) == *w)
1621 return FALSE;
1622
1623 setvar_s(v, nextword(w+1));
1624 return TRUE;
1625}
1626
1627/* evaluate node, return 1 when result is true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001628static int ptest(node *pattern)
1629{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001630 static var v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001631 return istrue(evaluate(pattern, &v));
1632}
1633
1634/* read next record from stream rsm into a variable v */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001635static int awk_getline(rstream *rsm, var *v)
1636{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001637 char *b;
1638 regmatch_t pmatch[2];
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001639 int a, p, pp=0, size;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001640 int fd, so, eo, r, rp;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001641 char c, *m, *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001642
1643 /* we're using our own buffer since we need access to accumulating
1644 * characters
1645 */
1646 fd = fileno(rsm->F);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001647 m = rsm->buffer;
1648 a = rsm->adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001649 p = rsm->pos;
1650 size = rsm->size;
1651 c = (char) rsplitter.n.info;
1652 rp = 0;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001653
1654 if (! m) qrealloc(&m, 256, &size);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001655 do {
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001656 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001657 so = eo = p;
1658 r = 1;
1659 if (p > 0) {
1660 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1661 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1662 b, 1, pmatch, 0) == 0) {
1663 so = pmatch[0].rm_so;
1664 eo = pmatch[0].rm_eo;
1665 if (b[eo] != '\0')
1666 break;
1667 }
1668 } else if (c != '\0') {
1669 s = strchr(b+pp, c);
1670 if (s) {
1671 so = eo = s-b;
1672 eo++;
1673 break;
1674 }
1675 } else {
1676 while (b[rp] == '\n')
1677 rp++;
1678 s = strstr(b+rp, "\n\n");
1679 if (s) {
1680 so = eo = s-b;
1681 while (b[eo] == '\n') eo++;
1682 if (b[eo] != '\0')
1683 break;
1684 }
1685 }
1686 }
1687
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001688 if (a > 0) {
1689 memmove(m, (const void *)(m+a), p+1);
1690 b = m;
1691 a = 0;
1692 }
1693
1694 qrealloc(&m, a+p+128, &size);
1695 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001696 pp = p;
1697 p += safe_read(fd, b+p, size-p-1);
1698 if (p < pp) {
1699 p = 0;
1700 r = 0;
1701 setvar_i(V[ERRNO], errno);
1702 }
1703 b[p] = '\0';
1704
1705 } while (p > pp);
1706
1707 if (p == 0) {
1708 r--;
1709 } else {
1710 c = b[so]; b[so] = '\0';
1711 setvar_s(v, b+rp);
1712 v->type |= VF_USER;
1713 b[so] = c;
1714 c = b[eo]; b[eo] = '\0';
1715 setvar_s(V[RT], b+so);
1716 b[eo] = c;
1717 }
1718
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001719 rsm->buffer = m;
1720 rsm->adv = a + eo;
1721 rsm->pos = p - eo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001722 rsm->size = size;
1723
1724 return r;
1725}
1726
Mike Frysinger10a11e22005-09-27 02:23:02 +00001727static int fmt_num(char *b, int size, char *format, double n, int int_as_int)
1728{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001729 int r=0;
1730 char c, *s=format;
1731
1732 if (int_as_int && n == (int)n) {
1733 r = snprintf(b, size, "%d", (int)n);
1734 } else {
1735 do { c = *s; } while (*s && *++s);
1736 if (strchr("diouxX", c)) {
1737 r = snprintf(b, size, format, (int)n);
1738 } else if (strchr("eEfgG", c)) {
1739 r = snprintf(b, size, format, n);
1740 } else {
1741 runtime_error(EMSG_INV_FMT);
1742 }
1743 }
1744 return r;
1745}
1746
1747
1748/* formatted output into an allocated buffer, return ptr to buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001749static char *awk_printf(node *n)
1750{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001751 char *b = NULL;
1752 char *fmt, *s, *s1, *f;
1753 int i, j, incr, bsize;
1754 char c, c1;
1755 var *v, *arg;
1756
1757 v = nvalloc(1);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001758 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001759
1760 i = 0;
1761 while (*f) {
1762 s = f;
1763 while (*f && (*f != '%' || *(++f) == '%'))
1764 f++;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001765 while (*f && !isalpha(*f))
Glenn L McGrath545106f2002-11-11 06:21:00 +00001766 f++;
1767
1768 incr = (f - s) + MAXVARFMT;
1769 qrealloc(&b, incr+i, &bsize);
1770 c = *f; if (c != '\0') f++;
1771 c1 = *f ; *f = '\0';
1772 arg = evaluate(nextarg(&n), v);
1773
1774 j = i;
1775 if (c == 'c' || !c) {
1776 i += sprintf(b+i, s,
1777 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1778
1779 } else if (c == 's') {
1780 s1 = getvar_s(arg);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001781 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001782 i += sprintf(b+i, s, s1);
1783
1784 } else {
1785 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1786 }
1787 *f = c1;
1788
1789 /* if there was an error while sprintf, return value is negative */
1790 if (i < j) i = j;
1791
1792 }
1793
1794 b = xrealloc(b, i+1);
1795 free(fmt);
1796 nvfree(v);
1797 b[i] = '\0';
1798 return b;
1799}
1800
1801/* common substitution routine
1802 * replace (nm) substring of (src) that match (n) with (repl), store
1803 * result into (dest), return number of substitutions. If nm=0, replace
1804 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1805 * subexpression matching (\1-\9)
1806 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001807static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1808{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001809 char *ds = NULL;
1810 char *sp, *s;
1811 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1812 regmatch_t pmatch[10];
1813 regex_t sreg, *re;
1814
1815 re = as_regex(rn, &sreg);
1816 if (! src) src = V[F0];
1817 if (! dest) dest = V[F0];
1818
1819 i = di = 0;
1820 sp = getvar_s(src);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001821 rl = bb_strlen(repl);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001822 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1823 so = pmatch[0].rm_so;
1824 eo = pmatch[0].rm_eo;
1825
1826 qrealloc(&ds, di + eo + rl, &dssize);
1827 memcpy(ds + di, sp, eo);
1828 di += eo;
1829 if (++i >= nm) {
1830 /* replace */
1831 di -= (eo - so);
1832 nbs = 0;
1833 for (s = repl; *s; s++) {
1834 ds[di++] = c = *s;
1835 if (c == '\\') {
1836 nbs++;
1837 continue;
1838 }
1839 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1840 di -= ((nbs + 3) >> 1);
1841 j = 0;
1842 if (c != '&') {
1843 j = c - '0';
1844 nbs++;
1845 }
1846 if (nbs % 2) {
1847 ds[di++] = c;
1848 } else {
1849 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1850 qrealloc(&ds, di + rl + n, &dssize);
1851 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1852 di += n;
1853 }
1854 }
1855 nbs = 0;
1856 }
1857 }
1858
1859 sp += eo;
1860 if (i == nm) break;
1861 if (eo == so) {
1862 if (! (ds[di++] = *sp++)) break;
1863 }
1864 }
1865
1866 qrealloc(&ds, di + strlen(sp), &dssize);
1867 strcpy(ds + di, sp);
1868 setvar_p(dest, ds);
1869 if (re == &sreg) regfree(re);
1870 return i;
1871}
1872
Mike Frysinger10a11e22005-09-27 02:23:02 +00001873static var *exec_builtin(node *op, var *res)
1874{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001875 int (*to_xxx)(int);
1876 var *tv;
1877 node *an[4];
1878 var *av[4];
1879 char *as[4];
1880 regmatch_t pmatch[2];
1881 regex_t sreg, *re;
1882 static tsplitter tspl;
1883 node *spl;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001884 uint32_t isr, info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001885 int nargs;
1886 time_t tt;
1887 char *s, *s1;
1888 int i, l, ll, n;
1889
1890 tv = nvalloc(4);
1891 isr = info = op->info;
1892 op = op->l.n;
1893
1894 av[2] = av[3] = NULL;
1895 for (i=0 ; i<4 && op ; i++) {
1896 an[i] = nextarg(&op);
1897 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1898 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1899 isr >>= 1;
1900 }
1901
1902 nargs = i;
1903 if (nargs < (info >> 30))
1904 runtime_error(EMSG_TOO_FEW_ARGS);
1905
1906 switch (info & OPNMASK) {
1907
1908 case B_a2:
1909#ifdef CONFIG_FEATURE_AWK_MATH
1910 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1911#else
1912 runtime_error(EMSG_NO_MATH);
1913#endif
1914 break;
1915
1916 case B_sp:
1917 if (nargs > 2) {
1918 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1919 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1920 } else {
1921 spl = &fsplitter.n;
1922 }
1923
1924 n = awk_split(as[0], spl, &s);
1925 s1 = s;
1926 clear_array(iamarray(av[1]));
1927 for (i=1; i<=n; i++)
1928 setari_u(av[1], i, nextword(&s1));
1929 free(s);
1930 setvar_i(res, n);
1931 break;
1932
1933 case B_ss:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001934 l = bb_strlen(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001935 i = getvar_i(av[1]) - 1;
1936 if (i>l) i=l; if (i<0) i=0;
1937 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1938 if (n<0) n=0;
1939 s = xmalloc(n+1);
1940 strncpy(s, as[0]+i, n);
1941 s[n] = '\0';
1942 setvar_p(res, s);
1943 break;
1944
1945 case B_lo:
1946 to_xxx = tolower;
1947 goto lo_cont;
1948
1949 case B_up:
1950 to_xxx = toupper;
1951lo_cont:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001952 s1 = s = bb_xstrdup(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001953 while (*s1) {
1954 *s1 = (*to_xxx)(*s1);
1955 s1++;
1956 }
1957 setvar_p(res, s);
1958 break;
1959
1960 case B_ix:
1961 n = 0;
Manuel Novoa III cad53642003-03-19 09:13:01 +00001962 ll = bb_strlen(as[1]);
1963 l = bb_strlen(as[0]) - ll;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001964 if (ll > 0 && l >= 0) {
1965 if (! icase) {
1966 s = strstr(as[0], as[1]);
1967 if (s) n = (s - as[0]) + 1;
1968 } else {
1969 /* this piece of code is terribly slow and
1970 * really should be rewritten
1971 */
1972 for (i=0; i<=l; i++) {
1973 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1974 n = i+1;
1975 break;
1976 }
1977 }
1978 }
1979 }
1980 setvar_i(res, n);
1981 break;
1982
1983 case B_ti:
1984 if (nargs > 1)
1985 tt = getvar_i(av[1]);
1986 else
1987 time(&tt);
1988 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1989 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1990 buf[i] = '\0';
1991 setvar_s(res, buf);
1992 break;
1993
1994 case B_ma:
1995 re = as_regex(an[1], &sreg);
1996 n = regexec(re, as[0], 1, pmatch, 0);
1997 if (n == 0) {
1998 pmatch[0].rm_so++;
1999 pmatch[0].rm_eo++;
2000 } else {
2001 pmatch[0].rm_so = 0;
2002 pmatch[0].rm_eo = -1;
2003 }
2004 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2005 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2006 setvar_i(res, pmatch[0].rm_so);
2007 if (re == &sreg) regfree(re);
2008 break;
2009
2010 case B_ge:
2011 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2012 break;
2013
2014 case B_gs:
2015 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2016 break;
2017
2018 case B_su:
2019 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2020 break;
2021 }
2022
2023 nvfree(tv);
2024 return res;
2025}
2026
2027/*
2028 * Evaluate node - the heart of the program. Supplied with subtree
2029 * and place where to store result. returns ptr to result.
2030 */
2031#define XC(n) ((n) >> 8)
2032
Mike Frysinger10a11e22005-09-27 02:23:02 +00002033static var *evaluate(node *op, var *res)
2034{
Mike Frysingerde2b9382005-09-27 03:18:00 +00002035 /* This procedure is recursive so we should count every byte */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002036 static var *fnargs = NULL;
2037 static unsigned int seed = 1;
2038 static regex_t sreg;
2039 node *op1;
2040 var *v1;
2041 union {
2042 var *v;
2043 char *s;
2044 double d;
2045 int i;
2046 } L, R;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002047 uint32_t opinfo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002048 short opn;
2049 union {
2050 char *s;
2051 rstream *rsm;
2052 FILE *F;
2053 var *v;
2054 regex_t *re;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002055 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002056 } X;
2057
2058 if (! op)
2059 return setvar_s(res, NULL);
2060
2061 v1 = nvalloc(2);
2062
2063 while (op) {
2064
2065 opinfo = op->info;
2066 opn = (short)(opinfo & OPNMASK);
2067 lineno = op->lineno;
2068
Mike Frysingerde2b9382005-09-27 03:18:00 +00002069 /* execute inevitable things */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002070 op1 = op->l.n;
2071 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2072 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2073 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2074 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2075 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2076
2077 switch (XC(opinfo & OPCLSMASK)) {
2078
2079 /* -- iterative node type -- */
2080
2081 /* test pattern */
2082 case XC( OC_TEST ):
2083 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2084 /* it's range pattern */
2085 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2086 op->info |= OF_CHECKED;
2087 if (ptest(op1->r.n))
2088 op->info &= ~OF_CHECKED;
2089
2090 op = op->a.n;
2091 } else {
2092 op = op->r.n;
2093 }
2094 } else {
2095 op = (ptest(op1)) ? op->a.n : op->r.n;
2096 }
2097 break;
2098
2099 /* just evaluate an expression, also used as unconditional jump */
2100 case XC( OC_EXEC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002101 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002102
2103 /* branch, used in if-else and various loops */
2104 case XC( OC_BR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002105 op = istrue(L.v) ? op->a.n : op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002106 break;
2107
2108 /* initialize for-in loop */
2109 case XC( OC_WALKINIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002110 hashwalk_init(L.v, iamarray(R.v));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002111 break;
2112
2113 /* get next array item */
2114 case XC( OC_WALKNEXT ):
2115 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2116 break;
2117
2118 case XC( OC_PRINT ):
2119 case XC( OC_PRINTF ):
2120 X.F = stdout;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002121 if (op->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002122 X.rsm = newfile(R.s);
2123 if (! X.rsm->F) {
2124 if (opn == '|') {
2125 if((X.rsm->F = popen(R.s, "w")) == NULL)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002126 bb_perror_msg_and_die("popen");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002127 X.rsm->is_pipe = 1;
2128 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002129 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002130 }
2131 }
2132 X.F = X.rsm->F;
2133 }
2134
2135 if ((opinfo & OPCLSMASK) == OC_PRINT) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002136 if (! op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002137 fputs(getvar_s(V[F0]), X.F);
2138 } else {
2139 while (op1) {
2140 L.v = evaluate(nextarg(&op1), v1);
2141 if (L.v->type & VF_NUMBER) {
2142 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2143 getvar_i(L.v), TRUE);
2144 fputs(buf, X.F);
2145 } else {
2146 fputs(getvar_s(L.v), X.F);
2147 }
2148
2149 if (op1) fputs(getvar_s(V[OFS]), X.F);
2150 }
2151 }
2152 fputs(getvar_s(V[ORS]), X.F);
2153
2154 } else { /* OC_PRINTF */
2155 L.s = awk_printf(op1);
2156 fputs(L.s, X.F);
2157 free(L.s);
2158 }
2159 fflush(X.F);
2160 break;
2161
2162 case XC( OC_DELETE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002163 X.info = op1->info & OPCLSMASK;
2164 if (X.info == OC_VAR) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002165 R.v = op1->l.v;
2166 } else if (X.info == OC_FNARG) {
2167 R.v = &fnargs[op1->l.i];
2168 } else {
2169 runtime_error(EMSG_NOT_ARRAY);
2170 }
2171
Mike Frysingerde2b9382005-09-27 03:18:00 +00002172 if (op1->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002173 clrvar(L.v);
2174 L.s = getvar_s(evaluate(op1->r.n, v1));
2175 hash_remove(iamarray(R.v), L.s);
2176 } else {
2177 clear_array(iamarray(R.v));
2178 }
2179 break;
2180
2181 case XC( OC_NEWSOURCE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002182 programname = op->l.s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002183 break;
2184
2185 case XC( OC_RETURN ):
2186 copyvar(res, L.v);
2187 break;
2188
2189 case XC( OC_NEXTFILE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002190 nextfile = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002191 case XC( OC_NEXT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002192 nextrec = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002193 case XC( OC_DONE ):
2194 clrvar(res);
2195 break;
2196
2197 case XC( OC_EXIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002198 awk_exit(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002199
2200 /* -- recursive node type -- */
2201
2202 case XC( OC_VAR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002203 L.v = op->l.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002204 if (L.v == V[NF])
2205 split_f0();
2206 goto v_cont;
2207
2208 case XC( OC_FNARG ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002209 L.v = &fnargs[op->l.i];
Glenn L McGrath545106f2002-11-11 06:21:00 +00002210
2211v_cont:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002212 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002213 break;
2214
2215 case XC( OC_IN ):
2216 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2217 break;
2218
2219 case XC( OC_REGEXP ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002220 op1 = op;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002221 L.s = getvar_s(V[F0]);
2222 goto re_cont;
2223
2224 case XC( OC_MATCH ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002225 op1 = op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002226re_cont:
2227 X.re = as_regex(op1, &sreg);
2228 R.i = regexec(X.re, L.s, 0, NULL, 0);
2229 if (X.re == &sreg) regfree(X.re);
2230 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2231 break;
2232
2233 case XC( OC_MOVE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002234 /* if source is a temporary string, jusk relink it to dest */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002235 if (R.v == v1+1 && R.v->string) {
2236 res = setvar_p(L.v, R.v->string);
2237 R.v->string = NULL;
2238 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002239 res = copyvar(L.v, R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002240 }
2241 break;
2242
2243 case XC( OC_TERNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002244 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002245 runtime_error(EMSG_POSSIBLE_ERROR);
2246 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2247 break;
2248
2249 case XC( OC_FUNC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002250 if (! op->r.f->body.first)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002251 runtime_error(EMSG_UNDEF_FUNC);
2252
2253 X.v = R.v = nvalloc(op->r.f->nargs+1);
2254 while (op1) {
2255 L.v = evaluate(nextarg(&op1), v1);
2256 copyvar(R.v, L.v);
2257 R.v->type |= VF_CHILD;
2258 R.v->x.parent = L.v;
2259 if (++R.v - X.v >= op->r.f->nargs)
2260 break;
2261 }
2262
2263 R.v = fnargs;
2264 fnargs = X.v;
2265
2266 L.s = programname;
2267 res = evaluate(op->r.f->body.first, res);
2268 programname = L.s;
2269
2270 nvfree(fnargs);
2271 fnargs = R.v;
2272 break;
2273
2274 case XC( OC_GETLINE ):
2275 case XC( OC_PGETLINE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002276 if (op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002277 X.rsm = newfile(L.s);
2278 if (! X.rsm->F) {
2279 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2280 X.rsm->F = popen(L.s, "r");
2281 X.rsm->is_pipe = TRUE;
2282 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002283 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002284 }
2285 }
2286 } else {
2287 if (! iF) iF = next_input_file();
2288 X.rsm = iF;
2289 }
2290
2291 if (! X.rsm->F) {
2292 setvar_i(V[ERRNO], errno);
2293 setvar_i(res, -1);
2294 break;
2295 }
2296
2297 if (! op->r.n)
2298 R.v = V[F0];
2299
2300 L.i = awk_getline(X.rsm, R.v);
2301 if (L.i > 0) {
2302 if (! op1) {
2303 incvar(V[FNR]);
2304 incvar(V[NR]);
2305 }
2306 }
2307 setvar_i(res, L.i);
2308 break;
2309
Mike Frysingerde2b9382005-09-27 03:18:00 +00002310 /* simple builtins */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002311 case XC( OC_FBLTIN ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002312 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002313
2314 case F_in:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002315 R.d = (int)L.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002316 break;
2317
2318 case F_rn:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002319 R.d = (double)rand() / (double)RAND_MAX;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002320 break;
2321
2322#ifdef CONFIG_FEATURE_AWK_MATH
2323 case F_co:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002324 R.d = cos(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002325 break;
2326
2327 case F_ex:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002328 R.d = exp(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002329 break;
2330
2331 case F_lg:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002332 R.d = log(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002333 break;
2334
2335 case F_si:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002336 R.d = sin(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002337 break;
2338
2339 case F_sq:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002340 R.d = sqrt(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002341 break;
2342#else
2343 case F_co:
2344 case F_ex:
2345 case F_lg:
2346 case F_si:
2347 case F_sq:
2348 runtime_error(EMSG_NO_MATH);
2349 break;
2350#endif
2351
2352 case F_sr:
2353 R.d = (double)seed;
2354 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2355 srand(seed);
2356 break;
2357
2358 case F_ti:
2359 R.d = time(NULL);
2360 break;
2361
2362 case F_le:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002363 if (! op1)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002364 L.s = getvar_s(V[F0]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002365 R.d = bb_strlen(L.s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002366 break;
2367
2368 case F_sy:
2369 fflush(NULL);
2370 R.d = (L.s && *L.s) ? system(L.s) : 0;
2371 break;
2372
2373 case F_ff:
2374 if (! op1)
2375 fflush(stdout);
2376 else {
2377 if (L.s && *L.s) {
2378 X.rsm = newfile(L.s);
2379 fflush(X.rsm->F);
2380 } else {
2381 fflush(NULL);
2382 }
2383 }
2384 break;
2385
2386 case F_cl:
2387 X.rsm = (rstream *)hash_search(fdhash, L.s);
2388 if (X.rsm) {
2389 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00002390 free(X.rsm->buffer);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002391 hash_remove(fdhash, L.s);
2392 }
2393 if (R.i != 0)
2394 setvar_i(V[ERRNO], errno);
2395 R.d = (double)R.i;
2396 break;
2397 }
2398 setvar_i(res, R.d);
2399 break;
2400
2401 case XC( OC_BUILTIN ):
2402 res = exec_builtin(op, res);
2403 break;
2404
2405 case XC( OC_SPRINTF ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002406 setvar_p(res, awk_printf(op1));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002407 break;
2408
2409 case XC( OC_UNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002410 X.v = R.v;
2411 L.d = R.d = getvar_i(R.v);
2412 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002413 case 'P':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002414 L.d = ++R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002415 goto r_op_change;
2416 case 'p':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002417 R.d++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002418 goto r_op_change;
2419 case 'M':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002420 L.d = --R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002421 goto r_op_change;
2422 case 'm':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002423 R.d--;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002424 goto r_op_change;
2425 case '!':
2426 L.d = istrue(X.v) ? 0 : 1;
2427 break;
2428 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002429 L.d = -R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002430 break;
2431 r_op_change:
2432 setvar_i(X.v, R.d);
2433 }
2434 setvar_i(res, L.d);
2435 break;
2436
2437 case XC( OC_FIELD ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002438 R.i = (int)getvar_i(R.v);
2439 if (R.i == 0) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002440 res = V[F0];
2441 } else {
2442 split_f0();
2443 if (R.i > nfields)
2444 fsrealloc(R.i);
2445
2446 res = &Fields[R.i-1];
2447 }
2448 break;
2449
2450 /* concatenation (" ") and index joining (",") */
2451 case XC( OC_CONCAT ):
2452 case XC( OC_COMMA ):
Manuel Novoa III cad53642003-03-19 09:13:01 +00002453 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002454 X.s = (char *)xmalloc(opn);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002455 strcpy(X.s, L.s);
2456 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2457 L.s = getvar_s(V[SUBSEP]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002458 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002459 strcat(X.s, L.s);
2460 }
2461 strcat(X.s, R.s);
2462 setvar_p(res, X.s);
2463 break;
2464
2465 case XC( OC_LAND ):
2466 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2467 break;
2468
2469 case XC( OC_LOR ):
2470 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2471 break;
2472
2473 case XC( OC_BINARY ):
2474 case XC( OC_REPLACE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002475 R.d = getvar_i(R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002476 switch (opn) {
2477 case '+':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002478 L.d += R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002479 break;
2480 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002481 L.d -= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002482 break;
2483 case '*':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002484 L.d *= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002485 break;
2486 case '/':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002487 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2488 L.d /= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002489 break;
2490 case '&':
2491#ifdef CONFIG_FEATURE_AWK_MATH
Mike Frysingerde2b9382005-09-27 03:18:00 +00002492 L.d = pow(L.d, R.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002493#else
2494 runtime_error(EMSG_NO_MATH);
2495#endif
2496 break;
2497 case '%':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002498 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2499 L.d -= (int)(L.d / R.d) * R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002500 break;
2501 }
2502 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2503 break;
2504
2505 case XC( OC_COMPARE ):
2506 if (is_numeric(L.v) && is_numeric(R.v)) {
2507 L.d = getvar_i(L.v) - getvar_i(R.v);
2508 } else {
2509 L.s = getvar_s(L.v);
2510 R.s = getvar_s(R.v);
2511 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2512 }
2513 switch (opn & 0xfe) {
2514 case 0:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002515 R.i = (L.d > 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002516 break;
2517 case 2:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002518 R.i = (L.d >= 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002519 break;
2520 case 4:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002521 R.i = (L.d == 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002522 break;
2523 }
2524 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2525 break;
2526
2527 default:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002528 runtime_error(EMSG_POSSIBLE_ERROR);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002529 }
2530 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2531 op = op->a.n;
2532 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2533 break;
2534 if (nextrec)
2535 break;
2536 }
2537 nvfree(v1);
2538 return res;
2539}
2540
2541
2542/* -------- main & co. -------- */
2543
Mike Frysinger10a11e22005-09-27 02:23:02 +00002544static int awk_exit(int r)
2545{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002546 unsigned int i;
2547 hash_item *hi;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002548 static var tv;
2549
2550 if (! exiting) {
2551 exiting = TRUE;
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00002552 nextrec = FALSE;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002553 evaluate(endseq.first, &tv);
2554 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002555
2556 /* waiting for children */
2557 for (i=0; i<fdhash->csize; i++) {
2558 hi = fdhash->items[i];
2559 while(hi) {
2560 if (hi->data.rs.F && hi->data.rs.is_pipe)
2561 pclose(hi->data.rs.F);
2562 hi = hi->next;
2563 }
2564 }
2565
2566 exit(r);
2567}
2568
2569/* if expr looks like "var=value", perform assignment and return 1,
2570 * otherwise return 0 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002571static int is_assignment(char *expr)
2572{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002573 char *exprc, *s, *s0, *s1;
2574
Manuel Novoa III cad53642003-03-19 09:13:01 +00002575 exprc = bb_xstrdup(expr);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002576 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2577 free(exprc);
2578 return FALSE;
2579 }
2580
2581 *(s++) = '\0';
2582 s0 = s1 = s;
2583 while (*s)
2584 *(s1++) = nextchar(&s);
2585
2586 *s1 = '\0';
2587 setvar_u(newvar(exprc), s0);
2588 free(exprc);
2589 return TRUE;
2590}
2591
2592/* switch to next input file */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002593static rstream *next_input_file(void)
2594{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002595 static rstream rsm;
2596 FILE *F = NULL;
2597 char *fname, *ind;
2598 static int files_happen = FALSE;
2599
2600 if (rsm.F) fclose(rsm.F);
2601 rsm.F = NULL;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002602 rsm.pos = rsm.adv = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002603
2604 do {
2605 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2606 if (files_happen)
2607 return NULL;
2608 fname = "-";
2609 F = stdin;
2610 } else {
2611 ind = getvar_s(incvar(V[ARGIND]));
2612 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2613 if (fname && *fname && !is_assignment(fname))
2614 F = afopen(fname, "r");
2615 }
2616 } while (!F);
2617
2618 files_happen = TRUE;
2619 setvar_s(V[FILENAME], fname);
2620 rsm.F = F;
2621 return &rsm;
2622}
2623
Mike Frysinger10a11e22005-09-27 02:23:02 +00002624extern int awk_main(int argc, char **argv)
2625{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002626 char *s, *s1;
2627 int i, j, c;
2628 var *v;
2629 static var tv;
2630 char **envp;
2631 static int from_file = FALSE;
2632 rstream *rsm;
2633 FILE *F, *stdfiles[3];
2634 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2635
2636 /* allocate global buffer */
2637 buf = xmalloc(MAXVARFMT+1);
2638
2639 vhash = hash_init();
2640 ahash = hash_init();
2641 fdhash = hash_init();
2642 fnhash = hash_init();
2643
2644 /* initialize variables */
2645 for (i=0; *vNames; i++) {
2646 V[i] = v = newvar(nextword(&vNames));
2647 if (*vValues != '\377')
2648 setvar_s(v, nextword(&vValues));
2649 else
2650 setvar_i(v, 0);
2651
2652 if (*vNames == '*') {
2653 v->type |= VF_SPECIAL;
2654 vNames++;
2655 }
2656 }
2657
2658 handle_special(V[FS]);
2659 handle_special(V[RS]);
2660
2661 stdfiles[0] = stdin;
2662 stdfiles[1] = stdout;
2663 stdfiles[2] = stderr;
2664 for (i=0; i<3; i++) {
2665 rsm = newfile(nextword(&stdnames));
2666 rsm->F = stdfiles[i];
2667 }
2668
2669 for (envp=environ; *envp; envp++) {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002670 s = bb_xstrdup(*envp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002671 s1 = strchr(s, '=');
Eric Andersen67776be2004-07-30 23:52:08 +00002672 if (!s1) {
2673 goto keep_going;
2674 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002675 *(s1++) = '\0';
2676 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
Eric Andersen67776be2004-07-30 23:52:08 +00002677keep_going:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002678 free(s);
2679 }
2680
2681 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2682 switch (c) {
2683 case 'F':
2684 setvar_s(V[FS], optarg);
2685 break;
2686 case 'v':
2687 if (! is_assignment(optarg))
Manuel Novoa III cad53642003-03-19 09:13:01 +00002688 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002689 break;
2690 case 'f':
2691 from_file = TRUE;
2692 F = afopen(programname = optarg, "r");
2693 s = NULL;
2694 /* one byte is reserved for some trick in next_token */
2695 for (i=j=1; j>0; i+=j) {
2696 s = (char *)xrealloc(s, i+4096);
2697 j = fread(s+i, 1, 4094, F);
2698 }
2699 s[i] = '\0';
2700 fclose(F);
2701 parse_program(s+1);
2702 free(s);
2703 break;
2704 case 'W':
Manuel Novoa III cad53642003-03-19 09:13:01 +00002705 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002706 break;
2707
2708 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +00002709 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002710 }
2711 }
2712
2713 if (!from_file) {
2714 if (argc == optind)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002715 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002716 programname="cmd. line";
2717 parse_program(argv[optind++]);
2718
2719 }
2720
2721 /* fill in ARGV array */
2722 setvar_i(V[ARGC], argc - optind + 1);
2723 setari_u(V[ARGV], 0, "awk");
2724 for(i=optind; i < argc; i++)
2725 setari_u(V[ARGV], i+1-optind, argv[i]);
2726
2727 evaluate(beginseq.first, &tv);
2728 if (! mainseq.first && ! endseq.first)
2729 awk_exit(EXIT_SUCCESS);
2730
2731 /* input file could already be opened in BEGIN block */
2732 if (! iF) iF = next_input_file();
2733
2734 /* passing through input files */
2735 while (iF) {
2736
2737 nextfile = FALSE;
2738 setvar_i(V[FNR], 0);
2739
2740 while ((c = awk_getline(iF, V[F0])) > 0) {
2741
2742 nextrec = FALSE;
2743 incvar(V[NR]);
2744 incvar(V[FNR]);
2745 evaluate(mainseq.first, &tv);
2746
2747 if (nextfile)
2748 break;
2749 }
2750
2751 if (c < 0)
2752 runtime_error(strerror(errno));
2753
2754 iF = next_input_file();
2755
2756 }
2757
Glenn L McGrath545106f2002-11-11 06:21:00 +00002758 awk_exit(EXIT_SUCCESS);
2759
2760 return 0;
2761}
2762