blob: 65856aa55a2337961999ba8ce015747fb26609b7 [file] [log] [blame]
Glenn L McGrath545106f2002-11-11 06:21:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +00007 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
Glenn L McGrath545106f2002-11-11 06:21:00 +00008 */
9
10#include <stdio.h>
11#include <stdlib.h>
12#include <unistd.h>
13#include <errno.h>
14#include <string.h>
15#include <time.h>
16#include <math.h>
17#include <ctype.h>
18#include <getopt.h>
Glenn L McGrath545106f2002-11-11 06:21:00 +000019
"Vladimir N. Oleynik"23f62fc2005-09-14 16:59:11 +000020#include "xregex.h"
Glenn L McGrath545106f2002-11-11 06:21:00 +000021#include "busybox.h"
22
23
24#define MAXVARFMT 240
25#define MINNVBLOCK 64
26
27/* variable flags */
28#define VF_NUMBER 0x0001 /* 1 = primary type is number */
29#define VF_ARRAY 0x0002 /* 1 = it's an array */
30
31#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
32#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
33#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
34#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
35#define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
36#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
37#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
38
39/* these flags are static, don't change them when value is changed */
40#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
41
42/* Variable */
43typedef struct var_s {
44 unsigned short type; /* flags */
45 double number;
46 char *string;
47 union {
48 int aidx; /* func arg index (on compilation stage) */
49 struct xhash_s *array; /* array ptr */
50 struct var_s *parent; /* for func args, ptr to actual parameter */
51 char **walker; /* list of array elements (for..in) */
52 } x;
53} var;
54
55/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
56typedef struct chain_s {
57 struct node_s *first;
58 struct node_s *last;
59 char *programname;
60} chain;
61
62/* Function */
63typedef struct func_s {
64 unsigned short nargs;
65 struct chain_s body;
66} func;
67
68/* I/O stream */
69typedef struct rstream_s {
70 FILE *F;
71 char *buffer;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +000072 int adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +000073 int size;
74 int pos;
75 unsigned short is_pipe;
76} rstream;
77
78typedef struct hash_item_s {
79 union {
80 struct var_s v; /* variable/array hash */
81 struct rstream_s rs; /* redirect streams hash */
82 struct func_s f; /* functions hash */
83 } data;
84 struct hash_item_s *next; /* next in chain */
85 char name[1]; /* really it's longer */
86} hash_item;
87
88typedef struct xhash_s {
89 unsigned int nel; /* num of elements */
90 unsigned int csize; /* current hash size */
91 unsigned int nprime; /* next hash size in PRIMES[] */
92 unsigned int glen; /* summary length of item names */
93 struct hash_item_s **items;
94} xhash;
95
96/* Tree node */
97typedef struct node_s {
Mike Frysingerf87b3e32005-09-27 04:16:22 +000098 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +000099 unsigned short lineno;
100 union {
101 struct node_s *n;
102 var *v;
103 int i;
104 char *s;
105 regex_t *re;
106 } l;
107 union {
108 struct node_s *n;
109 regex_t *ire;
110 func *f;
111 int argno;
112 } r;
113 union {
114 struct node_s *n;
115 } a;
116} node;
117
118/* Block of temporary variables */
119typedef struct nvblock_s {
120 int size;
121 var *pos;
122 struct nvblock_s *prev;
123 struct nvblock_s *next;
124 var nv[0];
125} nvblock;
126
127typedef struct tsplitter_s {
128 node n;
129 regex_t re[2];
130} tsplitter;
131
132/* simple token classes */
133/* Order and hex values are very important!!! See next_token() */
134#define TC_SEQSTART 1 /* ( */
135#define TC_SEQTERM (1 << 1) /* ) */
136#define TC_REGEXP (1 << 2) /* /.../ */
137#define TC_OUTRDR (1 << 3) /* | > >> */
138#define TC_UOPPOST (1 << 4) /* unary postfix operator */
139#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
140#define TC_BINOPX (1 << 6) /* two-opnd operator */
141#define TC_IN (1 << 7)
142#define TC_COMMA (1 << 8)
143#define TC_PIPE (1 << 9) /* input redirection pipe */
144#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
145#define TC_ARRTERM (1 << 11) /* ] */
146#define TC_GRPSTART (1 << 12) /* { */
147#define TC_GRPTERM (1 << 13) /* } */
148#define TC_SEMICOL (1 << 14)
149#define TC_NEWLINE (1 << 15)
150#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
151#define TC_WHILE (1 << 17)
152#define TC_ELSE (1 << 18)
153#define TC_BUILTIN (1 << 19)
154#define TC_GETLINE (1 << 20)
155#define TC_FUNCDECL (1 << 21) /* `function' `func' */
156#define TC_BEGIN (1 << 22)
157#define TC_END (1 << 23)
158#define TC_EOF (1 << 24)
159#define TC_VARIABLE (1 << 25)
160#define TC_ARRAY (1 << 26)
161#define TC_FUNCTION (1 << 27)
162#define TC_STRING (1 << 28)
163#define TC_NUMBER (1 << 29)
164
165#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
166
167/* combined token classes */
168#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
169#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
170#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
171 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
172
173#define TC_STATEMNT (TC_STATX | TC_WHILE)
174#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
175
176/* word tokens, cannot mean something else if not expected */
177#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
178 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
179
180/* discard newlines after these */
181#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
182 TC_BINOP | TC_OPTERM)
183
184/* what can expression begin with */
185#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
186/* what can group begin with */
187#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
188
189/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
190/* operator is inserted between them */
191#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
192 TC_STRING | TC_NUMBER | TC_UOPPOST)
193#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
194
195#define OF_RES1 0x010000
196#define OF_RES2 0x020000
197#define OF_STR1 0x040000
198#define OF_STR2 0x080000
199#define OF_NUM1 0x100000
200#define OF_CHECKED 0x200000
201
202/* combined operator flags */
203#define xx 0
204#define xV OF_RES2
205#define xS (OF_RES2 | OF_STR2)
206#define Vx OF_RES1
207#define VV (OF_RES1 | OF_RES2)
208#define Nx (OF_RES1 | OF_NUM1)
209#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
210#define Sx (OF_RES1 | OF_STR1)
211#define SV (OF_RES1 | OF_STR1 | OF_RES2)
212#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
213
214#define OPCLSMASK 0xFF00
215#define OPNMASK 0x007F
216
217/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
218 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
219 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
220 */
221#define P(x) (x << 24)
222#define PRIMASK 0x7F000000
223#define PRIMASK2 0x7E000000
224
225/* Operation classes */
226
227#define SHIFT_TIL_THIS 0x0600
228#define RECUR_FROM_THIS 0x1000
229
230enum {
231 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
232 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
233
234 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
235 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
236 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
237
238 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
239 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
240 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
241 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
242 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
243 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
244 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
245 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
246 OC_DONE=0x2800,
247
248 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
249 ST_WHILE=0x3300
250};
251
252/* simple builtins */
253enum {
254 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
255 F_ti, F_le, F_sy, F_ff, F_cl
256};
257
258/* builtins */
259enum {
260 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
261 B_ge, B_gs, B_su
262};
263
264/* tokens and their corresponding info values */
265
266#define NTC "\377" /* switch to next token class (tc<<1) */
267#define NTCC '\377'
268
269#define OC_B OC_BUILTIN
270
271static char * const tokenlist =
272 "\1(" NTC
273 "\1)" NTC
274 "\1/" NTC /* REGEXP */
275 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
276 "\2++" "\2--" NTC /* UOPPOST */
277 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
278 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
279 "\2*=" "\2/=" "\2%=" "\2^="
280 "\1+" "\1-" "\3**=" "\2**"
281 "\1/" "\1%" "\1^" "\1*"
282 "\2!=" "\2>=" "\2<=" "\1>"
283 "\1<" "\2!~" "\1~" "\2&&"
284 "\2||" "\1?" "\1:" NTC
285 "\2in" NTC
286 "\1," NTC
287 "\1|" NTC
288 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
289 "\1]" NTC
290 "\1{" NTC
291 "\1}" NTC
292 "\1;" NTC
293 "\1\n" NTC
294 "\2if" "\2do" "\3for" "\5break" /* STATX */
295 "\10continue" "\6delete" "\5print"
296 "\6printf" "\4next" "\10nextfile"
297 "\6return" "\4exit" NTC
298 "\5while" NTC
299 "\4else" NTC
300
301 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
302 "\3cos" "\3exp" "\3int" "\3log"
303 "\4rand" "\3sin" "\4sqrt" "\5srand"
304 "\6gensub" "\4gsub" "\5index" "\6length"
305 "\5match" "\5split" "\7sprintf" "\3sub"
306 "\6substr" "\7systime" "\10strftime"
307 "\7tolower" "\7toupper" NTC
308 "\7getline" NTC
309 "\4func" "\10function" NTC
310 "\5BEGIN" NTC
311 "\3END" "\0"
312 ;
313
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000314static const uint32_t tokeninfo[] = {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000315
316 0,
317 0,
318 OC_REGEXP,
319 xS|'a', xS|'w', xS|'|',
320 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
321 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
322 OC_FIELD|xV|P(5),
323 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
324 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
325 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
326 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
327 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
328 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
329 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
330 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
331 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
332 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
333 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
334 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
335 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
336 OC_COLON|xx|P(67)|':',
337 OC_IN|SV|P(49),
338 OC_COMMA|SS|P(80),
339 OC_PGETLINE|SV|P(37),
340 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
341 OC_UNARY|xV|P(19)|'!',
342 0,
343 0,
344 0,
345 0,
346 0,
347 ST_IF, ST_DO, ST_FOR, OC_BREAK,
348 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
349 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
350 OC_RETURN|Vx, OC_EXIT|Nx,
351 ST_WHILE,
352 0,
353
354 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
355 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
356 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
357 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
358 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
359 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
360 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
361 OC_GETLINE|SV|P(0),
362 0, 0,
363 0,
364 0
365};
366
367/* internal variable names and their initial values */
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000368/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000369enum {
370 CONVFMT=0, OFMT, FS, OFS,
371 ORS, RS, RT, FILENAME,
372 SUBSEP, ARGIND, ARGC, ARGV,
373 ERRNO, FNR,
374 NR, NF, IGNORECASE,
375 ENVIRON, F0, _intvarcount_
376};
377
378static char * vNames =
379 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000380 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
Glenn L McGrath545106f2002-11-11 06:21:00 +0000381 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
382 "ERRNO\0" "FNR\0"
383 "NR\0" "NF\0*" "IGNORECASE\0*"
384 "ENVIRON\0" "$\0*" "\0";
385
386static char * vValues =
387 "%.6g\0" "%.6g\0" " \0" " \0"
388 "\n\0" "\n\0" "\0" "\0"
389 "\034\0"
390 "\377";
391
392/* hash size may grow to these values */
393#define FIRST_PRIME 61;
394static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
395static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
396
397/* globals */
398
399extern char **environ;
400
401static var * V[_intvarcount_];
402static chain beginseq, mainseq, endseq, *seq;
403static int nextrec, nextfile;
404static node *break_ptr, *continue_ptr;
405static rstream *iF;
406static xhash *vhash, *ahash, *fdhash, *fnhash;
407static char *programname;
408static short lineno;
409static int is_f0_split;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000410static int nfields;
411static var *Fields;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000412static tsplitter fsplitter, rsplitter;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000413static nvblock *cb;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000414static char *pos;
415static char *buf;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000416static int icase;
417static int exiting;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000418
419static struct {
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000420 uint32_t tclass;
421 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000422 char *string;
423 double number;
424 short lineno;
425 int rollback;
426} t;
427
428/* function prototypes */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000429static void handle_special(var *);
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000430static node *parse_expr(uint32_t);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000431static void chain_group(void);
432static var *evaluate(node *, var *);
433static rstream *next_input_file(void);
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000434static int fmt_num(char *, int, const char *, double, int);
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +0000435static int awk_exit(int) ATTRIBUTE_NORETURN;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000436
437/* ---- error handling ---- */
438
439static const char EMSG_INTERNAL_ERROR[] = "Internal error";
440static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
441static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
442static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
443static const char EMSG_INV_FMT[] = "Invalid format specifier";
444static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
445static const char EMSG_NOT_ARRAY[] = "Not an array";
446static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
447static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
448#ifndef CONFIG_FEATURE_AWK_MATH
449static const char EMSG_NO_MATH[] = "Math support is not compiled in";
450#endif
451
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +0000452static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
Glenn L McGrathd4036f82002-11-28 09:30:40 +0000453static void syntax_error(const char * const message)
454{
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000455 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000456}
457
458#define runtime_error(x) syntax_error(x)
459
460
461/* ---- hash stuff ---- */
462
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000463static unsigned int hashidx(const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000464{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000465 register unsigned int idx=0;
466
467 while (*name) idx = *name++ + (idx << 6) - idx;
468 return idx;
469}
470
471/* create new hash */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000472static xhash *hash_init(void)
473{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000474 xhash *newhash;
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000475
Glenn L McGrath545106f2002-11-11 06:21:00 +0000476 newhash = (xhash *)xcalloc(1, sizeof(xhash));
477 newhash->csize = FIRST_PRIME;
478 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
479
480 return newhash;
481}
482
483/* find item in hash, return ptr to data, NULL if not found */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000484static void *hash_search(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000485{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000486 hash_item *hi;
487
488 hi = hash->items [ hashidx(name) % hash->csize ];
489 while (hi) {
490 if (strcmp(hi->name, name) == 0)
491 return &(hi->data);
492 hi = hi->next;
493 }
494 return NULL;
495}
496
497/* grow hash if it becomes too big */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000498static void hash_rebuild(xhash *hash)
499{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000500 unsigned int newsize, i, idx;
501 hash_item **newitems, *hi, *thi;
502
503 if (hash->nprime == NPRIMES)
504 return;
505
506 newsize = PRIMES[hash->nprime++];
507 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
508
509 for (i=0; i<hash->csize; i++) {
510 hi = hash->items[i];
511 while (hi) {
512 thi = hi;
513 hi = thi->next;
514 idx = hashidx(thi->name) % newsize;
515 thi->next = newitems[idx];
516 newitems[idx] = thi;
517 }
518 }
519
520 free(hash->items);
521 hash->csize = newsize;
522 hash->items = newitems;
523}
524
525/* find item in hash, add it if necessary. Return ptr to data */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000526static void *hash_find(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000527{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000528 hash_item *hi;
529 unsigned int idx;
530 int l;
531
532 hi = hash_search(hash, name);
533 if (! hi) {
534 if (++hash->nel / hash->csize > 10)
535 hash_rebuild(hash);
536
Manuel Novoa III cad53642003-03-19 09:13:01 +0000537 l = bb_strlen(name) + 1;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000538 hi = xcalloc(sizeof(hash_item) + l, 1);
539 memcpy(hi->name, name, l);
540
541 idx = hashidx(name) % hash->csize;
542 hi->next = hash->items[idx];
543 hash->items[idx] = hi;
544 hash->glen += l;
545 }
546 return &(hi->data);
547}
548
549#define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
550#define newvar(name) (var *) hash_find ( vhash , (name) )
551#define newfile(name) (rstream *) hash_find ( fdhash , (name) )
552#define newfunc(name) (func *) hash_find ( fnhash , (name) )
553
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000554static void hash_remove(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000555{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000556 hash_item *hi, **phi;
557
558 phi = &(hash->items[ hashidx(name) % hash->csize ]);
559 while (*phi) {
560 hi = *phi;
561 if (strcmp(hi->name, name) == 0) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000562 hash->glen -= (bb_strlen(name) + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000563 hash->nel--;
564 *phi = hi->next;
565 free(hi);
566 break;
567 }
568 phi = &(hi->next);
569 }
570}
571
572/* ------ some useful functions ------ */
573
Mike Frysinger10a11e22005-09-27 02:23:02 +0000574static void skip_spaces(char **s)
575{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000576 register char *p = *s;
577
578 while(*p == ' ' || *p == '\t' ||
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000579 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
Mike Frysingerde2b9382005-09-27 03:18:00 +0000580 p++;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000581 }
582 *s = p;
583}
584
Mike Frysinger10a11e22005-09-27 02:23:02 +0000585static char *nextword(char **s)
586{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000587 register char *p = *s;
588
589 while (*(*s)++) ;
590
591 return p;
592}
593
Mike Frysinger10a11e22005-09-27 02:23:02 +0000594static char nextchar(char **s)
595{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000596 register char c, *pps;
597
598 c = *((*s)++);
599 pps = *s;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000600 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000601 if (c == '\\' && *s == pps) c = *((*s)++);
602 return c;
603}
604
Mike Frysinger10a11e22005-09-27 02:23:02 +0000605static inline int isalnum_(int c)
606{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000607 return (isalnum(c) || c == '_');
608}
609
Mike Frysinger10a11e22005-09-27 02:23:02 +0000610static FILE *afopen(const char *path, const char *mode)
611{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000612 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000613}
614
615/* -------- working with variables (set/get/copy/etc) -------- */
616
Mike Frysinger10a11e22005-09-27 02:23:02 +0000617static xhash *iamarray(var *v)
618{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000619 var *a = v;
620
621 while (a->type & VF_CHILD)
622 a = a->x.parent;
623
624 if (! (a->type & VF_ARRAY)) {
625 a->type |= VF_ARRAY;
626 a->x.array = hash_init();
627 }
628 return a->x.array;
629}
630
Mike Frysinger10a11e22005-09-27 02:23:02 +0000631static void clear_array(xhash *array)
632{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000633 unsigned int i;
634 hash_item *hi, *thi;
635
636 for (i=0; i<array->csize; i++) {
637 hi = array->items[i];
638 while (hi) {
639 thi = hi;
640 hi = hi->next;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000641 free(thi->data.v.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000642 free(thi);
643 }
644 array->items[i] = NULL;
645 }
646 array->glen = array->nel = 0;
647}
648
649/* clear a variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000650static var *clrvar(var *v)
651{
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000652 if (!(v->type & VF_FSTR))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000653 free(v->string);
654
655 v->type &= VF_DONTTOUCH;
656 v->type |= VF_DIRTY;
657 v->string = NULL;
658 return v;
659}
660
661/* assign string value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000662static var *setvar_p(var *v, char *value)
663{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000664 clrvar(v);
665 v->string = value;
666 handle_special(v);
667
668 return v;
669}
670
671/* same as setvar_p but make a copy of string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000672static var *setvar_s(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000673{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000674 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000675}
676
677/* same as setvar_s but set USER flag */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000678static var *setvar_u(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000679{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000680 setvar_s(v, value);
681 v->type |= VF_USER;
682 return v;
683}
684
685/* set array element to user string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000686static void setari_u(var *a, int idx, const char *s)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000687{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000688 register var *v;
689 static char sidx[12];
690
691 sprintf(sidx, "%d", idx);
692 v = findvar(iamarray(a), sidx);
693 setvar_u(v, s);
694}
695
696/* assign numeric value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000697static var *setvar_i(var *v, double value)
698{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000699 clrvar(v);
700 v->type |= VF_NUMBER;
701 v->number = value;
702 handle_special(v);
703 return v;
704}
705
Mike Frysinger10a11e22005-09-27 02:23:02 +0000706static char *getvar_s(var *v)
707{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000708 /* if v is numeric and has no cached string, convert it to string */
709 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
710 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000711 v->string = bb_xstrdup(buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000712 v->type |= VF_CACHED;
713 }
714 return (v->string == NULL) ? "" : v->string;
715}
716
Mike Frysinger10a11e22005-09-27 02:23:02 +0000717static double getvar_i(var *v)
718{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000719 char *s;
720
721 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
722 v->number = 0;
723 s = v->string;
724 if (s && *s) {
725 v->number = strtod(s, &s);
726 if (v->type & VF_USER) {
727 skip_spaces(&s);
728 if (*s != '\0')
729 v->type &= ~VF_USER;
730 }
731 } else {
732 v->type &= ~VF_USER;
733 }
734 v->type |= VF_CACHED;
735 }
736 return v->number;
737}
738
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000739static var *copyvar(var *dest, const var *src)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000740{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000741 if (dest != src) {
742 clrvar(dest);
743 dest->type |= (src->type & ~VF_DONTTOUCH);
744 dest->number = src->number;
745 if (src->string)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000746 dest->string = bb_xstrdup(src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000747 }
748 handle_special(dest);
749 return dest;
750}
751
Mike Frysinger10a11e22005-09-27 02:23:02 +0000752static var *incvar(var *v)
753{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000754 return setvar_i(v, getvar_i(v)+1.);
755}
756
757/* return true if v is number or numeric string */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000758static int is_numeric(var *v)
759{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000760 getvar_i(v);
761 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
762}
763
764/* return 1 when value of v corresponds to true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000765static int istrue(var *v)
766{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000767 if (is_numeric(v))
768 return (v->number == 0) ? 0 : 1;
769 else
770 return (v->string && *(v->string)) ? 1 : 0;
771}
772
Eric Andersenaff114c2004-04-14 17:51:38 +0000773/* temporary variables allocator. Last allocated should be first freed */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000774static var *nvalloc(int n)
775{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000776 nvblock *pb = NULL;
777 var *v, *r;
778 int size;
779
780 while (cb) {
781 pb = cb;
782 if ((cb->pos - cb->nv) + n <= cb->size) break;
783 cb = cb->next;
784 }
785
786 if (! cb) {
787 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
788 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
789 cb->size = size;
790 cb->pos = cb->nv;
791 cb->prev = pb;
792 cb->next = NULL;
793 if (pb) pb->next = cb;
794 }
795
796 v = r = cb->pos;
797 cb->pos += n;
798
799 while (v < cb->pos) {
800 v->type = 0;
801 v->string = NULL;
802 v++;
803 }
804
805 return r;
806}
807
Mike Frysinger10a11e22005-09-27 02:23:02 +0000808static void nvfree(var *v)
809{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000810 var *p;
811
812 if (v < cb->nv || v >= cb->pos)
813 runtime_error(EMSG_INTERNAL_ERROR);
814
815 for (p=v; p<cb->pos; p++) {
816 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
817 clear_array(iamarray(p));
818 free(p->x.array->items);
819 free(p->x.array);
820 }
821 if (p->type & VF_WALK)
822 free(p->x.walker);
823
824 clrvar(p);
825 }
826
827 cb->pos = v;
828 while (cb->prev && cb->pos == cb->nv) {
829 cb = cb->prev;
830 }
831}
832
833/* ------- awk program text parsing ------- */
834
835/* Parse next token pointed by global pos, place results into global t.
836 * If token isn't expected, give away. Return token class
837 */
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000838static uint32_t next_token(uint32_t expected)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000839{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000840 char *p, *pp, *s;
841 char *tl;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000842 uint32_t tc;
843 const uint32_t *ti;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000844 int l;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000845 static int concat_inserted;
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000846 static uint32_t save_tclass, save_info;
847 static uint32_t ltclass = TC_OPTERM;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000848
849 if (t.rollback) {
850
851 t.rollback = FALSE;
852
853 } else if (concat_inserted) {
854
855 concat_inserted = FALSE;
856 t.tclass = save_tclass;
857 t.info = save_info;
858
859 } else {
860
861 p = pos;
862
863 readnext:
864 skip_spaces(&p);
865 lineno = t.lineno;
866 if (*p == '#')
867 while (*p != '\n' && *p != '\0') p++;
868
869 if (*p == '\n')
870 t.lineno++;
871
872 if (*p == '\0') {
873 tc = TC_EOF;
874
875 } else if (*p == '\"') {
876 /* it's a string */
877 t.string = s = ++p;
878 while (*p != '\"') {
879 if (*p == '\0' || *p == '\n')
880 syntax_error(EMSG_UNEXP_EOS);
881 *(s++) = nextchar(&p);
882 }
883 p++;
884 *s = '\0';
885 tc = TC_STRING;
886
887 } else if ((expected & TC_REGEXP) && *p == '/') {
888 /* it's regexp */
889 t.string = s = ++p;
890 while (*p != '/') {
891 if (*p == '\0' || *p == '\n')
892 syntax_error(EMSG_UNEXP_EOS);
893 if ((*s++ = *p++) == '\\') {
894 pp = p;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000895 *(s-1) = bb_process_escape_sequence((const char **)&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000896 if (*pp == '\\') *s++ = '\\';
897 if (p == pp) *s++ = *p++;
898 }
899 }
900 p++;
901 *s = '\0';
902 tc = TC_REGEXP;
903
904 } else if (*p == '.' || isdigit(*p)) {
905 /* it's a number */
906 t.number = strtod(p, &p);
907 if (*p == '.')
908 syntax_error(EMSG_UNEXP_TOKEN);
909 tc = TC_NUMBER;
910
911 } else {
912 /* search for something known */
913 tl = tokenlist;
914 tc = 0x00000001;
915 ti = tokeninfo;
916 while (*tl) {
917 l = *(tl++);
918 if (l == NTCC) {
919 tc <<= 1;
920 continue;
921 }
922 /* if token class is expected, token
923 * matches and it's not a longer word,
924 * then this is what we are looking for
925 */
926 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
927 *tl == *p && strncmp(p, tl, l) == 0 &&
928 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
929 t.info = *ti;
930 p += l;
931 break;
932 }
933 ti++;
934 tl += l;
935 }
936
937 if (! *tl) {
938 /* it's a name (var/array/function),
939 * otherwise it's something wrong
940 */
941 if (! isalnum_(*p))
942 syntax_error(EMSG_UNEXP_TOKEN);
943
944 t.string = --p;
945 while(isalnum_(*(++p))) {
946 *(p-1) = *p;
947 }
948 *(p-1) = '\0';
949 tc = TC_VARIABLE;
Bernhard Reutner-Fischerbb204622005-10-17 14:21:06 +0000950 /* also consume whitespace between functionname and bracket */
Rob Landley46e351d2006-02-14 16:05:32 +0000951 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000952 if (*p == '(') {
953 tc = TC_FUNCTION;
954 } else {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000955 if (*p == '[') {
956 p++;
957 tc = TC_ARRAY;
958 }
959 }
960 }
961 }
962 pos = p;
963
964 /* skipping newlines in some cases */
965 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
966 goto readnext;
967
968 /* insert concatenation operator when needed */
969 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
970 concat_inserted = TRUE;
971 save_tclass = tc;
972 save_info = t.info;
973 tc = TC_BINOP;
974 t.info = OC_CONCAT | SS | P(35);
975 }
976
977 t.tclass = tc;
978 }
979 ltclass = t.tclass;
980
981 /* Are we ready for this? */
982 if (! (ltclass & expected))
983 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
984 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
985
986 return ltclass;
987}
988
989static void rollback_token(void) { t.rollback = TRUE; }
990
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000991static node *new_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000992{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000993 register node *n;
994
995 n = (node *)xcalloc(sizeof(node), 1);
996 n->info = info;
997 n->lineno = lineno;
998 return n;
999}
1000
Mike Frysinger10a11e22005-09-27 02:23:02 +00001001static node *mk_re_node(char *s, node *n, regex_t *re)
1002{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001003 n->info = OC_REGEXP;
1004 n->l.re = re;
1005 n->r.ire = re + 1;
1006 xregcomp(re, s, REG_EXTENDED);
1007 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1008
1009 return n;
1010}
1011
Mike Frysinger10a11e22005-09-27 02:23:02 +00001012static node *condition(void)
1013{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001014 next_token(TC_SEQSTART);
1015 return parse_expr(TC_SEQTERM);
1016}
1017
1018/* parse expression terminated by given argument, return ptr
1019 * to built subtree. Terminator is eaten by parse_expr */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001020static node *parse_expr(uint32_t iexp)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001021{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001022 node sn;
1023 node *cn = &sn;
1024 node *vn, *glptr;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001025 uint32_t tc, xtc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001026 var *v;
1027
1028 sn.info = PRIMASK;
1029 sn.r.n = glptr = NULL;
1030 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1031
1032 while (! ((tc = next_token(xtc)) & iexp)) {
1033 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1034 /* input redirection (<) attached to glptr node */
1035 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
Glenn L McGrath4bded582004-02-22 11:55:09 +00001036 cn->a.n = glptr;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001037 xtc = TC_OPERAND | TC_UOPPRE;
1038 glptr = NULL;
1039
1040 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1041 /* for binary and postfix-unary operators, jump back over
1042 * previous operators with higher priority */
1043 vn = cn;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001044 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
Glenn L McGrath545106f2002-11-11 06:21:00 +00001045 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1046 vn = vn->a.n;
1047 if ((t.info & OPCLSMASK) == OC_TERNARY)
1048 t.info += P(6);
1049 cn = vn->a.n->r.n = new_node(t.info);
1050 cn->a.n = vn->a.n;
1051 if (tc & TC_BINOP) {
1052 cn->l.n = vn;
1053 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1054 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1055 /* it's a pipe */
1056 next_token(TC_GETLINE);
1057 /* give maximum priority to this pipe */
1058 cn->info &= ~PRIMASK;
1059 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1060 }
1061 } else {
1062 cn->r.n = vn;
1063 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1064 }
1065 vn->a.n = cn;
1066
1067 } else {
1068 /* for operands and prefix-unary operators, attach them
1069 * to last node */
1070 vn = cn;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001071 cn = vn->r.n = new_node(t.info);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001072 cn->a.n = vn;
1073 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1074 if (tc & (TC_OPERAND | TC_REGEXP)) {
Rob Landleyed830e82005-06-07 02:43:52 +00001075 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001076 /* one should be very careful with switch on tclass -
Glenn L McGrath545106f2002-11-11 06:21:00 +00001077 * only simple tclasses should be used! */
1078 switch (tc) {
1079 case TC_VARIABLE:
1080 case TC_ARRAY:
1081 cn->info = OC_VAR;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001082 if ((v = hash_search(ahash, t.string)) != NULL) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001083 cn->info = OC_FNARG;
1084 cn->l.i = v->x.aidx;
1085 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001086 cn->l.v = newvar(t.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001087 }
1088 if (tc & TC_ARRAY) {
1089 cn->info |= xS;
1090 cn->r.n = parse_expr(TC_ARRTERM);
1091 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001092 break;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001093
Glenn L McGrath545106f2002-11-11 06:21:00 +00001094 case TC_NUMBER:
1095 case TC_STRING:
1096 cn->info = OC_VAR;
1097 v = cn->l.v = xcalloc(sizeof(var), 1);
1098 if (tc & TC_NUMBER)
1099 setvar_i(v, t.number);
1100 else
1101 setvar_s(v, t.string);
1102 break;
1103
1104 case TC_REGEXP:
1105 mk_re_node(t.string, cn,
1106 (regex_t *)xcalloc(sizeof(regex_t),2));
1107 break;
1108
1109 case TC_FUNCTION:
Mike Frysingerde2b9382005-09-27 03:18:00 +00001110 cn->info = OC_FUNC;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001111 cn->r.f = newfunc(t.string);
1112 cn->l.n = condition();
1113 break;
1114
1115 case TC_SEQSTART:
1116 cn = vn->r.n = parse_expr(TC_SEQTERM);
1117 cn->a.n = vn;
1118 break;
1119
1120 case TC_GETLINE:
1121 glptr = cn;
1122 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1123 break;
1124
1125 case TC_BUILTIN:
1126 cn->l.n = condition();
1127 break;
1128 }
1129 }
1130 }
1131 }
1132 return sn.r.n;
1133}
1134
1135/* add node to chain. Return ptr to alloc'd node */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001136static node *chain_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001137{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001138 register node *n;
1139
1140 if (! seq->first)
1141 seq->first = seq->last = new_node(0);
1142
1143 if (seq->programname != programname) {
1144 seq->programname = programname;
1145 n = chain_node(OC_NEWSOURCE);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001146 n->l.s = bb_xstrdup(programname);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001147 }
1148
1149 n = seq->last;
1150 n->info = info;
1151 seq->last = n->a.n = new_node(OC_DONE);
1152
1153 return n;
1154}
1155
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001156static void chain_expr(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001157{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001158 node *n;
1159
1160 n = chain_node(info);
1161 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1162 if (t.tclass & TC_GRPTERM)
1163 rollback_token();
1164}
1165
Mike Frysinger10a11e22005-09-27 02:23:02 +00001166static node *chain_loop(node *nn)
1167{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001168 node *n, *n2, *save_brk, *save_cont;
1169
1170 save_brk = break_ptr;
1171 save_cont = continue_ptr;
1172
1173 n = chain_node(OC_BR | Vx);
1174 continue_ptr = new_node(OC_EXEC);
1175 break_ptr = new_node(OC_EXEC);
1176 chain_group();
1177 n2 = chain_node(OC_EXEC | Vx);
1178 n2->l.n = nn;
1179 n2->a.n = n;
1180 continue_ptr->a.n = n2;
1181 break_ptr->a.n = n->r.n = seq->last;
1182
1183 continue_ptr = save_cont;
1184 break_ptr = save_brk;
1185
1186 return n;
1187}
1188
1189/* parse group and attach it to chain */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001190static void chain_group(void)
1191{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001192 uint32_t c;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001193 node *n, *n2, *n3;
1194
1195 do {
1196 c = next_token(TC_GRPSEQ);
1197 } while (c & TC_NEWLINE);
1198
1199 if (c & TC_GRPSTART) {
1200 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001201 if (t.tclass & TC_NEWLINE) continue;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001202 rollback_token();
1203 chain_group();
1204 }
1205 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1206 rollback_token();
1207 chain_expr(OC_EXEC | Vx);
1208 } else { /* TC_STATEMNT */
1209 switch (t.info & OPCLSMASK) {
1210 case ST_IF:
1211 n = chain_node(OC_BR | Vx);
1212 n->l.n = condition();
1213 chain_group();
1214 n2 = chain_node(OC_EXEC);
1215 n->r.n = seq->last;
1216 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1217 chain_group();
1218 n2->a.n = seq->last;
1219 } else {
1220 rollback_token();
1221 }
1222 break;
1223
1224 case ST_WHILE:
1225 n2 = condition();
1226 n = chain_loop(NULL);
1227 n->l.n = n2;
1228 break;
1229
1230 case ST_DO:
1231 n2 = chain_node(OC_EXEC);
1232 n = chain_loop(NULL);
1233 n2->a.n = n->a.n;
1234 next_token(TC_WHILE);
1235 n->l.n = condition();
1236 break;
1237
1238 case ST_FOR:
1239 next_token(TC_SEQSTART);
1240 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1241 if (t.tclass & TC_SEQTERM) { /* for-in */
1242 if ((n2->info & OPCLSMASK) != OC_IN)
1243 syntax_error(EMSG_UNEXP_TOKEN);
1244 n = chain_node(OC_WALKINIT | VV);
1245 n->l.n = n2->l.n;
1246 n->r.n = n2->r.n;
1247 n = chain_loop(NULL);
1248 n->info = OC_WALKNEXT | Vx;
1249 n->l.n = n2->l.n;
1250 } else { /* for(;;) */
1251 n = chain_node(OC_EXEC | Vx);
1252 n->l.n = n2;
1253 n2 = parse_expr(TC_SEMICOL);
1254 n3 = parse_expr(TC_SEQTERM);
1255 n = chain_loop(n3);
1256 n->l.n = n2;
1257 if (! n2)
1258 n->info = OC_EXEC;
1259 }
1260 break;
1261
1262 case OC_PRINT:
1263 case OC_PRINTF:
1264 n = chain_node(t.info);
1265 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1266 if (t.tclass & TC_OUTRDR) {
1267 n->info |= t.info;
1268 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1269 }
1270 if (t.tclass & TC_GRPTERM)
1271 rollback_token();
1272 break;
1273
1274 case OC_BREAK:
1275 n = chain_node(OC_EXEC);
1276 n->a.n = break_ptr;
1277 break;
1278
1279 case OC_CONTINUE:
1280 n = chain_node(OC_EXEC);
1281 n->a.n = continue_ptr;
1282 break;
1283
1284 /* delete, next, nextfile, return, exit */
1285 default:
1286 chain_expr(t.info);
1287
1288 }
1289 }
1290}
1291
Mike Frysinger10a11e22005-09-27 02:23:02 +00001292static void parse_program(char *p)
1293{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001294 uint32_t tclass;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001295 node *cn;
1296 func *f;
1297 var *v;
1298
1299 pos = p;
1300 t.lineno = 1;
1301 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1302 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1303
1304 if (tclass & TC_OPTERM)
1305 continue;
1306
1307 seq = &mainseq;
1308 if (tclass & TC_BEGIN) {
1309 seq = &beginseq;
1310 chain_group();
1311
1312 } else if (tclass & TC_END) {
1313 seq = &endseq;
1314 chain_group();
1315
1316 } else if (tclass & TC_FUNCDECL) {
1317 next_token(TC_FUNCTION);
1318 pos++;
1319 f = newfunc(t.string);
1320 f->body.first = NULL;
1321 f->nargs = 0;
1322 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1323 v = findvar(ahash, t.string);
1324 v->x.aidx = (f->nargs)++;
1325
1326 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1327 break;
1328 }
1329 seq = &(f->body);
1330 chain_group();
1331 clear_array(ahash);
1332
1333 } else if (tclass & TC_OPSEQ) {
1334 rollback_token();
1335 cn = chain_node(OC_TEST);
1336 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1337 if (t.tclass & TC_GRPSTART) {
1338 rollback_token();
1339 chain_group();
1340 } else {
1341 chain_node(OC_PRINT);
1342 }
1343 cn->r.n = mainseq.last;
1344
1345 } else /* if (tclass & TC_GRPSTART) */ {
1346 rollback_token();
1347 chain_group();
1348 }
1349 }
1350}
1351
1352
1353/* -------- program execution part -------- */
1354
Mike Frysinger10a11e22005-09-27 02:23:02 +00001355static node *mk_splitter(char *s, tsplitter *spl)
1356{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001357 register regex_t *re, *ire;
1358 node *n;
1359
1360 re = &spl->re[0];
1361 ire = &spl->re[1];
1362 n = &spl->n;
1363 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1364 regfree(re);
1365 regfree(ire);
1366 }
Manuel Novoa III cad53642003-03-19 09:13:01 +00001367 if (bb_strlen(s) > 1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001368 mk_re_node(s, n, re);
1369 } else {
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001370 n->info = (uint32_t) *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001371 }
1372
1373 return n;
1374}
1375
1376/* use node as a regular expression. Supplied with node ptr and regex_t
Eric Andersenaff114c2004-04-14 17:51:38 +00001377 * storage space. Return ptr to regex (if result points to preg, it should
Glenn L McGrath545106f2002-11-11 06:21:00 +00001378 * be later regfree'd manually
1379 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001380static regex_t *as_regex(node *op, regex_t *preg)
1381{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001382 var *v;
1383 char *s;
1384
1385 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1386 return icase ? op->r.ire : op->l.re;
1387 } else {
1388 v = nvalloc(1);
1389 s = getvar_s(evaluate(op, v));
1390 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1391 nvfree(v);
1392 return preg;
1393 }
1394}
1395
1396/* gradually increasing buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001397static void qrealloc(char **b, int n, int *size)
1398{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001399 if (! *b || n >= *size)
1400 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1401}
1402
1403/* resize field storage space */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001404static void fsrealloc(int size)
1405{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001406 static int maxfields = 0;
1407 int i;
1408
1409 if (size >= maxfields) {
1410 i = maxfields;
1411 maxfields = size + 16;
1412 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1413 for (; i<maxfields; i++) {
1414 Fields[i].type = VF_SPECIAL;
1415 Fields[i].string = NULL;
1416 }
1417 }
1418
1419 if (size < nfields) {
1420 for (i=size; i<nfields; i++) {
1421 clrvar(Fields+i);
1422 }
1423 }
1424 nfields = size;
1425}
1426
Mike Frysinger10a11e22005-09-27 02:23:02 +00001427static int awk_split(char *s, node *spl, char **slist)
1428{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001429 int l, n=0;
1430 char c[4];
1431 char *s1;
1432 regmatch_t pmatch[2];
1433
1434 /* in worst case, each char would be a separate field */
Manuel Novoa III cad53642003-03-19 09:13:01 +00001435 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001436
1437 c[0] = c[1] = (char)spl->info;
1438 c[2] = c[3] = '\0';
1439 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1440
1441 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1442 while (*s) {
1443 l = strcspn(s, c+2);
1444 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1445 pmatch[0].rm_so <= l) {
1446 l = pmatch[0].rm_so;
1447 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1448 } else {
1449 pmatch[0].rm_eo = l;
1450 if (*(s+l)) pmatch[0].rm_eo++;
1451 }
1452
1453 memcpy(s1, s, l);
1454 *(s1+l) = '\0';
1455 nextword(&s1);
1456 s += pmatch[0].rm_eo;
1457 n++;
1458 }
1459 } else if (c[0] == '\0') { /* null split */
1460 while(*s) {
1461 *(s1++) = *(s++);
1462 *(s1++) = '\0';
1463 n++;
1464 }
1465 } else if (c[0] != ' ') { /* single-character split */
1466 if (icase) {
1467 c[0] = toupper(c[0]);
1468 c[1] = tolower(c[1]);
1469 }
1470 if (*s1) n++;
1471 while ((s1 = strpbrk(s1, c))) {
1472 *(s1++) = '\0';
1473 n++;
1474 }
1475 } else { /* space split */
1476 while (*s) {
1477 while (isspace(*s)) s++;
1478 if (! *s) break;
1479 n++;
1480 while (*s && !isspace(*s))
1481 *(s1++) = *(s++);
1482 *(s1++) = '\0';
1483 }
1484 }
1485 return n;
1486}
1487
Mike Frysinger10a11e22005-09-27 02:23:02 +00001488static void split_f0(void)
1489{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001490 static char *fstrings = NULL;
1491 int i, n;
1492 char *s;
1493
1494 if (is_f0_split)
1495 return;
1496
1497 is_f0_split = TRUE;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00001498 free(fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001499 fsrealloc(0);
1500 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1501 fsrealloc(n);
1502 s = fstrings;
1503 for (i=0; i<n; i++) {
1504 Fields[i].string = nextword(&s);
1505 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1506 }
1507
1508 /* set NF manually to avoid side effects */
1509 clrvar(V[NF]);
1510 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1511 V[NF]->number = nfields;
1512}
1513
1514/* perform additional actions when some internal variables changed */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001515static void handle_special(var *v)
1516{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001517 int n;
1518 char *b, *sep, *s;
1519 int sl, l, len, i, bsize;
1520
1521 if (! (v->type & VF_SPECIAL))
1522 return;
1523
1524 if (v == V[NF]) {
1525 n = (int)getvar_i(v);
1526 fsrealloc(n);
1527
1528 /* recalculate $0 */
1529 sep = getvar_s(V[OFS]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001530 sl = bb_strlen(sep);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001531 b = NULL;
1532 len = 0;
1533 for (i=0; i<n; i++) {
1534 s = getvar_s(&Fields[i]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001535 l = bb_strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001536 if (b) {
1537 memcpy(b+len, sep, sl);
1538 len += sl;
1539 }
1540 qrealloc(&b, len+l+sl, &bsize);
1541 memcpy(b+len, s, l);
1542 len += l;
1543 }
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00001544 if (b) b[len] = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00001545 setvar_p(V[F0], b);
1546 is_f0_split = TRUE;
1547
1548 } else if (v == V[F0]) {
1549 is_f0_split = FALSE;
1550
1551 } else if (v == V[FS]) {
1552 mk_splitter(getvar_s(v), &fsplitter);
1553
1554 } else if (v == V[RS]) {
1555 mk_splitter(getvar_s(v), &rsplitter);
1556
1557 } else if (v == V[IGNORECASE]) {
1558 icase = istrue(v);
1559
1560 } else { /* $n */
1561 n = getvar_i(V[NF]);
1562 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1563 /* right here v is invalid. Just to note... */
1564 }
1565}
1566
1567/* step through func/builtin/etc arguments */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001568static node *nextarg(node **pn)
1569{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001570 node *n;
1571
1572 n = *pn;
1573 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1574 *pn = n->r.n;
1575 n = n->l.n;
1576 } else {
1577 *pn = NULL;
1578 }
1579 return n;
1580}
1581
Mike Frysinger10a11e22005-09-27 02:23:02 +00001582static void hashwalk_init(var *v, xhash *array)
1583{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001584 char **w;
1585 hash_item *hi;
1586 int i;
1587
1588 if (v->type & VF_WALK)
1589 free(v->x.walker);
1590
1591 v->type |= VF_WALK;
1592 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1593 *w = *(w+1) = (char *)(w + 2);
1594 for (i=0; i<array->csize; i++) {
1595 hi = array->items[i];
1596 while(hi) {
1597 strcpy(*w, hi->name);
1598 nextword(w);
1599 hi = hi->next;
1600 }
1601 }
1602}
1603
Mike Frysinger10a11e22005-09-27 02:23:02 +00001604static int hashwalk_next(var *v)
1605{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001606 char **w;
1607
1608 w = v->x.walker;
1609 if (*(w+1) == *w)
1610 return FALSE;
1611
1612 setvar_s(v, nextword(w+1));
1613 return TRUE;
1614}
1615
1616/* evaluate node, return 1 when result is true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001617static int ptest(node *pattern)
1618{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001619 static var v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001620 return istrue(evaluate(pattern, &v));
1621}
1622
1623/* read next record from stream rsm into a variable v */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001624static int awk_getline(rstream *rsm, var *v)
1625{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001626 char *b;
1627 regmatch_t pmatch[2];
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001628 int a, p, pp=0, size;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001629 int fd, so, eo, r, rp;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001630 char c, *m, *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001631
1632 /* we're using our own buffer since we need access to accumulating
1633 * characters
1634 */
1635 fd = fileno(rsm->F);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001636 m = rsm->buffer;
1637 a = rsm->adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001638 p = rsm->pos;
1639 size = rsm->size;
1640 c = (char) rsplitter.n.info;
1641 rp = 0;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001642
1643 if (! m) qrealloc(&m, 256, &size);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001644 do {
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001645 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001646 so = eo = p;
1647 r = 1;
1648 if (p > 0) {
1649 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1650 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1651 b, 1, pmatch, 0) == 0) {
1652 so = pmatch[0].rm_so;
1653 eo = pmatch[0].rm_eo;
1654 if (b[eo] != '\0')
1655 break;
1656 }
1657 } else if (c != '\0') {
1658 s = strchr(b+pp, c);
Rob Landley46e351d2006-02-14 16:05:32 +00001659 if (! s) s = memchr(b+pp, '\0', p - pp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001660 if (s) {
1661 so = eo = s-b;
1662 eo++;
1663 break;
1664 }
1665 } else {
1666 while (b[rp] == '\n')
1667 rp++;
1668 s = strstr(b+rp, "\n\n");
1669 if (s) {
1670 so = eo = s-b;
1671 while (b[eo] == '\n') eo++;
1672 if (b[eo] != '\0')
1673 break;
1674 }
1675 }
1676 }
1677
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001678 if (a > 0) {
1679 memmove(m, (const void *)(m+a), p+1);
1680 b = m;
1681 a = 0;
1682 }
1683
1684 qrealloc(&m, a+p+128, &size);
1685 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001686 pp = p;
1687 p += safe_read(fd, b+p, size-p-1);
1688 if (p < pp) {
1689 p = 0;
1690 r = 0;
1691 setvar_i(V[ERRNO], errno);
1692 }
1693 b[p] = '\0';
1694
1695 } while (p > pp);
1696
1697 if (p == 0) {
1698 r--;
1699 } else {
1700 c = b[so]; b[so] = '\0';
1701 setvar_s(v, b+rp);
1702 v->type |= VF_USER;
1703 b[so] = c;
1704 c = b[eo]; b[eo] = '\0';
1705 setvar_s(V[RT], b+so);
1706 b[eo] = c;
1707 }
1708
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001709 rsm->buffer = m;
1710 rsm->adv = a + eo;
1711 rsm->pos = p - eo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001712 rsm->size = size;
1713
1714 return r;
1715}
1716
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001717static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001718{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001719 int r=0;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001720 char c;
1721 const char *s=format;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001722
1723 if (int_as_int && n == (int)n) {
1724 r = snprintf(b, size, "%d", (int)n);
1725 } else {
1726 do { c = *s; } while (*s && *++s);
1727 if (strchr("diouxX", c)) {
1728 r = snprintf(b, size, format, (int)n);
1729 } else if (strchr("eEfgG", c)) {
1730 r = snprintf(b, size, format, n);
1731 } else {
1732 runtime_error(EMSG_INV_FMT);
1733 }
1734 }
1735 return r;
1736}
1737
1738
1739/* formatted output into an allocated buffer, return ptr to buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001740static char *awk_printf(node *n)
1741{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001742 char *b = NULL;
1743 char *fmt, *s, *s1, *f;
1744 int i, j, incr, bsize;
1745 char c, c1;
1746 var *v, *arg;
1747
1748 v = nvalloc(1);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001749 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001750
1751 i = 0;
1752 while (*f) {
1753 s = f;
1754 while (*f && (*f != '%' || *(++f) == '%'))
1755 f++;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001756 while (*f && !isalpha(*f))
Glenn L McGrath545106f2002-11-11 06:21:00 +00001757 f++;
1758
1759 incr = (f - s) + MAXVARFMT;
1760 qrealloc(&b, incr+i, &bsize);
1761 c = *f; if (c != '\0') f++;
1762 c1 = *f ; *f = '\0';
1763 arg = evaluate(nextarg(&n), v);
1764
1765 j = i;
1766 if (c == 'c' || !c) {
1767 i += sprintf(b+i, s,
1768 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1769
1770 } else if (c == 's') {
1771 s1 = getvar_s(arg);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001772 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001773 i += sprintf(b+i, s, s1);
1774
1775 } else {
1776 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1777 }
1778 *f = c1;
1779
1780 /* if there was an error while sprintf, return value is negative */
1781 if (i < j) i = j;
1782
1783 }
1784
1785 b = xrealloc(b, i+1);
1786 free(fmt);
1787 nvfree(v);
1788 b[i] = '\0';
1789 return b;
1790}
1791
1792/* common substitution routine
1793 * replace (nm) substring of (src) that match (n) with (repl), store
1794 * result into (dest), return number of substitutions. If nm=0, replace
1795 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1796 * subexpression matching (\1-\9)
1797 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001798static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1799{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001800 char *ds = NULL;
1801 char *sp, *s;
1802 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1803 regmatch_t pmatch[10];
1804 regex_t sreg, *re;
1805
1806 re = as_regex(rn, &sreg);
1807 if (! src) src = V[F0];
1808 if (! dest) dest = V[F0];
1809
1810 i = di = 0;
1811 sp = getvar_s(src);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001812 rl = bb_strlen(repl);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001813 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1814 so = pmatch[0].rm_so;
1815 eo = pmatch[0].rm_eo;
1816
1817 qrealloc(&ds, di + eo + rl, &dssize);
1818 memcpy(ds + di, sp, eo);
1819 di += eo;
1820 if (++i >= nm) {
1821 /* replace */
1822 di -= (eo - so);
1823 nbs = 0;
1824 for (s = repl; *s; s++) {
1825 ds[di++] = c = *s;
1826 if (c == '\\') {
1827 nbs++;
1828 continue;
1829 }
1830 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1831 di -= ((nbs + 3) >> 1);
1832 j = 0;
1833 if (c != '&') {
1834 j = c - '0';
1835 nbs++;
1836 }
1837 if (nbs % 2) {
1838 ds[di++] = c;
1839 } else {
1840 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1841 qrealloc(&ds, di + rl + n, &dssize);
1842 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1843 di += n;
1844 }
1845 }
1846 nbs = 0;
1847 }
1848 }
1849
1850 sp += eo;
1851 if (i == nm) break;
1852 if (eo == so) {
1853 if (! (ds[di++] = *sp++)) break;
1854 }
1855 }
1856
1857 qrealloc(&ds, di + strlen(sp), &dssize);
1858 strcpy(ds + di, sp);
1859 setvar_p(dest, ds);
1860 if (re == &sreg) regfree(re);
1861 return i;
1862}
1863
Mike Frysinger10a11e22005-09-27 02:23:02 +00001864static var *exec_builtin(node *op, var *res)
1865{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001866 int (*to_xxx)(int);
1867 var *tv;
1868 node *an[4];
1869 var *av[4];
1870 char *as[4];
1871 regmatch_t pmatch[2];
1872 regex_t sreg, *re;
1873 static tsplitter tspl;
1874 node *spl;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001875 uint32_t isr, info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001876 int nargs;
1877 time_t tt;
1878 char *s, *s1;
1879 int i, l, ll, n;
1880
1881 tv = nvalloc(4);
1882 isr = info = op->info;
1883 op = op->l.n;
1884
1885 av[2] = av[3] = NULL;
1886 for (i=0 ; i<4 && op ; i++) {
1887 an[i] = nextarg(&op);
1888 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1889 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1890 isr >>= 1;
1891 }
1892
1893 nargs = i;
1894 if (nargs < (info >> 30))
1895 runtime_error(EMSG_TOO_FEW_ARGS);
1896
1897 switch (info & OPNMASK) {
1898
1899 case B_a2:
1900#ifdef CONFIG_FEATURE_AWK_MATH
1901 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1902#else
1903 runtime_error(EMSG_NO_MATH);
1904#endif
1905 break;
1906
1907 case B_sp:
1908 if (nargs > 2) {
1909 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1910 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1911 } else {
1912 spl = &fsplitter.n;
1913 }
1914
1915 n = awk_split(as[0], spl, &s);
1916 s1 = s;
1917 clear_array(iamarray(av[1]));
1918 for (i=1; i<=n; i++)
1919 setari_u(av[1], i, nextword(&s1));
1920 free(s);
1921 setvar_i(res, n);
1922 break;
1923
1924 case B_ss:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001925 l = bb_strlen(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001926 i = getvar_i(av[1]) - 1;
1927 if (i>l) i=l; if (i<0) i=0;
1928 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1929 if (n<0) n=0;
1930 s = xmalloc(n+1);
1931 strncpy(s, as[0]+i, n);
1932 s[n] = '\0';
1933 setvar_p(res, s);
1934 break;
1935
1936 case B_lo:
1937 to_xxx = tolower;
1938 goto lo_cont;
1939
1940 case B_up:
1941 to_xxx = toupper;
1942lo_cont:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001943 s1 = s = bb_xstrdup(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001944 while (*s1) {
1945 *s1 = (*to_xxx)(*s1);
1946 s1++;
1947 }
1948 setvar_p(res, s);
1949 break;
1950
1951 case B_ix:
1952 n = 0;
Manuel Novoa III cad53642003-03-19 09:13:01 +00001953 ll = bb_strlen(as[1]);
1954 l = bb_strlen(as[0]) - ll;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001955 if (ll > 0 && l >= 0) {
1956 if (! icase) {
1957 s = strstr(as[0], as[1]);
1958 if (s) n = (s - as[0]) + 1;
1959 } else {
1960 /* this piece of code is terribly slow and
1961 * really should be rewritten
1962 */
1963 for (i=0; i<=l; i++) {
1964 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1965 n = i+1;
1966 break;
1967 }
1968 }
1969 }
1970 }
1971 setvar_i(res, n);
1972 break;
1973
1974 case B_ti:
1975 if (nargs > 1)
1976 tt = getvar_i(av[1]);
1977 else
1978 time(&tt);
1979 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1980 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1981 buf[i] = '\0';
1982 setvar_s(res, buf);
1983 break;
1984
1985 case B_ma:
1986 re = as_regex(an[1], &sreg);
1987 n = regexec(re, as[0], 1, pmatch, 0);
1988 if (n == 0) {
1989 pmatch[0].rm_so++;
1990 pmatch[0].rm_eo++;
1991 } else {
1992 pmatch[0].rm_so = 0;
1993 pmatch[0].rm_eo = -1;
1994 }
1995 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1996 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1997 setvar_i(res, pmatch[0].rm_so);
1998 if (re == &sreg) regfree(re);
1999 break;
2000
2001 case B_ge:
2002 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2003 break;
2004
2005 case B_gs:
2006 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2007 break;
2008
2009 case B_su:
2010 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2011 break;
2012 }
2013
2014 nvfree(tv);
2015 return res;
2016}
2017
2018/*
2019 * Evaluate node - the heart of the program. Supplied with subtree
2020 * and place where to store result. returns ptr to result.
2021 */
2022#define XC(n) ((n) >> 8)
2023
Mike Frysinger10a11e22005-09-27 02:23:02 +00002024static var *evaluate(node *op, var *res)
2025{
Mike Frysingerde2b9382005-09-27 03:18:00 +00002026 /* This procedure is recursive so we should count every byte */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002027 static var *fnargs = NULL;
2028 static unsigned int seed = 1;
2029 static regex_t sreg;
2030 node *op1;
2031 var *v1;
2032 union {
2033 var *v;
2034 char *s;
2035 double d;
2036 int i;
2037 } L, R;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002038 uint32_t opinfo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002039 short opn;
2040 union {
2041 char *s;
2042 rstream *rsm;
2043 FILE *F;
2044 var *v;
2045 regex_t *re;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002046 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002047 } X;
2048
2049 if (! op)
2050 return setvar_s(res, NULL);
2051
2052 v1 = nvalloc(2);
2053
2054 while (op) {
2055
2056 opinfo = op->info;
2057 opn = (short)(opinfo & OPNMASK);
2058 lineno = op->lineno;
2059
Mike Frysingerde2b9382005-09-27 03:18:00 +00002060 /* execute inevitable things */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002061 op1 = op->l.n;
2062 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2063 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2064 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2065 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2066 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2067
2068 switch (XC(opinfo & OPCLSMASK)) {
2069
2070 /* -- iterative node type -- */
2071
2072 /* test pattern */
2073 case XC( OC_TEST ):
2074 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2075 /* it's range pattern */
2076 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2077 op->info |= OF_CHECKED;
2078 if (ptest(op1->r.n))
2079 op->info &= ~OF_CHECKED;
2080
2081 op = op->a.n;
2082 } else {
2083 op = op->r.n;
2084 }
2085 } else {
2086 op = (ptest(op1)) ? op->a.n : op->r.n;
2087 }
2088 break;
2089
2090 /* just evaluate an expression, also used as unconditional jump */
2091 case XC( OC_EXEC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002092 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002093
2094 /* branch, used in if-else and various loops */
2095 case XC( OC_BR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002096 op = istrue(L.v) ? op->a.n : op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002097 break;
2098
2099 /* initialize for-in loop */
2100 case XC( OC_WALKINIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002101 hashwalk_init(L.v, iamarray(R.v));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002102 break;
2103
2104 /* get next array item */
2105 case XC( OC_WALKNEXT ):
2106 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2107 break;
2108
2109 case XC( OC_PRINT ):
2110 case XC( OC_PRINTF ):
2111 X.F = stdout;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002112 if (op->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002113 X.rsm = newfile(R.s);
2114 if (! X.rsm->F) {
2115 if (opn == '|') {
2116 if((X.rsm->F = popen(R.s, "w")) == NULL)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002117 bb_perror_msg_and_die("popen");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002118 X.rsm->is_pipe = 1;
2119 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002120 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002121 }
2122 }
2123 X.F = X.rsm->F;
2124 }
2125
2126 if ((opinfo & OPCLSMASK) == OC_PRINT) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002127 if (! op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002128 fputs(getvar_s(V[F0]), X.F);
2129 } else {
2130 while (op1) {
2131 L.v = evaluate(nextarg(&op1), v1);
2132 if (L.v->type & VF_NUMBER) {
2133 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2134 getvar_i(L.v), TRUE);
2135 fputs(buf, X.F);
2136 } else {
2137 fputs(getvar_s(L.v), X.F);
2138 }
2139
2140 if (op1) fputs(getvar_s(V[OFS]), X.F);
2141 }
2142 }
2143 fputs(getvar_s(V[ORS]), X.F);
2144
2145 } else { /* OC_PRINTF */
2146 L.s = awk_printf(op1);
2147 fputs(L.s, X.F);
2148 free(L.s);
2149 }
2150 fflush(X.F);
2151 break;
2152
2153 case XC( OC_DELETE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002154 X.info = op1->info & OPCLSMASK;
2155 if (X.info == OC_VAR) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002156 R.v = op1->l.v;
2157 } else if (X.info == OC_FNARG) {
2158 R.v = &fnargs[op1->l.i];
2159 } else {
2160 runtime_error(EMSG_NOT_ARRAY);
2161 }
2162
Mike Frysingerde2b9382005-09-27 03:18:00 +00002163 if (op1->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002164 clrvar(L.v);
2165 L.s = getvar_s(evaluate(op1->r.n, v1));
2166 hash_remove(iamarray(R.v), L.s);
2167 } else {
2168 clear_array(iamarray(R.v));
2169 }
2170 break;
2171
2172 case XC( OC_NEWSOURCE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002173 programname = op->l.s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002174 break;
2175
2176 case XC( OC_RETURN ):
2177 copyvar(res, L.v);
2178 break;
2179
2180 case XC( OC_NEXTFILE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002181 nextfile = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002182 case XC( OC_NEXT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002183 nextrec = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002184 case XC( OC_DONE ):
2185 clrvar(res);
2186 break;
2187
2188 case XC( OC_EXIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002189 awk_exit(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002190
2191 /* -- recursive node type -- */
2192
2193 case XC( OC_VAR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002194 L.v = op->l.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002195 if (L.v == V[NF])
2196 split_f0();
2197 goto v_cont;
2198
2199 case XC( OC_FNARG ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002200 L.v = &fnargs[op->l.i];
Glenn L McGrath545106f2002-11-11 06:21:00 +00002201
2202v_cont:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002203 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002204 break;
2205
2206 case XC( OC_IN ):
2207 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2208 break;
2209
2210 case XC( OC_REGEXP ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002211 op1 = op;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002212 L.s = getvar_s(V[F0]);
2213 goto re_cont;
2214
2215 case XC( OC_MATCH ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002216 op1 = op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002217re_cont:
2218 X.re = as_regex(op1, &sreg);
2219 R.i = regexec(X.re, L.s, 0, NULL, 0);
2220 if (X.re == &sreg) regfree(X.re);
2221 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2222 break;
2223
2224 case XC( OC_MOVE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002225 /* if source is a temporary string, jusk relink it to dest */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002226 if (R.v == v1+1 && R.v->string) {
2227 res = setvar_p(L.v, R.v->string);
2228 R.v->string = NULL;
2229 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002230 res = copyvar(L.v, R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002231 }
2232 break;
2233
2234 case XC( OC_TERNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002235 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002236 runtime_error(EMSG_POSSIBLE_ERROR);
2237 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2238 break;
2239
2240 case XC( OC_FUNC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002241 if (! op->r.f->body.first)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002242 runtime_error(EMSG_UNDEF_FUNC);
2243
2244 X.v = R.v = nvalloc(op->r.f->nargs+1);
2245 while (op1) {
2246 L.v = evaluate(nextarg(&op1), v1);
2247 copyvar(R.v, L.v);
2248 R.v->type |= VF_CHILD;
2249 R.v->x.parent = L.v;
2250 if (++R.v - X.v >= op->r.f->nargs)
2251 break;
2252 }
2253
2254 R.v = fnargs;
2255 fnargs = X.v;
2256
2257 L.s = programname;
2258 res = evaluate(op->r.f->body.first, res);
2259 programname = L.s;
2260
2261 nvfree(fnargs);
2262 fnargs = R.v;
2263 break;
2264
2265 case XC( OC_GETLINE ):
2266 case XC( OC_PGETLINE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002267 if (op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002268 X.rsm = newfile(L.s);
2269 if (! X.rsm->F) {
2270 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2271 X.rsm->F = popen(L.s, "r");
2272 X.rsm->is_pipe = TRUE;
2273 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002274 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002275 }
2276 }
2277 } else {
2278 if (! iF) iF = next_input_file();
2279 X.rsm = iF;
2280 }
2281
2282 if (! X.rsm->F) {
2283 setvar_i(V[ERRNO], errno);
2284 setvar_i(res, -1);
2285 break;
2286 }
2287
2288 if (! op->r.n)
2289 R.v = V[F0];
2290
2291 L.i = awk_getline(X.rsm, R.v);
2292 if (L.i > 0) {
2293 if (! op1) {
2294 incvar(V[FNR]);
2295 incvar(V[NR]);
2296 }
2297 }
2298 setvar_i(res, L.i);
2299 break;
2300
Mike Frysingerde2b9382005-09-27 03:18:00 +00002301 /* simple builtins */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002302 case XC( OC_FBLTIN ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002303 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002304
2305 case F_in:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002306 R.d = (int)L.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002307 break;
2308
2309 case F_rn:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002310 R.d = (double)rand() / (double)RAND_MAX;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002311 break;
2312
2313#ifdef CONFIG_FEATURE_AWK_MATH
2314 case F_co:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002315 R.d = cos(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002316 break;
2317
2318 case F_ex:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002319 R.d = exp(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002320 break;
2321
2322 case F_lg:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002323 R.d = log(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002324 break;
2325
2326 case F_si:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002327 R.d = sin(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002328 break;
2329
2330 case F_sq:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002331 R.d = sqrt(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002332 break;
2333#else
2334 case F_co:
2335 case F_ex:
2336 case F_lg:
2337 case F_si:
2338 case F_sq:
2339 runtime_error(EMSG_NO_MATH);
2340 break;
2341#endif
2342
2343 case F_sr:
2344 R.d = (double)seed;
2345 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2346 srand(seed);
2347 break;
2348
2349 case F_ti:
2350 R.d = time(NULL);
2351 break;
2352
2353 case F_le:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002354 if (! op1)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002355 L.s = getvar_s(V[F0]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002356 R.d = bb_strlen(L.s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002357 break;
2358
2359 case F_sy:
2360 fflush(NULL);
Rob Landley51843362006-01-09 05:26:58 +00002361 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002362 break;
2363
2364 case F_ff:
2365 if (! op1)
2366 fflush(stdout);
2367 else {
2368 if (L.s && *L.s) {
2369 X.rsm = newfile(L.s);
2370 fflush(X.rsm->F);
2371 } else {
2372 fflush(NULL);
2373 }
2374 }
2375 break;
2376
2377 case F_cl:
2378 X.rsm = (rstream *)hash_search(fdhash, L.s);
2379 if (X.rsm) {
2380 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00002381 free(X.rsm->buffer);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002382 hash_remove(fdhash, L.s);
2383 }
2384 if (R.i != 0)
2385 setvar_i(V[ERRNO], errno);
2386 R.d = (double)R.i;
2387 break;
2388 }
2389 setvar_i(res, R.d);
2390 break;
2391
2392 case XC( OC_BUILTIN ):
2393 res = exec_builtin(op, res);
2394 break;
2395
2396 case XC( OC_SPRINTF ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002397 setvar_p(res, awk_printf(op1));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002398 break;
2399
2400 case XC( OC_UNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002401 X.v = R.v;
2402 L.d = R.d = getvar_i(R.v);
2403 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002404 case 'P':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002405 L.d = ++R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002406 goto r_op_change;
2407 case 'p':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002408 R.d++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002409 goto r_op_change;
2410 case 'M':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002411 L.d = --R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002412 goto r_op_change;
2413 case 'm':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002414 R.d--;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002415 goto r_op_change;
2416 case '!':
2417 L.d = istrue(X.v) ? 0 : 1;
2418 break;
2419 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002420 L.d = -R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002421 break;
2422 r_op_change:
2423 setvar_i(X.v, R.d);
2424 }
2425 setvar_i(res, L.d);
2426 break;
2427
2428 case XC( OC_FIELD ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002429 R.i = (int)getvar_i(R.v);
2430 if (R.i == 0) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002431 res = V[F0];
2432 } else {
2433 split_f0();
2434 if (R.i > nfields)
2435 fsrealloc(R.i);
2436
2437 res = &Fields[R.i-1];
2438 }
2439 break;
2440
2441 /* concatenation (" ") and index joining (",") */
2442 case XC( OC_CONCAT ):
2443 case XC( OC_COMMA ):
Manuel Novoa III cad53642003-03-19 09:13:01 +00002444 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002445 X.s = (char *)xmalloc(opn);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002446 strcpy(X.s, L.s);
2447 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2448 L.s = getvar_s(V[SUBSEP]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002449 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002450 strcat(X.s, L.s);
2451 }
2452 strcat(X.s, R.s);
2453 setvar_p(res, X.s);
2454 break;
2455
2456 case XC( OC_LAND ):
2457 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2458 break;
2459
2460 case XC( OC_LOR ):
2461 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2462 break;
2463
2464 case XC( OC_BINARY ):
2465 case XC( OC_REPLACE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002466 R.d = getvar_i(R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002467 switch (opn) {
2468 case '+':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002469 L.d += R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002470 break;
2471 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002472 L.d -= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002473 break;
2474 case '*':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002475 L.d *= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002476 break;
2477 case '/':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002478 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2479 L.d /= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002480 break;
2481 case '&':
2482#ifdef CONFIG_FEATURE_AWK_MATH
Mike Frysingerde2b9382005-09-27 03:18:00 +00002483 L.d = pow(L.d, R.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002484#else
2485 runtime_error(EMSG_NO_MATH);
2486#endif
2487 break;
2488 case '%':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002489 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2490 L.d -= (int)(L.d / R.d) * R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002491 break;
2492 }
2493 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2494 break;
2495
2496 case XC( OC_COMPARE ):
2497 if (is_numeric(L.v) && is_numeric(R.v)) {
2498 L.d = getvar_i(L.v) - getvar_i(R.v);
2499 } else {
2500 L.s = getvar_s(L.v);
2501 R.s = getvar_s(R.v);
2502 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2503 }
2504 switch (opn & 0xfe) {
2505 case 0:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002506 R.i = (L.d > 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002507 break;
2508 case 2:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002509 R.i = (L.d >= 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002510 break;
2511 case 4:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002512 R.i = (L.d == 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002513 break;
2514 }
2515 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2516 break;
2517
2518 default:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002519 runtime_error(EMSG_POSSIBLE_ERROR);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002520 }
2521 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2522 op = op->a.n;
2523 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2524 break;
2525 if (nextrec)
2526 break;
2527 }
2528 nvfree(v1);
2529 return res;
2530}
2531
2532
2533/* -------- main & co. -------- */
2534
Mike Frysinger10a11e22005-09-27 02:23:02 +00002535static int awk_exit(int r)
2536{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002537 unsigned int i;
2538 hash_item *hi;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002539 static var tv;
2540
2541 if (! exiting) {
2542 exiting = TRUE;
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00002543 nextrec = FALSE;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002544 evaluate(endseq.first, &tv);
2545 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002546
2547 /* waiting for children */
2548 for (i=0; i<fdhash->csize; i++) {
2549 hi = fdhash->items[i];
2550 while(hi) {
2551 if (hi->data.rs.F && hi->data.rs.is_pipe)
2552 pclose(hi->data.rs.F);
2553 hi = hi->next;
2554 }
2555 }
2556
2557 exit(r);
2558}
2559
2560/* if expr looks like "var=value", perform assignment and return 1,
2561 * otherwise return 0 */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00002562static int is_assignment(const char *expr)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002563{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002564 char *exprc, *s, *s0, *s1;
2565
Manuel Novoa III cad53642003-03-19 09:13:01 +00002566 exprc = bb_xstrdup(expr);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002567 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2568 free(exprc);
2569 return FALSE;
2570 }
2571
2572 *(s++) = '\0';
2573 s0 = s1 = s;
2574 while (*s)
2575 *(s1++) = nextchar(&s);
2576
2577 *s1 = '\0';
2578 setvar_u(newvar(exprc), s0);
2579 free(exprc);
2580 return TRUE;
2581}
2582
2583/* switch to next input file */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002584static rstream *next_input_file(void)
2585{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002586 static rstream rsm;
2587 FILE *F = NULL;
2588 char *fname, *ind;
2589 static int files_happen = FALSE;
2590
2591 if (rsm.F) fclose(rsm.F);
2592 rsm.F = NULL;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002593 rsm.pos = rsm.adv = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002594
2595 do {
2596 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2597 if (files_happen)
2598 return NULL;
2599 fname = "-";
2600 F = stdin;
2601 } else {
2602 ind = getvar_s(incvar(V[ARGIND]));
2603 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2604 if (fname && *fname && !is_assignment(fname))
2605 F = afopen(fname, "r");
2606 }
2607 } while (!F);
2608
2609 files_happen = TRUE;
2610 setvar_s(V[FILENAME], fname);
2611 rsm.F = F;
2612 return &rsm;
2613}
2614
Rob Landleydfba7412006-03-06 20:47:33 +00002615int awk_main(int argc, char **argv)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002616{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002617 char *s, *s1;
Rob Landley46e351d2006-02-14 16:05:32 +00002618 int i, j, c, flen;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002619 var *v;
2620 static var tv;
2621 char **envp;
2622 static int from_file = FALSE;
2623 rstream *rsm;
2624 FILE *F, *stdfiles[3];
2625 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2626
2627 /* allocate global buffer */
2628 buf = xmalloc(MAXVARFMT+1);
2629
2630 vhash = hash_init();
2631 ahash = hash_init();
2632 fdhash = hash_init();
2633 fnhash = hash_init();
2634
2635 /* initialize variables */
2636 for (i=0; *vNames; i++) {
2637 V[i] = v = newvar(nextword(&vNames));
2638 if (*vValues != '\377')
2639 setvar_s(v, nextword(&vValues));
2640 else
2641 setvar_i(v, 0);
2642
2643 if (*vNames == '*') {
2644 v->type |= VF_SPECIAL;
2645 vNames++;
2646 }
2647 }
2648
2649 handle_special(V[FS]);
2650 handle_special(V[RS]);
2651
2652 stdfiles[0] = stdin;
2653 stdfiles[1] = stdout;
2654 stdfiles[2] = stderr;
2655 for (i=0; i<3; i++) {
2656 rsm = newfile(nextword(&stdnames));
2657 rsm->F = stdfiles[i];
2658 }
2659
2660 for (envp=environ; *envp; envp++) {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002661 s = bb_xstrdup(*envp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002662 s1 = strchr(s, '=');
Eric Andersen67776be2004-07-30 23:52:08 +00002663 if (!s1) {
2664 goto keep_going;
2665 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002666 *(s1++) = '\0';
2667 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
Eric Andersen67776be2004-07-30 23:52:08 +00002668keep_going:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002669 free(s);
2670 }
2671
2672 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2673 switch (c) {
2674 case 'F':
2675 setvar_s(V[FS], optarg);
2676 break;
2677 case 'v':
2678 if (! is_assignment(optarg))
Manuel Novoa III cad53642003-03-19 09:13:01 +00002679 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002680 break;
2681 case 'f':
2682 from_file = TRUE;
2683 F = afopen(programname = optarg, "r");
2684 s = NULL;
2685 /* one byte is reserved for some trick in next_token */
Rob Landley46e351d2006-02-14 16:05:32 +00002686 if (fseek(F, 0, SEEK_END) == 0) {
2687 flen = ftell(F);
2688 s = (char *)xmalloc(flen+4);
2689 fseek(F, 0, SEEK_SET);
2690 i = 1 + fread(s+1, 1, flen, F);
2691 } else {
2692 for (i=j=1; j>0; i+=j) {
2693 s = (char *)xrealloc(s, i+4096);
2694 j = fread(s+i, 1, 4094, F);
2695 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002696 }
2697 s[i] = '\0';
2698 fclose(F);
2699 parse_program(s+1);
2700 free(s);
2701 break;
2702 case 'W':
Manuel Novoa III cad53642003-03-19 09:13:01 +00002703 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002704 break;
2705
2706 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +00002707 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002708 }
2709 }
2710
2711 if (!from_file) {
2712 if (argc == optind)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002713 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002714 programname="cmd. line";
2715 parse_program(argv[optind++]);
2716
2717 }
2718
2719 /* fill in ARGV array */
2720 setvar_i(V[ARGC], argc - optind + 1);
2721 setari_u(V[ARGV], 0, "awk");
2722 for(i=optind; i < argc; i++)
2723 setari_u(V[ARGV], i+1-optind, argv[i]);
2724
2725 evaluate(beginseq.first, &tv);
2726 if (! mainseq.first && ! endseq.first)
2727 awk_exit(EXIT_SUCCESS);
2728
2729 /* input file could already be opened in BEGIN block */
2730 if (! iF) iF = next_input_file();
2731
2732 /* passing through input files */
2733 while (iF) {
2734
2735 nextfile = FALSE;
2736 setvar_i(V[FNR], 0);
2737
2738 while ((c = awk_getline(iF, V[F0])) > 0) {
2739
2740 nextrec = FALSE;
2741 incvar(V[NR]);
2742 incvar(V[FNR]);
2743 evaluate(mainseq.first, &tv);
2744
2745 if (nextfile)
2746 break;
2747 }
2748
2749 if (c < 0)
2750 runtime_error(strerror(errno));
2751
2752 iF = next_input_file();
2753
2754 }
2755
Glenn L McGrath545106f2002-11-11 06:21:00 +00002756 awk_exit(EXIT_SUCCESS);
2757
2758 return 0;
2759}
2760