blob: 80ff8c77b3a878902e93d2f1ed71ffe463376858 [file] [log] [blame]
Glenn L McGrath545106f2002-11-11 06:21:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +00007 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
Glenn L McGrath545106f2002-11-11 06:21:00 +00008 */
9
10#include <stdio.h>
11#include <stdlib.h>
12#include <unistd.h>
13#include <errno.h>
14#include <string.h>
15#include <time.h>
16#include <math.h>
17#include <ctype.h>
18#include <getopt.h>
Glenn L McGrath545106f2002-11-11 06:21:00 +000019
"Vladimir N. Oleynik"23f62fc2005-09-14 16:59:11 +000020#include "xregex.h"
Glenn L McGrath545106f2002-11-11 06:21:00 +000021#include "busybox.h"
22
23
24#define MAXVARFMT 240
25#define MINNVBLOCK 64
26
27/* variable flags */
28#define VF_NUMBER 0x0001 /* 1 = primary type is number */
29#define VF_ARRAY 0x0002 /* 1 = it's an array */
30
31#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
32#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
33#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
34#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
35#define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
36#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
37#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
38
39/* these flags are static, don't change them when value is changed */
40#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
41
42/* Variable */
43typedef struct var_s {
44 unsigned short type; /* flags */
45 double number;
46 char *string;
47 union {
48 int aidx; /* func arg index (on compilation stage) */
49 struct xhash_s *array; /* array ptr */
50 struct var_s *parent; /* for func args, ptr to actual parameter */
51 char **walker; /* list of array elements (for..in) */
52 } x;
53} var;
54
55/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
56typedef struct chain_s {
57 struct node_s *first;
58 struct node_s *last;
59 char *programname;
60} chain;
61
62/* Function */
63typedef struct func_s {
64 unsigned short nargs;
65 struct chain_s body;
66} func;
67
68/* I/O stream */
69typedef struct rstream_s {
70 FILE *F;
71 char *buffer;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +000072 int adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +000073 int size;
74 int pos;
75 unsigned short is_pipe;
76} rstream;
77
78typedef struct hash_item_s {
79 union {
80 struct var_s v; /* variable/array hash */
81 struct rstream_s rs; /* redirect streams hash */
82 struct func_s f; /* functions hash */
83 } data;
84 struct hash_item_s *next; /* next in chain */
85 char name[1]; /* really it's longer */
86} hash_item;
87
88typedef struct xhash_s {
89 unsigned int nel; /* num of elements */
90 unsigned int csize; /* current hash size */
91 unsigned int nprime; /* next hash size in PRIMES[] */
92 unsigned int glen; /* summary length of item names */
93 struct hash_item_s **items;
94} xhash;
95
96/* Tree node */
97typedef struct node_s {
Mike Frysingerf87b3e32005-09-27 04:16:22 +000098 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +000099 unsigned short lineno;
100 union {
101 struct node_s *n;
102 var *v;
103 int i;
104 char *s;
105 regex_t *re;
106 } l;
107 union {
108 struct node_s *n;
109 regex_t *ire;
110 func *f;
111 int argno;
112 } r;
113 union {
114 struct node_s *n;
115 } a;
116} node;
117
118/* Block of temporary variables */
119typedef struct nvblock_s {
120 int size;
121 var *pos;
122 struct nvblock_s *prev;
123 struct nvblock_s *next;
124 var nv[0];
125} nvblock;
126
127typedef struct tsplitter_s {
128 node n;
129 regex_t re[2];
130} tsplitter;
131
132/* simple token classes */
133/* Order and hex values are very important!!! See next_token() */
134#define TC_SEQSTART 1 /* ( */
135#define TC_SEQTERM (1 << 1) /* ) */
136#define TC_REGEXP (1 << 2) /* /.../ */
137#define TC_OUTRDR (1 << 3) /* | > >> */
138#define TC_UOPPOST (1 << 4) /* unary postfix operator */
139#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
140#define TC_BINOPX (1 << 6) /* two-opnd operator */
141#define TC_IN (1 << 7)
142#define TC_COMMA (1 << 8)
143#define TC_PIPE (1 << 9) /* input redirection pipe */
144#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
145#define TC_ARRTERM (1 << 11) /* ] */
146#define TC_GRPSTART (1 << 12) /* { */
147#define TC_GRPTERM (1 << 13) /* } */
148#define TC_SEMICOL (1 << 14)
149#define TC_NEWLINE (1 << 15)
150#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
151#define TC_WHILE (1 << 17)
152#define TC_ELSE (1 << 18)
153#define TC_BUILTIN (1 << 19)
154#define TC_GETLINE (1 << 20)
155#define TC_FUNCDECL (1 << 21) /* `function' `func' */
156#define TC_BEGIN (1 << 22)
157#define TC_END (1 << 23)
158#define TC_EOF (1 << 24)
159#define TC_VARIABLE (1 << 25)
160#define TC_ARRAY (1 << 26)
161#define TC_FUNCTION (1 << 27)
162#define TC_STRING (1 << 28)
163#define TC_NUMBER (1 << 29)
164
165#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
166
167/* combined token classes */
168#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
169#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
170#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
171 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
172
173#define TC_STATEMNT (TC_STATX | TC_WHILE)
174#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
175
176/* word tokens, cannot mean something else if not expected */
177#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
178 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
179
180/* discard newlines after these */
181#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
182 TC_BINOP | TC_OPTERM)
183
184/* what can expression begin with */
185#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
186/* what can group begin with */
187#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
188
189/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
190/* operator is inserted between them */
191#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
192 TC_STRING | TC_NUMBER | TC_UOPPOST)
193#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
194
195#define OF_RES1 0x010000
196#define OF_RES2 0x020000
197#define OF_STR1 0x040000
198#define OF_STR2 0x080000
199#define OF_NUM1 0x100000
200#define OF_CHECKED 0x200000
201
202/* combined operator flags */
203#define xx 0
204#define xV OF_RES2
205#define xS (OF_RES2 | OF_STR2)
206#define Vx OF_RES1
207#define VV (OF_RES1 | OF_RES2)
208#define Nx (OF_RES1 | OF_NUM1)
209#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
210#define Sx (OF_RES1 | OF_STR1)
211#define SV (OF_RES1 | OF_STR1 | OF_RES2)
212#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
213
214#define OPCLSMASK 0xFF00
215#define OPNMASK 0x007F
216
217/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
218 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
219 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
220 */
221#define P(x) (x << 24)
222#define PRIMASK 0x7F000000
223#define PRIMASK2 0x7E000000
224
225/* Operation classes */
226
227#define SHIFT_TIL_THIS 0x0600
228#define RECUR_FROM_THIS 0x1000
229
230enum {
231 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
232 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
233
234 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
235 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
236 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
237
238 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
239 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
240 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
241 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
242 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
243 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
244 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
245 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
246 OC_DONE=0x2800,
247
248 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
249 ST_WHILE=0x3300
250};
251
252/* simple builtins */
253enum {
254 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
255 F_ti, F_le, F_sy, F_ff, F_cl
256};
257
258/* builtins */
259enum {
260 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
261 B_ge, B_gs, B_su
262};
263
264/* tokens and their corresponding info values */
265
266#define NTC "\377" /* switch to next token class (tc<<1) */
267#define NTCC '\377'
268
269#define OC_B OC_BUILTIN
270
271static char * const tokenlist =
272 "\1(" NTC
273 "\1)" NTC
274 "\1/" NTC /* REGEXP */
275 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
276 "\2++" "\2--" NTC /* UOPPOST */
277 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
278 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
279 "\2*=" "\2/=" "\2%=" "\2^="
280 "\1+" "\1-" "\3**=" "\2**"
281 "\1/" "\1%" "\1^" "\1*"
282 "\2!=" "\2>=" "\2<=" "\1>"
283 "\1<" "\2!~" "\1~" "\2&&"
284 "\2||" "\1?" "\1:" NTC
285 "\2in" NTC
286 "\1," NTC
287 "\1|" NTC
288 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
289 "\1]" NTC
290 "\1{" NTC
291 "\1}" NTC
292 "\1;" NTC
293 "\1\n" NTC
294 "\2if" "\2do" "\3for" "\5break" /* STATX */
295 "\10continue" "\6delete" "\5print"
296 "\6printf" "\4next" "\10nextfile"
297 "\6return" "\4exit" NTC
298 "\5while" NTC
299 "\4else" NTC
300
301 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
302 "\3cos" "\3exp" "\3int" "\3log"
303 "\4rand" "\3sin" "\4sqrt" "\5srand"
304 "\6gensub" "\4gsub" "\5index" "\6length"
305 "\5match" "\5split" "\7sprintf" "\3sub"
306 "\6substr" "\7systime" "\10strftime"
307 "\7tolower" "\7toupper" NTC
308 "\7getline" NTC
309 "\4func" "\10function" NTC
310 "\5BEGIN" NTC
311 "\3END" "\0"
312 ;
313
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000314static const uint32_t tokeninfo[] = {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000315
316 0,
317 0,
318 OC_REGEXP,
319 xS|'a', xS|'w', xS|'|',
320 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
321 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
322 OC_FIELD|xV|P(5),
323 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
324 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
325 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
326 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
327 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
328 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
329 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
330 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
331 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
332 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
333 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
334 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
335 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
336 OC_COLON|xx|P(67)|':',
337 OC_IN|SV|P(49),
338 OC_COMMA|SS|P(80),
339 OC_PGETLINE|SV|P(37),
340 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
341 OC_UNARY|xV|P(19)|'!',
342 0,
343 0,
344 0,
345 0,
346 0,
347 ST_IF, ST_DO, ST_FOR, OC_BREAK,
348 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
349 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
350 OC_RETURN|Vx, OC_EXIT|Nx,
351 ST_WHILE,
352 0,
353
354 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
355 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
356 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
357 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
358 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
359 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
360 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
361 OC_GETLINE|SV|P(0),
362 0, 0,
363 0,
364 0
365};
366
367/* internal variable names and their initial values */
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000368/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000369enum {
370 CONVFMT=0, OFMT, FS, OFS,
371 ORS, RS, RT, FILENAME,
372 SUBSEP, ARGIND, ARGC, ARGV,
373 ERRNO, FNR,
374 NR, NF, IGNORECASE,
375 ENVIRON, F0, _intvarcount_
376};
377
378static char * vNames =
379 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000380 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
Glenn L McGrath545106f2002-11-11 06:21:00 +0000381 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
382 "ERRNO\0" "FNR\0"
383 "NR\0" "NF\0*" "IGNORECASE\0*"
384 "ENVIRON\0" "$\0*" "\0";
385
386static char * vValues =
387 "%.6g\0" "%.6g\0" " \0" " \0"
388 "\n\0" "\n\0" "\0" "\0"
389 "\034\0"
390 "\377";
391
392/* hash size may grow to these values */
393#define FIRST_PRIME 61;
394static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
395static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
396
397/* globals */
398
399extern char **environ;
400
401static var * V[_intvarcount_];
402static chain beginseq, mainseq, endseq, *seq;
403static int nextrec, nextfile;
404static node *break_ptr, *continue_ptr;
405static rstream *iF;
406static xhash *vhash, *ahash, *fdhash, *fnhash;
407static char *programname;
408static short lineno;
409static int is_f0_split;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000410static int nfields;
411static var *Fields;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000412static tsplitter fsplitter, rsplitter;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000413static nvblock *cb;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000414static char *pos;
415static char *buf;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000416static int icase;
417static int exiting;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000418
419static struct {
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000420 uint32_t tclass;
421 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000422 char *string;
423 double number;
424 short lineno;
425 int rollback;
426} t;
427
428/* function prototypes */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000429static void handle_special(var *);
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000430static node *parse_expr(uint32_t);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000431static void chain_group(void);
432static var *evaluate(node *, var *);
433static rstream *next_input_file(void);
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000434static int fmt_num(char *, int, const char *, double, int);
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +0000435static int awk_exit(int) ATTRIBUTE_NORETURN;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000436
437/* ---- error handling ---- */
438
439static const char EMSG_INTERNAL_ERROR[] = "Internal error";
440static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
441static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
442static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
443static const char EMSG_INV_FMT[] = "Invalid format specifier";
444static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
445static const char EMSG_NOT_ARRAY[] = "Not an array";
446static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
447static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
448#ifndef CONFIG_FEATURE_AWK_MATH
449static const char EMSG_NO_MATH[] = "Math support is not compiled in";
450#endif
451
Bernhard Reutner-Fischer86f5c992006-01-22 22:55:11 +0000452static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
Glenn L McGrathd4036f82002-11-28 09:30:40 +0000453static void syntax_error(const char * const message)
454{
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000455 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000456}
457
458#define runtime_error(x) syntax_error(x)
459
460
461/* ---- hash stuff ---- */
462
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000463static unsigned int hashidx(const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000464{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000465 register unsigned int idx=0;
466
467 while (*name) idx = *name++ + (idx << 6) - idx;
468 return idx;
469}
470
471/* create new hash */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000472static xhash *hash_init(void)
473{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000474 xhash *newhash;
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000475
Glenn L McGrath545106f2002-11-11 06:21:00 +0000476 newhash = (xhash *)xcalloc(1, sizeof(xhash));
477 newhash->csize = FIRST_PRIME;
478 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
479
480 return newhash;
481}
482
483/* find item in hash, return ptr to data, NULL if not found */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000484static void *hash_search(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000485{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000486 hash_item *hi;
487
488 hi = hash->items [ hashidx(name) % hash->csize ];
489 while (hi) {
490 if (strcmp(hi->name, name) == 0)
491 return &(hi->data);
492 hi = hi->next;
493 }
494 return NULL;
495}
496
497/* grow hash if it becomes too big */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000498static void hash_rebuild(xhash *hash)
499{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000500 unsigned int newsize, i, idx;
501 hash_item **newitems, *hi, *thi;
502
503 if (hash->nprime == NPRIMES)
504 return;
505
506 newsize = PRIMES[hash->nprime++];
507 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
508
509 for (i=0; i<hash->csize; i++) {
510 hi = hash->items[i];
511 while (hi) {
512 thi = hi;
513 hi = thi->next;
514 idx = hashidx(thi->name) % newsize;
515 thi->next = newitems[idx];
516 newitems[idx] = thi;
517 }
518 }
519
520 free(hash->items);
521 hash->csize = newsize;
522 hash->items = newitems;
523}
524
525/* find item in hash, add it if necessary. Return ptr to data */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000526static void *hash_find(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000527{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000528 hash_item *hi;
529 unsigned int idx;
530 int l;
531
532 hi = hash_search(hash, name);
533 if (! hi) {
534 if (++hash->nel / hash->csize > 10)
535 hash_rebuild(hash);
536
Manuel Novoa III cad53642003-03-19 09:13:01 +0000537 l = bb_strlen(name) + 1;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000538 hi = xcalloc(sizeof(hash_item) + l, 1);
539 memcpy(hi->name, name, l);
540
541 idx = hashidx(name) % hash->csize;
542 hi->next = hash->items[idx];
543 hash->items[idx] = hi;
544 hash->glen += l;
545 }
546 return &(hi->data);
547}
548
549#define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
550#define newvar(name) (var *) hash_find ( vhash , (name) )
551#define newfile(name) (rstream *) hash_find ( fdhash , (name) )
552#define newfunc(name) (func *) hash_find ( fnhash , (name) )
553
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000554static void hash_remove(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000555{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000556 hash_item *hi, **phi;
557
558 phi = &(hash->items[ hashidx(name) % hash->csize ]);
559 while (*phi) {
560 hi = *phi;
561 if (strcmp(hi->name, name) == 0) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000562 hash->glen -= (bb_strlen(name) + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000563 hash->nel--;
564 *phi = hi->next;
565 free(hi);
566 break;
567 }
568 phi = &(hi->next);
569 }
570}
571
572/* ------ some useful functions ------ */
573
Mike Frysinger10a11e22005-09-27 02:23:02 +0000574static void skip_spaces(char **s)
575{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000576 register char *p = *s;
577
578 while(*p == ' ' || *p == '\t' ||
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000579 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
Mike Frysingerde2b9382005-09-27 03:18:00 +0000580 p++;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000581 }
582 *s = p;
583}
584
Mike Frysinger10a11e22005-09-27 02:23:02 +0000585static char *nextword(char **s)
586{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000587 register char *p = *s;
588
589 while (*(*s)++) ;
590
591 return p;
592}
593
Mike Frysinger10a11e22005-09-27 02:23:02 +0000594static char nextchar(char **s)
595{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000596 register char c, *pps;
597
598 c = *((*s)++);
599 pps = *s;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000600 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000601 if (c == '\\' && *s == pps) c = *((*s)++);
602 return c;
603}
604
Mike Frysinger10a11e22005-09-27 02:23:02 +0000605static inline int isalnum_(int c)
606{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000607 return (isalnum(c) || c == '_');
608}
609
Mike Frysinger10a11e22005-09-27 02:23:02 +0000610static FILE *afopen(const char *path, const char *mode)
611{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000612 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000613}
614
615/* -------- working with variables (set/get/copy/etc) -------- */
616
Mike Frysinger10a11e22005-09-27 02:23:02 +0000617static xhash *iamarray(var *v)
618{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000619 var *a = v;
620
621 while (a->type & VF_CHILD)
622 a = a->x.parent;
623
624 if (! (a->type & VF_ARRAY)) {
625 a->type |= VF_ARRAY;
626 a->x.array = hash_init();
627 }
628 return a->x.array;
629}
630
Mike Frysinger10a11e22005-09-27 02:23:02 +0000631static void clear_array(xhash *array)
632{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000633 unsigned int i;
634 hash_item *hi, *thi;
635
636 for (i=0; i<array->csize; i++) {
637 hi = array->items[i];
638 while (hi) {
639 thi = hi;
640 hi = hi->next;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000641 free(thi->data.v.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000642 free(thi);
643 }
644 array->items[i] = NULL;
645 }
646 array->glen = array->nel = 0;
647}
648
649/* clear a variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000650static var *clrvar(var *v)
651{
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000652 if (!(v->type & VF_FSTR))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000653 free(v->string);
654
655 v->type &= VF_DONTTOUCH;
656 v->type |= VF_DIRTY;
657 v->string = NULL;
658 return v;
659}
660
661/* assign string value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000662static var *setvar_p(var *v, char *value)
663{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000664 clrvar(v);
665 v->string = value;
666 handle_special(v);
667
668 return v;
669}
670
671/* same as setvar_p but make a copy of string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000672static var *setvar_s(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000673{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000674 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000675}
676
677/* same as setvar_s but set USER flag */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000678static var *setvar_u(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000679{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000680 setvar_s(v, value);
681 v->type |= VF_USER;
682 return v;
683}
684
685/* set array element to user string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000686static void setari_u(var *a, int idx, const char *s)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000687{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000688 register var *v;
689 static char sidx[12];
690
691 sprintf(sidx, "%d", idx);
692 v = findvar(iamarray(a), sidx);
693 setvar_u(v, s);
694}
695
696/* assign numeric value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000697static var *setvar_i(var *v, double value)
698{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000699 clrvar(v);
700 v->type |= VF_NUMBER;
701 v->number = value;
702 handle_special(v);
703 return v;
704}
705
Mike Frysinger10a11e22005-09-27 02:23:02 +0000706static char *getvar_s(var *v)
707{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000708 /* if v is numeric and has no cached string, convert it to string */
709 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
710 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000711 v->string = bb_xstrdup(buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000712 v->type |= VF_CACHED;
713 }
714 return (v->string == NULL) ? "" : v->string;
715}
716
Mike Frysinger10a11e22005-09-27 02:23:02 +0000717static double getvar_i(var *v)
718{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000719 char *s;
720
721 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
722 v->number = 0;
723 s = v->string;
724 if (s && *s) {
725 v->number = strtod(s, &s);
726 if (v->type & VF_USER) {
727 skip_spaces(&s);
728 if (*s != '\0')
729 v->type &= ~VF_USER;
730 }
731 } else {
732 v->type &= ~VF_USER;
733 }
734 v->type |= VF_CACHED;
735 }
736 return v->number;
737}
738
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000739static var *copyvar(var *dest, const var *src)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000740{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000741 if (dest != src) {
742 clrvar(dest);
743 dest->type |= (src->type & ~VF_DONTTOUCH);
744 dest->number = src->number;
745 if (src->string)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000746 dest->string = bb_xstrdup(src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000747 }
748 handle_special(dest);
749 return dest;
750}
751
Mike Frysinger10a11e22005-09-27 02:23:02 +0000752static var *incvar(var *v)
753{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000754 return setvar_i(v, getvar_i(v)+1.);
755}
756
757/* return true if v is number or numeric string */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000758static int is_numeric(var *v)
759{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000760 getvar_i(v);
761 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
762}
763
764/* return 1 when value of v corresponds to true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000765static int istrue(var *v)
766{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000767 if (is_numeric(v))
768 return (v->number == 0) ? 0 : 1;
769 else
770 return (v->string && *(v->string)) ? 1 : 0;
771}
772
Eric Andersenaff114c2004-04-14 17:51:38 +0000773/* temporary variables allocator. Last allocated should be first freed */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000774static var *nvalloc(int n)
775{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000776 nvblock *pb = NULL;
777 var *v, *r;
778 int size;
779
780 while (cb) {
781 pb = cb;
782 if ((cb->pos - cb->nv) + n <= cb->size) break;
783 cb = cb->next;
784 }
785
786 if (! cb) {
787 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
788 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
789 cb->size = size;
790 cb->pos = cb->nv;
791 cb->prev = pb;
792 cb->next = NULL;
793 if (pb) pb->next = cb;
794 }
795
796 v = r = cb->pos;
797 cb->pos += n;
798
799 while (v < cb->pos) {
800 v->type = 0;
801 v->string = NULL;
802 v++;
803 }
804
805 return r;
806}
807
Mike Frysinger10a11e22005-09-27 02:23:02 +0000808static void nvfree(var *v)
809{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000810 var *p;
811
812 if (v < cb->nv || v >= cb->pos)
813 runtime_error(EMSG_INTERNAL_ERROR);
814
815 for (p=v; p<cb->pos; p++) {
816 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
817 clear_array(iamarray(p));
818 free(p->x.array->items);
819 free(p->x.array);
820 }
821 if (p->type & VF_WALK)
822 free(p->x.walker);
823
824 clrvar(p);
825 }
826
827 cb->pos = v;
828 while (cb->prev && cb->pos == cb->nv) {
829 cb = cb->prev;
830 }
831}
832
833/* ------- awk program text parsing ------- */
834
835/* Parse next token pointed by global pos, place results into global t.
836 * If token isn't expected, give away. Return token class
837 */
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000838static uint32_t next_token(uint32_t expected)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000839{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000840 char *p, *pp, *s;
841 char *tl;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000842 uint32_t tc;
843 const uint32_t *ti;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000844 int l;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000845 static int concat_inserted;
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000846 static uint32_t save_tclass, save_info;
847 static uint32_t ltclass = TC_OPTERM;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000848
849 if (t.rollback) {
850
851 t.rollback = FALSE;
852
853 } else if (concat_inserted) {
854
855 concat_inserted = FALSE;
856 t.tclass = save_tclass;
857 t.info = save_info;
858
859 } else {
860
861 p = pos;
862
863 readnext:
864 skip_spaces(&p);
865 lineno = t.lineno;
866 if (*p == '#')
867 while (*p != '\n' && *p != '\0') p++;
868
869 if (*p == '\n')
870 t.lineno++;
871
872 if (*p == '\0') {
873 tc = TC_EOF;
874
875 } else if (*p == '\"') {
876 /* it's a string */
877 t.string = s = ++p;
878 while (*p != '\"') {
879 if (*p == '\0' || *p == '\n')
880 syntax_error(EMSG_UNEXP_EOS);
881 *(s++) = nextchar(&p);
882 }
883 p++;
884 *s = '\0';
885 tc = TC_STRING;
886
887 } else if ((expected & TC_REGEXP) && *p == '/') {
888 /* it's regexp */
889 t.string = s = ++p;
890 while (*p != '/') {
891 if (*p == '\0' || *p == '\n')
892 syntax_error(EMSG_UNEXP_EOS);
893 if ((*s++ = *p++) == '\\') {
894 pp = p;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000895 *(s-1) = bb_process_escape_sequence((const char **)&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000896 if (*pp == '\\') *s++ = '\\';
897 if (p == pp) *s++ = *p++;
898 }
899 }
900 p++;
901 *s = '\0';
902 tc = TC_REGEXP;
903
904 } else if (*p == '.' || isdigit(*p)) {
905 /* it's a number */
906 t.number = strtod(p, &p);
907 if (*p == '.')
908 syntax_error(EMSG_UNEXP_TOKEN);
909 tc = TC_NUMBER;
910
911 } else {
912 /* search for something known */
913 tl = tokenlist;
914 tc = 0x00000001;
915 ti = tokeninfo;
916 while (*tl) {
917 l = *(tl++);
918 if (l == NTCC) {
919 tc <<= 1;
920 continue;
921 }
922 /* if token class is expected, token
923 * matches and it's not a longer word,
924 * then this is what we are looking for
925 */
926 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
927 *tl == *p && strncmp(p, tl, l) == 0 &&
928 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
929 t.info = *ti;
930 p += l;
931 break;
932 }
933 ti++;
934 tl += l;
935 }
936
937 if (! *tl) {
938 /* it's a name (var/array/function),
939 * otherwise it's something wrong
940 */
941 if (! isalnum_(*p))
942 syntax_error(EMSG_UNEXP_TOKEN);
943
944 t.string = --p;
945 while(isalnum_(*(++p))) {
946 *(p-1) = *p;
947 }
948 *(p-1) = '\0';
949 tc = TC_VARIABLE;
Bernhard Reutner-Fischerbb204622005-10-17 14:21:06 +0000950 /* also consume whitespace between functionname and bracket */
951 skip_spaces(&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000952 if (*p == '(') {
953 tc = TC_FUNCTION;
954 } else {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000955 if (*p == '[') {
956 p++;
957 tc = TC_ARRAY;
958 }
959 }
960 }
961 }
962 pos = p;
963
964 /* skipping newlines in some cases */
965 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
966 goto readnext;
967
968 /* insert concatenation operator when needed */
969 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
970 concat_inserted = TRUE;
971 save_tclass = tc;
972 save_info = t.info;
973 tc = TC_BINOP;
974 t.info = OC_CONCAT | SS | P(35);
975 }
976
977 t.tclass = tc;
978 }
979 ltclass = t.tclass;
980
981 /* Are we ready for this? */
982 if (! (ltclass & expected))
983 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
984 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
985
986 return ltclass;
987}
988
989static void rollback_token(void) { t.rollback = TRUE; }
990
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000991static node *new_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000992{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000993 register node *n;
994
995 n = (node *)xcalloc(sizeof(node), 1);
996 n->info = info;
997 n->lineno = lineno;
998 return n;
999}
1000
Mike Frysinger10a11e22005-09-27 02:23:02 +00001001static node *mk_re_node(char *s, node *n, regex_t *re)
1002{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001003 n->info = OC_REGEXP;
1004 n->l.re = re;
1005 n->r.ire = re + 1;
1006 xregcomp(re, s, REG_EXTENDED);
1007 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1008
1009 return n;
1010}
1011
Mike Frysinger10a11e22005-09-27 02:23:02 +00001012static node *condition(void)
1013{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001014 next_token(TC_SEQSTART);
1015 return parse_expr(TC_SEQTERM);
1016}
1017
1018/* parse expression terminated by given argument, return ptr
1019 * to built subtree. Terminator is eaten by parse_expr */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001020static node *parse_expr(uint32_t iexp)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001021{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001022 node sn;
1023 node *cn = &sn;
1024 node *vn, *glptr;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001025 uint32_t tc, xtc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001026 var *v;
1027
1028 sn.info = PRIMASK;
1029 sn.r.n = glptr = NULL;
1030 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1031
1032 while (! ((tc = next_token(xtc)) & iexp)) {
1033 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1034 /* input redirection (<) attached to glptr node */
1035 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
Glenn L McGrath4bded582004-02-22 11:55:09 +00001036 cn->a.n = glptr;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001037 xtc = TC_OPERAND | TC_UOPPRE;
1038 glptr = NULL;
1039
1040 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1041 /* for binary and postfix-unary operators, jump back over
1042 * previous operators with higher priority */
1043 vn = cn;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001044 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
Glenn L McGrath545106f2002-11-11 06:21:00 +00001045 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1046 vn = vn->a.n;
1047 if ((t.info & OPCLSMASK) == OC_TERNARY)
1048 t.info += P(6);
1049 cn = vn->a.n->r.n = new_node(t.info);
1050 cn->a.n = vn->a.n;
1051 if (tc & TC_BINOP) {
1052 cn->l.n = vn;
1053 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1054 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1055 /* it's a pipe */
1056 next_token(TC_GETLINE);
1057 /* give maximum priority to this pipe */
1058 cn->info &= ~PRIMASK;
1059 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1060 }
1061 } else {
1062 cn->r.n = vn;
1063 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1064 }
1065 vn->a.n = cn;
1066
1067 } else {
1068 /* for operands and prefix-unary operators, attach them
1069 * to last node */
1070 vn = cn;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001071 cn = vn->r.n = new_node(t.info);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001072 cn->a.n = vn;
1073 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1074 if (tc & (TC_OPERAND | TC_REGEXP)) {
Rob Landleyed830e82005-06-07 02:43:52 +00001075 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001076 /* one should be very careful with switch on tclass -
Glenn L McGrath545106f2002-11-11 06:21:00 +00001077 * only simple tclasses should be used! */
1078 switch (tc) {
1079 case TC_VARIABLE:
1080 case TC_ARRAY:
1081 cn->info = OC_VAR;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001082 if ((v = hash_search(ahash, t.string)) != NULL) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001083 cn->info = OC_FNARG;
1084 cn->l.i = v->x.aidx;
1085 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001086 cn->l.v = newvar(t.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001087 }
1088 if (tc & TC_ARRAY) {
1089 cn->info |= xS;
1090 cn->r.n = parse_expr(TC_ARRTERM);
1091 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001092 break;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001093
Glenn L McGrath545106f2002-11-11 06:21:00 +00001094 case TC_NUMBER:
1095 case TC_STRING:
1096 cn->info = OC_VAR;
1097 v = cn->l.v = xcalloc(sizeof(var), 1);
1098 if (tc & TC_NUMBER)
1099 setvar_i(v, t.number);
1100 else
1101 setvar_s(v, t.string);
1102 break;
1103
1104 case TC_REGEXP:
1105 mk_re_node(t.string, cn,
1106 (regex_t *)xcalloc(sizeof(regex_t),2));
1107 break;
1108
1109 case TC_FUNCTION:
Mike Frysingerde2b9382005-09-27 03:18:00 +00001110 cn->info = OC_FUNC;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001111 cn->r.f = newfunc(t.string);
1112 cn->l.n = condition();
1113 break;
1114
1115 case TC_SEQSTART:
1116 cn = vn->r.n = parse_expr(TC_SEQTERM);
1117 cn->a.n = vn;
1118 break;
1119
1120 case TC_GETLINE:
1121 glptr = cn;
1122 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1123 break;
1124
1125 case TC_BUILTIN:
1126 cn->l.n = condition();
1127 break;
1128 }
1129 }
1130 }
1131 }
1132 return sn.r.n;
1133}
1134
1135/* add node to chain. Return ptr to alloc'd node */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001136static node *chain_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001137{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001138 register node *n;
1139
1140 if (! seq->first)
1141 seq->first = seq->last = new_node(0);
1142
1143 if (seq->programname != programname) {
1144 seq->programname = programname;
1145 n = chain_node(OC_NEWSOURCE);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001146 n->l.s = bb_xstrdup(programname);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001147 }
1148
1149 n = seq->last;
1150 n->info = info;
1151 seq->last = n->a.n = new_node(OC_DONE);
1152
1153 return n;
1154}
1155
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001156static void chain_expr(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001157{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001158 node *n;
1159
1160 n = chain_node(info);
1161 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1162 if (t.tclass & TC_GRPTERM)
1163 rollback_token();
1164}
1165
Mike Frysinger10a11e22005-09-27 02:23:02 +00001166static node *chain_loop(node *nn)
1167{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001168 node *n, *n2, *save_brk, *save_cont;
1169
1170 save_brk = break_ptr;
1171 save_cont = continue_ptr;
1172
1173 n = chain_node(OC_BR | Vx);
1174 continue_ptr = new_node(OC_EXEC);
1175 break_ptr = new_node(OC_EXEC);
1176 chain_group();
1177 n2 = chain_node(OC_EXEC | Vx);
1178 n2->l.n = nn;
1179 n2->a.n = n;
1180 continue_ptr->a.n = n2;
1181 break_ptr->a.n = n->r.n = seq->last;
1182
1183 continue_ptr = save_cont;
1184 break_ptr = save_brk;
1185
1186 return n;
1187}
1188
1189/* parse group and attach it to chain */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001190static void chain_group(void)
1191{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001192 uint32_t c;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001193 node *n, *n2, *n3;
1194
1195 do {
1196 c = next_token(TC_GRPSEQ);
1197 } while (c & TC_NEWLINE);
1198
1199 if (c & TC_GRPSTART) {
1200 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001201 if (t.tclass & TC_NEWLINE) continue;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001202 rollback_token();
1203 chain_group();
1204 }
1205 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1206 rollback_token();
1207 chain_expr(OC_EXEC | Vx);
1208 } else { /* TC_STATEMNT */
1209 switch (t.info & OPCLSMASK) {
1210 case ST_IF:
1211 n = chain_node(OC_BR | Vx);
1212 n->l.n = condition();
1213 chain_group();
1214 n2 = chain_node(OC_EXEC);
1215 n->r.n = seq->last;
1216 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1217 chain_group();
1218 n2->a.n = seq->last;
1219 } else {
1220 rollback_token();
1221 }
1222 break;
1223
1224 case ST_WHILE:
1225 n2 = condition();
1226 n = chain_loop(NULL);
1227 n->l.n = n2;
1228 break;
1229
1230 case ST_DO:
1231 n2 = chain_node(OC_EXEC);
1232 n = chain_loop(NULL);
1233 n2->a.n = n->a.n;
1234 next_token(TC_WHILE);
1235 n->l.n = condition();
1236 break;
1237
1238 case ST_FOR:
1239 next_token(TC_SEQSTART);
1240 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1241 if (t.tclass & TC_SEQTERM) { /* for-in */
1242 if ((n2->info & OPCLSMASK) != OC_IN)
1243 syntax_error(EMSG_UNEXP_TOKEN);
1244 n = chain_node(OC_WALKINIT | VV);
1245 n->l.n = n2->l.n;
1246 n->r.n = n2->r.n;
1247 n = chain_loop(NULL);
1248 n->info = OC_WALKNEXT | Vx;
1249 n->l.n = n2->l.n;
1250 } else { /* for(;;) */
1251 n = chain_node(OC_EXEC | Vx);
1252 n->l.n = n2;
1253 n2 = parse_expr(TC_SEMICOL);
1254 n3 = parse_expr(TC_SEQTERM);
1255 n = chain_loop(n3);
1256 n->l.n = n2;
1257 if (! n2)
1258 n->info = OC_EXEC;
1259 }
1260 break;
1261
1262 case OC_PRINT:
1263 case OC_PRINTF:
1264 n = chain_node(t.info);
1265 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1266 if (t.tclass & TC_OUTRDR) {
1267 n->info |= t.info;
1268 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1269 }
1270 if (t.tclass & TC_GRPTERM)
1271 rollback_token();
1272 break;
1273
1274 case OC_BREAK:
1275 n = chain_node(OC_EXEC);
1276 n->a.n = break_ptr;
1277 break;
1278
1279 case OC_CONTINUE:
1280 n = chain_node(OC_EXEC);
1281 n->a.n = continue_ptr;
1282 break;
1283
1284 /* delete, next, nextfile, return, exit */
1285 default:
1286 chain_expr(t.info);
1287
1288 }
1289 }
1290}
1291
Mike Frysinger10a11e22005-09-27 02:23:02 +00001292static void parse_program(char *p)
1293{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001294 uint32_t tclass;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001295 node *cn;
1296 func *f;
1297 var *v;
1298
1299 pos = p;
1300 t.lineno = 1;
1301 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1302 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1303
1304 if (tclass & TC_OPTERM)
1305 continue;
1306
1307 seq = &mainseq;
1308 if (tclass & TC_BEGIN) {
1309 seq = &beginseq;
1310 chain_group();
1311
1312 } else if (tclass & TC_END) {
1313 seq = &endseq;
1314 chain_group();
1315
1316 } else if (tclass & TC_FUNCDECL) {
1317 next_token(TC_FUNCTION);
1318 pos++;
1319 f = newfunc(t.string);
1320 f->body.first = NULL;
1321 f->nargs = 0;
1322 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1323 v = findvar(ahash, t.string);
1324 v->x.aidx = (f->nargs)++;
1325
1326 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1327 break;
1328 }
1329 seq = &(f->body);
1330 chain_group();
1331 clear_array(ahash);
1332
1333 } else if (tclass & TC_OPSEQ) {
1334 rollback_token();
1335 cn = chain_node(OC_TEST);
1336 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1337 if (t.tclass & TC_GRPSTART) {
1338 rollback_token();
1339 chain_group();
1340 } else {
1341 chain_node(OC_PRINT);
1342 }
1343 cn->r.n = mainseq.last;
1344
1345 } else /* if (tclass & TC_GRPSTART) */ {
1346 rollback_token();
1347 chain_group();
1348 }
1349 }
1350}
1351
1352
1353/* -------- program execution part -------- */
1354
Mike Frysinger10a11e22005-09-27 02:23:02 +00001355static node *mk_splitter(char *s, tsplitter *spl)
1356{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001357 register regex_t *re, *ire;
1358 node *n;
1359
1360 re = &spl->re[0];
1361 ire = &spl->re[1];
1362 n = &spl->n;
1363 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1364 regfree(re);
1365 regfree(ire);
1366 }
Manuel Novoa III cad53642003-03-19 09:13:01 +00001367 if (bb_strlen(s) > 1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001368 mk_re_node(s, n, re);
1369 } else {
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001370 n->info = (uint32_t) *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001371 }
1372
1373 return n;
1374}
1375
1376/* use node as a regular expression. Supplied with node ptr and regex_t
Eric Andersenaff114c2004-04-14 17:51:38 +00001377 * storage space. Return ptr to regex (if result points to preg, it should
Glenn L McGrath545106f2002-11-11 06:21:00 +00001378 * be later regfree'd manually
1379 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001380static regex_t *as_regex(node *op, regex_t *preg)
1381{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001382 var *v;
1383 char *s;
1384
1385 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1386 return icase ? op->r.ire : op->l.re;
1387 } else {
1388 v = nvalloc(1);
1389 s = getvar_s(evaluate(op, v));
1390 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1391 nvfree(v);
1392 return preg;
1393 }
1394}
1395
1396/* gradually increasing buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001397static void qrealloc(char **b, int n, int *size)
1398{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001399 if (! *b || n >= *size)
1400 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1401}
1402
1403/* resize field storage space */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001404static void fsrealloc(int size)
1405{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001406 static int maxfields = 0;
1407 int i;
1408
1409 if (size >= maxfields) {
1410 i = maxfields;
1411 maxfields = size + 16;
1412 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1413 for (; i<maxfields; i++) {
1414 Fields[i].type = VF_SPECIAL;
1415 Fields[i].string = NULL;
1416 }
1417 }
1418
1419 if (size < nfields) {
1420 for (i=size; i<nfields; i++) {
1421 clrvar(Fields+i);
1422 }
1423 }
1424 nfields = size;
1425}
1426
Mike Frysinger10a11e22005-09-27 02:23:02 +00001427static int awk_split(char *s, node *spl, char **slist)
1428{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001429 int l, n=0;
1430 char c[4];
1431 char *s1;
1432 regmatch_t pmatch[2];
1433
1434 /* in worst case, each char would be a separate field */
Manuel Novoa III cad53642003-03-19 09:13:01 +00001435 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001436
1437 c[0] = c[1] = (char)spl->info;
1438 c[2] = c[3] = '\0';
1439 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1440
1441 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1442 while (*s) {
1443 l = strcspn(s, c+2);
1444 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1445 pmatch[0].rm_so <= l) {
1446 l = pmatch[0].rm_so;
1447 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1448 } else {
1449 pmatch[0].rm_eo = l;
1450 if (*(s+l)) pmatch[0].rm_eo++;
1451 }
1452
1453 memcpy(s1, s, l);
1454 *(s1+l) = '\0';
1455 nextword(&s1);
1456 s += pmatch[0].rm_eo;
1457 n++;
1458 }
1459 } else if (c[0] == '\0') { /* null split */
1460 while(*s) {
1461 *(s1++) = *(s++);
1462 *(s1++) = '\0';
1463 n++;
1464 }
1465 } else if (c[0] != ' ') { /* single-character split */
1466 if (icase) {
1467 c[0] = toupper(c[0]);
1468 c[1] = tolower(c[1]);
1469 }
1470 if (*s1) n++;
1471 while ((s1 = strpbrk(s1, c))) {
1472 *(s1++) = '\0';
1473 n++;
1474 }
1475 } else { /* space split */
1476 while (*s) {
1477 while (isspace(*s)) s++;
1478 if (! *s) break;
1479 n++;
1480 while (*s && !isspace(*s))
1481 *(s1++) = *(s++);
1482 *(s1++) = '\0';
1483 }
1484 }
1485 return n;
1486}
1487
Mike Frysinger10a11e22005-09-27 02:23:02 +00001488static void split_f0(void)
1489{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001490 static char *fstrings = NULL;
1491 int i, n;
1492 char *s;
1493
1494 if (is_f0_split)
1495 return;
1496
1497 is_f0_split = TRUE;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00001498 free(fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001499 fsrealloc(0);
1500 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1501 fsrealloc(n);
1502 s = fstrings;
1503 for (i=0; i<n; i++) {
1504 Fields[i].string = nextword(&s);
1505 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1506 }
1507
1508 /* set NF manually to avoid side effects */
1509 clrvar(V[NF]);
1510 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1511 V[NF]->number = nfields;
1512}
1513
1514/* perform additional actions when some internal variables changed */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001515static void handle_special(var *v)
1516{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001517 int n;
1518 char *b, *sep, *s;
1519 int sl, l, len, i, bsize;
1520
1521 if (! (v->type & VF_SPECIAL))
1522 return;
1523
1524 if (v == V[NF]) {
1525 n = (int)getvar_i(v);
1526 fsrealloc(n);
1527
1528 /* recalculate $0 */
1529 sep = getvar_s(V[OFS]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001530 sl = bb_strlen(sep);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001531 b = NULL;
1532 len = 0;
1533 for (i=0; i<n; i++) {
1534 s = getvar_s(&Fields[i]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001535 l = bb_strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001536 if (b) {
1537 memcpy(b+len, sep, sl);
1538 len += sl;
1539 }
1540 qrealloc(&b, len+l+sl, &bsize);
1541 memcpy(b+len, s, l);
1542 len += l;
1543 }
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00001544 if (b) b[len] = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00001545 setvar_p(V[F0], b);
1546 is_f0_split = TRUE;
1547
1548 } else if (v == V[F0]) {
1549 is_f0_split = FALSE;
1550
1551 } else if (v == V[FS]) {
1552 mk_splitter(getvar_s(v), &fsplitter);
1553
1554 } else if (v == V[RS]) {
1555 mk_splitter(getvar_s(v), &rsplitter);
1556
1557 } else if (v == V[IGNORECASE]) {
1558 icase = istrue(v);
1559
1560 } else { /* $n */
1561 n = getvar_i(V[NF]);
1562 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1563 /* right here v is invalid. Just to note... */
1564 }
1565}
1566
1567/* step through func/builtin/etc arguments */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001568static node *nextarg(node **pn)
1569{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001570 node *n;
1571
1572 n = *pn;
1573 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1574 *pn = n->r.n;
1575 n = n->l.n;
1576 } else {
1577 *pn = NULL;
1578 }
1579 return n;
1580}
1581
Mike Frysinger10a11e22005-09-27 02:23:02 +00001582static void hashwalk_init(var *v, xhash *array)
1583{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001584 char **w;
1585 hash_item *hi;
1586 int i;
1587
1588 if (v->type & VF_WALK)
1589 free(v->x.walker);
1590
1591 v->type |= VF_WALK;
1592 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1593 *w = *(w+1) = (char *)(w + 2);
1594 for (i=0; i<array->csize; i++) {
1595 hi = array->items[i];
1596 while(hi) {
1597 strcpy(*w, hi->name);
1598 nextword(w);
1599 hi = hi->next;
1600 }
1601 }
1602}
1603
Mike Frysinger10a11e22005-09-27 02:23:02 +00001604static int hashwalk_next(var *v)
1605{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001606 char **w;
1607
1608 w = v->x.walker;
1609 if (*(w+1) == *w)
1610 return FALSE;
1611
1612 setvar_s(v, nextword(w+1));
1613 return TRUE;
1614}
1615
1616/* evaluate node, return 1 when result is true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001617static int ptest(node *pattern)
1618{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001619 static var v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001620 return istrue(evaluate(pattern, &v));
1621}
1622
1623/* read next record from stream rsm into a variable v */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001624static int awk_getline(rstream *rsm, var *v)
1625{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001626 char *b;
1627 regmatch_t pmatch[2];
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001628 int a, p, pp=0, size;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001629 int fd, so, eo, r, rp;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001630 char c, *m, *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001631
1632 /* we're using our own buffer since we need access to accumulating
1633 * characters
1634 */
1635 fd = fileno(rsm->F);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001636 m = rsm->buffer;
1637 a = rsm->adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001638 p = rsm->pos;
1639 size = rsm->size;
1640 c = (char) rsplitter.n.info;
1641 rp = 0;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001642
1643 if (! m) qrealloc(&m, 256, &size);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001644 do {
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001645 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001646 so = eo = p;
1647 r = 1;
1648 if (p > 0) {
1649 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1650 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1651 b, 1, pmatch, 0) == 0) {
1652 so = pmatch[0].rm_so;
1653 eo = pmatch[0].rm_eo;
1654 if (b[eo] != '\0')
1655 break;
1656 }
1657 } else if (c != '\0') {
1658 s = strchr(b+pp, c);
1659 if (s) {
1660 so = eo = s-b;
1661 eo++;
1662 break;
1663 }
1664 } else {
1665 while (b[rp] == '\n')
1666 rp++;
1667 s = strstr(b+rp, "\n\n");
1668 if (s) {
1669 so = eo = s-b;
1670 while (b[eo] == '\n') eo++;
1671 if (b[eo] != '\0')
1672 break;
1673 }
1674 }
1675 }
1676
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001677 if (a > 0) {
1678 memmove(m, (const void *)(m+a), p+1);
1679 b = m;
1680 a = 0;
1681 }
1682
1683 qrealloc(&m, a+p+128, &size);
1684 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001685 pp = p;
1686 p += safe_read(fd, b+p, size-p-1);
1687 if (p < pp) {
1688 p = 0;
1689 r = 0;
1690 setvar_i(V[ERRNO], errno);
1691 }
1692 b[p] = '\0';
1693
1694 } while (p > pp);
1695
1696 if (p == 0) {
1697 r--;
1698 } else {
1699 c = b[so]; b[so] = '\0';
1700 setvar_s(v, b+rp);
1701 v->type |= VF_USER;
1702 b[so] = c;
1703 c = b[eo]; b[eo] = '\0';
1704 setvar_s(V[RT], b+so);
1705 b[eo] = c;
1706 }
1707
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001708 rsm->buffer = m;
1709 rsm->adv = a + eo;
1710 rsm->pos = p - eo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001711 rsm->size = size;
1712
1713 return r;
1714}
1715
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001716static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001717{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001718 int r=0;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001719 char c;
1720 const char *s=format;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001721
1722 if (int_as_int && n == (int)n) {
1723 r = snprintf(b, size, "%d", (int)n);
1724 } else {
1725 do { c = *s; } while (*s && *++s);
1726 if (strchr("diouxX", c)) {
1727 r = snprintf(b, size, format, (int)n);
1728 } else if (strchr("eEfgG", c)) {
1729 r = snprintf(b, size, format, n);
1730 } else {
1731 runtime_error(EMSG_INV_FMT);
1732 }
1733 }
1734 return r;
1735}
1736
1737
1738/* formatted output into an allocated buffer, return ptr to buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001739static char *awk_printf(node *n)
1740{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001741 char *b = NULL;
1742 char *fmt, *s, *s1, *f;
1743 int i, j, incr, bsize;
1744 char c, c1;
1745 var *v, *arg;
1746
1747 v = nvalloc(1);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001748 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001749
1750 i = 0;
1751 while (*f) {
1752 s = f;
1753 while (*f && (*f != '%' || *(++f) == '%'))
1754 f++;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001755 while (*f && !isalpha(*f))
Glenn L McGrath545106f2002-11-11 06:21:00 +00001756 f++;
1757
1758 incr = (f - s) + MAXVARFMT;
1759 qrealloc(&b, incr+i, &bsize);
1760 c = *f; if (c != '\0') f++;
1761 c1 = *f ; *f = '\0';
1762 arg = evaluate(nextarg(&n), v);
1763
1764 j = i;
1765 if (c == 'c' || !c) {
1766 i += sprintf(b+i, s,
1767 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1768
1769 } else if (c == 's') {
1770 s1 = getvar_s(arg);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001771 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001772 i += sprintf(b+i, s, s1);
1773
1774 } else {
1775 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1776 }
1777 *f = c1;
1778
1779 /* if there was an error while sprintf, return value is negative */
1780 if (i < j) i = j;
1781
1782 }
1783
1784 b = xrealloc(b, i+1);
1785 free(fmt);
1786 nvfree(v);
1787 b[i] = '\0';
1788 return b;
1789}
1790
1791/* common substitution routine
1792 * replace (nm) substring of (src) that match (n) with (repl), store
1793 * result into (dest), return number of substitutions. If nm=0, replace
1794 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1795 * subexpression matching (\1-\9)
1796 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001797static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1798{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001799 char *ds = NULL;
1800 char *sp, *s;
1801 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1802 regmatch_t pmatch[10];
1803 regex_t sreg, *re;
1804
1805 re = as_regex(rn, &sreg);
1806 if (! src) src = V[F0];
1807 if (! dest) dest = V[F0];
1808
1809 i = di = 0;
1810 sp = getvar_s(src);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001811 rl = bb_strlen(repl);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001812 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1813 so = pmatch[0].rm_so;
1814 eo = pmatch[0].rm_eo;
1815
1816 qrealloc(&ds, di + eo + rl, &dssize);
1817 memcpy(ds + di, sp, eo);
1818 di += eo;
1819 if (++i >= nm) {
1820 /* replace */
1821 di -= (eo - so);
1822 nbs = 0;
1823 for (s = repl; *s; s++) {
1824 ds[di++] = c = *s;
1825 if (c == '\\') {
1826 nbs++;
1827 continue;
1828 }
1829 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1830 di -= ((nbs + 3) >> 1);
1831 j = 0;
1832 if (c != '&') {
1833 j = c - '0';
1834 nbs++;
1835 }
1836 if (nbs % 2) {
1837 ds[di++] = c;
1838 } else {
1839 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1840 qrealloc(&ds, di + rl + n, &dssize);
1841 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1842 di += n;
1843 }
1844 }
1845 nbs = 0;
1846 }
1847 }
1848
1849 sp += eo;
1850 if (i == nm) break;
1851 if (eo == so) {
1852 if (! (ds[di++] = *sp++)) break;
1853 }
1854 }
1855
1856 qrealloc(&ds, di + strlen(sp), &dssize);
1857 strcpy(ds + di, sp);
1858 setvar_p(dest, ds);
1859 if (re == &sreg) regfree(re);
1860 return i;
1861}
1862
Mike Frysinger10a11e22005-09-27 02:23:02 +00001863static var *exec_builtin(node *op, var *res)
1864{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001865 int (*to_xxx)(int);
1866 var *tv;
1867 node *an[4];
1868 var *av[4];
1869 char *as[4];
1870 regmatch_t pmatch[2];
1871 regex_t sreg, *re;
1872 static tsplitter tspl;
1873 node *spl;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001874 uint32_t isr, info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001875 int nargs;
1876 time_t tt;
1877 char *s, *s1;
1878 int i, l, ll, n;
1879
1880 tv = nvalloc(4);
1881 isr = info = op->info;
1882 op = op->l.n;
1883
1884 av[2] = av[3] = NULL;
1885 for (i=0 ; i<4 && op ; i++) {
1886 an[i] = nextarg(&op);
1887 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1888 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1889 isr >>= 1;
1890 }
1891
1892 nargs = i;
1893 if (nargs < (info >> 30))
1894 runtime_error(EMSG_TOO_FEW_ARGS);
1895
1896 switch (info & OPNMASK) {
1897
1898 case B_a2:
1899#ifdef CONFIG_FEATURE_AWK_MATH
1900 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1901#else
1902 runtime_error(EMSG_NO_MATH);
1903#endif
1904 break;
1905
1906 case B_sp:
1907 if (nargs > 2) {
1908 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1909 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1910 } else {
1911 spl = &fsplitter.n;
1912 }
1913
1914 n = awk_split(as[0], spl, &s);
1915 s1 = s;
1916 clear_array(iamarray(av[1]));
1917 for (i=1; i<=n; i++)
1918 setari_u(av[1], i, nextword(&s1));
1919 free(s);
1920 setvar_i(res, n);
1921 break;
1922
1923 case B_ss:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001924 l = bb_strlen(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001925 i = getvar_i(av[1]) - 1;
1926 if (i>l) i=l; if (i<0) i=0;
1927 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1928 if (n<0) n=0;
1929 s = xmalloc(n+1);
1930 strncpy(s, as[0]+i, n);
1931 s[n] = '\0';
1932 setvar_p(res, s);
1933 break;
1934
1935 case B_lo:
1936 to_xxx = tolower;
1937 goto lo_cont;
1938
1939 case B_up:
1940 to_xxx = toupper;
1941lo_cont:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001942 s1 = s = bb_xstrdup(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001943 while (*s1) {
1944 *s1 = (*to_xxx)(*s1);
1945 s1++;
1946 }
1947 setvar_p(res, s);
1948 break;
1949
1950 case B_ix:
1951 n = 0;
Manuel Novoa III cad53642003-03-19 09:13:01 +00001952 ll = bb_strlen(as[1]);
1953 l = bb_strlen(as[0]) - ll;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001954 if (ll > 0 && l >= 0) {
1955 if (! icase) {
1956 s = strstr(as[0], as[1]);
1957 if (s) n = (s - as[0]) + 1;
1958 } else {
1959 /* this piece of code is terribly slow and
1960 * really should be rewritten
1961 */
1962 for (i=0; i<=l; i++) {
1963 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1964 n = i+1;
1965 break;
1966 }
1967 }
1968 }
1969 }
1970 setvar_i(res, n);
1971 break;
1972
1973 case B_ti:
1974 if (nargs > 1)
1975 tt = getvar_i(av[1]);
1976 else
1977 time(&tt);
1978 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1979 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1980 buf[i] = '\0';
1981 setvar_s(res, buf);
1982 break;
1983
1984 case B_ma:
1985 re = as_regex(an[1], &sreg);
1986 n = regexec(re, as[0], 1, pmatch, 0);
1987 if (n == 0) {
1988 pmatch[0].rm_so++;
1989 pmatch[0].rm_eo++;
1990 } else {
1991 pmatch[0].rm_so = 0;
1992 pmatch[0].rm_eo = -1;
1993 }
1994 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1995 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1996 setvar_i(res, pmatch[0].rm_so);
1997 if (re == &sreg) regfree(re);
1998 break;
1999
2000 case B_ge:
2001 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2002 break;
2003
2004 case B_gs:
2005 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2006 break;
2007
2008 case B_su:
2009 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2010 break;
2011 }
2012
2013 nvfree(tv);
2014 return res;
2015}
2016
2017/*
2018 * Evaluate node - the heart of the program. Supplied with subtree
2019 * and place where to store result. returns ptr to result.
2020 */
2021#define XC(n) ((n) >> 8)
2022
Mike Frysinger10a11e22005-09-27 02:23:02 +00002023static var *evaluate(node *op, var *res)
2024{
Mike Frysingerde2b9382005-09-27 03:18:00 +00002025 /* This procedure is recursive so we should count every byte */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002026 static var *fnargs = NULL;
2027 static unsigned int seed = 1;
2028 static regex_t sreg;
2029 node *op1;
2030 var *v1;
2031 union {
2032 var *v;
2033 char *s;
2034 double d;
2035 int i;
2036 } L, R;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002037 uint32_t opinfo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002038 short opn;
2039 union {
2040 char *s;
2041 rstream *rsm;
2042 FILE *F;
2043 var *v;
2044 regex_t *re;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002045 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002046 } X;
2047
2048 if (! op)
2049 return setvar_s(res, NULL);
2050
2051 v1 = nvalloc(2);
2052
2053 while (op) {
2054
2055 opinfo = op->info;
2056 opn = (short)(opinfo & OPNMASK);
2057 lineno = op->lineno;
2058
Mike Frysingerde2b9382005-09-27 03:18:00 +00002059 /* execute inevitable things */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002060 op1 = op->l.n;
2061 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2062 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2063 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2064 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2065 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2066
2067 switch (XC(opinfo & OPCLSMASK)) {
2068
2069 /* -- iterative node type -- */
2070
2071 /* test pattern */
2072 case XC( OC_TEST ):
2073 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2074 /* it's range pattern */
2075 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2076 op->info |= OF_CHECKED;
2077 if (ptest(op1->r.n))
2078 op->info &= ~OF_CHECKED;
2079
2080 op = op->a.n;
2081 } else {
2082 op = op->r.n;
2083 }
2084 } else {
2085 op = (ptest(op1)) ? op->a.n : op->r.n;
2086 }
2087 break;
2088
2089 /* just evaluate an expression, also used as unconditional jump */
2090 case XC( OC_EXEC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002091 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002092
2093 /* branch, used in if-else and various loops */
2094 case XC( OC_BR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002095 op = istrue(L.v) ? op->a.n : op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002096 break;
2097
2098 /* initialize for-in loop */
2099 case XC( OC_WALKINIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002100 hashwalk_init(L.v, iamarray(R.v));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002101 break;
2102
2103 /* get next array item */
2104 case XC( OC_WALKNEXT ):
2105 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2106 break;
2107
2108 case XC( OC_PRINT ):
2109 case XC( OC_PRINTF ):
2110 X.F = stdout;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002111 if (op->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002112 X.rsm = newfile(R.s);
2113 if (! X.rsm->F) {
2114 if (opn == '|') {
2115 if((X.rsm->F = popen(R.s, "w")) == NULL)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002116 bb_perror_msg_and_die("popen");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002117 X.rsm->is_pipe = 1;
2118 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002119 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002120 }
2121 }
2122 X.F = X.rsm->F;
2123 }
2124
2125 if ((opinfo & OPCLSMASK) == OC_PRINT) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002126 if (! op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002127 fputs(getvar_s(V[F0]), X.F);
2128 } else {
2129 while (op1) {
2130 L.v = evaluate(nextarg(&op1), v1);
2131 if (L.v->type & VF_NUMBER) {
2132 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2133 getvar_i(L.v), TRUE);
2134 fputs(buf, X.F);
2135 } else {
2136 fputs(getvar_s(L.v), X.F);
2137 }
2138
2139 if (op1) fputs(getvar_s(V[OFS]), X.F);
2140 }
2141 }
2142 fputs(getvar_s(V[ORS]), X.F);
2143
2144 } else { /* OC_PRINTF */
2145 L.s = awk_printf(op1);
2146 fputs(L.s, X.F);
2147 free(L.s);
2148 }
2149 fflush(X.F);
2150 break;
2151
2152 case XC( OC_DELETE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002153 X.info = op1->info & OPCLSMASK;
2154 if (X.info == OC_VAR) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002155 R.v = op1->l.v;
2156 } else if (X.info == OC_FNARG) {
2157 R.v = &fnargs[op1->l.i];
2158 } else {
2159 runtime_error(EMSG_NOT_ARRAY);
2160 }
2161
Mike Frysingerde2b9382005-09-27 03:18:00 +00002162 if (op1->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002163 clrvar(L.v);
2164 L.s = getvar_s(evaluate(op1->r.n, v1));
2165 hash_remove(iamarray(R.v), L.s);
2166 } else {
2167 clear_array(iamarray(R.v));
2168 }
2169 break;
2170
2171 case XC( OC_NEWSOURCE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002172 programname = op->l.s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002173 break;
2174
2175 case XC( OC_RETURN ):
2176 copyvar(res, L.v);
2177 break;
2178
2179 case XC( OC_NEXTFILE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002180 nextfile = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002181 case XC( OC_NEXT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002182 nextrec = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002183 case XC( OC_DONE ):
2184 clrvar(res);
2185 break;
2186
2187 case XC( OC_EXIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002188 awk_exit(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002189
2190 /* -- recursive node type -- */
2191
2192 case XC( OC_VAR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002193 L.v = op->l.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002194 if (L.v == V[NF])
2195 split_f0();
2196 goto v_cont;
2197
2198 case XC( OC_FNARG ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002199 L.v = &fnargs[op->l.i];
Glenn L McGrath545106f2002-11-11 06:21:00 +00002200
2201v_cont:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002202 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002203 break;
2204
2205 case XC( OC_IN ):
2206 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2207 break;
2208
2209 case XC( OC_REGEXP ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002210 op1 = op;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002211 L.s = getvar_s(V[F0]);
2212 goto re_cont;
2213
2214 case XC( OC_MATCH ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002215 op1 = op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002216re_cont:
2217 X.re = as_regex(op1, &sreg);
2218 R.i = regexec(X.re, L.s, 0, NULL, 0);
2219 if (X.re == &sreg) regfree(X.re);
2220 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2221 break;
2222
2223 case XC( OC_MOVE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002224 /* if source is a temporary string, jusk relink it to dest */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002225 if (R.v == v1+1 && R.v->string) {
2226 res = setvar_p(L.v, R.v->string);
2227 R.v->string = NULL;
2228 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002229 res = copyvar(L.v, R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002230 }
2231 break;
2232
2233 case XC( OC_TERNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002234 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002235 runtime_error(EMSG_POSSIBLE_ERROR);
2236 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2237 break;
2238
2239 case XC( OC_FUNC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002240 if (! op->r.f->body.first)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002241 runtime_error(EMSG_UNDEF_FUNC);
2242
2243 X.v = R.v = nvalloc(op->r.f->nargs+1);
2244 while (op1) {
2245 L.v = evaluate(nextarg(&op1), v1);
2246 copyvar(R.v, L.v);
2247 R.v->type |= VF_CHILD;
2248 R.v->x.parent = L.v;
2249 if (++R.v - X.v >= op->r.f->nargs)
2250 break;
2251 }
2252
2253 R.v = fnargs;
2254 fnargs = X.v;
2255
2256 L.s = programname;
2257 res = evaluate(op->r.f->body.first, res);
2258 programname = L.s;
2259
2260 nvfree(fnargs);
2261 fnargs = R.v;
2262 break;
2263
2264 case XC( OC_GETLINE ):
2265 case XC( OC_PGETLINE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002266 if (op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002267 X.rsm = newfile(L.s);
2268 if (! X.rsm->F) {
2269 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2270 X.rsm->F = popen(L.s, "r");
2271 X.rsm->is_pipe = TRUE;
2272 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002273 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002274 }
2275 }
2276 } else {
2277 if (! iF) iF = next_input_file();
2278 X.rsm = iF;
2279 }
2280
2281 if (! X.rsm->F) {
2282 setvar_i(V[ERRNO], errno);
2283 setvar_i(res, -1);
2284 break;
2285 }
2286
2287 if (! op->r.n)
2288 R.v = V[F0];
2289
2290 L.i = awk_getline(X.rsm, R.v);
2291 if (L.i > 0) {
2292 if (! op1) {
2293 incvar(V[FNR]);
2294 incvar(V[NR]);
2295 }
2296 }
2297 setvar_i(res, L.i);
2298 break;
2299
Mike Frysingerde2b9382005-09-27 03:18:00 +00002300 /* simple builtins */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002301 case XC( OC_FBLTIN ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002302 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002303
2304 case F_in:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002305 R.d = (int)L.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002306 break;
2307
2308 case F_rn:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002309 R.d = (double)rand() / (double)RAND_MAX;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002310 break;
2311
2312#ifdef CONFIG_FEATURE_AWK_MATH
2313 case F_co:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002314 R.d = cos(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002315 break;
2316
2317 case F_ex:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002318 R.d = exp(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002319 break;
2320
2321 case F_lg:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002322 R.d = log(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002323 break;
2324
2325 case F_si:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002326 R.d = sin(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002327 break;
2328
2329 case F_sq:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002330 R.d = sqrt(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002331 break;
2332#else
2333 case F_co:
2334 case F_ex:
2335 case F_lg:
2336 case F_si:
2337 case F_sq:
2338 runtime_error(EMSG_NO_MATH);
2339 break;
2340#endif
2341
2342 case F_sr:
2343 R.d = (double)seed;
2344 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2345 srand(seed);
2346 break;
2347
2348 case F_ti:
2349 R.d = time(NULL);
2350 break;
2351
2352 case F_le:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002353 if (! op1)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002354 L.s = getvar_s(V[F0]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002355 R.d = bb_strlen(L.s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002356 break;
2357
2358 case F_sy:
2359 fflush(NULL);
Rob Landley51843362006-01-09 05:26:58 +00002360 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002361 break;
2362
2363 case F_ff:
2364 if (! op1)
2365 fflush(stdout);
2366 else {
2367 if (L.s && *L.s) {
2368 X.rsm = newfile(L.s);
2369 fflush(X.rsm->F);
2370 } else {
2371 fflush(NULL);
2372 }
2373 }
2374 break;
2375
2376 case F_cl:
2377 X.rsm = (rstream *)hash_search(fdhash, L.s);
2378 if (X.rsm) {
2379 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00002380 free(X.rsm->buffer);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002381 hash_remove(fdhash, L.s);
2382 }
2383 if (R.i != 0)
2384 setvar_i(V[ERRNO], errno);
2385 R.d = (double)R.i;
2386 break;
2387 }
2388 setvar_i(res, R.d);
2389 break;
2390
2391 case XC( OC_BUILTIN ):
2392 res = exec_builtin(op, res);
2393 break;
2394
2395 case XC( OC_SPRINTF ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002396 setvar_p(res, awk_printf(op1));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002397 break;
2398
2399 case XC( OC_UNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002400 X.v = R.v;
2401 L.d = R.d = getvar_i(R.v);
2402 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002403 case 'P':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002404 L.d = ++R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002405 goto r_op_change;
2406 case 'p':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002407 R.d++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002408 goto r_op_change;
2409 case 'M':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002410 L.d = --R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002411 goto r_op_change;
2412 case 'm':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002413 R.d--;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002414 goto r_op_change;
2415 case '!':
2416 L.d = istrue(X.v) ? 0 : 1;
2417 break;
2418 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002419 L.d = -R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002420 break;
2421 r_op_change:
2422 setvar_i(X.v, R.d);
2423 }
2424 setvar_i(res, L.d);
2425 break;
2426
2427 case XC( OC_FIELD ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002428 R.i = (int)getvar_i(R.v);
2429 if (R.i == 0) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002430 res = V[F0];
2431 } else {
2432 split_f0();
2433 if (R.i > nfields)
2434 fsrealloc(R.i);
2435
2436 res = &Fields[R.i-1];
2437 }
2438 break;
2439
2440 /* concatenation (" ") and index joining (",") */
2441 case XC( OC_CONCAT ):
2442 case XC( OC_COMMA ):
Manuel Novoa III cad53642003-03-19 09:13:01 +00002443 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002444 X.s = (char *)xmalloc(opn);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002445 strcpy(X.s, L.s);
2446 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2447 L.s = getvar_s(V[SUBSEP]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002448 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002449 strcat(X.s, L.s);
2450 }
2451 strcat(X.s, R.s);
2452 setvar_p(res, X.s);
2453 break;
2454
2455 case XC( OC_LAND ):
2456 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2457 break;
2458
2459 case XC( OC_LOR ):
2460 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2461 break;
2462
2463 case XC( OC_BINARY ):
2464 case XC( OC_REPLACE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002465 R.d = getvar_i(R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002466 switch (opn) {
2467 case '+':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002468 L.d += R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002469 break;
2470 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002471 L.d -= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002472 break;
2473 case '*':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002474 L.d *= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002475 break;
2476 case '/':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002477 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2478 L.d /= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002479 break;
2480 case '&':
2481#ifdef CONFIG_FEATURE_AWK_MATH
Mike Frysingerde2b9382005-09-27 03:18:00 +00002482 L.d = pow(L.d, R.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002483#else
2484 runtime_error(EMSG_NO_MATH);
2485#endif
2486 break;
2487 case '%':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002488 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2489 L.d -= (int)(L.d / R.d) * R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002490 break;
2491 }
2492 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2493 break;
2494
2495 case XC( OC_COMPARE ):
2496 if (is_numeric(L.v) && is_numeric(R.v)) {
2497 L.d = getvar_i(L.v) - getvar_i(R.v);
2498 } else {
2499 L.s = getvar_s(L.v);
2500 R.s = getvar_s(R.v);
2501 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2502 }
2503 switch (opn & 0xfe) {
2504 case 0:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002505 R.i = (L.d > 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002506 break;
2507 case 2:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002508 R.i = (L.d >= 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002509 break;
2510 case 4:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002511 R.i = (L.d == 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002512 break;
2513 }
2514 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2515 break;
2516
2517 default:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002518 runtime_error(EMSG_POSSIBLE_ERROR);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002519 }
2520 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2521 op = op->a.n;
2522 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2523 break;
2524 if (nextrec)
2525 break;
2526 }
2527 nvfree(v1);
2528 return res;
2529}
2530
2531
2532/* -------- main & co. -------- */
2533
Mike Frysinger10a11e22005-09-27 02:23:02 +00002534static int awk_exit(int r)
2535{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002536 unsigned int i;
2537 hash_item *hi;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002538 static var tv;
2539
2540 if (! exiting) {
2541 exiting = TRUE;
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00002542 nextrec = FALSE;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002543 evaluate(endseq.first, &tv);
2544 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002545
2546 /* waiting for children */
2547 for (i=0; i<fdhash->csize; i++) {
2548 hi = fdhash->items[i];
2549 while(hi) {
2550 if (hi->data.rs.F && hi->data.rs.is_pipe)
2551 pclose(hi->data.rs.F);
2552 hi = hi->next;
2553 }
2554 }
2555
2556 exit(r);
2557}
2558
2559/* if expr looks like "var=value", perform assignment and return 1,
2560 * otherwise return 0 */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00002561static int is_assignment(const char *expr)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002562{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002563 char *exprc, *s, *s0, *s1;
2564
Manuel Novoa III cad53642003-03-19 09:13:01 +00002565 exprc = bb_xstrdup(expr);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002566 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2567 free(exprc);
2568 return FALSE;
2569 }
2570
2571 *(s++) = '\0';
2572 s0 = s1 = s;
2573 while (*s)
2574 *(s1++) = nextchar(&s);
2575
2576 *s1 = '\0';
2577 setvar_u(newvar(exprc), s0);
2578 free(exprc);
2579 return TRUE;
2580}
2581
2582/* switch to next input file */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002583static rstream *next_input_file(void)
2584{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002585 static rstream rsm;
2586 FILE *F = NULL;
2587 char *fname, *ind;
2588 static int files_happen = FALSE;
2589
2590 if (rsm.F) fclose(rsm.F);
2591 rsm.F = NULL;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002592 rsm.pos = rsm.adv = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002593
2594 do {
2595 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2596 if (files_happen)
2597 return NULL;
2598 fname = "-";
2599 F = stdin;
2600 } else {
2601 ind = getvar_s(incvar(V[ARGIND]));
2602 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2603 if (fname && *fname && !is_assignment(fname))
2604 F = afopen(fname, "r");
2605 }
2606 } while (!F);
2607
2608 files_happen = TRUE;
2609 setvar_s(V[FILENAME], fname);
2610 rsm.F = F;
2611 return &rsm;
2612}
2613
Mike Frysinger10a11e22005-09-27 02:23:02 +00002614extern int awk_main(int argc, char **argv)
2615{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002616 char *s, *s1;
2617 int i, j, c;
2618 var *v;
2619 static var tv;
2620 char **envp;
2621 static int from_file = FALSE;
2622 rstream *rsm;
2623 FILE *F, *stdfiles[3];
2624 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2625
2626 /* allocate global buffer */
2627 buf = xmalloc(MAXVARFMT+1);
2628
2629 vhash = hash_init();
2630 ahash = hash_init();
2631 fdhash = hash_init();
2632 fnhash = hash_init();
2633
2634 /* initialize variables */
2635 for (i=0; *vNames; i++) {
2636 V[i] = v = newvar(nextword(&vNames));
2637 if (*vValues != '\377')
2638 setvar_s(v, nextword(&vValues));
2639 else
2640 setvar_i(v, 0);
2641
2642 if (*vNames == '*') {
2643 v->type |= VF_SPECIAL;
2644 vNames++;
2645 }
2646 }
2647
2648 handle_special(V[FS]);
2649 handle_special(V[RS]);
2650
2651 stdfiles[0] = stdin;
2652 stdfiles[1] = stdout;
2653 stdfiles[2] = stderr;
2654 for (i=0; i<3; i++) {
2655 rsm = newfile(nextword(&stdnames));
2656 rsm->F = stdfiles[i];
2657 }
2658
2659 for (envp=environ; *envp; envp++) {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002660 s = bb_xstrdup(*envp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002661 s1 = strchr(s, '=');
Eric Andersen67776be2004-07-30 23:52:08 +00002662 if (!s1) {
2663 goto keep_going;
2664 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002665 *(s1++) = '\0';
2666 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
Eric Andersen67776be2004-07-30 23:52:08 +00002667keep_going:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002668 free(s);
2669 }
2670
2671 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2672 switch (c) {
2673 case 'F':
2674 setvar_s(V[FS], optarg);
2675 break;
2676 case 'v':
2677 if (! is_assignment(optarg))
Manuel Novoa III cad53642003-03-19 09:13:01 +00002678 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002679 break;
2680 case 'f':
2681 from_file = TRUE;
2682 F = afopen(programname = optarg, "r");
2683 s = NULL;
2684 /* one byte is reserved for some trick in next_token */
2685 for (i=j=1; j>0; i+=j) {
2686 s = (char *)xrealloc(s, i+4096);
2687 j = fread(s+i, 1, 4094, F);
2688 }
2689 s[i] = '\0';
2690 fclose(F);
2691 parse_program(s+1);
2692 free(s);
2693 break;
2694 case 'W':
Manuel Novoa III cad53642003-03-19 09:13:01 +00002695 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002696 break;
2697
2698 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +00002699 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002700 }
2701 }
2702
2703 if (!from_file) {
2704 if (argc == optind)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002705 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002706 programname="cmd. line";
2707 parse_program(argv[optind++]);
2708
2709 }
2710
2711 /* fill in ARGV array */
2712 setvar_i(V[ARGC], argc - optind + 1);
2713 setari_u(V[ARGV], 0, "awk");
2714 for(i=optind; i < argc; i++)
2715 setari_u(V[ARGV], i+1-optind, argv[i]);
2716
2717 evaluate(beginseq.first, &tv);
2718 if (! mainseq.first && ! endseq.first)
2719 awk_exit(EXIT_SUCCESS);
2720
2721 /* input file could already be opened in BEGIN block */
2722 if (! iF) iF = next_input_file();
2723
2724 /* passing through input files */
2725 while (iF) {
2726
2727 nextfile = FALSE;
2728 setvar_i(V[FNR], 0);
2729
2730 while ((c = awk_getline(iF, V[F0])) > 0) {
2731
2732 nextrec = FALSE;
2733 incvar(V[NR]);
2734 incvar(V[FNR]);
2735 evaluate(mainseq.first, &tv);
2736
2737 if (nextfile)
2738 break;
2739 }
2740
2741 if (c < 0)
2742 runtime_error(strerror(errno));
2743
2744 iF = next_input_file();
2745
2746 }
2747
Glenn L McGrath545106f2002-11-11 06:21:00 +00002748 awk_exit(EXIT_SUCCESS);
2749
2750 return 0;
2751}
2752