blob: 4bd872924d05d229ada2fe88e1ea06a620cb834e [file] [log] [blame]
Glenn L McGrath545106f2002-11-11 06:21:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <unistd.h>
26#include <errno.h>
27#include <string.h>
28#include <time.h>
29#include <math.h>
30#include <ctype.h>
31#include <getopt.h>
Glenn L McGrath545106f2002-11-11 06:21:00 +000032
"Vladimir N. Oleynik"23f62fc2005-09-14 16:59:11 +000033#include "xregex.h"
Glenn L McGrath545106f2002-11-11 06:21:00 +000034#include "busybox.h"
35
36
37#define MAXVARFMT 240
38#define MINNVBLOCK 64
39
40/* variable flags */
41#define VF_NUMBER 0x0001 /* 1 = primary type is number */
42#define VF_ARRAY 0x0002 /* 1 = it's an array */
43
44#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
45#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
46#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
47#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
48#define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
49#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
50#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
51
52/* these flags are static, don't change them when value is changed */
53#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55/* Variable */
56typedef struct var_s {
57 unsigned short type; /* flags */
58 double number;
59 char *string;
60 union {
61 int aidx; /* func arg index (on compilation stage) */
62 struct xhash_s *array; /* array ptr */
63 struct var_s *parent; /* for func args, ptr to actual parameter */
64 char **walker; /* list of array elements (for..in) */
65 } x;
66} var;
67
68/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69typedef struct chain_s {
70 struct node_s *first;
71 struct node_s *last;
72 char *programname;
73} chain;
74
75/* Function */
76typedef struct func_s {
77 unsigned short nargs;
78 struct chain_s body;
79} func;
80
81/* I/O stream */
82typedef struct rstream_s {
83 FILE *F;
84 char *buffer;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +000085 int adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +000086 int size;
87 int pos;
88 unsigned short is_pipe;
89} rstream;
90
91typedef struct hash_item_s {
92 union {
93 struct var_s v; /* variable/array hash */
94 struct rstream_s rs; /* redirect streams hash */
95 struct func_s f; /* functions hash */
96 } data;
97 struct hash_item_s *next; /* next in chain */
98 char name[1]; /* really it's longer */
99} hash_item;
100
101typedef struct xhash_s {
102 unsigned int nel; /* num of elements */
103 unsigned int csize; /* current hash size */
104 unsigned int nprime; /* next hash size in PRIMES[] */
105 unsigned int glen; /* summary length of item names */
106 struct hash_item_s **items;
107} xhash;
108
109/* Tree node */
110typedef struct node_s {
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000111 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000112 unsigned short lineno;
113 union {
114 struct node_s *n;
115 var *v;
116 int i;
117 char *s;
118 regex_t *re;
119 } l;
120 union {
121 struct node_s *n;
122 regex_t *ire;
123 func *f;
124 int argno;
125 } r;
126 union {
127 struct node_s *n;
128 } a;
129} node;
130
131/* Block of temporary variables */
132typedef struct nvblock_s {
133 int size;
134 var *pos;
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
137 var nv[0];
138} nvblock;
139
140typedef struct tsplitter_s {
141 node n;
142 regex_t re[2];
143} tsplitter;
144
145/* simple token classes */
146/* Order and hex values are very important!!! See next_token() */
147#define TC_SEQSTART 1 /* ( */
148#define TC_SEQTERM (1 << 1) /* ) */
149#define TC_REGEXP (1 << 2) /* /.../ */
150#define TC_OUTRDR (1 << 3) /* | > >> */
151#define TC_UOPPOST (1 << 4) /* unary postfix operator */
152#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153#define TC_BINOPX (1 << 6) /* two-opnd operator */
154#define TC_IN (1 << 7)
155#define TC_COMMA (1 << 8)
156#define TC_PIPE (1 << 9) /* input redirection pipe */
157#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158#define TC_ARRTERM (1 << 11) /* ] */
159#define TC_GRPSTART (1 << 12) /* { */
160#define TC_GRPTERM (1 << 13) /* } */
161#define TC_SEMICOL (1 << 14)
162#define TC_NEWLINE (1 << 15)
163#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164#define TC_WHILE (1 << 17)
165#define TC_ELSE (1 << 18)
166#define TC_BUILTIN (1 << 19)
167#define TC_GETLINE (1 << 20)
168#define TC_FUNCDECL (1 << 21) /* `function' `func' */
169#define TC_BEGIN (1 << 22)
170#define TC_END (1 << 23)
171#define TC_EOF (1 << 24)
172#define TC_VARIABLE (1 << 25)
173#define TC_ARRAY (1 << 26)
174#define TC_FUNCTION (1 << 27)
175#define TC_STRING (1 << 28)
176#define TC_NUMBER (1 << 29)
177
178#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
179
180/* combined token classes */
181#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186#define TC_STATEMNT (TC_STATX | TC_WHILE)
187#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
188
189/* word tokens, cannot mean something else if not expected */
190#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193/* discard newlines after these */
194#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195 TC_BINOP | TC_OPTERM)
196
197/* what can expression begin with */
198#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199/* what can group begin with */
200#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203/* operator is inserted between them */
204#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205 TC_STRING | TC_NUMBER | TC_UOPPOST)
206#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207
208#define OF_RES1 0x010000
209#define OF_RES2 0x020000
210#define OF_STR1 0x040000
211#define OF_STR2 0x080000
212#define OF_NUM1 0x100000
213#define OF_CHECKED 0x200000
214
215/* combined operator flags */
216#define xx 0
217#define xV OF_RES2
218#define xS (OF_RES2 | OF_STR2)
219#define Vx OF_RES1
220#define VV (OF_RES1 | OF_RES2)
221#define Nx (OF_RES1 | OF_NUM1)
222#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223#define Sx (OF_RES1 | OF_STR1)
224#define SV (OF_RES1 | OF_STR1 | OF_RES2)
225#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227#define OPCLSMASK 0xFF00
228#define OPNMASK 0x007F
229
230/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233 */
234#define P(x) (x << 24)
235#define PRIMASK 0x7F000000
236#define PRIMASK2 0x7E000000
237
238/* Operation classes */
239
240#define SHIFT_TIL_THIS 0x0600
241#define RECUR_FROM_THIS 0x1000
242
243enum {
244 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
245 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
246
247 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
248 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
249 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
250
251 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
252 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
253 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
254 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
255 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
256 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
257 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
258 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
259 OC_DONE=0x2800,
260
261 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
262 ST_WHILE=0x3300
263};
264
265/* simple builtins */
266enum {
267 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
269};
270
271/* builtins */
272enum {
273 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
274 B_ge, B_gs, B_su
275};
276
277/* tokens and their corresponding info values */
278
279#define NTC "\377" /* switch to next token class (tc<<1) */
280#define NTCC '\377'
281
282#define OC_B OC_BUILTIN
283
284static char * const tokenlist =
285 "\1(" NTC
286 "\1)" NTC
287 "\1/" NTC /* REGEXP */
288 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
289 "\2++" "\2--" NTC /* UOPPOST */
290 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
291 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
292 "\2*=" "\2/=" "\2%=" "\2^="
293 "\1+" "\1-" "\3**=" "\2**"
294 "\1/" "\1%" "\1^" "\1*"
295 "\2!=" "\2>=" "\2<=" "\1>"
296 "\1<" "\2!~" "\1~" "\2&&"
297 "\2||" "\1?" "\1:" NTC
298 "\2in" NTC
299 "\1," NTC
300 "\1|" NTC
301 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
302 "\1]" NTC
303 "\1{" NTC
304 "\1}" NTC
305 "\1;" NTC
306 "\1\n" NTC
307 "\2if" "\2do" "\3for" "\5break" /* STATX */
308 "\10continue" "\6delete" "\5print"
309 "\6printf" "\4next" "\10nextfile"
310 "\6return" "\4exit" NTC
311 "\5while" NTC
312 "\4else" NTC
313
314 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
315 "\3cos" "\3exp" "\3int" "\3log"
316 "\4rand" "\3sin" "\4sqrt" "\5srand"
317 "\6gensub" "\4gsub" "\5index" "\6length"
318 "\5match" "\5split" "\7sprintf" "\3sub"
319 "\6substr" "\7systime" "\10strftime"
320 "\7tolower" "\7toupper" NTC
321 "\7getline" NTC
322 "\4func" "\10function" NTC
323 "\5BEGIN" NTC
324 "\3END" "\0"
325 ;
326
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000327static const uint32_t tokeninfo[] = {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000328
329 0,
330 0,
331 OC_REGEXP,
332 xS|'a', xS|'w', xS|'|',
333 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
334 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
335 OC_FIELD|xV|P(5),
336 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
337 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
338 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
339 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
341 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
342 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
343 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
344 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
345 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
346 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
347 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
348 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
349 OC_COLON|xx|P(67)|':',
350 OC_IN|SV|P(49),
351 OC_COMMA|SS|P(80),
352 OC_PGETLINE|SV|P(37),
353 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
354 OC_UNARY|xV|P(19)|'!',
355 0,
356 0,
357 0,
358 0,
359 0,
360 ST_IF, ST_DO, ST_FOR, OC_BREAK,
361 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
362 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
363 OC_RETURN|Vx, OC_EXIT|Nx,
364 ST_WHILE,
365 0,
366
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374 OC_GETLINE|SV|P(0),
375 0, 0,
376 0,
377 0
378};
379
380/* internal variable names and their initial values */
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000381/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000382enum {
383 CONVFMT=0, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, ARGIND, ARGC, ARGV,
386 ERRNO, FNR,
387 NR, NF, IGNORECASE,
388 ENVIRON, F0, _intvarcount_
389};
390
391static char * vNames =
392 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000393 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
Glenn L McGrath545106f2002-11-11 06:21:00 +0000394 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
395 "ERRNO\0" "FNR\0"
396 "NR\0" "NF\0*" "IGNORECASE\0*"
397 "ENVIRON\0" "$\0*" "\0";
398
399static char * vValues =
400 "%.6g\0" "%.6g\0" " \0" " \0"
401 "\n\0" "\n\0" "\0" "\0"
402 "\034\0"
403 "\377";
404
405/* hash size may grow to these values */
406#define FIRST_PRIME 61;
407static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
409
410/* globals */
411
412extern char **environ;
413
414static var * V[_intvarcount_];
415static chain beginseq, mainseq, endseq, *seq;
416static int nextrec, nextfile;
417static node *break_ptr, *continue_ptr;
418static rstream *iF;
419static xhash *vhash, *ahash, *fdhash, *fnhash;
420static char *programname;
421static short lineno;
422static int is_f0_split;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000423static int nfields;
424static var *Fields;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000425static tsplitter fsplitter, rsplitter;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000426static nvblock *cb;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000427static char *pos;
428static char *buf;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000429static int icase;
430static int exiting;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000431
432static struct {
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000433 uint32_t tclass;
434 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000435 char *string;
436 double number;
437 short lineno;
438 int rollback;
439} t;
440
441/* function prototypes */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000442static void handle_special(var *);
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000443static node *parse_expr(uint32_t);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000444static void chain_group(void);
445static var *evaluate(node *, var *);
446static rstream *next_input_file(void);
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000447static int fmt_num(char *, int, const char *, double, int);
448static int awk_exit(int) attribute_noreturn;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000449
450/* ---- error handling ---- */
451
452static const char EMSG_INTERNAL_ERROR[] = "Internal error";
453static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
454static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
455static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
456static const char EMSG_INV_FMT[] = "Invalid format specifier";
457static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
458static const char EMSG_NOT_ARRAY[] = "Not an array";
459static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
460static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
461#ifndef CONFIG_FEATURE_AWK_MATH
462static const char EMSG_NO_MATH[] = "Math support is not compiled in";
463#endif
464
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000465static void syntax_error(const char * const message) attribute_noreturn;
Glenn L McGrathd4036f82002-11-28 09:30:40 +0000466static void syntax_error(const char * const message)
467{
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000468 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000469}
470
471#define runtime_error(x) syntax_error(x)
472
473
474/* ---- hash stuff ---- */
475
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000476static unsigned int hashidx(const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000477{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000478 register unsigned int idx=0;
479
480 while (*name) idx = *name++ + (idx << 6) - idx;
481 return idx;
482}
483
484/* create new hash */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000485static xhash *hash_init(void)
486{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000487 xhash *newhash;
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000488
Glenn L McGrath545106f2002-11-11 06:21:00 +0000489 newhash = (xhash *)xcalloc(1, sizeof(xhash));
490 newhash->csize = FIRST_PRIME;
491 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
492
493 return newhash;
494}
495
496/* find item in hash, return ptr to data, NULL if not found */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000497static void *hash_search(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000498{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000499 hash_item *hi;
500
501 hi = hash->items [ hashidx(name) % hash->csize ];
502 while (hi) {
503 if (strcmp(hi->name, name) == 0)
504 return &(hi->data);
505 hi = hi->next;
506 }
507 return NULL;
508}
509
510/* grow hash if it becomes too big */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000511static void hash_rebuild(xhash *hash)
512{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000513 unsigned int newsize, i, idx;
514 hash_item **newitems, *hi, *thi;
515
516 if (hash->nprime == NPRIMES)
517 return;
518
519 newsize = PRIMES[hash->nprime++];
520 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
521
522 for (i=0; i<hash->csize; i++) {
523 hi = hash->items[i];
524 while (hi) {
525 thi = hi;
526 hi = thi->next;
527 idx = hashidx(thi->name) % newsize;
528 thi->next = newitems[idx];
529 newitems[idx] = thi;
530 }
531 }
532
533 free(hash->items);
534 hash->csize = newsize;
535 hash->items = newitems;
536}
537
538/* find item in hash, add it if necessary. Return ptr to data */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000539static void *hash_find(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000540{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000541 hash_item *hi;
542 unsigned int idx;
543 int l;
544
545 hi = hash_search(hash, name);
546 if (! hi) {
547 if (++hash->nel / hash->csize > 10)
548 hash_rebuild(hash);
549
Manuel Novoa III cad53642003-03-19 09:13:01 +0000550 l = bb_strlen(name) + 1;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000551 hi = xcalloc(sizeof(hash_item) + l, 1);
552 memcpy(hi->name, name, l);
553
554 idx = hashidx(name) % hash->csize;
555 hi->next = hash->items[idx];
556 hash->items[idx] = hi;
557 hash->glen += l;
558 }
559 return &(hi->data);
560}
561
562#define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
563#define newvar(name) (var *) hash_find ( vhash , (name) )
564#define newfile(name) (rstream *) hash_find ( fdhash , (name) )
565#define newfunc(name) (func *) hash_find ( fnhash , (name) )
566
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000567static void hash_remove(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000568{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000569 hash_item *hi, **phi;
570
571 phi = &(hash->items[ hashidx(name) % hash->csize ]);
572 while (*phi) {
573 hi = *phi;
574 if (strcmp(hi->name, name) == 0) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000575 hash->glen -= (bb_strlen(name) + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000576 hash->nel--;
577 *phi = hi->next;
578 free(hi);
579 break;
580 }
581 phi = &(hi->next);
582 }
583}
584
585/* ------ some useful functions ------ */
586
Mike Frysinger10a11e22005-09-27 02:23:02 +0000587static void skip_spaces(char **s)
588{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000589 register char *p = *s;
590
591 while(*p == ' ' || *p == '\t' ||
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000592 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
Mike Frysingerde2b9382005-09-27 03:18:00 +0000593 p++;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000594 }
595 *s = p;
596}
597
Mike Frysinger10a11e22005-09-27 02:23:02 +0000598static char *nextword(char **s)
599{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000600 register char *p = *s;
601
602 while (*(*s)++) ;
603
604 return p;
605}
606
Mike Frysinger10a11e22005-09-27 02:23:02 +0000607static char nextchar(char **s)
608{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000609 register char c, *pps;
610
611 c = *((*s)++);
612 pps = *s;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000613 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000614 if (c == '\\' && *s == pps) c = *((*s)++);
615 return c;
616}
617
Mike Frysinger10a11e22005-09-27 02:23:02 +0000618static inline int isalnum_(int c)
619{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000620 return (isalnum(c) || c == '_');
621}
622
Mike Frysinger10a11e22005-09-27 02:23:02 +0000623static FILE *afopen(const char *path, const char *mode)
624{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000625 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000626}
627
628/* -------- working with variables (set/get/copy/etc) -------- */
629
Mike Frysinger10a11e22005-09-27 02:23:02 +0000630static xhash *iamarray(var *v)
631{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000632 var *a = v;
633
634 while (a->type & VF_CHILD)
635 a = a->x.parent;
636
637 if (! (a->type & VF_ARRAY)) {
638 a->type |= VF_ARRAY;
639 a->x.array = hash_init();
640 }
641 return a->x.array;
642}
643
Mike Frysinger10a11e22005-09-27 02:23:02 +0000644static void clear_array(xhash *array)
645{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000646 unsigned int i;
647 hash_item *hi, *thi;
648
649 for (i=0; i<array->csize; i++) {
650 hi = array->items[i];
651 while (hi) {
652 thi = hi;
653 hi = hi->next;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000654 free(thi->data.v.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000655 free(thi);
656 }
657 array->items[i] = NULL;
658 }
659 array->glen = array->nel = 0;
660}
661
662/* clear a variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000663static var *clrvar(var *v)
664{
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000665 if (!(v->type & VF_FSTR))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000666 free(v->string);
667
668 v->type &= VF_DONTTOUCH;
669 v->type |= VF_DIRTY;
670 v->string = NULL;
671 return v;
672}
673
674/* assign string value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000675static var *setvar_p(var *v, char *value)
676{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000677 clrvar(v);
678 v->string = value;
679 handle_special(v);
680
681 return v;
682}
683
684/* same as setvar_p but make a copy of string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000685static var *setvar_s(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000686{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000687 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000688}
689
690/* same as setvar_s but set USER flag */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000691static var *setvar_u(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000692{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000693 setvar_s(v, value);
694 v->type |= VF_USER;
695 return v;
696}
697
698/* set array element to user string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000699static void setari_u(var *a, int idx, const char *s)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000700{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000701 register var *v;
702 static char sidx[12];
703
704 sprintf(sidx, "%d", idx);
705 v = findvar(iamarray(a), sidx);
706 setvar_u(v, s);
707}
708
709/* assign numeric value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000710static var *setvar_i(var *v, double value)
711{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000712 clrvar(v);
713 v->type |= VF_NUMBER;
714 v->number = value;
715 handle_special(v);
716 return v;
717}
718
Mike Frysinger10a11e22005-09-27 02:23:02 +0000719static char *getvar_s(var *v)
720{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000721 /* if v is numeric and has no cached string, convert it to string */
722 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
723 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000724 v->string = bb_xstrdup(buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000725 v->type |= VF_CACHED;
726 }
727 return (v->string == NULL) ? "" : v->string;
728}
729
Mike Frysinger10a11e22005-09-27 02:23:02 +0000730static double getvar_i(var *v)
731{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000732 char *s;
733
734 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
735 v->number = 0;
736 s = v->string;
737 if (s && *s) {
738 v->number = strtod(s, &s);
739 if (v->type & VF_USER) {
740 skip_spaces(&s);
741 if (*s != '\0')
742 v->type &= ~VF_USER;
743 }
744 } else {
745 v->type &= ~VF_USER;
746 }
747 v->type |= VF_CACHED;
748 }
749 return v->number;
750}
751
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000752static var *copyvar(var *dest, const var *src)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000753{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000754 if (dest != src) {
755 clrvar(dest);
756 dest->type |= (src->type & ~VF_DONTTOUCH);
757 dest->number = src->number;
758 if (src->string)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000759 dest->string = bb_xstrdup(src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000760 }
761 handle_special(dest);
762 return dest;
763}
764
Mike Frysinger10a11e22005-09-27 02:23:02 +0000765static var *incvar(var *v)
766{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000767 return setvar_i(v, getvar_i(v)+1.);
768}
769
770/* return true if v is number or numeric string */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000771static int is_numeric(var *v)
772{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000773 getvar_i(v);
774 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
775}
776
777/* return 1 when value of v corresponds to true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000778static int istrue(var *v)
779{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000780 if (is_numeric(v))
781 return (v->number == 0) ? 0 : 1;
782 else
783 return (v->string && *(v->string)) ? 1 : 0;
784}
785
Eric Andersenaff114c2004-04-14 17:51:38 +0000786/* temporary variables allocator. Last allocated should be first freed */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000787static var *nvalloc(int n)
788{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000789 nvblock *pb = NULL;
790 var *v, *r;
791 int size;
792
793 while (cb) {
794 pb = cb;
795 if ((cb->pos - cb->nv) + n <= cb->size) break;
796 cb = cb->next;
797 }
798
799 if (! cb) {
800 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
801 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
802 cb->size = size;
803 cb->pos = cb->nv;
804 cb->prev = pb;
805 cb->next = NULL;
806 if (pb) pb->next = cb;
807 }
808
809 v = r = cb->pos;
810 cb->pos += n;
811
812 while (v < cb->pos) {
813 v->type = 0;
814 v->string = NULL;
815 v++;
816 }
817
818 return r;
819}
820
Mike Frysinger10a11e22005-09-27 02:23:02 +0000821static void nvfree(var *v)
822{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000823 var *p;
824
825 if (v < cb->nv || v >= cb->pos)
826 runtime_error(EMSG_INTERNAL_ERROR);
827
828 for (p=v; p<cb->pos; p++) {
829 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
830 clear_array(iamarray(p));
831 free(p->x.array->items);
832 free(p->x.array);
833 }
834 if (p->type & VF_WALK)
835 free(p->x.walker);
836
837 clrvar(p);
838 }
839
840 cb->pos = v;
841 while (cb->prev && cb->pos == cb->nv) {
842 cb = cb->prev;
843 }
844}
845
846/* ------- awk program text parsing ------- */
847
848/* Parse next token pointed by global pos, place results into global t.
849 * If token isn't expected, give away. Return token class
850 */
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000851static uint32_t next_token(uint32_t expected)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000852{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000853 char *p, *pp, *s;
854 char *tl;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000855 uint32_t tc;
856 const uint32_t *ti;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000857 int l;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000858 static int concat_inserted;
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000859 static uint32_t save_tclass, save_info;
860 static uint32_t ltclass = TC_OPTERM;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000861
862 if (t.rollback) {
863
864 t.rollback = FALSE;
865
866 } else if (concat_inserted) {
867
868 concat_inserted = FALSE;
869 t.tclass = save_tclass;
870 t.info = save_info;
871
872 } else {
873
874 p = pos;
875
876 readnext:
877 skip_spaces(&p);
878 lineno = t.lineno;
879 if (*p == '#')
880 while (*p != '\n' && *p != '\0') p++;
881
882 if (*p == '\n')
883 t.lineno++;
884
885 if (*p == '\0') {
886 tc = TC_EOF;
887
888 } else if (*p == '\"') {
889 /* it's a string */
890 t.string = s = ++p;
891 while (*p != '\"') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 *(s++) = nextchar(&p);
895 }
896 p++;
897 *s = '\0';
898 tc = TC_STRING;
899
900 } else if ((expected & TC_REGEXP) && *p == '/') {
901 /* it's regexp */
902 t.string = s = ++p;
903 while (*p != '/') {
904 if (*p == '\0' || *p == '\n')
905 syntax_error(EMSG_UNEXP_EOS);
906 if ((*s++ = *p++) == '\\') {
907 pp = p;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000908 *(s-1) = bb_process_escape_sequence((const char **)&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000909 if (*pp == '\\') *s++ = '\\';
910 if (p == pp) *s++ = *p++;
911 }
912 }
913 p++;
914 *s = '\0';
915 tc = TC_REGEXP;
916
917 } else if (*p == '.' || isdigit(*p)) {
918 /* it's a number */
919 t.number = strtod(p, &p);
920 if (*p == '.')
921 syntax_error(EMSG_UNEXP_TOKEN);
922 tc = TC_NUMBER;
923
924 } else {
925 /* search for something known */
926 tl = tokenlist;
927 tc = 0x00000001;
928 ti = tokeninfo;
929 while (*tl) {
930 l = *(tl++);
931 if (l == NTCC) {
932 tc <<= 1;
933 continue;
934 }
935 /* if token class is expected, token
936 * matches and it's not a longer word,
937 * then this is what we are looking for
938 */
939 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940 *tl == *p && strncmp(p, tl, l) == 0 &&
941 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
942 t.info = *ti;
943 p += l;
944 break;
945 }
946 ti++;
947 tl += l;
948 }
949
950 if (! *tl) {
951 /* it's a name (var/array/function),
952 * otherwise it's something wrong
953 */
954 if (! isalnum_(*p))
955 syntax_error(EMSG_UNEXP_TOKEN);
956
957 t.string = --p;
958 while(isalnum_(*(++p))) {
959 *(p-1) = *p;
960 }
961 *(p-1) = '\0';
962 tc = TC_VARIABLE;
Bernhard Reutner-Fischerbb204622005-10-17 14:21:06 +0000963 /* also consume whitespace between functionname and bracket */
964 skip_spaces(&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000965 if (*p == '(') {
966 tc = TC_FUNCTION;
967 } else {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000968 if (*p == '[') {
969 p++;
970 tc = TC_ARRAY;
971 }
972 }
973 }
974 }
975 pos = p;
976
977 /* skipping newlines in some cases */
978 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
979 goto readnext;
980
981 /* insert concatenation operator when needed */
982 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
983 concat_inserted = TRUE;
984 save_tclass = tc;
985 save_info = t.info;
986 tc = TC_BINOP;
987 t.info = OC_CONCAT | SS | P(35);
988 }
989
990 t.tclass = tc;
991 }
992 ltclass = t.tclass;
993
994 /* Are we ready for this? */
995 if (! (ltclass & expected))
996 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
997 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
998
999 return ltclass;
1000}
1001
1002static void rollback_token(void) { t.rollback = TRUE; }
1003
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001004static node *new_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001005{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001006 register node *n;
1007
1008 n = (node *)xcalloc(sizeof(node), 1);
1009 n->info = info;
1010 n->lineno = lineno;
1011 return n;
1012}
1013
Mike Frysinger10a11e22005-09-27 02:23:02 +00001014static node *mk_re_node(char *s, node *n, regex_t *re)
1015{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001016 n->info = OC_REGEXP;
1017 n->l.re = re;
1018 n->r.ire = re + 1;
1019 xregcomp(re, s, REG_EXTENDED);
1020 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1021
1022 return n;
1023}
1024
Mike Frysinger10a11e22005-09-27 02:23:02 +00001025static node *condition(void)
1026{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001027 next_token(TC_SEQSTART);
1028 return parse_expr(TC_SEQTERM);
1029}
1030
1031/* parse expression terminated by given argument, return ptr
1032 * to built subtree. Terminator is eaten by parse_expr */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001033static node *parse_expr(uint32_t iexp)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001034{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001035 node sn;
1036 node *cn = &sn;
1037 node *vn, *glptr;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001038 uint32_t tc, xtc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001039 var *v;
1040
1041 sn.info = PRIMASK;
1042 sn.r.n = glptr = NULL;
1043 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1044
1045 while (! ((tc = next_token(xtc)) & iexp)) {
1046 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1047 /* input redirection (<) attached to glptr node */
1048 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
Glenn L McGrath4bded582004-02-22 11:55:09 +00001049 cn->a.n = glptr;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001050 xtc = TC_OPERAND | TC_UOPPRE;
1051 glptr = NULL;
1052
1053 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1054 /* for binary and postfix-unary operators, jump back over
1055 * previous operators with higher priority */
1056 vn = cn;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001057 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
Glenn L McGrath545106f2002-11-11 06:21:00 +00001058 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1059 vn = vn->a.n;
1060 if ((t.info & OPCLSMASK) == OC_TERNARY)
1061 t.info += P(6);
1062 cn = vn->a.n->r.n = new_node(t.info);
1063 cn->a.n = vn->a.n;
1064 if (tc & TC_BINOP) {
1065 cn->l.n = vn;
1066 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1067 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1068 /* it's a pipe */
1069 next_token(TC_GETLINE);
1070 /* give maximum priority to this pipe */
1071 cn->info &= ~PRIMASK;
1072 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1073 }
1074 } else {
1075 cn->r.n = vn;
1076 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1077 }
1078 vn->a.n = cn;
1079
1080 } else {
1081 /* for operands and prefix-unary operators, attach them
1082 * to last node */
1083 vn = cn;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001084 cn = vn->r.n = new_node(t.info);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001085 cn->a.n = vn;
1086 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1087 if (tc & (TC_OPERAND | TC_REGEXP)) {
Rob Landleyed830e82005-06-07 02:43:52 +00001088 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001089 /* one should be very careful with switch on tclass -
Glenn L McGrath545106f2002-11-11 06:21:00 +00001090 * only simple tclasses should be used! */
1091 switch (tc) {
1092 case TC_VARIABLE:
1093 case TC_ARRAY:
1094 cn->info = OC_VAR;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001095 if ((v = hash_search(ahash, t.string)) != NULL) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001096 cn->info = OC_FNARG;
1097 cn->l.i = v->x.aidx;
1098 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001099 cn->l.v = newvar(t.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001100 }
1101 if (tc & TC_ARRAY) {
1102 cn->info |= xS;
1103 cn->r.n = parse_expr(TC_ARRTERM);
1104 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001105 break;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001106
Glenn L McGrath545106f2002-11-11 06:21:00 +00001107 case TC_NUMBER:
1108 case TC_STRING:
1109 cn->info = OC_VAR;
1110 v = cn->l.v = xcalloc(sizeof(var), 1);
1111 if (tc & TC_NUMBER)
1112 setvar_i(v, t.number);
1113 else
1114 setvar_s(v, t.string);
1115 break;
1116
1117 case TC_REGEXP:
1118 mk_re_node(t.string, cn,
1119 (regex_t *)xcalloc(sizeof(regex_t),2));
1120 break;
1121
1122 case TC_FUNCTION:
Mike Frysingerde2b9382005-09-27 03:18:00 +00001123 cn->info = OC_FUNC;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001124 cn->r.f = newfunc(t.string);
1125 cn->l.n = condition();
1126 break;
1127
1128 case TC_SEQSTART:
1129 cn = vn->r.n = parse_expr(TC_SEQTERM);
1130 cn->a.n = vn;
1131 break;
1132
1133 case TC_GETLINE:
1134 glptr = cn;
1135 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1136 break;
1137
1138 case TC_BUILTIN:
1139 cn->l.n = condition();
1140 break;
1141 }
1142 }
1143 }
1144 }
1145 return sn.r.n;
1146}
1147
1148/* add node to chain. Return ptr to alloc'd node */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001149static node *chain_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001150{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001151 register node *n;
1152
1153 if (! seq->first)
1154 seq->first = seq->last = new_node(0);
1155
1156 if (seq->programname != programname) {
1157 seq->programname = programname;
1158 n = chain_node(OC_NEWSOURCE);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001159 n->l.s = bb_xstrdup(programname);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001160 }
1161
1162 n = seq->last;
1163 n->info = info;
1164 seq->last = n->a.n = new_node(OC_DONE);
1165
1166 return n;
1167}
1168
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001169static void chain_expr(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001170{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001171 node *n;
1172
1173 n = chain_node(info);
1174 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1175 if (t.tclass & TC_GRPTERM)
1176 rollback_token();
1177}
1178
Mike Frysinger10a11e22005-09-27 02:23:02 +00001179static node *chain_loop(node *nn)
1180{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001181 node *n, *n2, *save_brk, *save_cont;
1182
1183 save_brk = break_ptr;
1184 save_cont = continue_ptr;
1185
1186 n = chain_node(OC_BR | Vx);
1187 continue_ptr = new_node(OC_EXEC);
1188 break_ptr = new_node(OC_EXEC);
1189 chain_group();
1190 n2 = chain_node(OC_EXEC | Vx);
1191 n2->l.n = nn;
1192 n2->a.n = n;
1193 continue_ptr->a.n = n2;
1194 break_ptr->a.n = n->r.n = seq->last;
1195
1196 continue_ptr = save_cont;
1197 break_ptr = save_brk;
1198
1199 return n;
1200}
1201
1202/* parse group and attach it to chain */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001203static void chain_group(void)
1204{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001205 uint32_t c;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001206 node *n, *n2, *n3;
1207
1208 do {
1209 c = next_token(TC_GRPSEQ);
1210 } while (c & TC_NEWLINE);
1211
1212 if (c & TC_GRPSTART) {
1213 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00001214 if (t.tclass & TC_NEWLINE) continue;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001215 rollback_token();
1216 chain_group();
1217 }
1218 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1219 rollback_token();
1220 chain_expr(OC_EXEC | Vx);
1221 } else { /* TC_STATEMNT */
1222 switch (t.info & OPCLSMASK) {
1223 case ST_IF:
1224 n = chain_node(OC_BR | Vx);
1225 n->l.n = condition();
1226 chain_group();
1227 n2 = chain_node(OC_EXEC);
1228 n->r.n = seq->last;
1229 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1230 chain_group();
1231 n2->a.n = seq->last;
1232 } else {
1233 rollback_token();
1234 }
1235 break;
1236
1237 case ST_WHILE:
1238 n2 = condition();
1239 n = chain_loop(NULL);
1240 n->l.n = n2;
1241 break;
1242
1243 case ST_DO:
1244 n2 = chain_node(OC_EXEC);
1245 n = chain_loop(NULL);
1246 n2->a.n = n->a.n;
1247 next_token(TC_WHILE);
1248 n->l.n = condition();
1249 break;
1250
1251 case ST_FOR:
1252 next_token(TC_SEQSTART);
1253 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1254 if (t.tclass & TC_SEQTERM) { /* for-in */
1255 if ((n2->info & OPCLSMASK) != OC_IN)
1256 syntax_error(EMSG_UNEXP_TOKEN);
1257 n = chain_node(OC_WALKINIT | VV);
1258 n->l.n = n2->l.n;
1259 n->r.n = n2->r.n;
1260 n = chain_loop(NULL);
1261 n->info = OC_WALKNEXT | Vx;
1262 n->l.n = n2->l.n;
1263 } else { /* for(;;) */
1264 n = chain_node(OC_EXEC | Vx);
1265 n->l.n = n2;
1266 n2 = parse_expr(TC_SEMICOL);
1267 n3 = parse_expr(TC_SEQTERM);
1268 n = chain_loop(n3);
1269 n->l.n = n2;
1270 if (! n2)
1271 n->info = OC_EXEC;
1272 }
1273 break;
1274
1275 case OC_PRINT:
1276 case OC_PRINTF:
1277 n = chain_node(t.info);
1278 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1279 if (t.tclass & TC_OUTRDR) {
1280 n->info |= t.info;
1281 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1282 }
1283 if (t.tclass & TC_GRPTERM)
1284 rollback_token();
1285 break;
1286
1287 case OC_BREAK:
1288 n = chain_node(OC_EXEC);
1289 n->a.n = break_ptr;
1290 break;
1291
1292 case OC_CONTINUE:
1293 n = chain_node(OC_EXEC);
1294 n->a.n = continue_ptr;
1295 break;
1296
1297 /* delete, next, nextfile, return, exit */
1298 default:
1299 chain_expr(t.info);
1300
1301 }
1302 }
1303}
1304
Mike Frysinger10a11e22005-09-27 02:23:02 +00001305static void parse_program(char *p)
1306{
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001307 uint32_t tclass;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001308 node *cn;
1309 func *f;
1310 var *v;
1311
1312 pos = p;
1313 t.lineno = 1;
1314 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1315 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1316
1317 if (tclass & TC_OPTERM)
1318 continue;
1319
1320 seq = &mainseq;
1321 if (tclass & TC_BEGIN) {
1322 seq = &beginseq;
1323 chain_group();
1324
1325 } else if (tclass & TC_END) {
1326 seq = &endseq;
1327 chain_group();
1328
1329 } else if (tclass & TC_FUNCDECL) {
1330 next_token(TC_FUNCTION);
1331 pos++;
1332 f = newfunc(t.string);
1333 f->body.first = NULL;
1334 f->nargs = 0;
1335 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1336 v = findvar(ahash, t.string);
1337 v->x.aidx = (f->nargs)++;
1338
1339 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1340 break;
1341 }
1342 seq = &(f->body);
1343 chain_group();
1344 clear_array(ahash);
1345
1346 } else if (tclass & TC_OPSEQ) {
1347 rollback_token();
1348 cn = chain_node(OC_TEST);
1349 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1350 if (t.tclass & TC_GRPSTART) {
1351 rollback_token();
1352 chain_group();
1353 } else {
1354 chain_node(OC_PRINT);
1355 }
1356 cn->r.n = mainseq.last;
1357
1358 } else /* if (tclass & TC_GRPSTART) */ {
1359 rollback_token();
1360 chain_group();
1361 }
1362 }
1363}
1364
1365
1366/* -------- program execution part -------- */
1367
Mike Frysinger10a11e22005-09-27 02:23:02 +00001368static node *mk_splitter(char *s, tsplitter *spl)
1369{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001370 register regex_t *re, *ire;
1371 node *n;
1372
1373 re = &spl->re[0];
1374 ire = &spl->re[1];
1375 n = &spl->n;
1376 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1377 regfree(re);
1378 regfree(ire);
1379 }
Manuel Novoa III cad53642003-03-19 09:13:01 +00001380 if (bb_strlen(s) > 1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001381 mk_re_node(s, n, re);
1382 } else {
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001383 n->info = (uint32_t) *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001384 }
1385
1386 return n;
1387}
1388
1389/* use node as a regular expression. Supplied with node ptr and regex_t
Eric Andersenaff114c2004-04-14 17:51:38 +00001390 * storage space. Return ptr to regex (if result points to preg, it should
Glenn L McGrath545106f2002-11-11 06:21:00 +00001391 * be later regfree'd manually
1392 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001393static regex_t *as_regex(node *op, regex_t *preg)
1394{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001395 var *v;
1396 char *s;
1397
1398 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1399 return icase ? op->r.ire : op->l.re;
1400 } else {
1401 v = nvalloc(1);
1402 s = getvar_s(evaluate(op, v));
1403 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1404 nvfree(v);
1405 return preg;
1406 }
1407}
1408
1409/* gradually increasing buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001410static void qrealloc(char **b, int n, int *size)
1411{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001412 if (! *b || n >= *size)
1413 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1414}
1415
1416/* resize field storage space */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001417static void fsrealloc(int size)
1418{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001419 static int maxfields = 0;
1420 int i;
1421
1422 if (size >= maxfields) {
1423 i = maxfields;
1424 maxfields = size + 16;
1425 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1426 for (; i<maxfields; i++) {
1427 Fields[i].type = VF_SPECIAL;
1428 Fields[i].string = NULL;
1429 }
1430 }
1431
1432 if (size < nfields) {
1433 for (i=size; i<nfields; i++) {
1434 clrvar(Fields+i);
1435 }
1436 }
1437 nfields = size;
1438}
1439
Mike Frysinger10a11e22005-09-27 02:23:02 +00001440static int awk_split(char *s, node *spl, char **slist)
1441{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001442 int l, n=0;
1443 char c[4];
1444 char *s1;
1445 regmatch_t pmatch[2];
1446
1447 /* in worst case, each char would be a separate field */
Manuel Novoa III cad53642003-03-19 09:13:01 +00001448 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001449
1450 c[0] = c[1] = (char)spl->info;
1451 c[2] = c[3] = '\0';
1452 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1453
1454 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1455 while (*s) {
1456 l = strcspn(s, c+2);
1457 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1458 pmatch[0].rm_so <= l) {
1459 l = pmatch[0].rm_so;
1460 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1461 } else {
1462 pmatch[0].rm_eo = l;
1463 if (*(s+l)) pmatch[0].rm_eo++;
1464 }
1465
1466 memcpy(s1, s, l);
1467 *(s1+l) = '\0';
1468 nextword(&s1);
1469 s += pmatch[0].rm_eo;
1470 n++;
1471 }
1472 } else if (c[0] == '\0') { /* null split */
1473 while(*s) {
1474 *(s1++) = *(s++);
1475 *(s1++) = '\0';
1476 n++;
1477 }
1478 } else if (c[0] != ' ') { /* single-character split */
1479 if (icase) {
1480 c[0] = toupper(c[0]);
1481 c[1] = tolower(c[1]);
1482 }
1483 if (*s1) n++;
1484 while ((s1 = strpbrk(s1, c))) {
1485 *(s1++) = '\0';
1486 n++;
1487 }
1488 } else { /* space split */
1489 while (*s) {
1490 while (isspace(*s)) s++;
1491 if (! *s) break;
1492 n++;
1493 while (*s && !isspace(*s))
1494 *(s1++) = *(s++);
1495 *(s1++) = '\0';
1496 }
1497 }
1498 return n;
1499}
1500
Mike Frysinger10a11e22005-09-27 02:23:02 +00001501static void split_f0(void)
1502{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001503 static char *fstrings = NULL;
1504 int i, n;
1505 char *s;
1506
1507 if (is_f0_split)
1508 return;
1509
1510 is_f0_split = TRUE;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00001511 free(fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001512 fsrealloc(0);
1513 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1514 fsrealloc(n);
1515 s = fstrings;
1516 for (i=0; i<n; i++) {
1517 Fields[i].string = nextword(&s);
1518 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1519 }
1520
1521 /* set NF manually to avoid side effects */
1522 clrvar(V[NF]);
1523 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1524 V[NF]->number = nfields;
1525}
1526
1527/* perform additional actions when some internal variables changed */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001528static void handle_special(var *v)
1529{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001530 int n;
1531 char *b, *sep, *s;
1532 int sl, l, len, i, bsize;
1533
1534 if (! (v->type & VF_SPECIAL))
1535 return;
1536
1537 if (v == V[NF]) {
1538 n = (int)getvar_i(v);
1539 fsrealloc(n);
1540
1541 /* recalculate $0 */
1542 sep = getvar_s(V[OFS]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001543 sl = bb_strlen(sep);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001544 b = NULL;
1545 len = 0;
1546 for (i=0; i<n; i++) {
1547 s = getvar_s(&Fields[i]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001548 l = bb_strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001549 if (b) {
1550 memcpy(b+len, sep, sl);
1551 len += sl;
1552 }
1553 qrealloc(&b, len+l+sl, &bsize);
1554 memcpy(b+len, s, l);
1555 len += l;
1556 }
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00001557 if (b) b[len] = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00001558 setvar_p(V[F0], b);
1559 is_f0_split = TRUE;
1560
1561 } else if (v == V[F0]) {
1562 is_f0_split = FALSE;
1563
1564 } else if (v == V[FS]) {
1565 mk_splitter(getvar_s(v), &fsplitter);
1566
1567 } else if (v == V[RS]) {
1568 mk_splitter(getvar_s(v), &rsplitter);
1569
1570 } else if (v == V[IGNORECASE]) {
1571 icase = istrue(v);
1572
1573 } else { /* $n */
1574 n = getvar_i(V[NF]);
1575 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1576 /* right here v is invalid. Just to note... */
1577 }
1578}
1579
1580/* step through func/builtin/etc arguments */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001581static node *nextarg(node **pn)
1582{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001583 node *n;
1584
1585 n = *pn;
1586 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1587 *pn = n->r.n;
1588 n = n->l.n;
1589 } else {
1590 *pn = NULL;
1591 }
1592 return n;
1593}
1594
Mike Frysinger10a11e22005-09-27 02:23:02 +00001595static void hashwalk_init(var *v, xhash *array)
1596{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001597 char **w;
1598 hash_item *hi;
1599 int i;
1600
1601 if (v->type & VF_WALK)
1602 free(v->x.walker);
1603
1604 v->type |= VF_WALK;
1605 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1606 *w = *(w+1) = (char *)(w + 2);
1607 for (i=0; i<array->csize; i++) {
1608 hi = array->items[i];
1609 while(hi) {
1610 strcpy(*w, hi->name);
1611 nextword(w);
1612 hi = hi->next;
1613 }
1614 }
1615}
1616
Mike Frysinger10a11e22005-09-27 02:23:02 +00001617static int hashwalk_next(var *v)
1618{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001619 char **w;
1620
1621 w = v->x.walker;
1622 if (*(w+1) == *w)
1623 return FALSE;
1624
1625 setvar_s(v, nextword(w+1));
1626 return TRUE;
1627}
1628
1629/* evaluate node, return 1 when result is true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001630static int ptest(node *pattern)
1631{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001632 static var v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001633 return istrue(evaluate(pattern, &v));
1634}
1635
1636/* read next record from stream rsm into a variable v */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001637static int awk_getline(rstream *rsm, var *v)
1638{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001639 char *b;
1640 regmatch_t pmatch[2];
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001641 int a, p, pp=0, size;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001642 int fd, so, eo, r, rp;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001643 char c, *m, *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001644
1645 /* we're using our own buffer since we need access to accumulating
1646 * characters
1647 */
1648 fd = fileno(rsm->F);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001649 m = rsm->buffer;
1650 a = rsm->adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001651 p = rsm->pos;
1652 size = rsm->size;
1653 c = (char) rsplitter.n.info;
1654 rp = 0;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001655
1656 if (! m) qrealloc(&m, 256, &size);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001657 do {
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001658 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001659 so = eo = p;
1660 r = 1;
1661 if (p > 0) {
1662 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1663 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1664 b, 1, pmatch, 0) == 0) {
1665 so = pmatch[0].rm_so;
1666 eo = pmatch[0].rm_eo;
1667 if (b[eo] != '\0')
1668 break;
1669 }
1670 } else if (c != '\0') {
1671 s = strchr(b+pp, c);
1672 if (s) {
1673 so = eo = s-b;
1674 eo++;
1675 break;
1676 }
1677 } else {
1678 while (b[rp] == '\n')
1679 rp++;
1680 s = strstr(b+rp, "\n\n");
1681 if (s) {
1682 so = eo = s-b;
1683 while (b[eo] == '\n') eo++;
1684 if (b[eo] != '\0')
1685 break;
1686 }
1687 }
1688 }
1689
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001690 if (a > 0) {
1691 memmove(m, (const void *)(m+a), p+1);
1692 b = m;
1693 a = 0;
1694 }
1695
1696 qrealloc(&m, a+p+128, &size);
1697 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001698 pp = p;
1699 p += safe_read(fd, b+p, size-p-1);
1700 if (p < pp) {
1701 p = 0;
1702 r = 0;
1703 setvar_i(V[ERRNO], errno);
1704 }
1705 b[p] = '\0';
1706
1707 } while (p > pp);
1708
1709 if (p == 0) {
1710 r--;
1711 } else {
1712 c = b[so]; b[so] = '\0';
1713 setvar_s(v, b+rp);
1714 v->type |= VF_USER;
1715 b[so] = c;
1716 c = b[eo]; b[eo] = '\0';
1717 setvar_s(V[RT], b+so);
1718 b[eo] = c;
1719 }
1720
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00001721 rsm->buffer = m;
1722 rsm->adv = a + eo;
1723 rsm->pos = p - eo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001724 rsm->size = size;
1725
1726 return r;
1727}
1728
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001729static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001730{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001731 int r=0;
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001732 char c;
1733 const char *s=format;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001734
1735 if (int_as_int && n == (int)n) {
1736 r = snprintf(b, size, "%d", (int)n);
1737 } else {
1738 do { c = *s; } while (*s && *++s);
1739 if (strchr("diouxX", c)) {
1740 r = snprintf(b, size, format, (int)n);
1741 } else if (strchr("eEfgG", c)) {
1742 r = snprintf(b, size, format, n);
1743 } else {
1744 runtime_error(EMSG_INV_FMT);
1745 }
1746 }
1747 return r;
1748}
1749
1750
1751/* formatted output into an allocated buffer, return ptr to buffer */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001752static char *awk_printf(node *n)
1753{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001754 char *b = NULL;
1755 char *fmt, *s, *s1, *f;
1756 int i, j, incr, bsize;
1757 char c, c1;
1758 var *v, *arg;
1759
1760 v = nvalloc(1);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001761 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001762
1763 i = 0;
1764 while (*f) {
1765 s = f;
1766 while (*f && (*f != '%' || *(++f) == '%'))
1767 f++;
Eric Andersenc7bda1c2004-03-15 08:29:22 +00001768 while (*f && !isalpha(*f))
Glenn L McGrath545106f2002-11-11 06:21:00 +00001769 f++;
1770
1771 incr = (f - s) + MAXVARFMT;
1772 qrealloc(&b, incr+i, &bsize);
1773 c = *f; if (c != '\0') f++;
1774 c1 = *f ; *f = '\0';
1775 arg = evaluate(nextarg(&n), v);
1776
1777 j = i;
1778 if (c == 'c' || !c) {
1779 i += sprintf(b+i, s,
1780 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1781
1782 } else if (c == 's') {
1783 s1 = getvar_s(arg);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001784 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001785 i += sprintf(b+i, s, s1);
1786
1787 } else {
1788 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1789 }
1790 *f = c1;
1791
1792 /* if there was an error while sprintf, return value is negative */
1793 if (i < j) i = j;
1794
1795 }
1796
1797 b = xrealloc(b, i+1);
1798 free(fmt);
1799 nvfree(v);
1800 b[i] = '\0';
1801 return b;
1802}
1803
1804/* common substitution routine
1805 * replace (nm) substring of (src) that match (n) with (repl), store
1806 * result into (dest), return number of substitutions. If nm=0, replace
1807 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1808 * subexpression matching (\1-\9)
1809 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001810static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1811{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001812 char *ds = NULL;
1813 char *sp, *s;
1814 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1815 regmatch_t pmatch[10];
1816 regex_t sreg, *re;
1817
1818 re = as_regex(rn, &sreg);
1819 if (! src) src = V[F0];
1820 if (! dest) dest = V[F0];
1821
1822 i = di = 0;
1823 sp = getvar_s(src);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001824 rl = bb_strlen(repl);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001825 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1826 so = pmatch[0].rm_so;
1827 eo = pmatch[0].rm_eo;
1828
1829 qrealloc(&ds, di + eo + rl, &dssize);
1830 memcpy(ds + di, sp, eo);
1831 di += eo;
1832 if (++i >= nm) {
1833 /* replace */
1834 di -= (eo - so);
1835 nbs = 0;
1836 for (s = repl; *s; s++) {
1837 ds[di++] = c = *s;
1838 if (c == '\\') {
1839 nbs++;
1840 continue;
1841 }
1842 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1843 di -= ((nbs + 3) >> 1);
1844 j = 0;
1845 if (c != '&') {
1846 j = c - '0';
1847 nbs++;
1848 }
1849 if (nbs % 2) {
1850 ds[di++] = c;
1851 } else {
1852 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1853 qrealloc(&ds, di + rl + n, &dssize);
1854 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1855 di += n;
1856 }
1857 }
1858 nbs = 0;
1859 }
1860 }
1861
1862 sp += eo;
1863 if (i == nm) break;
1864 if (eo == so) {
1865 if (! (ds[di++] = *sp++)) break;
1866 }
1867 }
1868
1869 qrealloc(&ds, di + strlen(sp), &dssize);
1870 strcpy(ds + di, sp);
1871 setvar_p(dest, ds);
1872 if (re == &sreg) regfree(re);
1873 return i;
1874}
1875
Mike Frysinger10a11e22005-09-27 02:23:02 +00001876static var *exec_builtin(node *op, var *res)
1877{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001878 int (*to_xxx)(int);
1879 var *tv;
1880 node *an[4];
1881 var *av[4];
1882 char *as[4];
1883 regmatch_t pmatch[2];
1884 regex_t sreg, *re;
1885 static tsplitter tspl;
1886 node *spl;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001887 uint32_t isr, info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001888 int nargs;
1889 time_t tt;
1890 char *s, *s1;
1891 int i, l, ll, n;
1892
1893 tv = nvalloc(4);
1894 isr = info = op->info;
1895 op = op->l.n;
1896
1897 av[2] = av[3] = NULL;
1898 for (i=0 ; i<4 && op ; i++) {
1899 an[i] = nextarg(&op);
1900 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1901 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1902 isr >>= 1;
1903 }
1904
1905 nargs = i;
1906 if (nargs < (info >> 30))
1907 runtime_error(EMSG_TOO_FEW_ARGS);
1908
1909 switch (info & OPNMASK) {
1910
1911 case B_a2:
1912#ifdef CONFIG_FEATURE_AWK_MATH
1913 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1914#else
1915 runtime_error(EMSG_NO_MATH);
1916#endif
1917 break;
1918
1919 case B_sp:
1920 if (nargs > 2) {
1921 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1922 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1923 } else {
1924 spl = &fsplitter.n;
1925 }
1926
1927 n = awk_split(as[0], spl, &s);
1928 s1 = s;
1929 clear_array(iamarray(av[1]));
1930 for (i=1; i<=n; i++)
1931 setari_u(av[1], i, nextword(&s1));
1932 free(s);
1933 setvar_i(res, n);
1934 break;
1935
1936 case B_ss:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001937 l = bb_strlen(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001938 i = getvar_i(av[1]) - 1;
1939 if (i>l) i=l; if (i<0) i=0;
1940 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1941 if (n<0) n=0;
1942 s = xmalloc(n+1);
1943 strncpy(s, as[0]+i, n);
1944 s[n] = '\0';
1945 setvar_p(res, s);
1946 break;
1947
1948 case B_lo:
1949 to_xxx = tolower;
1950 goto lo_cont;
1951
1952 case B_up:
1953 to_xxx = toupper;
1954lo_cont:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001955 s1 = s = bb_xstrdup(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001956 while (*s1) {
1957 *s1 = (*to_xxx)(*s1);
1958 s1++;
1959 }
1960 setvar_p(res, s);
1961 break;
1962
1963 case B_ix:
1964 n = 0;
Manuel Novoa III cad53642003-03-19 09:13:01 +00001965 ll = bb_strlen(as[1]);
1966 l = bb_strlen(as[0]) - ll;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001967 if (ll > 0 && l >= 0) {
1968 if (! icase) {
1969 s = strstr(as[0], as[1]);
1970 if (s) n = (s - as[0]) + 1;
1971 } else {
1972 /* this piece of code is terribly slow and
1973 * really should be rewritten
1974 */
1975 for (i=0; i<=l; i++) {
1976 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1977 n = i+1;
1978 break;
1979 }
1980 }
1981 }
1982 }
1983 setvar_i(res, n);
1984 break;
1985
1986 case B_ti:
1987 if (nargs > 1)
1988 tt = getvar_i(av[1]);
1989 else
1990 time(&tt);
1991 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1992 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1993 buf[i] = '\0';
1994 setvar_s(res, buf);
1995 break;
1996
1997 case B_ma:
1998 re = as_regex(an[1], &sreg);
1999 n = regexec(re, as[0], 1, pmatch, 0);
2000 if (n == 0) {
2001 pmatch[0].rm_so++;
2002 pmatch[0].rm_eo++;
2003 } else {
2004 pmatch[0].rm_so = 0;
2005 pmatch[0].rm_eo = -1;
2006 }
2007 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2008 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2009 setvar_i(res, pmatch[0].rm_so);
2010 if (re == &sreg) regfree(re);
2011 break;
2012
2013 case B_ge:
2014 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2015 break;
2016
2017 case B_gs:
2018 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2019 break;
2020
2021 case B_su:
2022 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2023 break;
2024 }
2025
2026 nvfree(tv);
2027 return res;
2028}
2029
2030/*
2031 * Evaluate node - the heart of the program. Supplied with subtree
2032 * and place where to store result. returns ptr to result.
2033 */
2034#define XC(n) ((n) >> 8)
2035
Mike Frysinger10a11e22005-09-27 02:23:02 +00002036static var *evaluate(node *op, var *res)
2037{
Mike Frysingerde2b9382005-09-27 03:18:00 +00002038 /* This procedure is recursive so we should count every byte */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002039 static var *fnargs = NULL;
2040 static unsigned int seed = 1;
2041 static regex_t sreg;
2042 node *op1;
2043 var *v1;
2044 union {
2045 var *v;
2046 char *s;
2047 double d;
2048 int i;
2049 } L, R;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002050 uint32_t opinfo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002051 short opn;
2052 union {
2053 char *s;
2054 rstream *rsm;
2055 FILE *F;
2056 var *v;
2057 regex_t *re;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002058 uint32_t info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002059 } X;
2060
2061 if (! op)
2062 return setvar_s(res, NULL);
2063
2064 v1 = nvalloc(2);
2065
2066 while (op) {
2067
2068 opinfo = op->info;
2069 opn = (short)(opinfo & OPNMASK);
2070 lineno = op->lineno;
2071
Mike Frysingerde2b9382005-09-27 03:18:00 +00002072 /* execute inevitable things */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002073 op1 = op->l.n;
2074 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2075 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2076 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2077 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2078 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2079
2080 switch (XC(opinfo & OPCLSMASK)) {
2081
2082 /* -- iterative node type -- */
2083
2084 /* test pattern */
2085 case XC( OC_TEST ):
2086 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2087 /* it's range pattern */
2088 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2089 op->info |= OF_CHECKED;
2090 if (ptest(op1->r.n))
2091 op->info &= ~OF_CHECKED;
2092
2093 op = op->a.n;
2094 } else {
2095 op = op->r.n;
2096 }
2097 } else {
2098 op = (ptest(op1)) ? op->a.n : op->r.n;
2099 }
2100 break;
2101
2102 /* just evaluate an expression, also used as unconditional jump */
2103 case XC( OC_EXEC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002104 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002105
2106 /* branch, used in if-else and various loops */
2107 case XC( OC_BR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002108 op = istrue(L.v) ? op->a.n : op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002109 break;
2110
2111 /* initialize for-in loop */
2112 case XC( OC_WALKINIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002113 hashwalk_init(L.v, iamarray(R.v));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002114 break;
2115
2116 /* get next array item */
2117 case XC( OC_WALKNEXT ):
2118 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2119 break;
2120
2121 case XC( OC_PRINT ):
2122 case XC( OC_PRINTF ):
2123 X.F = stdout;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002124 if (op->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002125 X.rsm = newfile(R.s);
2126 if (! X.rsm->F) {
2127 if (opn == '|') {
2128 if((X.rsm->F = popen(R.s, "w")) == NULL)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002129 bb_perror_msg_and_die("popen");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002130 X.rsm->is_pipe = 1;
2131 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002132 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002133 }
2134 }
2135 X.F = X.rsm->F;
2136 }
2137
2138 if ((opinfo & OPCLSMASK) == OC_PRINT) {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002139 if (! op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002140 fputs(getvar_s(V[F0]), X.F);
2141 } else {
2142 while (op1) {
2143 L.v = evaluate(nextarg(&op1), v1);
2144 if (L.v->type & VF_NUMBER) {
2145 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2146 getvar_i(L.v), TRUE);
2147 fputs(buf, X.F);
2148 } else {
2149 fputs(getvar_s(L.v), X.F);
2150 }
2151
2152 if (op1) fputs(getvar_s(V[OFS]), X.F);
2153 }
2154 }
2155 fputs(getvar_s(V[ORS]), X.F);
2156
2157 } else { /* OC_PRINTF */
2158 L.s = awk_printf(op1);
2159 fputs(L.s, X.F);
2160 free(L.s);
2161 }
2162 fflush(X.F);
2163 break;
2164
2165 case XC( OC_DELETE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002166 X.info = op1->info & OPCLSMASK;
2167 if (X.info == OC_VAR) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002168 R.v = op1->l.v;
2169 } else if (X.info == OC_FNARG) {
2170 R.v = &fnargs[op1->l.i];
2171 } else {
2172 runtime_error(EMSG_NOT_ARRAY);
2173 }
2174
Mike Frysingerde2b9382005-09-27 03:18:00 +00002175 if (op1->r.n) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002176 clrvar(L.v);
2177 L.s = getvar_s(evaluate(op1->r.n, v1));
2178 hash_remove(iamarray(R.v), L.s);
2179 } else {
2180 clear_array(iamarray(R.v));
2181 }
2182 break;
2183
2184 case XC( OC_NEWSOURCE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002185 programname = op->l.s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002186 break;
2187
2188 case XC( OC_RETURN ):
2189 copyvar(res, L.v);
2190 break;
2191
2192 case XC( OC_NEXTFILE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002193 nextfile = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002194 case XC( OC_NEXT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002195 nextrec = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002196 case XC( OC_DONE ):
2197 clrvar(res);
2198 break;
2199
2200 case XC( OC_EXIT ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002201 awk_exit(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002202
2203 /* -- recursive node type -- */
2204
2205 case XC( OC_VAR ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002206 L.v = op->l.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002207 if (L.v == V[NF])
2208 split_f0();
2209 goto v_cont;
2210
2211 case XC( OC_FNARG ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002212 L.v = &fnargs[op->l.i];
Glenn L McGrath545106f2002-11-11 06:21:00 +00002213
2214v_cont:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002215 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002216 break;
2217
2218 case XC( OC_IN ):
2219 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2220 break;
2221
2222 case XC( OC_REGEXP ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002223 op1 = op;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002224 L.s = getvar_s(V[F0]);
2225 goto re_cont;
2226
2227 case XC( OC_MATCH ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002228 op1 = op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002229re_cont:
2230 X.re = as_regex(op1, &sreg);
2231 R.i = regexec(X.re, L.s, 0, NULL, 0);
2232 if (X.re == &sreg) regfree(X.re);
2233 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2234 break;
2235
2236 case XC( OC_MOVE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002237 /* if source is a temporary string, jusk relink it to dest */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002238 if (R.v == v1+1 && R.v->string) {
2239 res = setvar_p(L.v, R.v->string);
2240 R.v->string = NULL;
2241 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00002242 res = copyvar(L.v, R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002243 }
2244 break;
2245
2246 case XC( OC_TERNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002247 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002248 runtime_error(EMSG_POSSIBLE_ERROR);
2249 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2250 break;
2251
2252 case XC( OC_FUNC ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002253 if (! op->r.f->body.first)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002254 runtime_error(EMSG_UNDEF_FUNC);
2255
2256 X.v = R.v = nvalloc(op->r.f->nargs+1);
2257 while (op1) {
2258 L.v = evaluate(nextarg(&op1), v1);
2259 copyvar(R.v, L.v);
2260 R.v->type |= VF_CHILD;
2261 R.v->x.parent = L.v;
2262 if (++R.v - X.v >= op->r.f->nargs)
2263 break;
2264 }
2265
2266 R.v = fnargs;
2267 fnargs = X.v;
2268
2269 L.s = programname;
2270 res = evaluate(op->r.f->body.first, res);
2271 programname = L.s;
2272
2273 nvfree(fnargs);
2274 fnargs = R.v;
2275 break;
2276
2277 case XC( OC_GETLINE ):
2278 case XC( OC_PGETLINE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002279 if (op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002280 X.rsm = newfile(L.s);
2281 if (! X.rsm->F) {
2282 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2283 X.rsm->F = popen(L.s, "r");
2284 X.rsm->is_pipe = TRUE;
2285 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002286 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002287 }
2288 }
2289 } else {
2290 if (! iF) iF = next_input_file();
2291 X.rsm = iF;
2292 }
2293
2294 if (! X.rsm->F) {
2295 setvar_i(V[ERRNO], errno);
2296 setvar_i(res, -1);
2297 break;
2298 }
2299
2300 if (! op->r.n)
2301 R.v = V[F0];
2302
2303 L.i = awk_getline(X.rsm, R.v);
2304 if (L.i > 0) {
2305 if (! op1) {
2306 incvar(V[FNR]);
2307 incvar(V[NR]);
2308 }
2309 }
2310 setvar_i(res, L.i);
2311 break;
2312
Mike Frysingerde2b9382005-09-27 03:18:00 +00002313 /* simple builtins */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002314 case XC( OC_FBLTIN ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002315 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002316
2317 case F_in:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002318 R.d = (int)L.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002319 break;
2320
2321 case F_rn:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002322 R.d = (double)rand() / (double)RAND_MAX;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002323 break;
2324
2325#ifdef CONFIG_FEATURE_AWK_MATH
2326 case F_co:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002327 R.d = cos(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002328 break;
2329
2330 case F_ex:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002331 R.d = exp(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002332 break;
2333
2334 case F_lg:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002335 R.d = log(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002336 break;
2337
2338 case F_si:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002339 R.d = sin(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002340 break;
2341
2342 case F_sq:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002343 R.d = sqrt(L.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002344 break;
2345#else
2346 case F_co:
2347 case F_ex:
2348 case F_lg:
2349 case F_si:
2350 case F_sq:
2351 runtime_error(EMSG_NO_MATH);
2352 break;
2353#endif
2354
2355 case F_sr:
2356 R.d = (double)seed;
2357 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2358 srand(seed);
2359 break;
2360
2361 case F_ti:
2362 R.d = time(NULL);
2363 break;
2364
2365 case F_le:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002366 if (! op1)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002367 L.s = getvar_s(V[F0]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002368 R.d = bb_strlen(L.s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002369 break;
2370
2371 case F_sy:
2372 fflush(NULL);
Rob Landley51843362006-01-09 05:26:58 +00002373 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002374 break;
2375
2376 case F_ff:
2377 if (! op1)
2378 fflush(stdout);
2379 else {
2380 if (L.s && *L.s) {
2381 X.rsm = newfile(L.s);
2382 fflush(X.rsm->F);
2383 } else {
2384 fflush(NULL);
2385 }
2386 }
2387 break;
2388
2389 case F_cl:
2390 X.rsm = (rstream *)hash_search(fdhash, L.s);
2391 if (X.rsm) {
2392 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00002393 free(X.rsm->buffer);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002394 hash_remove(fdhash, L.s);
2395 }
2396 if (R.i != 0)
2397 setvar_i(V[ERRNO], errno);
2398 R.d = (double)R.i;
2399 break;
2400 }
2401 setvar_i(res, R.d);
2402 break;
2403
2404 case XC( OC_BUILTIN ):
2405 res = exec_builtin(op, res);
2406 break;
2407
2408 case XC( OC_SPRINTF ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002409 setvar_p(res, awk_printf(op1));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002410 break;
2411
2412 case XC( OC_UNARY ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002413 X.v = R.v;
2414 L.d = R.d = getvar_i(R.v);
2415 switch (opn) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002416 case 'P':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002417 L.d = ++R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002418 goto r_op_change;
2419 case 'p':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002420 R.d++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002421 goto r_op_change;
2422 case 'M':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002423 L.d = --R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002424 goto r_op_change;
2425 case 'm':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002426 R.d--;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002427 goto r_op_change;
2428 case '!':
2429 L.d = istrue(X.v) ? 0 : 1;
2430 break;
2431 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002432 L.d = -R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002433 break;
2434 r_op_change:
2435 setvar_i(X.v, R.d);
2436 }
2437 setvar_i(res, L.d);
2438 break;
2439
2440 case XC( OC_FIELD ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002441 R.i = (int)getvar_i(R.v);
2442 if (R.i == 0) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002443 res = V[F0];
2444 } else {
2445 split_f0();
2446 if (R.i > nfields)
2447 fsrealloc(R.i);
2448
2449 res = &Fields[R.i-1];
2450 }
2451 break;
2452
2453 /* concatenation (" ") and index joining (",") */
2454 case XC( OC_CONCAT ):
2455 case XC( OC_COMMA ):
Manuel Novoa III cad53642003-03-19 09:13:01 +00002456 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
Mike Frysingerde2b9382005-09-27 03:18:00 +00002457 X.s = (char *)xmalloc(opn);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002458 strcpy(X.s, L.s);
2459 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2460 L.s = getvar_s(V[SUBSEP]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002461 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002462 strcat(X.s, L.s);
2463 }
2464 strcat(X.s, R.s);
2465 setvar_p(res, X.s);
2466 break;
2467
2468 case XC( OC_LAND ):
2469 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2470 break;
2471
2472 case XC( OC_LOR ):
2473 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2474 break;
2475
2476 case XC( OC_BINARY ):
2477 case XC( OC_REPLACE ):
Mike Frysingerde2b9382005-09-27 03:18:00 +00002478 R.d = getvar_i(R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002479 switch (opn) {
2480 case '+':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002481 L.d += R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002482 break;
2483 case '-':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002484 L.d -= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002485 break;
2486 case '*':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002487 L.d *= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002488 break;
2489 case '/':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002490 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2491 L.d /= R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002492 break;
2493 case '&':
2494#ifdef CONFIG_FEATURE_AWK_MATH
Mike Frysingerde2b9382005-09-27 03:18:00 +00002495 L.d = pow(L.d, R.d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002496#else
2497 runtime_error(EMSG_NO_MATH);
2498#endif
2499 break;
2500 case '%':
Mike Frysingerde2b9382005-09-27 03:18:00 +00002501 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2502 L.d -= (int)(L.d / R.d) * R.d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002503 break;
2504 }
2505 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2506 break;
2507
2508 case XC( OC_COMPARE ):
2509 if (is_numeric(L.v) && is_numeric(R.v)) {
2510 L.d = getvar_i(L.v) - getvar_i(R.v);
2511 } else {
2512 L.s = getvar_s(L.v);
2513 R.s = getvar_s(R.v);
2514 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2515 }
2516 switch (opn & 0xfe) {
2517 case 0:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002518 R.i = (L.d > 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002519 break;
2520 case 2:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002521 R.i = (L.d >= 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002522 break;
2523 case 4:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002524 R.i = (L.d == 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002525 break;
2526 }
2527 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2528 break;
2529
2530 default:
Mike Frysingerde2b9382005-09-27 03:18:00 +00002531 runtime_error(EMSG_POSSIBLE_ERROR);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002532 }
2533 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2534 op = op->a.n;
2535 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2536 break;
2537 if (nextrec)
2538 break;
2539 }
2540 nvfree(v1);
2541 return res;
2542}
2543
2544
2545/* -------- main & co. -------- */
2546
Mike Frysinger10a11e22005-09-27 02:23:02 +00002547static int awk_exit(int r)
2548{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002549 unsigned int i;
2550 hash_item *hi;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002551 static var tv;
2552
2553 if (! exiting) {
2554 exiting = TRUE;
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00002555 nextrec = FALSE;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002556 evaluate(endseq.first, &tv);
2557 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002558
2559 /* waiting for children */
2560 for (i=0; i<fdhash->csize; i++) {
2561 hi = fdhash->items[i];
2562 while(hi) {
2563 if (hi->data.rs.F && hi->data.rs.is_pipe)
2564 pclose(hi->data.rs.F);
2565 hi = hi->next;
2566 }
2567 }
2568
2569 exit(r);
2570}
2571
2572/* if expr looks like "var=value", perform assignment and return 1,
2573 * otherwise return 0 */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00002574static int is_assignment(const char *expr)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002575{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002576 char *exprc, *s, *s0, *s1;
2577
Manuel Novoa III cad53642003-03-19 09:13:01 +00002578 exprc = bb_xstrdup(expr);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002579 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2580 free(exprc);
2581 return FALSE;
2582 }
2583
2584 *(s++) = '\0';
2585 s0 = s1 = s;
2586 while (*s)
2587 *(s1++) = nextchar(&s);
2588
2589 *s1 = '\0';
2590 setvar_u(newvar(exprc), s0);
2591 free(exprc);
2592 return TRUE;
2593}
2594
2595/* switch to next input file */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002596static rstream *next_input_file(void)
2597{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002598 static rstream rsm;
2599 FILE *F = NULL;
2600 char *fname, *ind;
2601 static int files_happen = FALSE;
2602
2603 if (rsm.F) fclose(rsm.F);
2604 rsm.F = NULL;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002605 rsm.pos = rsm.adv = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002606
2607 do {
2608 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2609 if (files_happen)
2610 return NULL;
2611 fname = "-";
2612 F = stdin;
2613 } else {
2614 ind = getvar_s(incvar(V[ARGIND]));
2615 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2616 if (fname && *fname && !is_assignment(fname))
2617 F = afopen(fname, "r");
2618 }
2619 } while (!F);
2620
2621 files_happen = TRUE;
2622 setvar_s(V[FILENAME], fname);
2623 rsm.F = F;
2624 return &rsm;
2625}
2626
Mike Frysinger10a11e22005-09-27 02:23:02 +00002627extern int awk_main(int argc, char **argv)
2628{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002629 char *s, *s1;
2630 int i, j, c;
2631 var *v;
2632 static var tv;
2633 char **envp;
2634 static int from_file = FALSE;
2635 rstream *rsm;
2636 FILE *F, *stdfiles[3];
2637 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2638
2639 /* allocate global buffer */
2640 buf = xmalloc(MAXVARFMT+1);
2641
2642 vhash = hash_init();
2643 ahash = hash_init();
2644 fdhash = hash_init();
2645 fnhash = hash_init();
2646
2647 /* initialize variables */
2648 for (i=0; *vNames; i++) {
2649 V[i] = v = newvar(nextword(&vNames));
2650 if (*vValues != '\377')
2651 setvar_s(v, nextword(&vValues));
2652 else
2653 setvar_i(v, 0);
2654
2655 if (*vNames == '*') {
2656 v->type |= VF_SPECIAL;
2657 vNames++;
2658 }
2659 }
2660
2661 handle_special(V[FS]);
2662 handle_special(V[RS]);
2663
2664 stdfiles[0] = stdin;
2665 stdfiles[1] = stdout;
2666 stdfiles[2] = stderr;
2667 for (i=0; i<3; i++) {
2668 rsm = newfile(nextword(&stdnames));
2669 rsm->F = stdfiles[i];
2670 }
2671
2672 for (envp=environ; *envp; envp++) {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002673 s = bb_xstrdup(*envp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002674 s1 = strchr(s, '=');
Eric Andersen67776be2004-07-30 23:52:08 +00002675 if (!s1) {
2676 goto keep_going;
2677 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002678 *(s1++) = '\0';
2679 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
Eric Andersen67776be2004-07-30 23:52:08 +00002680keep_going:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002681 free(s);
2682 }
2683
2684 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2685 switch (c) {
2686 case 'F':
2687 setvar_s(V[FS], optarg);
2688 break;
2689 case 'v':
2690 if (! is_assignment(optarg))
Manuel Novoa III cad53642003-03-19 09:13:01 +00002691 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002692 break;
2693 case 'f':
2694 from_file = TRUE;
2695 F = afopen(programname = optarg, "r");
2696 s = NULL;
2697 /* one byte is reserved for some trick in next_token */
2698 for (i=j=1; j>0; i+=j) {
2699 s = (char *)xrealloc(s, i+4096);
2700 j = fread(s+i, 1, 4094, F);
2701 }
2702 s[i] = '\0';
2703 fclose(F);
2704 parse_program(s+1);
2705 free(s);
2706 break;
2707 case 'W':
Manuel Novoa III cad53642003-03-19 09:13:01 +00002708 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002709 break;
2710
2711 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +00002712 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002713 }
2714 }
2715
2716 if (!from_file) {
2717 if (argc == optind)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002718 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002719 programname="cmd. line";
2720 parse_program(argv[optind++]);
2721
2722 }
2723
2724 /* fill in ARGV array */
2725 setvar_i(V[ARGC], argc - optind + 1);
2726 setari_u(V[ARGV], 0, "awk");
2727 for(i=optind; i < argc; i++)
2728 setari_u(V[ARGV], i+1-optind, argv[i]);
2729
2730 evaluate(beginseq.first, &tv);
2731 if (! mainseq.first && ! endseq.first)
2732 awk_exit(EXIT_SUCCESS);
2733
2734 /* input file could already be opened in BEGIN block */
2735 if (! iF) iF = next_input_file();
2736
2737 /* passing through input files */
2738 while (iF) {
2739
2740 nextfile = FALSE;
2741 setvar_i(V[FNR], 0);
2742
2743 while ((c = awk_getline(iF, V[F0])) > 0) {
2744
2745 nextrec = FALSE;
2746 incvar(V[NR]);
2747 incvar(V[FNR]);
2748 evaluate(mainseq.first, &tv);
2749
2750 if (nextfile)
2751 break;
2752 }
2753
2754 if (c < 0)
2755 runtime_error(strerror(errno));
2756
2757 iF = next_input_file();
2758
2759 }
2760
Glenn L McGrath545106f2002-11-11 06:21:00 +00002761 awk_exit(EXIT_SUCCESS);
2762
2763 return 0;
2764}
2765