blob: 079d0bde579fed62fef01192caeb694b9bd006c1 [file] [log] [blame]
Glenn L McGrath545106f2002-11-11 06:21:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02007 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
Glenn L McGrath545106f2002-11-11 06:21:00 +00008 */
Sven-Göran Berghf200f732013-11-12 14:18:25 +01009//config:config AWK
Denys Vlasenkob097a842018-12-28 03:20:17 +010010//config: bool "awk (23 kb)"
Sven-Göran Berghf200f732013-11-12 14:18:25 +010011//config: default y
12//config: help
Denys Vlasenko68b653b2017-07-27 10:53:09 +020013//config: Awk is used as a pattern scanning and processing language.
Sven-Göran Berghf200f732013-11-12 14:18:25 +010014//config:
15//config:config FEATURE_AWK_LIBM
16//config: bool "Enable math functions (requires libm)"
17//config: default y
18//config: depends on AWK
19//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020020//config: Enable math functions of the Awk programming language.
21//config: NOTE: This requires libm to be present for linking.
Sven-Göran Berghf200f732013-11-12 14:18:25 +010022//config:
23//config:config FEATURE_AWK_GNU_EXTENSIONS
24//config: bool "Enable a few GNU extensions"
25//config: default y
26//config: depends on AWK
27//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020028//config: Enable a few features from gawk:
29//config: * command line option -e AWK_PROGRAM
30//config: * simultaneous use of -f and -e on the command line.
31//config: This enables the use of awk library files.
32//config: Example: awk -f mylib.awk -e '{print myfunction($1);}' ...
Sven-Göran Berghf200f732013-11-12 14:18:25 +010033
34//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
35
36//kbuild:lib-$(CONFIG_AWK) += awk.o
37
Pere Orga6a3e01d2011-04-01 22:56:30 +020038//usage:#define awk_trivial_usage
39//usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
40//usage:#define awk_full_usage "\n\n"
Denys Vlasenko66426762011-06-05 03:58:28 +020041//usage: " -v VAR=VAL Set variable"
Pere Orga6a3e01d2011-04-01 22:56:30 +020042//usage: "\n -F SEP Use SEP as field separator"
43//usage: "\n -f FILE Read program from FILE"
Sven-Göran Berghf200f732013-11-12 14:18:25 +010044//usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
45//usage: "\n -e AWK_PROGRAM"
46//usage: )
Pere Orga6a3e01d2011-04-01 22:56:30 +020047
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000048#include "libbb.h"
Rob Landleyd921b2e2006-08-03 15:41:12 +000049#include "xregex.h"
50#include <math.h>
Glenn L McGrath545106f2002-11-11 06:21:00 +000051
Denis Vlasenko99912ca2007-04-10 15:43:37 +000052/* This is a NOEXEC applet. Be very careful! */
53
Glenn L McGrath545106f2002-11-11 06:21:00 +000054
Denys Vlasenkoda62b092010-03-11 12:13:18 +010055/* If you comment out one of these below, it will be #defined later
56 * to perform debug printfs to stderr: */
57#define debug_printf_walker(...) do {} while (0)
Denys Vlasenkod527e0c2010-10-05 13:22:11 +020058#define debug_printf_eval(...) do {} while (0)
Denys Vlasenko7b46d112011-09-11 00:30:56 +020059#define debug_printf_parse(...) do {} while (0)
Denys Vlasenkoda62b092010-03-11 12:13:18 +010060
61#ifndef debug_printf_walker
62# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
63#endif
Denys Vlasenkod527e0c2010-10-05 13:22:11 +020064#ifndef debug_printf_eval
65# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
66#endif
Denys Vlasenko7b46d112011-09-11 00:30:56 +020067#ifndef debug_printf_parse
68# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
Denys Vlasenkofd217c12021-06-18 16:35:27 +020069#else
70# define debug_parse_print_tc(...) ((void)0)
Denys Vlasenko7b46d112011-09-11 00:30:56 +020071#endif
Denys Vlasenkoda62b092010-03-11 12:13:18 +010072
73
Denys Vlasenkoa1799db2017-08-16 19:07:53 +020074/* "+": stop on first non-option:
75 * $ awk 'BEGIN { for(i=1; i<ARGC; ++i) { print i ": " ARGV[i] }}' -argz
76 * 1: -argz
77 */
78#define OPTSTR_AWK "+" \
Denys Vlasenko237bedd2016-07-06 21:58:02 +020079 "F:v:*f:*" \
80 IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
Sven-Göran Berghf200f732013-11-12 14:18:25 +010081 "W:"
Sven-Göran Berghf200f732013-11-12 14:18:25 +010082enum {
83 OPTBIT_F, /* define field separator */
84 OPTBIT_v, /* define variable */
85 OPTBIT_f, /* pull in awk program from file */
86 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
87 OPTBIT_W, /* -W ignored */
88 OPT_F = 1 << OPTBIT_F,
89 OPT_v = 1 << OPTBIT_v,
90 OPT_f = 1 << OPTBIT_f,
91 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
92 OPT_W = 1 << OPTBIT_W
93};
Denys Vlasenkoda62b092010-03-11 12:13:18 +010094
Denis Vlasenko629563b2007-02-24 17:05:52 +000095#define MAXVARFMT 240
Glenn L McGrath545106f2002-11-11 06:21:00 +000096
97/* variable flags */
Denis Vlasenko629563b2007-02-24 17:05:52 +000098#define VF_NUMBER 0x0001 /* 1 = primary type is number */
99#define VF_ARRAY 0x0002 /* 1 = it's an array */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000100
Denis Vlasenko629563b2007-02-24 17:05:52 +0000101#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
102#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
103#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
104#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
Denys Vlasenkocb042b02021-07-03 13:29:32 +0200105#define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */
Denis Vlasenko629563b2007-02-24 17:05:52 +0000106#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
107#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000108
109/* these flags are static, don't change them when value is changed */
Denis Vlasenko629563b2007-02-24 17:05:52 +0000110#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000111
Denys Vlasenkoda62b092010-03-11 12:13:18 +0100112typedef struct walker_list {
113 char *end;
114 char *cur;
115 struct walker_list *prev;
116 char wbuf[1];
117} walker_list;
118
Glenn L McGrath545106f2002-11-11 06:21:00 +0000119/* Variable */
120typedef struct var_s {
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000121 unsigned type; /* flags */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000122 char *string;
Denys Vlasenko6cf6f1e2021-06-30 02:12:27 +0200123 double number;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000124 union {
Denis Vlasenko629563b2007-02-24 17:05:52 +0000125 int aidx; /* func arg idx (for compilation stage) */
126 struct xhash_s *array; /* array ptr */
127 struct var_s *parent; /* for func args, ptr to actual parameter */
Denys Vlasenkoda62b092010-03-11 12:13:18 +0100128 walker_list *walker; /* list of array elements (for..in) */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000129 } x;
130} var;
131
132/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133typedef struct chain_s {
134 struct node_s *first;
135 struct node_s *last;
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +0000136 const char *programname;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000137} chain;
138
139/* Function */
140typedef struct func_s {
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000141 unsigned nargs;
Denys Vlasenkod1507102021-06-30 12:23:51 +0200142 smallint defined;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000143 struct chain_s body;
144} func;
145
146/* I/O stream */
147typedef struct rstream_s {
148 FILE *F;
149 char *buffer;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +0000150 int adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000151 int size;
152 int pos;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000153 smallint is_pipe;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000154} rstream;
155
156typedef struct hash_item_s {
157 union {
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000158 struct var_s v; /* variable/array hash */
159 struct rstream_s rs; /* redirect streams hash */
160 struct func_s f; /* functions hash */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000161 } data;
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000162 struct hash_item_s *next; /* next in chain */
163 char name[1]; /* really it's longer */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000164} hash_item;
165
166typedef struct xhash_s {
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000167 unsigned nel; /* num of elements */
168 unsigned csize; /* current hash size */
169 unsigned nprime; /* next hash size in PRIMES[] */
170 unsigned glen; /* summary length of item names */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000171 struct hash_item_s **items;
172} xhash;
173
174/* Tree node */
175typedef struct node_s {
Mike Frysingerf87b3e32005-09-27 04:16:22 +0000176 uint32_t info;
Denis Vlasenkocd5c7862007-05-17 16:37:22 +0000177 unsigned lineno;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000178 union {
179 struct node_s *n;
180 var *v;
Denys Vlasenko7b81db12010-03-12 21:04:47 +0100181 int aidx;
Denys Vlasenko2211fa72021-07-03 11:54:01 +0200182 const char *new_progname;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000183 regex_t *re;
184 } l;
185 union {
186 struct node_s *n;
187 regex_t *ire;
188 func *f;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000189 } r;
190 union {
191 struct node_s *n;
192 } a;
193} node;
194
Glenn L McGrath545106f2002-11-11 06:21:00 +0000195typedef struct tsplitter_s {
196 node n;
197 regex_t re[2];
198} tsplitter;
199
200/* simple token classes */
Denys Vlasenko9782cb72021-06-29 01:50:47 +0200201/* order and hex values are very important!!! See next_token() */
Denys Vlasenkodabbeeb2021-07-14 16:58:05 +0200202#define TC_LPAREN (1 << 0) /* ( */
203#define TC_RPAREN (1 << 1) /* ) */
204#define TC_REGEXP (1 << 2) /* /.../ */
205#define TC_OUTRDR (1 << 3) /* | > >> */
206#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
207#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
208#define TC_BINOPX (1 << 6) /* two-opnd operator */
209#define TC_IN (1 << 7) /* 'in' */
210#define TC_COMMA (1 << 8) /* , */
211#define TC_PIPE (1 << 9) /* input redirection pipe | */
212#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
213#define TC_ARRTERM (1 << 11) /* ] */
214#define TC_LBRACE (1 << 12) /* { */
215#define TC_RBRACE (1 << 13) /* } */
216#define TC_SEMICOL (1 << 14) /* ; */
217#define TC_NEWLINE (1 << 15)
218#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
219#define TC_WHILE (1 << 17) /* 'while' */
220#define TC_ELSE (1 << 18) /* 'else' */
221#define TC_BUILTIN (1 << 19)
Denys Vlasenko28b00ce2015-10-02 02:41:39 +0200222/* This costs ~50 bytes of code.
223 * A separate class to support deprecated "length" form. If we don't need that
224 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
225 * can be merged with TC_BUILTIN:
226 */
Denys Vlasenkodabbeeb2021-07-14 16:58:05 +0200227#define TC_LENGTH (1 << 20) /* 'length' */
228#define TC_GETLINE (1 << 21) /* 'getline' */
229#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
230#define TC_BEGIN (1 << 23) /* 'BEGIN' */
231#define TC_END (1 << 24) /* 'END' */
232#define TC_EOF (1 << 25)
233#define TC_VARIABLE (1 << 26) /* name */
234#define TC_ARRAY (1 << 27) /* name[ */
235#define TC_FUNCTION (1 << 28) /* name( */
236#define TC_STRING (1 << 29) /* "..." */
237#define TC_NUMBER (1 << 30)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000238
Denys Vlasenkofd217c12021-06-18 16:35:27 +0200239#ifndef debug_parse_print_tc
Denys Vlasenkodabbeeb2021-07-14 16:58:05 +0200240static void debug_parse_print_tc(uint32_t n)
241{
242 if (n & TC_LPAREN ) debug_printf_parse(" LPAREN" );
243 if (n & TC_RPAREN ) debug_printf_parse(" RPAREN" );
244 if (n & TC_REGEXP ) debug_printf_parse(" REGEXP" );
245 if (n & TC_OUTRDR ) debug_printf_parse(" OUTRDR" );
246 if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
247 if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
248 if (n & TC_BINOPX ) debug_printf_parse(" BINOPX" );
249 if (n & TC_IN ) debug_printf_parse(" IN" );
250 if (n & TC_COMMA ) debug_printf_parse(" COMMA" );
251 if (n & TC_PIPE ) debug_printf_parse(" PIPE" );
252 if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
253 if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
254 if (n & TC_LBRACE ) debug_printf_parse(" LBRACE" );
255 if (n & TC_RBRACE ) debug_printf_parse(" RBRACE" );
256 if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
257 if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
258 if (n & TC_STATX ) debug_printf_parse(" STATX" );
259 if (n & TC_WHILE ) debug_printf_parse(" WHILE" );
260 if (n & TC_ELSE ) debug_printf_parse(" ELSE" );
261 if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
262 if (n & TC_LENGTH ) debug_printf_parse(" LENGTH" );
263 if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
264 if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
265 if (n & TC_BEGIN ) debug_printf_parse(" BEGIN" );
266 if (n & TC_END ) debug_printf_parse(" END" );
267 if (n & TC_EOF ) debug_printf_parse(" EOF" );
268 if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
269 if (n & TC_ARRAY ) debug_printf_parse(" ARRAY" );
270 if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
271 if (n & TC_STRING ) debug_printf_parse(" STRING" );
272 if (n & TC_NUMBER ) debug_printf_parse(" NUMBER" );
273}
Denys Vlasenkofd217c12021-06-18 16:35:27 +0200274#endif
Glenn L McGrath545106f2002-11-11 06:21:00 +0000275
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +0200276/* combined token classes ("token [class] sets") */
277#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
Denys Vlasenkofd217c12021-06-18 16:35:27 +0200278
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +0200279#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
280//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
281#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
282 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
Denys Vlasenko9782cb72021-06-29 01:50:47 +0200283 | TC_LPAREN | TC_STRING | TC_NUMBER)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000284
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +0200285#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
286#define TS_STATEMNT (TC_STATX | TC_WHILE)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000287
288/* word tokens, cannot mean something else if not expected */
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +0200289#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
290 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
291 | TC_FUNCDECL | TC_BEGIN | TC_END)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000292
293/* discard newlines after these */
Denys Vlasenko1f765702021-07-03 01:32:03 +0200294#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
295 | TC_SEMICOL | TC_NEWLINE)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000296
297/* what can expression begin with */
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +0200298#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000299/* what can group begin with */
Denys Vlasenko1f765702021-07-03 01:32:03 +0200300#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \
301 | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000302
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +0200303/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000304/* operator is inserted between them */
Denys Vlasenko9782cb72021-06-29 01:50:47 +0200305#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
Denys Vlasenkobd8b05b2020-02-02 23:28:55 +0100306 | TC_STRING | TC_NUMBER | TC_UOPPOST \
307 | TC_LENGTH)
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +0200308#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000309
Brian Foley08a514c2019-01-01 13:40:59 -0800310#define OF_RES1 0x010000
311#define OF_RES2 0x020000
312#define OF_STR1 0x040000
313#define OF_STR2 0x080000
314#define OF_NUM1 0x100000
315#define OF_CHECKED 0x200000
316#define OF_REQUIRED 0x400000
317
Glenn L McGrath545106f2002-11-11 06:21:00 +0000318/* combined operator flags */
319#define xx 0
320#define xV OF_RES2
321#define xS (OF_RES2 | OF_STR2)
322#define Vx OF_RES1
Denys Vlasenko640212a2021-07-02 15:19:14 +0200323#define Rx OF_REQUIRED
Glenn L McGrath545106f2002-11-11 06:21:00 +0000324#define VV (OF_RES1 | OF_RES2)
325#define Nx (OF_RES1 | OF_NUM1)
326#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
327#define Sx (OF_RES1 | OF_STR1)
328#define SV (OF_RES1 | OF_STR1 | OF_RES2)
329#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
330
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000331#define OPCLSMASK 0xFF00
332#define OPNMASK 0x007F
Glenn L McGrath545106f2002-11-11 06:21:00 +0000333
334/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
Denys Vlasenko786ca192021-07-02 17:32:08 +0200335 * (for builtins it has different meaning)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000336 */
Denys Vlasenko202a1b92011-09-10 04:51:09 +0200337#undef P
338#undef PRIMASK
339#undef PRIMASK2
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000340#define P(x) (x << 24)
341#define PRIMASK 0x7F000000
342#define PRIMASK2 0x7E000000
Glenn L McGrath545106f2002-11-11 06:21:00 +0000343
344/* Operation classes */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000345#define SHIFT_TIL_THIS 0x0600
346#define RECUR_FROM_THIS 0x1000
Glenn L McGrath545106f2002-11-11 06:21:00 +0000347enum {
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000348 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
349 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
Glenn L McGrath545106f2002-11-11 06:21:00 +0000350
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000351 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
352 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
353 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
Glenn L McGrath545106f2002-11-11 06:21:00 +0000354
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000355 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
356 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
357 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
358 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
359 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
360 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
361 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
362 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
363 OC_DONE = 0x2800,
Glenn L McGrath545106f2002-11-11 06:21:00 +0000364
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000365 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
366 ST_WHILE = 0x3300
Glenn L McGrath545106f2002-11-11 06:21:00 +0000367};
368
369/* simple builtins */
370enum {
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000371 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
Glenn L McGrath545106f2002-11-11 06:21:00 +0000372 F_ti, F_le, F_sy, F_ff, F_cl
373};
374
375/* builtins */
376enum {
Leonid Lisovskiy46a0be52009-09-21 04:08:08 +0200377 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
Denis Vlasenkoe175ff22006-09-26 17:41:00 +0000378 B_ge, B_gs, B_su,
379 B_an, B_co, B_ls, B_or, B_rs, B_xo,
Glenn L McGrath545106f2002-11-11 06:21:00 +0000380};
381
382/* tokens and their corresponding info values */
383
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200384#define NTC "\377" /* switch to next token class (tc<<1) */
385#define NTCC '\377'
Glenn L McGrath545106f2002-11-11 06:21:00 +0000386
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000387static const char tokenlist[] ALIGN1 =
Denys Vlasenko9782cb72021-06-29 01:50:47 +0200388 "\1(" NTC /* TC_LPAREN */
389 "\1)" NTC /* TC_RPAREN */
Denys Vlasenko28b00ce2015-10-02 02:41:39 +0200390 "\1/" NTC /* TC_REGEXP */
391 "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
392 "\2++" "\2--" NTC /* TC_UOPPOST */
393 "\2++" "\2--" "\1$" NTC /* TC_UOPPRE1 */
394 "\2==" "\1=" "\2+=" "\2-=" /* TC_BINOPX */
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200395 "\2*=" "\2/=" "\2%=" "\2^="
396 "\1+" "\1-" "\3**=" "\2**"
397 "\1/" "\1%" "\1^" "\1*"
398 "\2!=" "\2>=" "\2<=" "\1>"
399 "\1<" "\2!~" "\1~" "\2&&"
400 "\2||" "\1?" "\1:" NTC
Denys Vlasenko28b00ce2015-10-02 02:41:39 +0200401 "\2in" NTC /* TC_IN */
402 "\1," NTC /* TC_COMMA */
403 "\1|" NTC /* TC_PIPE */
404 "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
405 "\1]" NTC /* TC_ARRTERM */
Denys Vlasenko717200e2021-07-03 00:39:55 +0200406 "\1{" NTC /* TC_LBRACE */
407 "\1}" NTC /* TC_RBRACE */
Denys Vlasenko28b00ce2015-10-02 02:41:39 +0200408 "\1;" NTC /* TC_SEMICOL */
409 "\1\n" NTC /* TC_NEWLINE */
410 "\2if" "\2do" "\3for" "\5break" /* TC_STATX */
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200411 "\10continue" "\6delete" "\5print"
412 "\6printf" "\4next" "\10nextfile"
413 "\6return" "\4exit" NTC
Denys Vlasenko28b00ce2015-10-02 02:41:39 +0200414 "\5while" NTC /* TC_WHILE */
415 "\4else" NTC /* TC_ELSE */
416 "\3and" "\5compl" "\6lshift" "\2or" /* TC_BUILTIN */
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200417 "\6rshift" "\3xor"
Denys Vlasenko28b00ce2015-10-02 02:41:39 +0200418 "\5close" "\6system" "\6fflush" "\5atan2"
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200419 "\3cos" "\3exp" "\3int" "\3log"
420 "\4rand" "\3sin" "\4sqrt" "\5srand"
Denys Vlasenkodabbeeb2021-07-14 16:58:05 +0200421 "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200422 "\5match" "\5split" "\7sprintf" "\3sub"
423 "\6substr" "\7systime" "\10strftime" "\6mktime"
424 "\7tolower" "\7toupper" NTC
Denys Vlasenko28b00ce2015-10-02 02:41:39 +0200425 "\6length" NTC /* TC_LENGTH */
426 "\7getline" NTC /* TC_GETLINE */
427 "\4func" "\10function" NTC /* TC_FUNCDECL */
428 "\5BEGIN" NTC /* TC_BEGIN */
429 "\3END" /* TC_END */
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200430 /* compiler adds trailing "\0" */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000431 ;
432
Denys Vlasenko965b7952020-11-30 13:03:03 +0100433static const uint32_t tokeninfo[] ALIGN4 = {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000434 0,
435 0,
Denys Vlasenko08ca3132021-07-03 13:57:47 +0200436#define TI_REGEXP OC_REGEXP
437 TI_REGEXP,
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200438 xS|'a', xS|'w', xS|'|',
439 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
Denys Vlasenkofd217c12021-06-18 16:35:27 +0200440#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
441#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
442 TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200443 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
444 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
445 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
446 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
447 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
Denys Vlasenko08ca3132021-07-03 13:57:47 +0200448#define TI_LESS (OC_COMPARE|VV|P(39)|2)
Denys Vlasenkoac4786b2021-06-20 22:52:29 +0200449 TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
Denys Vlasenko08ca3132021-07-03 13:57:47 +0200450#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
451#define TI_COLON (OC_COLON|xx|P(67)|':')
452 OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
453#define TI_IN (OC_IN|SV|P(49))
454 TI_IN,
455#define TI_COMMA (OC_COMMA|SS|P(80))
456 TI_COMMA,
457#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
458 TI_PGETLINE,
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200459 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
460 0, /* ] */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000461 0,
462 0,
463 0,
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200464 0, /* \n */
465 ST_IF, ST_DO, ST_FOR, OC_BREAK,
Denys Vlasenko3d57a842021-07-11 12:00:31 +0200466 OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200467 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
468 OC_RETURN|Vx, OC_EXIT|Nx,
Glenn L McGrath545106f2002-11-11 06:21:00 +0000469 ST_WHILE,
Denys Vlasenko6a0d7492010-10-23 21:02:15 +0200470 0, /* else */
Denys Vlasenko786ca192021-07-02 17:32:08 +0200471// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
472// Highest byte bit pattern: nn s3s2s1 v3v2v1
473// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
Denys Vlasenko37ae8cd2021-07-02 18:55:00 +0200474// OC_F's are builtins with zero or one argument.
Denys Vlasenko08ca3132021-07-03 13:57:47 +0200475// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt
476// Check for no args is present in builtins' code (not in this table): rand, systime
Denys Vlasenko47d91332021-07-02 18:28:12 +0200477// Have one _optional_ arg: fflush, srand, length
478#define OC_B OC_BUILTIN
Denys Vlasenko37ae8cd2021-07-02 18:55:00 +0200479#define OC_F OC_FBLTIN
Denys Vlasenko786ca192021-07-02 17:32:08 +0200480#define A1 P(0x40) /*one arg*/
481#define A2 P(0x80) /*two args*/
482#define A3 P(0xc0) /*three args*/
483#define __v P(1)
484#define _vv P(3)
485#define __s__v P(9)
486#define __s_vv P(0x0b)
487#define __svvv P(0x0f)
488#define _ss_vv P(0x1b)
489#define _s_vv_ P(0x16)
490#define ss_vv_ P(0x36)
Denys Vlasenko37ae8cd2021-07-02 18:55:00 +0200491 OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
492 OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
493 OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2
494 OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log
495 OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand
496 OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
497 OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub
498 OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
499 OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper
500 OC_F|F_le|Sx, // length
501 OC_GETLINE|SV, // getline
Denys Vlasenko786ca192021-07-02 17:32:08 +0200502 0, 0, // func function
503 0, // BEGIN
504 0 // END
505#undef A1
506#undef A2
507#undef A3
508#undef OC_B
Denys Vlasenko37ae8cd2021-07-02 18:55:00 +0200509#undef OC_F
Glenn L McGrath545106f2002-11-11 06:21:00 +0000510};
511
512/* internal variable names and their initial values */
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000513/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
Glenn L McGrath545106f2002-11-11 06:21:00 +0000514enum {
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000515 CONVFMT, OFMT, FS, OFS,
Denis Vlasenkof782f522007-01-01 23:51:30 +0000516 ORS, RS, RT, FILENAME,
Denis Vlasenko41d5ebe2009-01-25 01:00:15 +0000517 SUBSEP, F0, ARGIND, ARGC,
518 ARGV, ERRNO, FNR, NR,
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200519 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
Glenn L McGrath545106f2002-11-11 06:21:00 +0000520};
521
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000522static const char vNames[] ALIGN1 =
Denis Vlasenkof782f522007-01-01 23:51:30 +0000523 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
524 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
Denis Vlasenko41d5ebe2009-01-25 01:00:15 +0000525 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
526 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
527 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
Glenn L McGrath545106f2002-11-11 06:21:00 +0000528
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000529static const char vValues[] ALIGN1 =
Denis Vlasenkof782f522007-01-01 23:51:30 +0000530 "%.6g\0" "%.6g\0" " \0" " \0"
531 "\n\0" "\n\0" "\0" "\0"
Denis Vlasenko41d5ebe2009-01-25 01:00:15 +0000532 "\034\0" "\0" "\377";
Glenn L McGrath545106f2002-11-11 06:21:00 +0000533
534/* hash size may grow to these values */
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000535#define FIRST_PRIME 61
536static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
Glenn L McGrath545106f2002-11-11 06:21:00 +0000537
Glenn L McGrath545106f2002-11-11 06:21:00 +0000538
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000539/* Globals. Split in two parts so that first one is addressed
Denis Vlasenko9aa5c652009-02-26 11:21:04 +0000540 * with (mostly short) negative offsets.
541 * NB: it's unsafe to put members of type "double"
542 * into globals2 (gcc may fail to align them).
543 */
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000544struct globals {
Denis Vlasenko9aa5c652009-02-26 11:21:04 +0000545 double t_double;
Denis Vlasenko574f2f42008-02-27 18:41:59 +0000546 chain beginseq, mainseq, endseq;
547 chain *seq;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000548 node *break_ptr, *continue_ptr;
549 rstream *iF;
Denys Vlasenko21fbee22021-06-29 14:33:04 +0200550 xhash *ahash; /* argument names, used only while parsing function bodies */
551 xhash *fnhash; /* function names, used only in parsing stage */
552 xhash *vhash; /* variables and arrays */
Denys Vlasenkob3c91a12021-06-29 18:33:25 +0200553 //xhash *fdhash; /* file objects, used only in execution stage */
554 //we are reusing ahash as fdhash, via define (see later)
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000555 const char *g_progname;
556 int g_lineno;
557 int nfields;
558 int maxfields; /* used in fsrealloc() only */
559 var *Fields;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000560 char *g_pos;
Denys Vlasenko4f275032021-06-29 03:27:07 +0200561 char g_saved_ch;
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000562 smallint icase;
563 smallint exiting;
564 smallint nextrec;
565 smallint nextfile;
566 smallint is_f0_split;
Denys Vlasenko7b46d112011-09-11 00:30:56 +0200567 smallint t_rollback;
Denys Vlasenkoef5463c2021-07-02 14:53:52 +0200568
569 /* former statics from various functions */
570 smallint next_token__concat_inserted;
571 uint32_t next_token__save_tclass;
572 uint32_t next_token__save_info;
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000573};
574struct globals2 {
575 uint32_t t_info; /* often used */
576 uint32_t t_tclass;
577 char *t_string;
578 int t_lineno;
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000579
580 var *intvar[NUM_INTERNAL_VARS]; /* often used */
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000581
582 /* former statics from various functions */
583 char *split_f0__fstrings;
584
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000585 rstream next_input_file__rsm;
Denys Vlasenkoef5463c2021-07-02 14:53:52 +0200586 smallint next_input_file__files_happen;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000587
Denys Vlasenko4d902ea2021-07-02 22:28:51 +0200588 smalluint exitcode;
589
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000590 unsigned evaluate__seed;
Denys Vlasenkoef5463c2021-07-02 14:53:52 +0200591 var *evaluate__fnargs;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000592 regex_t evaluate__sreg;
593
Denys Vlasenko8b4c4292021-07-01 17:50:26 +0200594 var ptest__tmpvar;
595 var awk_printf__tmpvar;
596 var as_regex__tmpvar;
Denys Vlasenko966cafc2021-07-02 14:33:13 +0200597 var exit__tmpvar;
598 var main__tmpvar;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000599
600 tsplitter exec_builtin__tspl;
601
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000602 /* biggest and least used members go last */
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000603 tsplitter fsplitter, rsplitter;
Denys Vlasenkocb6061a2021-06-29 02:43:02 +0200604
605 char g_buf[MAXVARFMT + 1];
Denys Vlasenko3dbc5a92010-02-05 14:54:22 +0100606};
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000607#define G1 (ptr_to_globals[-1])
Denis Vlasenko574f2f42008-02-27 18:41:59 +0000608#define G (*(struct globals2 *)ptr_to_globals)
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000609/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
Denys Vlasenkoef5463c2021-07-02 14:53:52 +0200610//char G1size[sizeof(G1)]; // 0x70
611//char Gsize[sizeof(G)]; // 0x2f8
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000612/* Trying to keep most of members accessible with short offsets: */
Denys Vlasenkoef5463c2021-07-02 14:53:52 +0200613//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c
Denis Vlasenko9aa5c652009-02-26 11:21:04 +0000614#define t_double (G1.t_double )
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000615#define beginseq (G1.beginseq )
616#define mainseq (G1.mainseq )
617#define endseq (G1.endseq )
618#define seq (G1.seq )
619#define break_ptr (G1.break_ptr )
620#define continue_ptr (G1.continue_ptr)
621#define iF (G1.iF )
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000622#define ahash (G1.ahash )
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000623#define fnhash (G1.fnhash )
Denys Vlasenkob3c91a12021-06-29 18:33:25 +0200624#define vhash (G1.vhash )
625#define fdhash ahash
626//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing,
627// and ends up empty after parsing phase. Thus, we can simply reuse it
628// for fdhash in execution stage.
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000629#define g_progname (G1.g_progname )
630#define g_lineno (G1.g_lineno )
631#define nfields (G1.nfields )
632#define maxfields (G1.maxfields )
633#define Fields (G1.Fields )
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000634#define g_pos (G1.g_pos )
Denys Vlasenko4f275032021-06-29 03:27:07 +0200635#define g_saved_ch (G1.g_saved_ch )
Denis Vlasenkoe16e6e72007-06-07 13:14:53 +0000636#define icase (G1.icase )
637#define exiting (G1.exiting )
638#define nextrec (G1.nextrec )
639#define nextfile (G1.nextfile )
640#define is_f0_split (G1.is_f0_split )
Denys Vlasenko7b46d112011-09-11 00:30:56 +0200641#define t_rollback (G1.t_rollback )
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000642#define t_info (G.t_info )
643#define t_tclass (G.t_tclass )
644#define t_string (G.t_string )
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000645#define t_lineno (G.t_lineno )
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000646#define intvar (G.intvar )
647#define fsplitter (G.fsplitter )
648#define rsplitter (G.rsplitter )
Denys Vlasenkocb6061a2021-06-29 02:43:02 +0200649#define g_buf (G.g_buf )
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000650#define INIT_G() do { \
Denys Vlasenko90a99042009-09-06 02:36:23 +0200651 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
Denys Vlasenko1f765702021-07-03 01:32:03 +0200652 t_tclass = TC_NEWLINE; \
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000653 G.evaluate__seed = 1; \
654} while (0)
655
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000656static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
657static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
658static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
659static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
Brian Foley08a514c2019-01-01 13:40:59 -0800660static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments";
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000661static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
662static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
663static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000664static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
Denys Vlasenko2454e672018-04-23 10:53:18 +0200665static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
Glenn L McGrath545106f2002-11-11 06:21:00 +0000666
Denys Vlasenko4d902ea2021-07-02 22:28:51 +0200667static int awk_exit(void) NORETURN;
Denis Vlasenkof782f522007-01-01 23:51:30 +0000668
Denis Vlasenkoc7cc5a92009-04-19 01:27:20 +0000669static void syntax_error(const char *message) NORETURN;
670static void syntax_error(const char *message)
Glenn L McGrathd4036f82002-11-28 09:30:40 +0000671{
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000672 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000673}
674
Glenn L McGrath545106f2002-11-11 06:21:00 +0000675/* ---- hash stuff ---- */
676
Denis Vlasenkof782f522007-01-01 23:51:30 +0000677static unsigned hashidx(const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000678{
Denis Vlasenkof782f522007-01-01 23:51:30 +0000679 unsigned idx = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000680
Denys Vlasenkocdeda162009-11-30 01:14:16 +0100681 while (*name)
682 idx = *name++ + (idx << 6) - idx;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000683 return idx;
684}
685
686/* create new hash */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000687static xhash *hash_init(void)
688{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000689 xhash *newhash;
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000690
Denys Vlasenkocdeda162009-11-30 01:14:16 +0100691 newhash = xzalloc(sizeof(*newhash));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000692 newhash->csize = FIRST_PRIME;
Denys Vlasenkocdeda162009-11-30 01:14:16 +0100693 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000694
695 return newhash;
696}
697
Denys Vlasenkob3c91a12021-06-29 18:33:25 +0200698static void hash_clear(xhash *hash)
699{
700 unsigned i;
701 hash_item *hi, *thi;
702
703 for (i = 0; i < hash->csize; i++) {
704 hi = hash->items[i];
705 while (hi) {
706 thi = hi;
707 hi = hi->next;
Denys Vlasenko3aff3b92021-06-29 19:07:36 +0200708//FIXME: this assumes that it's a hash of *variables*:
Denys Vlasenkob3c91a12021-06-29 18:33:25 +0200709 free(thi->data.v.string);
710 free(thi);
711 }
712 hash->items[i] = NULL;
713 }
714 hash->glen = hash->nel = 0;
715}
716
717#if 0 //UNUSED
718static void hash_free(xhash *hash)
719{
720 hash_clear(hash);
721 free(hash->items);
722 free(hash);
723}
724#endif
725
Glenn L McGrath545106f2002-11-11 06:21:00 +0000726/* find item in hash, return ptr to data, NULL if not found */
Denys Vlasenko3aff3b92021-06-29 19:07:36 +0200727static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000728{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000729 hash_item *hi;
730
Denys Vlasenko3aff3b92021-06-29 19:07:36 +0200731 hi = hash->items[idx % hash->csize];
Glenn L McGrath545106f2002-11-11 06:21:00 +0000732 while (hi) {
733 if (strcmp(hi->name, name) == 0)
Denys Vlasenko7b81db12010-03-12 21:04:47 +0100734 return &hi->data;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000735 hi = hi->next;
736 }
737 return NULL;
738}
739
Denys Vlasenko3aff3b92021-06-29 19:07:36 +0200740static void *hash_search(xhash *hash, const char *name)
741{
742 return hash_search3(hash, name, hashidx(name));
743}
744
Glenn L McGrath545106f2002-11-11 06:21:00 +0000745/* grow hash if it becomes too big */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000746static void hash_rebuild(xhash *hash)
747{
Denis Vlasenkof782f522007-01-01 23:51:30 +0000748 unsigned newsize, i, idx;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000749 hash_item **newitems, *hi, *thi;
750
Denis Vlasenko80b8b392007-06-25 10:55:35 +0000751 if (hash->nprime == ARRAY_SIZE(PRIMES))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000752 return;
753
754 newsize = PRIMES[hash->nprime++];
Denys Vlasenkocdeda162009-11-30 01:14:16 +0100755 newitems = xzalloc(newsize * sizeof(newitems[0]));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000756
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000757 for (i = 0; i < hash->csize; i++) {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000758 hi = hash->items[i];
759 while (hi) {
760 thi = hi;
761 hi = thi->next;
762 idx = hashidx(thi->name) % newsize;
763 thi->next = newitems[idx];
764 newitems[idx] = thi;
765 }
766 }
767
768 free(hash->items);
769 hash->csize = newsize;
770 hash->items = newitems;
771}
772
773/* find item in hash, add it if necessary. Return ptr to data */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000774static void *hash_find(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000775{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000776 hash_item *hi;
Denis Vlasenkof782f522007-01-01 23:51:30 +0000777 unsigned idx;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000778 int l;
779
Denys Vlasenko3aff3b92021-06-29 19:07:36 +0200780 idx = hashidx(name);
781 hi = hash_search3(hash, name, idx);
Denis Vlasenkob78c7822007-07-18 18:31:11 +0000782 if (!hi) {
Denys Vlasenko3aff3b92021-06-29 19:07:36 +0200783 if (++hash->nel > hash->csize * 8)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000784 hash_rebuild(hash);
785
Rob Landleya3896512006-05-07 20:20:34 +0000786 l = strlen(name) + 1;
Denis Vlasenko7a676642009-03-15 22:20:31 +0000787 hi = xzalloc(sizeof(*hi) + l);
788 strcpy(hi->name, name);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000789
Denys Vlasenko3aff3b92021-06-29 19:07:36 +0200790 idx = idx % hash->csize;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000791 hi->next = hash->items[idx];
792 hash->items[idx] = hi;
793 hash->glen += l;
794 }
Denys Vlasenko7b81db12010-03-12 21:04:47 +0100795 return &hi->data;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000796}
797
Denis Vlasenkoffba9412007-05-17 23:03:35 +0000798#define findvar(hash, name) ((var*) hash_find((hash), (name)))
799#define newvar(name) ((var*) hash_find(vhash, (name)))
800#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
801#define newfunc(name) ((func*) hash_find(fnhash, (name)))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000802
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000803static void hash_remove(xhash *hash, const char *name)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000804{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000805 hash_item *hi, **phi;
806
Denys Vlasenko7b81db12010-03-12 21:04:47 +0100807 phi = &hash->items[hashidx(name) % hash->csize];
Glenn L McGrath545106f2002-11-11 06:21:00 +0000808 while (*phi) {
809 hi = *phi;
810 if (strcmp(hi->name, name) == 0) {
Rob Landleya3896512006-05-07 20:20:34 +0000811 hash->glen -= (strlen(name) + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000812 hash->nel--;
813 *phi = hi->next;
814 free(hi);
815 break;
816 }
Denys Vlasenko7b81db12010-03-12 21:04:47 +0100817 phi = &hi->next;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000818 }
819}
820
821/* ------ some useful functions ------ */
822
Denys Vlasenkob0a57ab2010-03-11 12:44:25 +0100823static char *skip_spaces(char *p)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000824{
Denys Vlasenko92642072021-06-29 02:32:32 +0200825 for (;;) {
Denis Vlasenkocd5c7862007-05-17 16:37:22 +0000826 if (*p == '\\' && p[1] == '\n') {
827 p++;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +0000828 t_lineno++;
Denis Vlasenkocd5c7862007-05-17 16:37:22 +0000829 } else if (*p != ' ' && *p != '\t') {
830 break;
831 }
Mike Frysingerde2b9382005-09-27 03:18:00 +0000832 p++;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000833 }
Denys Vlasenkob0a57ab2010-03-11 12:44:25 +0100834 return p;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000835}
836
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +0100837/* returns old *s, advances *s past word and terminating NUL */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000838static char *nextword(char **s)
839{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000840 char *p = *s;
Denys Vlasenko3aff3b92021-06-29 19:07:36 +0200841 char *q = p;
842 while (*q++ != '\0')
Denys Vlasenkocdeda162009-11-30 01:14:16 +0100843 continue;
Denys Vlasenko3aff3b92021-06-29 19:07:36 +0200844 *s = q;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000845 return p;
846}
847
Mike Frysinger10a11e22005-09-27 02:23:02 +0000848static char nextchar(char **s)
849{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000850 char c, *pps;
Denys Vlasenkoe2952df2022-01-08 22:42:35 +0100851 again:
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +0100852 c = *(*s)++;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000853 pps = *s;
Denys Vlasenkocdeda162009-11-30 01:14:16 +0100854 if (c == '\\')
855 c = bb_process_escape_sequence((const char**)s);
Denys Vlasenkoea664dd2012-06-22 18:41:01 +0200856 /* Example awk statement:
857 * s = "abc\"def"
858 * we must treat \" as "
859 */
Denys Vlasenko2b299fe2010-10-24 01:58:04 +0200860 if (c == '\\' && *s == pps) { /* unrecognized \z? */
861 c = *(*s); /* yes, fetch z */
Denys Vlasenkoe2952df2022-01-08 22:42:35 +0100862 if (c) { /* advance unless z = NUL */
863 (*s)++;
864 if (c == '\n') /* \<newline>? eat it */
865 goto again;
866 }
Denys Vlasenko2b299fe2010-10-24 01:58:04 +0200867 }
Glenn L McGrath545106f2002-11-11 06:21:00 +0000868 return c;
869}
870
Denys Vlasenkoea664dd2012-06-22 18:41:01 +0200871/* TODO: merge with strcpy_and_process_escape_sequences()?
872 */
873static void unescape_string_in_place(char *s1)
874{
875 char *s = s1;
876 while ((*s1 = nextchar(&s)) != '\0')
877 s1++;
878}
879
Denis Vlasenko77ad97f2008-05-13 02:27:31 +0000880static ALWAYS_INLINE int isalnum_(int c)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000881{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000882 return (isalnum(c) || c == '_');
883}
884
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +0000885static double my_strtod(char **pp)
886{
Denys Vlasenkod527e0c2010-10-05 13:22:11 +0200887 char *cp = *pp;
Denys Vlasenkoe2952df2022-01-08 22:42:35 +0100888 return strtod(cp, pp);
889}
890#if ENABLE_DESKTOP
891static double my_strtod_or_hexoct(char **pp)
892{
893 char *cp = *pp;
894 if (cp[0] == '0') {
Denys Vlasenkod527e0c2010-10-05 13:22:11 +0200895 /* Might be hex or octal integer: 0x123abc or 07777 */
896 char c = (cp[1] | 0x20);
897 if (c == 'x' || isdigit(cp[1])) {
898 unsigned long long ull = strtoull(cp, pp, 0);
899 if (c == 'x')
900 return ull;
901 c = **pp;
902 if (!isdigit(c) && c != '.')
903 return ull;
904 /* else: it may be a floating number. Examples:
905 * 009.123 (*pp points to '9')
906 * 000.123 (*pp points to '.')
907 * fall through to strtod.
908 */
909 }
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +0000910 }
Denys Vlasenkod527e0c2010-10-05 13:22:11 +0200911 return strtod(cp, pp);
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +0000912}
Denys Vlasenkoe2952df2022-01-08 22:42:35 +0100913#else
914# define my_strtod_or_hexoct(p) my_strtod(p)
915#endif
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +0000916
Glenn L McGrath545106f2002-11-11 06:21:00 +0000917/* -------- working with variables (set/get/copy/etc) -------- */
918
Denys Vlasenkoe2e38022021-07-04 01:25:34 +0200919static void fmt_num(const char *format, double n)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000920{
Denys Vlasenkoe2e38022021-07-04 01:25:34 +0200921 if (n == (long long)n) {
922 snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
Denys Vlasenko40573552021-07-02 14:27:40 +0200923 } else {
Denys Vlasenkoe2e38022021-07-04 01:25:34 +0200924 const char *s = format;
925 char c;
926
Denys Vlasenko40573552021-07-02 14:27:40 +0200927 do { c = *s; } while (c && *++s);
928 if (strchr("diouxX", c)) {
Denys Vlasenkoe2e38022021-07-04 01:25:34 +0200929 snprintf(g_buf, MAXVARFMT, format, (int)n);
Denys Vlasenkoe1e7ad62021-07-03 01:59:36 +0200930 } else if (strchr("eEfFgGaA", c)) {
Denys Vlasenkoe2e38022021-07-04 01:25:34 +0200931 snprintf(g_buf, MAXVARFMT, format, n);
Denys Vlasenko40573552021-07-02 14:27:40 +0200932 } else {
933 syntax_error(EMSG_INV_FMT);
934 }
935 }
Denys Vlasenko40573552021-07-02 14:27:40 +0200936}
937
938static xhash *iamarray(var *a)
939{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000940 while (a->type & VF_CHILD)
941 a = a->x.parent;
942
Denis Vlasenkocd5c7862007-05-17 16:37:22 +0000943 if (!(a->type & VF_ARRAY)) {
Glenn L McGrath545106f2002-11-11 06:21:00 +0000944 a->type |= VF_ARRAY;
945 a->x.array = hash_init();
946 }
947 return a->x.array;
948}
949
Denys Vlasenkob3c91a12021-06-29 18:33:25 +0200950#define clear_array(array) hash_clear(array)
Glenn L McGrath545106f2002-11-11 06:21:00 +0000951
952/* clear a variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000953static var *clrvar(var *v)
954{
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000955 if (!(v->type & VF_FSTR))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000956 free(v->string);
957
958 v->type &= VF_DONTTOUCH;
959 v->type |= VF_DIRTY;
960 v->string = NULL;
961 return v;
962}
963
Denys Vlasenko40573552021-07-02 14:27:40 +0200964static void handle_special(var *);
965
Glenn L McGrath545106f2002-11-11 06:21:00 +0000966/* assign string value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000967static var *setvar_p(var *v, char *value)
968{
Glenn L McGrath545106f2002-11-11 06:21:00 +0000969 clrvar(v);
970 v->string = value;
971 handle_special(v);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000972 return v;
973}
974
975/* same as setvar_p but make a copy of string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000976static var *setvar_s(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000977{
Rob Landleyd921b2e2006-08-03 15:41:12 +0000978 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000979}
980
Denys Vlasenkocdeda162009-11-30 01:14:16 +0100981/* same as setvar_s but sets USER flag */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000982static var *setvar_u(var *v, const char *value)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000983{
Denys Vlasenkocdeda162009-11-30 01:14:16 +0100984 v = setvar_s(v, value);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000985 v->type |= VF_USER;
986 return v;
987}
988
989/* set array element to user string */
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +0000990static void setari_u(var *a, int idx, const char *s)
Mike Frysinger10a11e22005-09-27 02:23:02 +0000991{
"Robert P. J. Day"68229832006-07-01 13:08:46 +0000992 var *v;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000993
Denys Vlasenko7bb346f2009-10-06 22:09:50 +0200994 v = findvar(iamarray(a), itoa(idx));
Glenn L McGrath545106f2002-11-11 06:21:00 +0000995 setvar_u(v, s);
996}
997
998/* assign numeric value to variable */
Mike Frysinger10a11e22005-09-27 02:23:02 +0000999static var *setvar_i(var *v, double value)
1000{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001001 clrvar(v);
1002 v->type |= VF_NUMBER;
1003 v->number = value;
1004 handle_special(v);
1005 return v;
1006}
1007
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00001008static const char *getvar_s(var *v)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001009{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001010 /* if v is numeric and has no cached string, convert it to string */
1011 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02001012 fmt_num(getvar_s(intvar[CONVFMT]), v->number);
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001013 v->string = xstrdup(g_buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001014 v->type |= VF_CACHED;
1015 }
1016 return (v->string == NULL) ? "" : v->string;
1017}
1018
Mike Frysinger10a11e22005-09-27 02:23:02 +00001019static double getvar_i(var *v)
1020{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001021 char *s;
1022
1023 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
1024 v->number = 0;
1025 s = v->string;
1026 if (s && *s) {
Denys Vlasenkod527e0c2010-10-05 13:22:11 +02001027 debug_printf_eval("getvar_i: '%s'->", s);
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00001028 v->number = my_strtod(&s);
Denys Vlasenkoe2952df2022-01-08 22:42:35 +01001029 /* ^^^ hex/oct NOT allowed here! */
Denys Vlasenkod527e0c2010-10-05 13:22:11 +02001030 debug_printf_eval("%f (s:'%s')\n", v->number, s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001031 if (v->type & VF_USER) {
Denys Vlasenkof414fb42021-06-29 03:02:21 +02001032//TODO: skip_spaces() also skips backslash+newline, is it intended here?
Denys Vlasenkob0a57ab2010-03-11 12:44:25 +01001033 s = skip_spaces(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001034 if (*s != '\0')
1035 v->type &= ~VF_USER;
1036 }
1037 } else {
Denys Vlasenkod527e0c2010-10-05 13:22:11 +02001038 debug_printf_eval("getvar_i: '%s'->zero\n", s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001039 v->type &= ~VF_USER;
1040 }
1041 v->type |= VF_CACHED;
1042 }
Denys Vlasenkod527e0c2010-10-05 13:22:11 +02001043 debug_printf_eval("getvar_i: %f\n", v->number);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001044 return v->number;
1045}
1046
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00001047/* Used for operands of bitwise ops */
1048static unsigned long getvar_i_int(var *v)
1049{
1050 double d = getvar_i(v);
1051
1052 /* Casting doubles to longs is undefined for values outside
1053 * of target type range. Try to widen it as much as possible */
1054 if (d >= 0)
1055 return (unsigned long)d;
Denis Vlasenko665eaff2008-09-05 04:59:02 +00001056 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00001057 return - (long) (unsigned long) (-d);
1058}
1059
"Vladimir N. Oleynik"5cf9a032005-10-19 09:21:51 +00001060static var *copyvar(var *dest, const var *src)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001061{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001062 if (dest != src) {
1063 clrvar(dest);
Denis Vlasenko629563b2007-02-24 17:05:52 +00001064 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
Denys Vlasenkod527e0c2010-10-05 13:22:11 +02001065 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001066 dest->number = src->number;
1067 if (src->string)
Rob Landleyd921b2e2006-08-03 15:41:12 +00001068 dest->string = xstrdup(src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001069 }
1070 handle_special(dest);
1071 return dest;
1072}
1073
Mike Frysinger10a11e22005-09-27 02:23:02 +00001074static var *incvar(var *v)
1075{
Denys Vlasenkocdeda162009-11-30 01:14:16 +01001076 return setvar_i(v, getvar_i(v) + 1.0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001077}
1078
1079/* return true if v is number or numeric string */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001080static int is_numeric(var *v)
1081{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001082 getvar_i(v);
1083 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
1084}
1085
1086/* return 1 when value of v corresponds to true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001087static int istrue(var *v)
1088{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001089 if (is_numeric(v))
Denys Vlasenkocdeda162009-11-30 01:14:16 +01001090 return (v->number != 0);
1091 return (v->string && v->string[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001092}
1093
Glenn L McGrath545106f2002-11-11 06:21:00 +00001094/* ------- awk program text parsing ------- */
1095
Denys Vlasenkoaf017232021-06-29 01:03:42 +02001096/* Parse next token pointed by global pos, place results into global t_XYZ variables.
1097 * If token isn't expected, print error message and die.
1098 * Return token class (also store it in t_tclass).
Glenn L McGrath545106f2002-11-11 06:21:00 +00001099 */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001100static uint32_t next_token(uint32_t expected)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001101{
Denys Vlasenkoef5463c2021-07-02 14:53:52 +02001102#define concat_inserted (G1.next_token__concat_inserted)
1103#define save_tclass (G1.next_token__save_tclass)
1104#define save_info (G1.next_token__save_info)
Glenn L McGrath545106f2002-11-11 06:21:00 +00001105
Denys Vlasenkof414fb42021-06-29 03:02:21 +02001106 char *p;
Denis Vlasenkof782f522007-01-01 23:51:30 +00001107 const char *tl;
Denis Vlasenkof782f522007-01-01 23:51:30 +00001108 const uint32_t *ti;
Denys Vlasenko832cb4f2021-06-29 01:09:08 +02001109 uint32_t tc, last_token_class;
1110
Denys Vlasenko1f765702021-07-03 01:32:03 +02001111 last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */
Denis Vlasenkof782f522007-01-01 23:51:30 +00001112
Denys Vlasenkofd217c12021-06-18 16:35:27 +02001113 debug_printf_parse("%s() expected(%x):", __func__, expected);
1114 debug_parse_print_tc(expected);
1115 debug_printf_parse("\n");
1116
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001117 if (t_rollback) {
Denys Vlasenkobd8b05b2020-02-02 23:28:55 +01001118 debug_printf_parse("%s: using rolled-back token\n", __func__);
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001119 t_rollback = FALSE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001120 } else if (concat_inserted) {
Denys Vlasenkobd8b05b2020-02-02 23:28:55 +01001121 debug_printf_parse("%s: using concat-inserted token\n", __func__);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001122 concat_inserted = FALSE;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001123 t_tclass = save_tclass;
1124 t_info = save_info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001125 } else {
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001126 p = g_pos;
Denys Vlasenko4f275032021-06-29 03:27:07 +02001127 if (g_saved_ch != '\0') {
1128 *p = g_saved_ch;
1129 g_saved_ch = '\0';
1130 }
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00001131 readnext:
Denys Vlasenkob0a57ab2010-03-11 12:44:25 +01001132 p = skip_spaces(p);
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001133 g_lineno = t_lineno;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001134 if (*p == '#')
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001135 while (*p != '\n' && *p != '\0')
1136 p++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001137
Glenn L McGrath545106f2002-11-11 06:21:00 +00001138 if (*p == '\0') {
1139 tc = TC_EOF;
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001140 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
Denys Vlasenkoe2952df2022-01-08 22:42:35 +01001141 } else if (*p == '"') {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001142 /* it's a string */
Denys Vlasenkof414fb42021-06-29 03:02:21 +02001143 char *s = t_string = ++p;
Denys Vlasenkoe2952df2022-01-08 22:42:35 +01001144 while (*p != '"') {
Denys Vlasenko2b299fe2010-10-24 01:58:04 +02001145 char *pp;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001146 if (*p == '\0' || *p == '\n')
1147 syntax_error(EMSG_UNEXP_EOS);
Denys Vlasenko2b299fe2010-10-24 01:58:04 +02001148 pp = p;
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01001149 *s++ = nextchar(&pp);
1150 p = pp;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001151 }
1152 p++;
1153 *s = '\0';
1154 tc = TC_STRING;
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001155 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001156 } else if ((expected & TC_REGEXP) && *p == '/') {
1157 /* it's regexp */
Denys Vlasenkof414fb42021-06-29 03:02:21 +02001158 char *s = t_string = ++p;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001159 while (*p != '/') {
1160 if (*p == '\0' || *p == '\n')
1161 syntax_error(EMSG_UNEXP_EOS);
Denis Vlasenkod9b5ab82007-05-18 07:30:43 +00001162 *s = *p++;
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001163 if (*s++ == '\\') {
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01001164 char *pp = p;
1165 s[-1] = bb_process_escape_sequence((const char **)&pp);
1166 if (*p == '\\')
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001167 *s++ = '\\';
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01001168 if (pp == p)
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001169 *s++ = *p++;
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01001170 else
1171 p = pp;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001172 }
1173 }
1174 p++;
1175 *s = '\0';
1176 tc = TC_REGEXP;
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001177 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001178
1179 } else if (*p == '.' || isdigit(*p)) {
1180 /* it's a number */
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01001181 char *pp = p;
Denys Vlasenkoe2952df2022-01-08 22:42:35 +01001182 t_double = my_strtod_or_hexoct(&pp);
1183 /* ^^^ awk only allows hex/oct consts in _program_, not in _input_ */
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01001184 p = pp;
Denys Vlasenko28458c62010-10-05 16:49:03 +02001185 if (*p == '.')
Glenn L McGrath545106f2002-11-11 06:21:00 +00001186 syntax_error(EMSG_UNEXP_TOKEN);
1187 tc = TC_NUMBER;
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001188 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001189 } else {
Denys Vlasenko4f275032021-06-29 03:27:07 +02001190 char *end_of_name;
1191
Denys Vlasenkof414fb42021-06-29 03:02:21 +02001192 if (*p == '\n')
1193 t_lineno++;
1194
Glenn L McGrath545106f2002-11-11 06:21:00 +00001195 /* search for something known */
1196 tl = tokenlist;
1197 tc = 0x00000001;
1198 ti = tokeninfo;
1199 while (*tl) {
Denys Vlasenko28458c62010-10-05 16:49:03 +02001200 int l = (unsigned char) *tl++;
1201 if (l == (unsigned char) NTCC) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001202 tc <<= 1;
1203 continue;
1204 }
Denys Vlasenko28458c62010-10-05 16:49:03 +02001205 /* if token class is expected,
1206 * token matches,
1207 * and it's not a longer word,
Glenn L McGrath545106f2002-11-11 06:21:00 +00001208 */
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001209 if ((tc & (expected | TS_WORD | TC_NEWLINE))
Denys Vlasenko28458c62010-10-05 16:49:03 +02001210 && strncmp(p, tl, l) == 0
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001211 && !((tc & TS_WORD) && isalnum_(p[l]))
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001212 ) {
Denys Vlasenko28458c62010-10-05 16:49:03 +02001213 /* then this is what we are looking for */
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001214 t_info = *ti;
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001215 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001216 p += l;
Denys Vlasenko28458c62010-10-05 16:49:03 +02001217 goto token_found;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001218 }
1219 ti++;
1220 tl += l;
1221 }
Denys Vlasenko28458c62010-10-05 16:49:03 +02001222 /* not a known token */
Glenn L McGrath545106f2002-11-11 06:21:00 +00001223
Denys Vlasenko28458c62010-10-05 16:49:03 +02001224 /* is it a name? (var/array/function) */
1225 if (!isalnum_(*p))
1226 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1227 /* yes */
Denys Vlasenko4f275032021-06-29 03:27:07 +02001228 t_string = p;
1229 while (isalnum_(*p))
1230 p++;
1231 end_of_name = p;
Denys Vlasenko6872c192021-06-29 12:16:36 +02001232
1233 if (last_token_class == TC_FUNCDECL)
1234 /* eat space in "function FUNC (...) {...}" declaration */
Denys Vlasenko28458c62010-10-05 16:49:03 +02001235 p = skip_spaces(p);
Denys Vlasenko6872c192021-06-29 12:16:36 +02001236 else if (expected & TC_ARRAY) {
1237 /* eat space between array name and [ */
1238 char *s = skip_spaces(p);
1239 if (*s == '[') /* array ref, not just a name? */
1240 p = s;
1241 }
1242 /* else: do NOT consume whitespace after variable name!
1243 * gawk allows definition "function FUNC (p) {...}" - note space,
1244 * but disallows the call "FUNC (p)" because it isn't one -
1245 * expression "v (a)" should NOT be parsed as TC_FUNCTION:
1246 * it is a valid concatenation if "v" is a variable,
1247 * not a function name (and type of name is not known at parse time).
1248 */
1249
Denys Vlasenko28458c62010-10-05 16:49:03 +02001250 if (*p == '(') {
Denys Vlasenkof414fb42021-06-29 03:02:21 +02001251 p++;
Denys Vlasenko28458c62010-10-05 16:49:03 +02001252 tc = TC_FUNCTION;
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001253 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
Denys Vlasenkof414fb42021-06-29 03:02:21 +02001254 } else if (*p == '[') {
1255 p++;
1256 tc = TC_ARRAY;
1257 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
Denys Vlasenko28458c62010-10-05 16:49:03 +02001258 } else {
Denys Vlasenko6872c192021-06-29 12:16:36 +02001259 tc = TC_VARIABLE;
Denys Vlasenkof414fb42021-06-29 03:02:21 +02001260 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
Denys Vlasenko4f275032021-06-29 03:27:07 +02001261 if (end_of_name == p) {
1262 /* there is no space for trailing NUL in t_string!
1263 * We need to save the char we are going to NUL.
1264 * (we'll use it in future call to next_token())
1265 */
1266 g_saved_ch = *end_of_name;
1267// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
1268// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
1269// '.' and analyze it later: we also have to *store it back* in next
1270// next_token(), in order to give my_strtod() the undamaged ".2" string.
1271 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001272 }
Denys Vlasenko4f275032021-06-29 03:27:07 +02001273 *end_of_name = '\0'; /* terminate t_string */
Glenn L McGrath545106f2002-11-11 06:21:00 +00001274 }
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001275 token_found:
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001276 g_pos = p;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001277
1278 /* skipping newlines in some cases */
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001279 if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
Glenn L McGrath545106f2002-11-11 06:21:00 +00001280 goto readnext;
1281
1282 /* insert concatenation operator when needed */
Denys Vlasenkoaf017232021-06-29 01:03:42 +02001283 debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001284 (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
Denys Vlasenko9782cb72021-06-29 01:50:47 +02001285 !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001286 if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
Denys Vlasenko9782cb72021-06-29 01:50:47 +02001287 && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */
Denys Vlasenkobd8b05b2020-02-02 23:28:55 +01001288 ) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001289 concat_inserted = TRUE;
1290 save_tclass = tc;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001291 save_info = t_info;
Denys Vlasenkoaf017232021-06-29 01:03:42 +02001292 tc = TC_BINOPX;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001293 t_info = OC_CONCAT | SS | P(35);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001294 }
1295
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001296 t_tclass = tc;
Denys Vlasenkoaf017232021-06-29 01:03:42 +02001297 debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001298 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001299 /* Are we ready for this? */
Denys Vlasenkoaf017232021-06-29 01:03:42 +02001300 if (!(t_tclass & expected)) {
Denys Vlasenko832cb4f2021-06-29 01:09:08 +02001301 syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001302 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
Denys Vlasenko28b00ce2015-10-02 02:41:39 +02001303 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001304
Denys Vlasenkoaf017232021-06-29 01:03:42 +02001305 debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
1306 debug_parse_print_tc(t_tclass);
Denys Vlasenkofd217c12021-06-18 16:35:27 +02001307 debug_printf_parse("\n");
Denys Vlasenkoaf017232021-06-29 01:03:42 +02001308
Denys Vlasenkoaf017232021-06-29 01:03:42 +02001309 return t_tclass;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001310#undef concat_inserted
1311#undef save_tclass
1312#undef save_info
Glenn L McGrath545106f2002-11-11 06:21:00 +00001313}
1314
Denys Vlasenko216d3d82021-06-29 03:44:56 +02001315static ALWAYS_INLINE void rollback_token(void)
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00001316{
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001317 t_rollback = TRUE;
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00001318}
Glenn L McGrath545106f2002-11-11 06:21:00 +00001319
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001320static node *new_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001321{
"Robert P. J. Day"68229832006-07-01 13:08:46 +00001322 node *n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001323
Denis Vlasenko4cccc032006-12-22 18:37:07 +00001324 n = xzalloc(sizeof(node));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001325 n->info = info;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001326 n->lineno = g_lineno;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001327 return n;
1328}
1329
Denys Vlasenkofab288c2010-04-04 01:17:30 +02001330static void mk_re_node(const char *s, node *n, regex_t *re)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001331{
Denys Vlasenko08ca3132021-07-03 13:57:47 +02001332 n->info = TI_REGEXP;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001333 n->l.re = re;
1334 n->r.ire = re + 1;
1335 xregcomp(re, s, REG_EXTENDED);
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001336 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001337}
1338
Denys Vlasenko40573552021-07-02 14:27:40 +02001339static node *parse_expr(uint32_t);
1340
Denys Vlasenko9782cb72021-06-29 01:50:47 +02001341static node *parse_lrparen_list(void)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001342{
Denys Vlasenko9782cb72021-06-29 01:50:47 +02001343 next_token(TC_LPAREN);
1344 return parse_expr(TC_RPAREN);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001345}
1346
1347/* parse expression terminated by given argument, return ptr
1348 * to built subtree. Terminator is eaten by parse_expr */
Denys Vlasenkofd217c12021-06-18 16:35:27 +02001349static node *parse_expr(uint32_t term_tc)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001350{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001351 node sn;
1352 node *cn = &sn;
1353 node *vn, *glptr;
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001354 uint32_t tc, expected_tc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001355 var *v;
1356
Denys Vlasenkofd217c12021-06-18 16:35:27 +02001357 debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
1358 debug_parse_print_tc(term_tc);
1359 debug_printf_parse("\n");
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001360
Glenn L McGrath545106f2002-11-11 06:21:00 +00001361 sn.info = PRIMASK;
Brian Foleydac15a12019-01-01 13:40:58 -08001362 sn.r.n = sn.a.n = glptr = NULL;
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001363 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001364
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001365 while (!((tc = next_token(expected_tc)) & term_tc)) {
Denys Vlasenko28458c62010-10-05 16:49:03 +02001366
Denys Vlasenkoac4786b2021-06-20 22:52:29 +02001367 if (glptr && (t_info == TI_LESS)) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001368 /* input redirection (<) attached to glptr node */
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001369 debug_printf_parse("%s: input redir\n", __func__);
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001370 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
Glenn L McGrath4bded582004-02-22 11:55:09 +00001371 cn->a.n = glptr;
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001372 expected_tc = TS_OPERAND | TS_UOPPRE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001373 glptr = NULL;
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001374 continue;
1375 }
1376 if (tc & (TS_BINOP | TC_UOPPOST)) {
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001377 debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001378 /* for binary and postfix-unary operators, jump back over
1379 * previous operators with higher priority */
1380 vn = cn;
Denys Vlasenkocdeda162009-11-30 01:14:16 +01001381 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
Denys Vlasenko08ca3132021-07-03 13:57:47 +02001382 || ((t_info == vn->info) && t_info == TI_COLON)
Denys Vlasenkocdeda162009-11-30 01:14:16 +01001383 ) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001384 vn = vn->a.n;
Brian Foleydac15a12019-01-01 13:40:58 -08001385 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
Denys Vlasenkocdeda162009-11-30 01:14:16 +01001386 }
Denys Vlasenko08ca3132021-07-03 13:57:47 +02001387 if (t_info == TI_TERNARY)
1388//TODO: why?
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001389 t_info += P(6);
1390 cn = vn->a.n->r.n = new_node(t_info);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001391 cn->a.n = vn->a.n;
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001392 if (tc & TS_BINOP) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001393 cn->l.n = vn;
Denys Vlasenkocb042b02021-07-03 13:29:32 +02001394//FIXME: this is the place to detect and reject assignments to non-lvalues.
1395//Currently we allow "assignments" to consts and temporaries, nonsense like this:
1396// awk 'BEGIN { "qwe" = 1 }'
1397// awk 'BEGIN { 7 *= 7 }'
1398// awk 'BEGIN { length("qwe") = 1 }'
1399// awk 'BEGIN { (1+1) += 3 }'
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001400 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
Denys Vlasenko08ca3132021-07-03 13:57:47 +02001401 if (t_info == TI_PGETLINE) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001402 /* it's a pipe */
1403 next_token(TC_GETLINE);
1404 /* give maximum priority to this pipe */
1405 cn->info &= ~PRIMASK;
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001406 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001407 }
1408 } else {
1409 cn->r.n = vn;
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001410 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001411 }
1412 vn->a.n = cn;
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001413 continue;
1414 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00001415
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001416 debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
1417 /* for operands and prefix-unary operators, attach them
1418 * to last node */
1419 vn = cn;
1420 cn = vn->r.n = new_node(t_info);
1421 cn->a.n = vn;
Denys Vlasenkofd217c12021-06-18 16:35:27 +02001422
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001423 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1424 if (t_info == TI_PREINC || t_info == TI_PREDEC)
1425 expected_tc = TS_LVALUE | TC_UOPPRE1;
Mike Frysingerde2b9382005-09-27 03:18:00 +00001426
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001427 if (!(tc & (TS_OPERAND | TC_REGEXP)))
1428 continue;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001429
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001430 debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
1431 expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
1432 /* one should be very careful with switch on tclass -
1433 * only simple tclasses should be used (TC_xyz, not TS_xyz) */
1434 switch (tc) {
1435 case TC_VARIABLE:
1436 case TC_ARRAY:
1437 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1438 cn->info = OC_VAR;
1439 v = hash_search(ahash, t_string);
1440 if (v != NULL) {
1441 cn->info = OC_FNARG;
1442 cn->l.aidx = v->x.aidx;
1443 } else {
1444 cn->l.v = newvar(t_string);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001445 }
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001446 if (tc & TC_ARRAY) {
1447 cn->info |= xS;
1448 cn->r.n = parse_expr(TC_ARRTERM);
1449 }
1450 break;
1451
1452 case TC_NUMBER:
1453 case TC_STRING:
1454 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1455 cn->info = OC_VAR;
1456 v = cn->l.v = xzalloc(sizeof(var));
1457 if (tc & TC_NUMBER)
1458 setvar_i(v, t_double);
1459 else {
1460 setvar_s(v, t_string);
1461 expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1462 }
1463 break;
1464
1465 case TC_REGEXP:
1466 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1467 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1468 break;
1469
1470 case TC_FUNCTION:
1471 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1472 cn->info = OC_FUNC;
1473 cn->r.f = newfunc(t_string);
Denys Vlasenkof414fb42021-06-29 03:02:21 +02001474 cn->l.n = parse_expr(TC_RPAREN);
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001475 break;
1476
Denys Vlasenko9782cb72021-06-29 01:50:47 +02001477 case TC_LPAREN:
1478 debug_printf_parse("%s: TC_LPAREN\n", __func__);
1479 cn = vn->r.n = parse_expr(TC_RPAREN);
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001480 if (!cn)
1481 syntax_error("Empty sequence");
1482 cn->a.n = vn;
1483 break;
1484
1485 case TC_GETLINE:
1486 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1487 glptr = cn;
1488 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1489 break;
1490
1491 case TC_BUILTIN:
1492 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
Denys Vlasenko9782cb72021-06-29 01:50:47 +02001493 cn->l.n = parse_lrparen_list();
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001494 break;
1495
1496 case TC_LENGTH:
1497 debug_printf_parse("%s: TC_LENGTH\n", __func__);
Denys Vlasenko216d3d82021-06-29 03:44:56 +02001498 tc = next_token(TC_LPAREN /* length(...) */
Denys Vlasenko1f765702021-07-03 01:32:03 +02001499 | TC_SEMICOL /* length; */
1500 | TC_NEWLINE /* length<newline> */
Denys Vlasenko717200e2021-07-03 00:39:55 +02001501 | TC_RBRACE /* length } */
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001502 | TC_BINOPX /* length <op> NUM */
1503 | TC_COMMA /* print length, 1 */
1504 );
Denys Vlasenko1193c682021-07-02 14:29:01 +02001505 if (tc != TC_LPAREN)
1506 rollback_token();
1507 else {
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001508 /* It was a "(" token. Handle just like TC_BUILTIN */
Denys Vlasenko1193c682021-07-02 14:29:01 +02001509 cn->l.n = parse_expr(TC_RPAREN);
Denys Vlasenkoa4934412021-06-29 01:30:49 +02001510 }
1511 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001512 }
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001513 } /* while() */
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001514
1515 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001516 return sn.r.n;
1517}
1518
1519/* add node to chain. Return ptr to alloc'd node */
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001520static node *chain_node(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001521{
"Robert P. J. Day"68229832006-07-01 13:08:46 +00001522 node *n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001523
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00001524 if (!seq->first)
Glenn L McGrath545106f2002-11-11 06:21:00 +00001525 seq->first = seq->last = new_node(0);
1526
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001527 if (seq->programname != g_progname) {
1528 seq->programname = g_progname;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001529 n = chain_node(OC_NEWSOURCE);
Denys Vlasenko2211fa72021-07-03 11:54:01 +02001530 n->l.new_progname = g_progname;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001531 }
1532
1533 n = seq->last;
1534 n->info = info;
1535 seq->last = n->a.n = new_node(OC_DONE);
1536
1537 return n;
1538}
1539
Mike Frysingerf87b3e32005-09-27 04:16:22 +00001540static void chain_expr(uint32_t info)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001541{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001542 node *n;
1543
1544 n = chain_node(info);
Brian Foley08a514c2019-01-01 13:40:59 -08001545
Denys Vlasenko1f765702021-07-03 01:32:03 +02001546 n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
Brian Foley08a514c2019-01-01 13:40:59 -08001547 if ((info & OF_REQUIRED) && !n->l.n)
1548 syntax_error(EMSG_TOO_FEW_ARGS);
1549
Denys Vlasenko717200e2021-07-03 00:39:55 +02001550 if (t_tclass & TC_RBRACE)
Glenn L McGrath545106f2002-11-11 06:21:00 +00001551 rollback_token();
1552}
1553
Denys Vlasenko40573552021-07-02 14:27:40 +02001554static void chain_group(void);
1555
Mike Frysinger10a11e22005-09-27 02:23:02 +00001556static node *chain_loop(node *nn)
1557{
Glenn L McGrath545106f2002-11-11 06:21:00 +00001558 node *n, *n2, *save_brk, *save_cont;
1559
1560 save_brk = break_ptr;
1561 save_cont = continue_ptr;
1562
1563 n = chain_node(OC_BR | Vx);
1564 continue_ptr = new_node(OC_EXEC);
1565 break_ptr = new_node(OC_EXEC);
1566 chain_group();
1567 n2 = chain_node(OC_EXEC | Vx);
1568 n2->l.n = nn;
1569 n2->a.n = n;
1570 continue_ptr->a.n = n2;
1571 break_ptr->a.n = n->r.n = seq->last;
1572
1573 continue_ptr = save_cont;
1574 break_ptr = save_brk;
1575
1576 return n;
1577}
1578
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001579static void chain_until_rbrace(void)
1580{
1581 uint32_t tc;
1582 while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1583 debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1584 if (tc == TC_NEWLINE)
1585 continue;
1586 rollback_token();
1587 chain_group();
1588 }
1589 debug_printf_parse("%s: TC_RBRACE\n", __func__);
1590}
1591
Glenn L McGrath545106f2002-11-11 06:21:00 +00001592/* parse group and attach it to chain */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001593static void chain_group(void)
1594{
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001595 uint32_t tc;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001596 node *n, *n2, *n3;
1597
1598 do {
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001599 tc = next_token(TS_GRPSEQ);
1600 } while (tc == TC_NEWLINE);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001601
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001602 if (tc == TC_LBRACE) {
Denys Vlasenko717200e2021-07-03 00:39:55 +02001603 debug_printf_parse("%s: TC_LBRACE\n", __func__);
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001604 chain_until_rbrace();
Denys Vlasenko216d3d82021-06-29 03:44:56 +02001605 return;
1606 }
Denys Vlasenkoab755e32021-07-12 13:30:30 +02001607 if (tc & (TS_OPSEQ | TC_SEMICOL)) {
1608 debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001609 rollback_token();
1610 chain_expr(OC_EXEC | Vx);
Denys Vlasenko216d3d82021-06-29 03:44:56 +02001611 return;
1612 }
Denys Vlasenko686287b2021-06-29 03:47:46 +02001613
1614 /* TS_STATEMNT */
1615 debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
1616 switch (t_info & OPCLSMASK) {
1617 case ST_IF:
1618 debug_printf_parse("%s: ST_IF\n", __func__);
1619 n = chain_node(OC_BR | Vx);
1620 n->l.n = parse_lrparen_list();
1621 chain_group();
1622 n2 = chain_node(OC_EXEC);
1623 n->r.n = seq->last;
Denys Vlasenko717200e2021-07-03 00:39:55 +02001624 if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001625 chain_group();
Denys Vlasenko686287b2021-06-29 03:47:46 +02001626 n2->a.n = seq->last;
1627 } else {
1628 rollback_token();
Glenn L McGrath545106f2002-11-11 06:21:00 +00001629 }
Denys Vlasenko686287b2021-06-29 03:47:46 +02001630 break;
1631
1632 case ST_WHILE:
1633 debug_printf_parse("%s: ST_WHILE\n", __func__);
1634 n2 = parse_lrparen_list();
1635 n = chain_loop(NULL);
1636 n->l.n = n2;
1637 break;
1638
1639 case ST_DO:
1640 debug_printf_parse("%s: ST_DO\n", __func__);
1641 n2 = chain_node(OC_EXEC);
1642 n = chain_loop(NULL);
1643 n2->a.n = n->a.n;
1644 next_token(TC_WHILE);
1645 n->l.n = parse_lrparen_list();
1646 break;
1647
1648 case ST_FOR:
1649 debug_printf_parse("%s: ST_FOR\n", __func__);
1650 next_token(TC_LPAREN);
1651 n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
Denys Vlasenkod6262742021-07-14 14:25:07 +02001652 if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */
Denys Vlasenko08ca3132021-07-03 13:57:47 +02001653 if (!n2 || n2->info != TI_IN)
Denys Vlasenko686287b2021-06-29 03:47:46 +02001654 syntax_error(EMSG_UNEXP_TOKEN);
1655 n = chain_node(OC_WALKINIT | VV);
1656 n->l.n = n2->l.n;
1657 n->r.n = n2->r.n;
1658 n = chain_loop(NULL);
1659 n->info = OC_WALKNEXT | Vx;
1660 n->l.n = n2->l.n;
1661 } else { /* for (;;) */
1662 n = chain_node(OC_EXEC | Vx);
1663 n->l.n = n2;
1664 n2 = parse_expr(TC_SEMICOL);
1665 n3 = parse_expr(TC_RPAREN);
1666 n = chain_loop(n3);
1667 n->l.n = n2;
1668 if (!n2)
1669 n->info = OC_EXEC;
1670 }
1671 break;
1672
1673 case OC_PRINT:
1674 case OC_PRINTF:
1675 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1676 n = chain_node(t_info);
Denys Vlasenko1f765702021-07-03 01:32:03 +02001677 n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
Denys Vlasenko686287b2021-06-29 03:47:46 +02001678 if (t_tclass & TC_OUTRDR) {
1679 n->info |= t_info;
Denys Vlasenko1f765702021-07-03 01:32:03 +02001680 n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
Denys Vlasenko686287b2021-06-29 03:47:46 +02001681 }
Denys Vlasenko717200e2021-07-03 00:39:55 +02001682 if (t_tclass & TC_RBRACE)
Denys Vlasenko686287b2021-06-29 03:47:46 +02001683 rollback_token();
1684 break;
1685
1686 case OC_BREAK:
1687 debug_printf_parse("%s: OC_BREAK\n", __func__);
1688 n = chain_node(OC_EXEC);
Denys Vlasenkod3480dd2021-07-14 14:33:37 +02001689 if (!break_ptr)
1690 syntax_error("'break' not in a loop");
Denys Vlasenko686287b2021-06-29 03:47:46 +02001691 n->a.n = break_ptr;
1692 chain_expr(t_info);
1693 break;
1694
1695 case OC_CONTINUE:
1696 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1697 n = chain_node(OC_EXEC);
Denys Vlasenkod3480dd2021-07-14 14:33:37 +02001698 if (!continue_ptr)
1699 syntax_error("'continue' not in a loop");
Denys Vlasenko686287b2021-06-29 03:47:46 +02001700 n->a.n = continue_ptr;
1701 chain_expr(t_info);
1702 break;
1703
1704 /* delete, next, nextfile, return, exit */
1705 default:
1706 debug_printf_parse("%s: default\n", __func__);
1707 chain_expr(t_info);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001708 }
1709}
1710
Mike Frysinger10a11e22005-09-27 02:23:02 +00001711static void parse_program(char *p)
1712{
Denys Vlasenkofd217c12021-06-18 16:35:27 +02001713 debug_printf_parse("%s()\n", __func__);
1714
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001715 g_pos = p;
1716 t_lineno = 1;
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001717 for (;;) {
1718 uint32_t tclass;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001719
Denys Vlasenkod6262742021-07-14 14:25:07 +02001720 tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1721 | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
1722 got_tok:
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001723 if (tclass == TC_EOF) {
1724 debug_printf_parse("%s: TC_EOF\n", __func__);
1725 break;
1726 }
Denys Vlasenkod6262742021-07-14 14:25:07 +02001727 if (tclass == TC_NEWLINE) {
1728 debug_printf_parse("%s: TC_NEWLINE\n", __func__);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001729 continue;
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001730 }
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001731 if (tclass == TC_BEGIN) {
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001732 debug_printf_parse("%s: TC_BEGIN\n", __func__);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001733 seq = &beginseq;
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001734 /* ensure there is no newline between BEGIN and { */
1735 next_token(TC_LBRACE);
1736 chain_until_rbrace();
Denys Vlasenkod6262742021-07-14 14:25:07 +02001737 goto next_tok;
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001738 }
1739 if (tclass == TC_END) {
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001740 debug_printf_parse("%s: TC_END\n", __func__);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001741 seq = &endseq;
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001742 /* ensure there is no newline between END and { */
1743 next_token(TC_LBRACE);
1744 chain_until_rbrace();
Denys Vlasenkod6262742021-07-14 14:25:07 +02001745 goto next_tok;
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001746 }
1747 if (tclass == TC_FUNCDECL) {
1748 func *f;
1749
Denys Vlasenko7b46d112011-09-11 00:30:56 +02001750 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001751 next_token(TC_FUNCTION);
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001752 f = newfunc(t_string);
Denys Vlasenkod1507102021-06-30 12:23:51 +02001753 if (f->defined)
1754 syntax_error("Duplicate function");
1755 f->defined = 1;
1756 //f->body.first = NULL; - already is
1757 //f->nargs = 0; - already is
Denys Vlasenko92642072021-06-29 02:32:32 +02001758 /* func arg list: comma sep list of args, and a close paren */
1759 for (;;) {
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001760 var *v;
Denys Vlasenko92642072021-06-29 02:32:32 +02001761 if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1762 if (f->nargs == 0)
1763 break; /* func() is ok */
1764 /* func(a,) is not ok */
Brian Foley1c42c182019-01-06 18:32:59 -08001765 syntax_error(EMSG_UNEXP_TOKEN);
Denys Vlasenko92642072021-06-29 02:32:32 +02001766 }
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00001767 v = findvar(ahash, t_string);
Denys Vlasenko7b81db12010-03-12 21:04:47 +01001768 v->x.aidx = f->nargs++;
Brian Foley1c42c182019-01-06 18:32:59 -08001769 /* Arg followed either by end of arg list or 1 comma */
Denys Vlasenko92642072021-06-29 02:32:32 +02001770 if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
Glenn L McGrath545106f2002-11-11 06:21:00 +00001771 break;
Denys Vlasenko92642072021-06-29 02:32:32 +02001772 /* it was a comma, we ate it */
Glenn L McGrath545106f2002-11-11 06:21:00 +00001773 }
Denys Vlasenko7b81db12010-03-12 21:04:47 +01001774 seq = &f->body;
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001775 /* ensure there is { after "func F(...)" - but newlines are allowed */
1776 while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1777 continue;
1778 chain_until_rbrace();
Denys Vlasenkob3c91a12021-06-29 18:33:25 +02001779 hash_clear(ahash);
Denys Vlasenkod6262742021-07-14 14:25:07 +02001780 goto next_tok;
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001781 }
1782 seq = &mainseq;
1783 if (tclass & TS_OPSEQ) {
1784 node *cn;
1785
Denys Vlasenkoadcd9a62021-06-29 01:23:37 +02001786 debug_printf_parse("%s: TS_OPSEQ\n", __func__);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001787 rollback_token();
1788 cn = chain_node(OC_TEST);
Denys Vlasenko1f765702021-07-03 01:32:03 +02001789 cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001790 if (t_tclass == TC_LBRACE) {
Denys Vlasenko717200e2021-07-03 00:39:55 +02001791 debug_printf_parse("%s: TC_LBRACE\n", __func__);
Denys Vlasenko49c3ce62021-07-11 11:46:21 +02001792 chain_until_rbrace();
Glenn L McGrath545106f2002-11-11 06:21:00 +00001793 } else {
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001794 /* no action, assume default "{ print }" */
Denys Vlasenko717200e2021-07-03 00:39:55 +02001795 debug_printf_parse("%s: !TC_LBRACE\n", __func__);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001796 chain_node(OC_PRINT);
1797 }
1798 cn->r.n = mainseq.last;
Denys Vlasenkod6262742021-07-14 14:25:07 +02001799 goto next_tok;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001800 }
Denys Vlasenko2b65e732021-07-03 01:16:48 +02001801 /* tclass == TC_LBRACE */
1802 debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1803 chain_until_rbrace();
Denys Vlasenkod6262742021-07-14 14:25:07 +02001804 next_tok:
1805 /* Same as next_token() at the top of the loop, + TC_SEMICOL */
1806 tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1807 | TC_EOF | TC_NEWLINE | TC_SEMICOL);
1808 /* gawk allows many newlines, but does not allow more than one semicolon:
1809 * BEGIN {...}<newline>;<newline>;
1810 * would complain "each rule must have a pattern or an action part".
1811 * Same message for
1812 * ; BEGIN {...}
1813 */
1814 if (tclass != TC_SEMICOL)
1815 goto got_tok; /* use this token */
1816 /* else: loop back - ate the semicolon, get and use _next_ token */
1817 } /* for (;;) */
Glenn L McGrath545106f2002-11-11 06:21:00 +00001818}
1819
Glenn L McGrath545106f2002-11-11 06:21:00 +00001820/* -------- program execution part -------- */
1821
Denys Vlasenko6cf6f1e2021-06-30 02:12:27 +02001822/* temporary variables allocator */
1823static var *nvalloc(int sz)
1824{
1825 return xzalloc(sz * sizeof(var));
1826}
1827
1828static void nvfree(var *v, int sz)
1829{
1830 var *p = v;
1831
1832 while (--sz >= 0) {
1833 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1834 clear_array(iamarray(p));
1835 free(p->x.array->items);
1836 free(p->x.array);
1837 }
1838 if (p->type & VF_WALK) {
1839 walker_list *n;
1840 walker_list *w = p->x.walker;
1841 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1842 p->x.walker = NULL;
1843 while (w) {
1844 n = w->prev;
1845 debug_printf_walker(" free(%p)\n", w);
1846 free(w);
1847 w = n;
1848 }
1849 }
1850 clrvar(p);
1851 p++;
1852 }
1853
1854 free(v);
1855}
1856
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00001857static node *mk_splitter(const char *s, tsplitter *spl)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001858{
"Robert P. J. Day"68229832006-07-01 13:08:46 +00001859 regex_t *re, *ire;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001860 node *n;
1861
1862 re = &spl->re[0];
1863 ire = &spl->re[1];
1864 n = &spl->n;
Denys Vlasenko08ca3132021-07-03 13:57:47 +02001865 if (n->info == TI_REGEXP) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001866 regfree(re);
Denis Vlasenkob78c7822007-07-18 18:31:11 +00001867 regfree(ire); // TODO: nuke ire, use re+1?
Glenn L McGrath545106f2002-11-11 06:21:00 +00001868 }
Denys Vlasenko28458c62010-10-05 16:49:03 +02001869 if (s[0] && s[1]) { /* strlen(s) > 1 */
Glenn L McGrath545106f2002-11-11 06:21:00 +00001870 mk_re_node(s, n, re);
1871 } else {
Denys Vlasenko28458c62010-10-05 16:49:03 +02001872 n->info = (uint32_t) s[0];
Glenn L McGrath545106f2002-11-11 06:21:00 +00001873 }
1874
1875 return n;
1876}
1877
Denys Vlasenko40573552021-07-02 14:27:40 +02001878static var *evaluate(node *, var *);
1879
Denys Vlasenko6cf6f1e2021-06-30 02:12:27 +02001880/* Use node as a regular expression. Supplied with node ptr and regex_t
Eric Andersenaff114c2004-04-14 17:51:38 +00001881 * storage space. Return ptr to regex (if result points to preg, it should
Denys Vlasenko6cf6f1e2021-06-30 02:12:27 +02001882 * be later regfree'd manually).
Glenn L McGrath545106f2002-11-11 06:21:00 +00001883 */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001884static regex_t *as_regex(node *op, regex_t *preg)
1885{
Denis Vlasenko7a676642009-03-15 22:20:31 +00001886 int cflags;
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00001887 const char *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001888
Denys Vlasenko08ca3132021-07-03 13:57:47 +02001889 if (op->info == TI_REGEXP) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001890 return icase ? op->r.ire : op->l.re;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001891 }
Denys Vlasenko8b4c4292021-07-01 17:50:26 +02001892
Denys Vlasenko8b4c4292021-07-01 17:50:26 +02001893 //tmpvar = nvalloc(1);
Denys Vlasenko786ca192021-07-02 17:32:08 +02001894#define TMPVAR (&G.as_regex__tmpvar)
Denys Vlasenko8b4c4292021-07-01 17:50:26 +02001895 // We use a single "static" tmpvar (instead of on-stack or malloced one)
1896 // to decrease memory consumption in deeply-recursive awk programs.
1897 // The rule to work safely is to never call evaluate() while our static
1898 // TMPVAR's value is still needed.
1899 s = getvar_s(evaluate(op, TMPVAR));
Denis Vlasenko7a676642009-03-15 22:20:31 +00001900
1901 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1902 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1903 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1904 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1905 * (maybe gsub is not supposed to use REG_EXTENDED?).
1906 */
1907 if (regcomp(preg, s, cflags)) {
1908 cflags &= ~REG_EXTENDED;
1909 xregcomp(preg, s, cflags);
1910 }
Denys Vlasenko8b4c4292021-07-01 17:50:26 +02001911 //nvfree(tmpvar, 1);
1912#undef TMPVAR
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001913 return preg;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001914}
1915
Denys Vlasenkofab288c2010-04-04 01:17:30 +02001916/* gradually increasing buffer.
1917 * note that we reallocate even if n == old_size,
1918 * and thus there is at least one extra allocated byte.
1919 */
Denys Vlasenkoc9955f22010-03-10 19:21:54 +01001920static char* qrealloc(char *b, int n, int *size)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001921{
Denys Vlasenkoc9955f22010-03-10 19:21:54 +01001922 if (!b || n >= *size) {
Denis Vlasenkodeeed592008-07-08 05:14:36 +00001923 *size = n + (n>>1) + 80;
Denys Vlasenkoc9955f22010-03-10 19:21:54 +01001924 b = xrealloc(b, *size);
Denis Vlasenkodeeed592008-07-08 05:14:36 +00001925 }
Denys Vlasenkoc9955f22010-03-10 19:21:54 +01001926 return b;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001927}
1928
1929/* resize field storage space */
Mike Frysinger10a11e22005-09-27 02:23:02 +00001930static void fsrealloc(int size)
1931{
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02001932 int i, newsize;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001933
1934 if (size >= maxfields) {
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02001935 /* Sanity cap, easier than catering for overflows */
1936 if (size > 0xffffff)
1937 bb_die_memory_exhausted();
1938
Glenn L McGrath545106f2002-11-11 06:21:00 +00001939 i = maxfields;
1940 maxfields = size + 16;
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02001941
1942 newsize = maxfields * sizeof(Fields[0]);
1943 debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
1944 Fields = xrealloc(Fields, newsize);
1945 debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
1946 /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
1947
Denis Vlasenkof782f522007-01-01 23:51:30 +00001948 for (; i < maxfields; i++) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001949 Fields[i].type = VF_SPECIAL;
1950 Fields[i].string = NULL;
1951 }
1952 }
Denys Vlasenko28458c62010-10-05 16:49:03 +02001953 /* if size < nfields, clear extra field variables */
1954 for (i = size; i < nfields; i++) {
1955 clrvar(Fields + i);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001956 }
1957 nfields = size;
1958}
1959
Denys Vlasenko665a6592020-12-02 19:07:31 +01001960static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[])
1961{
1962 int r = regexec(preg, s, 1, pmatch, 0);
1963 if (r == 0 && pmatch[0].rm_eo == 0) {
1964 /* For example, happens when FS can match
1965 * an empty string (awk -F ' *'). Logically,
1966 * this should split into one-char fields.
1967 * However, gawk 5.0.1 searches for first
1968 * _non-empty_ separator string match:
1969 */
1970 size_t ofs = 0;
1971 do {
1972 ofs++;
1973 if (!s[ofs])
1974 return REG_NOMATCH;
1975 regexec(preg, s + ofs, 1, pmatch, 0);
1976 } while (pmatch[0].rm_eo == 0);
1977 pmatch[0].rm_so += ofs;
1978 pmatch[0].rm_eo += ofs;
1979 }
1980 return r;
1981}
1982
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00001983static int awk_split(const char *s, node *spl, char **slist)
Mike Frysinger10a11e22005-09-27 02:23:02 +00001984{
Denys Vlasenko5323af72020-11-16 10:40:32 +01001985 int n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001986 char c[4];
1987 char *s1;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001988
1989 /* in worst case, each char would be a separate field */
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00001990 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1991 strcpy(s1, s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001992
1993 c[0] = c[1] = (char)spl->info;
1994 c[2] = c[3] = '\0';
Denis Vlasenkoffba9412007-05-17 23:03:35 +00001995 if (*getvar_s(intvar[RS]) == '\0')
1996 c[2] = '\n';
Glenn L McGrath545106f2002-11-11 06:21:00 +00001997
Denys Vlasenko28458c62010-10-05 16:49:03 +02001998 n = 0;
Denys Vlasenko08ca3132021-07-03 13:57:47 +02001999 if (spl->info == TI_REGEXP) { /* regex split */
Denis Vlasenkoaf1bd092007-07-18 18:32:25 +00002000 if (!*s)
2001 return n; /* "": zero fields */
2002 n++; /* at least one field will be there */
2003 do {
Denys Vlasenko5323af72020-11-16 10:40:32 +01002004 int l;
Denys Vlasenko646429e2021-07-02 23:24:52 +02002005 regmatch_t pmatch[1];
Denys Vlasenko5323af72020-11-16 10:40:32 +01002006
Denis Vlasenkoaf1bd092007-07-18 18:32:25 +00002007 l = strcspn(s, c+2); /* len till next NUL or \n */
Denys Vlasenko665a6592020-12-02 19:07:31 +01002008 if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00002009 && pmatch[0].rm_so <= l
2010 ) {
Denys Vlasenko665a6592020-12-02 19:07:31 +01002011 /* if (pmatch[0].rm_eo == 0) ... - impossible */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002012 l = pmatch[0].rm_so;
Denis Vlasenkoaf1bd092007-07-18 18:32:25 +00002013 n++; /* we saw yet another delimiter */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002014 } else {
2015 pmatch[0].rm_eo = l;
Denys Vlasenkoe4244232009-05-18 23:50:03 +02002016 if (s[l])
2017 pmatch[0].rm_eo++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002018 }
Denys Vlasenko5323af72020-11-16 10:40:32 +01002019 s1 = mempcpy(s1, s, l);
2020 *s1++ = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00002021 s += pmatch[0].rm_eo;
Denis Vlasenkoaf1bd092007-07-18 18:32:25 +00002022 } while (*s);
Denys Vlasenko5323af72020-11-16 10:40:32 +01002023
2024 /* echo a-- | awk -F-- '{ print NF, length($NF), $NF }'
2025 * should print "2 0 ":
2026 */
2027 *s1 = '\0';
2028
Denis Vlasenkoaf1bd092007-07-18 18:32:25 +00002029 return n;
2030 }
2031 if (c[0] == '\0') { /* null split */
Denis Vlasenkobf0a2012006-12-26 10:42:51 +00002032 while (*s) {
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00002033 *s1++ = *s++;
2034 *s1++ = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00002035 n++;
2036 }
Denis Vlasenkoaf1bd092007-07-18 18:32:25 +00002037 return n;
2038 }
2039 if (c[0] != ' ') { /* single-character split */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002040 if (icase) {
2041 c[0] = toupper(c[0]);
2042 c[1] = tolower(c[1]);
2043 }
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01002044 if (*s1)
2045 n++;
Denys Vlasenko28458c62010-10-05 16:49:03 +02002046 while ((s1 = strpbrk(s1, c)) != NULL) {
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00002047 *s1++ = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00002048 n++;
2049 }
Denis Vlasenkoaf1bd092007-07-18 18:32:25 +00002050 return n;
2051 }
2052 /* space split */
2053 while (*s) {
2054 s = skip_whitespace(s);
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01002055 if (!*s)
2056 break;
Denis Vlasenkoaf1bd092007-07-18 18:32:25 +00002057 n++;
2058 while (*s && !isspace(*s))
2059 *s1++ = *s++;
2060 *s1++ = '\0';
Glenn L McGrath545106f2002-11-11 06:21:00 +00002061 }
2062 return n;
2063}
2064
Mike Frysinger10a11e22005-09-27 02:23:02 +00002065static void split_f0(void)
2066{
Denis Vlasenkoaf1bd092007-07-18 18:32:25 +00002067/* static char *fstrings; */
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002068#define fstrings (G.split_f0__fstrings)
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002069
Glenn L McGrath545106f2002-11-11 06:21:00 +00002070 int i, n;
2071 char *s;
2072
2073 if (is_f0_split)
2074 return;
2075
2076 is_f0_split = TRUE;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00002077 free(fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002078 fsrealloc(0);
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002079 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002080 fsrealloc(n);
2081 s = fstrings;
Denis Vlasenkof782f522007-01-01 23:51:30 +00002082 for (i = 0; i < n; i++) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002083 Fields[i].string = nextword(&s);
2084 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
2085 }
2086
2087 /* set NF manually to avoid side effects */
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002088 clrvar(intvar[NF]);
2089 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
2090 intvar[NF]->number = nfields;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002091#undef fstrings
Glenn L McGrath545106f2002-11-11 06:21:00 +00002092}
2093
2094/* perform additional actions when some internal variables changed */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002095static void handle_special(var *v)
2096{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002097 int n;
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00002098 char *b;
2099 const char *sep, *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002100 int sl, l, len, i, bsize;
2101
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00002102 if (!(v->type & VF_SPECIAL))
Glenn L McGrath545106f2002-11-11 06:21:00 +00002103 return;
2104
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002105 if (v == intvar[NF]) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002106 n = (int)getvar_i(v);
Denys Vlasenko75a1c872018-11-28 12:16:29 +01002107 if (n < 0)
2108 syntax_error("NF set to negative value");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002109 fsrealloc(n);
2110
2111 /* recalculate $0 */
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002112 sep = getvar_s(intvar[OFS]);
Rob Landleya3896512006-05-07 20:20:34 +00002113 sl = strlen(sep);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002114 b = NULL;
2115 len = 0;
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002116 for (i = 0; i < n; i++) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002117 s = getvar_s(&Fields[i]);
Rob Landleya3896512006-05-07 20:20:34 +00002118 l = strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002119 if (b) {
2120 memcpy(b+len, sep, sl);
2121 len += sl;
2122 }
Denys Vlasenkoc9955f22010-03-10 19:21:54 +01002123 b = qrealloc(b, len+l+sl, &bsize);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002124 memcpy(b+len, s, l);
2125 len += l;
2126 }
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00002127 if (b)
2128 b[len] = '\0';
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002129 setvar_p(intvar[F0], b);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002130 is_f0_split = TRUE;
2131
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002132 } else if (v == intvar[F0]) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002133 is_f0_split = FALSE;
2134
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002135 } else if (v == intvar[FS]) {
Denys Vlasenkodf8066a2012-07-11 01:27:15 +02002136 /*
2137 * The POSIX-2008 standard says that changing FS should have no effect on the
2138 * current input line, but only on the next one. The language is:
2139 *
2140 * > Before the first reference to a field in the record is evaluated, the record
2141 * > shall be split into fields, according to the rules in Regular Expressions,
2142 * > using the value of FS that was current at the time the record was read.
2143 *
2144 * So, split up current line before assignment to FS:
2145 */
2146 split_f0();
2147
Glenn L McGrath545106f2002-11-11 06:21:00 +00002148 mk_splitter(getvar_s(v), &fsplitter);
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002149 } else if (v == intvar[RS]) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002150 mk_splitter(getvar_s(v), &rsplitter);
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002151 } else if (v == intvar[IGNORECASE]) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002152 icase = istrue(v);
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00002153 } else { /* $n */
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002154 n = getvar_i(intvar[NF]);
2155 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002156 /* right here v is invalid. Just to note... */
2157 }
2158}
2159
2160/* step through func/builtin/etc arguments */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002161static node *nextarg(node **pn)
2162{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002163 node *n;
2164
2165 n = *pn;
Denys Vlasenko08ca3132021-07-03 13:57:47 +02002166 if (n && n->info == TI_COMMA) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002167 *pn = n->r.n;
2168 n = n->l.n;
2169 } else {
2170 *pn = NULL;
2171 }
2172 return n;
2173}
2174
Mike Frysinger10a11e22005-09-27 02:23:02 +00002175static void hashwalk_init(var *v, xhash *array)
2176{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002177 hash_item *hi;
Denis Vlasenko77ad97f2008-05-13 02:27:31 +00002178 unsigned i;
Denys Vlasenkoda62b092010-03-11 12:13:18 +01002179 walker_list *w;
2180 walker_list *prev_walker;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002181
Denys Vlasenkoda62b092010-03-11 12:13:18 +01002182 if (v->type & VF_WALK) {
2183 prev_walker = v->x.walker;
2184 } else {
2185 v->type |= VF_WALK;
2186 prev_walker = NULL;
2187 }
2188 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
Denys Vlasenko3cb60c32010-03-10 19:20:32 +01002189
Denys Vlasenkoda62b092010-03-11 12:13:18 +01002190 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
2191 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
2192 w->cur = w->end = w->wbuf;
2193 w->prev = prev_walker;
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002194 for (i = 0; i < array->csize; i++) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002195 hi = array->items[i];
Denis Vlasenkobf0a2012006-12-26 10:42:51 +00002196 while (hi) {
Denys Vlasenko3aff3b92021-06-29 19:07:36 +02002197 w->end = stpcpy(w->end, hi->name) + 1;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002198 hi = hi->next;
2199 }
2200 }
2201}
2202
Mike Frysinger10a11e22005-09-27 02:23:02 +00002203static int hashwalk_next(var *v)
2204{
Denys Vlasenkoda62b092010-03-11 12:13:18 +01002205 walker_list *w = v->x.walker;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002206
Denys Vlasenkoda62b092010-03-11 12:13:18 +01002207 if (w->cur >= w->end) {
2208 walker_list *prev_walker = w->prev;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002209
Denys Vlasenkoda62b092010-03-11 12:13:18 +01002210 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
2211 free(w);
Denys Vlasenko3cb60c32010-03-10 19:20:32 +01002212 v->x.walker = prev_walker;
2213 return FALSE;
2214 }
2215
Denys Vlasenkoda62b092010-03-11 12:13:18 +01002216 setvar_s(v, nextword(&w->cur));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002217 return TRUE;
2218}
2219
2220/* evaluate node, return 1 when result is true, 0 otherwise */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002221static int ptest(node *pattern)
2222{
Denys Vlasenko8b4c4292021-07-01 17:50:26 +02002223 // We use a single "static" tmpvar (instead of on-stack or malloced one)
2224 // to decrease memory consumption in deeply-recursive awk programs.
2225 // The rule to work safely is to never call evaluate() while our static
2226 // TMPVAR's value is still needed.
2227 return istrue(evaluate(pattern, &G.ptest__tmpvar));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002228}
2229
2230/* read next record from stream rsm into a variable v */
Mike Frysinger10a11e22005-09-27 02:23:02 +00002231static int awk_getline(rstream *rsm, var *v)
2232{
Glenn L McGrath545106f2002-11-11 06:21:00 +00002233 char *b;
Denys Vlasenko646429e2021-07-02 23:24:52 +02002234 regmatch_t pmatch[1];
Denys Vlasenko7b81db12010-03-12 21:04:47 +01002235 int size, a, p, pp = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002236 int fd, so, eo, r, rp;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002237 char c, *m, *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002238
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02002239 debug_printf_eval("entered %s()\n", __func__);
2240
Glenn L McGrath545106f2002-11-11 06:21:00 +00002241 /* we're using our own buffer since we need access to accumulating
2242 * characters
2243 */
2244 fd = fileno(rsm->F);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002245 m = rsm->buffer;
2246 a = rsm->adv;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002247 p = rsm->pos;
2248 size = rsm->size;
2249 c = (char) rsplitter.n.info;
2250 rp = 0;
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002251
Denys Vlasenkoc9955f22010-03-10 19:21:54 +01002252 if (!m)
2253 m = qrealloc(m, 256, &size);
Denys Vlasenko7b81db12010-03-12 21:04:47 +01002254
Glenn L McGrath545106f2002-11-11 06:21:00 +00002255 do {
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002256 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002257 so = eo = p;
2258 r = 1;
2259 if (p > 0) {
Denys Vlasenko08ca3132021-07-03 13:57:47 +02002260 if (rsplitter.n.info == TI_REGEXP) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002261 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002262 b, 1, pmatch, 0) == 0) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002263 so = pmatch[0].rm_so;
2264 eo = pmatch[0].rm_eo;
2265 if (b[eo] != '\0')
2266 break;
2267 }
2268 } else if (c != '\0') {
2269 s = strchr(b+pp, c);
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01002270 if (!s)
2271 s = memchr(b+pp, '\0', p - pp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002272 if (s) {
2273 so = eo = s-b;
2274 eo++;
2275 break;
2276 }
2277 } else {
2278 while (b[rp] == '\n')
2279 rp++;
2280 s = strstr(b+rp, "\n\n");
2281 if (s) {
2282 so = eo = s-b;
Denys Vlasenko7b81db12010-03-12 21:04:47 +01002283 while (b[eo] == '\n')
2284 eo++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002285 if (b[eo] != '\0')
2286 break;
2287 }
2288 }
2289 }
2290
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002291 if (a > 0) {
Denys Vlasenko7b81db12010-03-12 21:04:47 +01002292 memmove(m, m+a, p+1);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002293 b = m;
2294 a = 0;
2295 }
2296
Denys Vlasenkoc9955f22010-03-10 19:21:54 +01002297 m = qrealloc(m, a+p+128, &size);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002298 b = m + a;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002299 pp = p;
2300 p += safe_read(fd, b+p, size-p-1);
2301 if (p < pp) {
2302 p = 0;
2303 r = 0;
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002304 setvar_i(intvar[ERRNO], errno);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002305 }
2306 b[p] = '\0';
2307
2308 } while (p > pp);
2309
2310 if (p == 0) {
2311 r--;
2312 } else {
2313 c = b[so]; b[so] = '\0';
2314 setvar_s(v, b+rp);
2315 v->type |= VF_USER;
2316 b[so] = c;
2317 c = b[eo]; b[eo] = '\0';
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002318 setvar_s(intvar[RT], b+so);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002319 b[eo] = c;
2320 }
2321
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00002322 rsm->buffer = m;
2323 rsm->adv = a + eo;
2324 rsm->pos = p - eo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002325 rsm->size = size;
2326
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02002327 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2328
Glenn L McGrath545106f2002-11-11 06:21:00 +00002329 return r;
2330}
2331
Glenn L McGrath545106f2002-11-11 06:21:00 +00002332/* formatted output into an allocated buffer, return ptr to buffer */
Ron Yorstone8fe9f92021-01-27 11:19:14 +00002333#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2334# define awk_printf(a, b) awk_printf(a)
2335#endif
Denys Vlasenko4ef88412021-07-11 12:25:33 +02002336static char *awk_printf(node *n, size_t *len)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002337{
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002338 char *b;
2339 char *fmt, *f;
Denys Vlasenko4ef88412021-07-11 12:25:33 +02002340 size_t i;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002341
Denys Vlasenko8b4c4292021-07-01 17:50:26 +02002342 //tmpvar = nvalloc(1);
2343#define TMPVAR (&G.awk_printf__tmpvar)
2344 // We use a single "static" tmpvar (instead of on-stack or malloced one)
2345 // to decrease memory consumption in deeply-recursive awk programs.
2346 // The rule to work safely is to never call evaluate() while our static
2347 // TMPVAR's value is still needed.
2348 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
2349 // ^^^^^^^^^ here we immediately strdup() the value, so the later call
2350 // to evaluate() potentially recursing into another awk_printf() can't
2351 // mangle the value.
Glenn L McGrath545106f2002-11-11 06:21:00 +00002352
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002353 b = NULL;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002354 i = 0;
Denys Vlasenko857800c2021-09-09 19:26:39 +02002355 while (1) { /* "print one format spec" loop */
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002356 char *s;
2357 char c;
2358 char sv;
2359 var *arg;
Denys Vlasenko4ef88412021-07-11 12:25:33 +02002360 size_t slen;
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002361
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002362 /* Find end of the next format spec, or end of line */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002363 s = f;
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002364 while (1) {
2365 c = *f;
2366 if (!c) /* no percent chars found at all */
2367 goto nul;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002368 f++;
Denys Vlasenkoe60c5692021-09-09 19:13:32 +02002369 if (c == '%')
2370 break;
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002371 }
Denys Vlasenkoe60c5692021-09-09 19:13:32 +02002372 /* we are past % in "....%..." */
2373 c = *f;
2374 if (!c) /* "....%" */
2375 goto nul;
2376 if (c == '%') { /* "....%%...." */
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002377 slen = f - s;
2378 s = xstrndup(s, slen);
2379 f++;
Denys Vlasenko857800c2021-09-09 19:26:39 +02002380 goto append; /* print "....%" part verbatim */
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002381 }
2382 while (1) {
2383 if (isalpha(c))
2384 break;
2385 if (c == '*')
2386 syntax_error("%*x formats are not supported");
Ron Yorston305a30d2021-09-09 08:15:31 +01002387 c = *++f;
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002388 if (!c) { /* "....%...." and no letter found after % */
2389 /* Example: awk 'BEGIN { printf "^^^%^^^\n"; }' */
2390 nul:
Ron Yorston305a30d2021-09-09 08:15:31 +01002391 slen = f - s;
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002392 goto tail; /* print remaining string, exit loop */
Ron Yorston305a30d2021-09-09 08:15:31 +01002393 }
Denis Vlasenko389f9d52007-05-09 21:57:23 +00002394 }
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002395 /* we are at A in "....%...A..." */
2396
Denys Vlasenko8b4c4292021-07-01 17:50:26 +02002397 arg = evaluate(nextarg(&n), TMPVAR);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002398
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002399 /* Result can be arbitrarily long. Example:
2400 * printf "%99999s", "BOOM"
2401 */
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002402 sv = *++f;
2403 *f = '\0';
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002404 if (c == 'c') {
Denys Vlasenkocaa93ec2021-07-11 18:16:10 +02002405 char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
Denys Vlasenko8d269ef2021-07-12 11:27:11 +02002406 char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */);
2407 slen = strlen(r);
2408 if (cc == '\0') /* if cc is NUL, re-format the string with it */
2409 sprintf(r, s, cc);
2410 s = r;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002411 } else {
Denys Vlasenko4ef88412021-07-11 12:25:33 +02002412 if (c == 's') {
2413 s = xasprintf(s, getvar_s(arg));
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002414 } else {
Denys Vlasenko4ef88412021-07-11 12:25:33 +02002415 double d = getvar_i(arg);
2416 if (strchr("diouxX", c)) {
2417//TODO: make it wider here (%x -> %llx etc)?
2418 s = xasprintf(s, (int)d);
2419 } else if (strchr("eEfFgGaA", c)) {
2420 s = xasprintf(s, d);
2421 } else {
Denys Vlasenko8a0adba2021-09-09 18:57:07 +02002422//TODO: GNU Awk 5.0.1: printf "%W" prints "%W", does not error out
Denys Vlasenko4ef88412021-07-11 12:25:33 +02002423 syntax_error(EMSG_INV_FMT);
2424 }
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002425 }
Denys Vlasenko4ef88412021-07-11 12:25:33 +02002426 slen = strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002427 }
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002428 *f = sv;
Denys Vlasenko857800c2021-09-09 19:26:39 +02002429 append:
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002430 if (i == 0) {
2431 b = s;
Denys Vlasenko4ef88412021-07-11 12:25:33 +02002432 i = slen;
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002433 continue;
2434 }
2435 tail:
Denys Vlasenko4ef88412021-07-11 12:25:33 +02002436 b = xrealloc(b, i + slen + 1);
2437 strcpy(b + i, s);
2438 i += slen;
Denys Vlasenko857800c2021-09-09 19:26:39 +02002439 if (!c) /* s is NOT allocated and this is the last part of string? */
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02002440 break;
2441 free(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002442 }
2443
Glenn L McGrath545106f2002-11-11 06:21:00 +00002444 free(fmt);
Denys Vlasenko40573552021-07-02 14:27:40 +02002445 //nvfree(tmpvar, 1);
Denys Vlasenko8b4c4292021-07-01 17:50:26 +02002446#undef TMPVAR
2447
Ron Yorstone8fe9f92021-01-27 11:19:14 +00002448#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2449 if (len)
2450 *len = i;
2451#endif
Glenn L McGrath545106f2002-11-11 06:21:00 +00002452 return b;
2453}
2454
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002455/* Common substitution routine.
2456 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2457 * store result into (dest), return number of substitutions.
2458 * If nm = 0, replace all matches.
2459 * If src or dst is NULL, use $0.
2460 * If subexp != 0, enable subexpression matching (\1-\9).
Glenn L McGrath545106f2002-11-11 06:21:00 +00002461 */
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002462static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002463{
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002464 char *resbuf;
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00002465 const char *sp;
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002466 int match_no, residx, replen, resbufsize;
2467 int regexec_flags;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002468 regmatch_t pmatch[10];
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002469 regex_t sreg, *regex;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002470
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002471 resbuf = NULL;
2472 residx = 0;
2473 match_no = 0;
2474 regexec_flags = 0;
2475 regex = as_regex(rn, &sreg);
2476 sp = getvar_s(src ? src : intvar[F0]);
2477 replen = strlen(repl);
2478 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2479 int so = pmatch[0].rm_so;
2480 int eo = pmatch[0].rm_eo;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002481
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002482 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2483 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2484 memcpy(resbuf + residx, sp, eo);
2485 residx += eo;
2486 if (++match_no >= nm) {
2487 const char *s;
2488 int nbs;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002489
Glenn L McGrath545106f2002-11-11 06:21:00 +00002490 /* replace */
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002491 residx -= (eo - so);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002492 nbs = 0;
2493 for (s = repl; *s; s++) {
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002494 char c = resbuf[residx++] = *s;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002495 if (c == '\\') {
2496 nbs++;
2497 continue;
2498 }
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002499 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2500 int j;
2501 residx -= ((nbs + 3) >> 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002502 j = 0;
2503 if (c != '&') {
2504 j = c - '0';
2505 nbs++;
2506 }
2507 if (nbs % 2) {
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002508 resbuf[residx++] = c;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002509 } else {
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002510 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2511 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2512 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2513 residx += n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002514 }
2515 }
2516 nbs = 0;
2517 }
2518 }
2519
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002520 regexec_flags = REG_NOTBOL;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002521 sp += eo;
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002522 if (match_no == nm)
Denys Vlasenkocdeda162009-11-30 01:14:16 +01002523 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002524 if (eo == so) {
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002525 /* Empty match (e.g. "b*" will match anywhere).
2526 * Advance by one char. */
2527//BUG (bug 1333):
2528//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2529//... and will erroneously match "b" even though it is NOT at the word start.
2530//we need REG_NOTBOW but it does not exist...
Denys Vlasenko7379cd12010-04-04 01:48:12 +02002531//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2532//it should be able to do it correctly.
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002533 /* Subtle: this is safe only because
2534 * qrealloc allocated at least one extra byte */
2535 resbuf[residx] = *sp;
2536 if (*sp == '\0')
2537 goto ret;
2538 sp++;
2539 residx++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002540 }
2541 }
2542
Denys Vlasenkofab288c2010-04-04 01:17:30 +02002543 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2544 strcpy(resbuf + residx, sp);
2545 ret:
2546 //bb_error_msg("end sp:'%s'%p", sp,sp);
2547 setvar_p(dest ? dest : intvar[F0], resbuf);
2548 if (regex == &sreg)
2549 regfree(regex);
2550 return match_no;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002551}
2552
Leonid Lisovskiy46a0be52009-09-21 04:08:08 +02002553static NOINLINE int do_mktime(const char *ds)
2554{
2555 struct tm then;
2556 int count;
2557
2558 /*memset(&then, 0, sizeof(then)); - not needed */
2559 then.tm_isdst = -1; /* default is unknown */
2560
2561 /* manpage of mktime says these fields are ints,
2562 * so we can sscanf stuff directly into them */
2563 count = sscanf(ds, "%u %u %u %u %u %u %d",
2564 &then.tm_year, &then.tm_mon, &then.tm_mday,
2565 &then.tm_hour, &then.tm_min, &then.tm_sec,
2566 &then.tm_isdst);
2567
2568 if (count < 6
2569 || (unsigned)then.tm_mon < 1
2570 || (unsigned)then.tm_year < 1900
2571 ) {
2572 return -1;
2573 }
2574
2575 then.tm_mon -= 1;
Denys Vlasenkobc3e9472009-09-21 04:16:00 +02002576 then.tm_year -= 1900;
Leonid Lisovskiy46a0be52009-09-21 04:08:08 +02002577
2578 return mktime(&then);
2579}
2580
Denys Vlasenkob705bf52021-07-02 23:38:50 +02002581/* Reduce stack usage in exec_builtin() by keeping match() code separate */
Denys Vlasenko90404ed2021-07-03 12:20:36 +02002582static NOINLINE var *do_match(node *an1, const char *as0)
Denys Vlasenkob705bf52021-07-02 23:38:50 +02002583{
2584 regmatch_t pmatch[1];
2585 regex_t sreg, *re;
Denys Vlasenko90404ed2021-07-03 12:20:36 +02002586 int n, start, len;
Denys Vlasenkob705bf52021-07-02 23:38:50 +02002587
2588 re = as_regex(an1, &sreg);
2589 n = regexec(re, as0, 1, pmatch, 0);
Denys Vlasenkob705bf52021-07-02 23:38:50 +02002590 if (re == &sreg)
2591 regfree(re);
Denys Vlasenko90404ed2021-07-03 12:20:36 +02002592 start = 0;
2593 len = -1;
2594 if (n == 0) {
2595 start = pmatch[0].rm_so + 1;
2596 len = pmatch[0].rm_eo - pmatch[0].rm_so;
2597 }
2598 setvar_i(newvar("RLENGTH"), len);
2599 return setvar_i(newvar("RSTART"), start);
Denys Vlasenkob705bf52021-07-02 23:38:50 +02002600}
2601
2602/* Reduce stack usage in evaluate() by keeping builtins' code separate */
Leonid Lisovskiy46a0be52009-09-21 04:08:08 +02002603static NOINLINE var *exec_builtin(node *op, var *res)
Mike Frysinger10a11e22005-09-27 02:23:02 +00002604{
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002605#define tspl (G.exec_builtin__tspl)
2606
Denys Vlasenko15734872021-07-01 16:02:16 +02002607 var *tmpvars;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002608 node *an[4];
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002609 var *av[4];
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00002610 const char *as[4];
Glenn L McGrath545106f2002-11-11 06:21:00 +00002611 node *spl;
Mike Frysingerf87b3e32005-09-27 04:16:22 +00002612 uint32_t isr, info;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002613 int nargs;
2614 time_t tt;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002615 int i, l, ll, n;
2616
Denys Vlasenko15734872021-07-01 16:02:16 +02002617 tmpvars = nvalloc(4);
2618#define TMPVAR0 (tmpvars)
2619#define TMPVAR1 (tmpvars + 1)
2620#define TMPVAR2 (tmpvars + 2)
2621#define TMPVAR3 (tmpvars + 3)
2622#define TMPVAR(i) (tmpvars + (i))
Glenn L McGrath545106f2002-11-11 06:21:00 +00002623 isr = info = op->info;
2624 op = op->l.n;
2625
2626 av[2] = av[3] = NULL;
Denis Vlasenkoffba9412007-05-17 23:03:35 +00002627 for (i = 0; i < 4 && op; i++) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002628 an[i] = nextarg(&op);
Denys Vlasenkoab755e32021-07-12 13:30:30 +02002629 if (isr & 0x09000000) {
Denys Vlasenko15734872021-07-01 16:02:16 +02002630 av[i] = evaluate(an[i], TMPVAR(i));
Denys Vlasenkoab755e32021-07-12 13:30:30 +02002631 if (isr & 0x08000000)
2632 as[i] = getvar_s(av[i]);
2633 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002634 isr >>= 1;
2635 }
2636
2637 nargs = i;
Denis Vlasenko77ad97f2008-05-13 02:27:31 +00002638 if ((uint32_t)nargs < (info >> 30))
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002639 syntax_error(EMSG_TOO_FEW_ARGS);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002640
Denys Vlasenko56b3eec2009-10-23 13:03:59 +02002641 info &= OPNMASK;
2642 switch (info) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002643
Denis Vlasenkof782f522007-01-01 23:51:30 +00002644 case B_a2:
Rob Landleyd8205b32010-10-24 03:27:22 +02002645 if (ENABLE_FEATURE_AWK_LIBM)
2646 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2647 else
2648 syntax_error(EMSG_NO_MATH);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002649 break;
2650
Denys Vlasenko39fe4d12010-03-12 16:57:06 +01002651 case B_sp: {
2652 char *s, *s1;
2653
Glenn L McGrath545106f2002-11-11 06:21:00 +00002654 if (nargs > 2) {
Denys Vlasenko08ca3132021-07-03 13:57:47 +02002655 spl = (an[2]->info == TI_REGEXP) ? an[2]
2656 : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002657 } else {
2658 spl = &fsplitter.n;
2659 }
2660
2661 n = awk_split(as[0], spl, &s);
2662 s1 = s;
2663 clear_array(iamarray(av[1]));
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00002664 for (i = 1; i <= n; i++)
Denys Vlasenko39fe4d12010-03-12 16:57:06 +01002665 setari_u(av[1], i, nextword(&s));
2666 free(s1);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002667 setvar_i(res, n);
2668 break;
Denys Vlasenko39fe4d12010-03-12 16:57:06 +01002669 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002670
Denys Vlasenko39fe4d12010-03-12 16:57:06 +01002671 case B_ss: {
2672 char *s;
2673
Rob Landleya3896512006-05-07 20:20:34 +00002674 l = strlen(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002675 i = getvar_i(av[1]) - 1;
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01002676 if (i > l)
2677 i = l;
2678 if (i < 0)
2679 i = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002680 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01002681 if (n < 0)
2682 n = 0;
Denis Vlasenko8ae5b282008-07-02 22:47:49 +00002683 s = xstrndup(as[0]+i, n);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002684 setvar_p(res, s);
2685 break;
Denys Vlasenko39fe4d12010-03-12 16:57:06 +01002686 }
Denis Vlasenkof7996f32007-01-11 17:20:00 +00002687
Denis Vlasenko7cbcd1c2008-08-28 23:16:58 +00002688 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2689 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
Denis Vlasenkof782f522007-01-01 23:51:30 +00002690 case B_an:
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00002691 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
Denis Vlasenkoe175ff22006-09-26 17:41:00 +00002692 break;
Denis Vlasenkof7996f32007-01-11 17:20:00 +00002693
Denis Vlasenkof782f522007-01-01 23:51:30 +00002694 case B_co:
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00002695 setvar_i(res, ~getvar_i_int(av[0]));
Denis Vlasenkoe175ff22006-09-26 17:41:00 +00002696 break;
2697
Denis Vlasenkof782f522007-01-01 23:51:30 +00002698 case B_ls:
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00002699 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
Denis Vlasenkoe175ff22006-09-26 17:41:00 +00002700 break;
2701
Denis Vlasenkof782f522007-01-01 23:51:30 +00002702 case B_or:
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00002703 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
Denis Vlasenkoe175ff22006-09-26 17:41:00 +00002704 break;
2705
Denis Vlasenkof782f522007-01-01 23:51:30 +00002706 case B_rs:
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00002707 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
Denis Vlasenkoe175ff22006-09-26 17:41:00 +00002708 break;
2709
Denis Vlasenkof782f522007-01-01 23:51:30 +00002710 case B_xo:
Denis Vlasenkoa2e1eea2008-09-02 09:00:23 +00002711 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
Denis Vlasenkoe175ff22006-09-26 17:41:00 +00002712 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002713
Denis Vlasenkof782f522007-01-01 23:51:30 +00002714 case B_lo:
Denys Vlasenko39fe4d12010-03-12 16:57:06 +01002715 case B_up: {
2716 char *s, *s1;
Rob Landleyd921b2e2006-08-03 15:41:12 +00002717 s1 = s = xstrdup(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002718 while (*s1) {
Denys Vlasenko56b3eec2009-10-23 13:03:59 +02002719 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2720 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2721 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002722 s1++;
2723 }
2724 setvar_p(res, s);
2725 break;
Denys Vlasenko39fe4d12010-03-12 16:57:06 +01002726 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002727
Denis Vlasenkof782f522007-01-01 23:51:30 +00002728 case B_ix:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002729 n = 0;
Rob Landleya3896512006-05-07 20:20:34 +00002730 ll = strlen(as[1]);
2731 l = strlen(as[0]) - ll;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002732 if (ll > 0 && l >= 0) {
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002733 if (!icase) {
Denys Vlasenko39fe4d12010-03-12 16:57:06 +01002734 char *s = strstr(as[0], as[1]);
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01002735 if (s)
2736 n = (s - as[0]) + 1;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002737 } else {
2738 /* this piece of code is terribly slow and
2739 * really should be rewritten
2740 */
Denys Vlasenko39fe4d12010-03-12 16:57:06 +01002741 for (i = 0; i <= l; i++) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002742 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2743 n = i+1;
2744 break;
2745 }
2746 }
2747 }
2748 }
2749 setvar_i(res, n);
2750 break;
2751
Denis Vlasenkof782f522007-01-01 23:51:30 +00002752 case B_ti:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002753 if (nargs > 1)
2754 tt = getvar_i(av[1]);
2755 else
2756 time(&tt);
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00002757 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002758 i = strftime(g_buf, MAXVARFMT,
Denis Vlasenkoa41fdf32007-01-29 22:51:00 +00002759 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2760 localtime(&tt));
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002761 g_buf[i] = '\0';
2762 setvar_s(res, g_buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002763 break;
2764
Leonid Lisovskiy46a0be52009-09-21 04:08:08 +02002765 case B_mt:
2766 setvar_i(res, do_mktime(as[0]));
2767 break;
2768
Denis Vlasenkof782f522007-01-01 23:51:30 +00002769 case B_ma:
Denys Vlasenko90404ed2021-07-03 12:20:36 +02002770 res = do_match(an[1], as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002771 break;
2772
Denis Vlasenkof782f522007-01-01 23:51:30 +00002773 case B_ge:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002774 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2775 break;
2776
Denis Vlasenkof782f522007-01-01 23:51:30 +00002777 case B_gs:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002778 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2779 break;
2780
Denis Vlasenkof782f522007-01-01 23:51:30 +00002781 case B_su:
Glenn L McGrath545106f2002-11-11 06:21:00 +00002782 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2783 break;
2784 }
2785
Denys Vlasenko15734872021-07-01 16:02:16 +02002786 nvfree(tmpvars, 4);
2787#undef TMPVAR0
2788#undef TMPVAR1
2789#undef TMPVAR2
2790#undef TMPVAR3
2791#undef TMPVAR
2792
Glenn L McGrath545106f2002-11-11 06:21:00 +00002793 return res;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002794#undef tspl
Glenn L McGrath545106f2002-11-11 06:21:00 +00002795}
2796
Denys Vlasenko40573552021-07-02 14:27:40 +02002797/* if expr looks like "var=value", perform assignment and return 1,
2798 * otherwise return 0 */
2799static int is_assignment(const char *expr)
2800{
2801 char *exprc, *val;
2802
Denys Vlasenkoa5d7b0f2021-07-02 23:07:21 +02002803 val = (char*)endofname(expr);
2804 if (val == (char*)expr || *val != '=') {
Denys Vlasenko40573552021-07-02 14:27:40 +02002805 return FALSE;
2806 }
2807
2808 exprc = xstrdup(expr);
2809 val = exprc + (val - expr);
2810 *val++ = '\0';
2811
2812 unescape_string_in_place(val);
2813 setvar_u(newvar(exprc), val);
2814 free(exprc);
2815 return TRUE;
2816}
2817
2818/* switch to next input file */
2819static rstream *next_input_file(void)
2820{
2821#define rsm (G.next_input_file__rsm)
2822#define files_happen (G.next_input_file__files_happen)
2823
Denys Vlasenko40573552021-07-02 14:27:40 +02002824 const char *fname, *ind;
2825
2826 if (rsm.F)
2827 fclose(rsm.F);
2828 rsm.F = NULL;
2829 rsm.pos = rsm.adv = 0;
2830
2831 for (;;) {
2832 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2833 if (files_happen)
2834 return NULL;
2835 fname = "-";
Denys Vlasenkoa5d7b0f2021-07-02 23:07:21 +02002836 rsm.F = stdin;
Denys Vlasenko40573552021-07-02 14:27:40 +02002837 break;
2838 }
2839 ind = getvar_s(incvar(intvar[ARGIND]));
2840 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2841 if (fname && *fname && !is_assignment(fname)) {
Denys Vlasenkoa5d7b0f2021-07-02 23:07:21 +02002842 rsm.F = xfopen_stdin(fname);
Denys Vlasenko40573552021-07-02 14:27:40 +02002843 break;
2844 }
2845 }
2846
2847 files_happen = TRUE;
2848 setvar_s(intvar[FILENAME], fname);
Denys Vlasenko40573552021-07-02 14:27:40 +02002849 return &rsm;
2850#undef rsm
2851#undef files_happen
2852}
2853
Glenn L McGrath545106f2002-11-11 06:21:00 +00002854/*
2855 * Evaluate node - the heart of the program. Supplied with subtree
Denys Vlasenkob705bf52021-07-02 23:38:50 +02002856 * and "res" variable to assign the result to if we evaluate an expression.
2857 * If node refers to e.g. a variable or a field, no assignment happens.
2858 * Return ptr to the result (which may or may not be the "res" variable!)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002859 */
2860#define XC(n) ((n) >> 8)
2861
Mike Frysinger10a11e22005-09-27 02:23:02 +00002862static var *evaluate(node *op, var *res)
2863{
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002864/* This procedure is recursive so we should count every byte */
2865#define fnargs (G.evaluate__fnargs)
2866/* seed is initialized to 1 */
2867#define seed (G.evaluate__seed)
Denys Vlasenkofb132e42010-10-29 11:46:52 +02002868#define sreg (G.evaluate__sreg)
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00002869
Denys Vlasenko15734872021-07-01 16:02:16 +02002870 var *tmpvars;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002871
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00002872 if (!op)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002873 return setvar_s(res, NULL);
2874
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02002875 debug_printf_eval("entered %s()\n", __func__);
2876
Denys Vlasenko15734872021-07-01 16:02:16 +02002877 tmpvars = nvalloc(2);
Denys Vlasenko786ca192021-07-02 17:32:08 +02002878#define TMPVAR0 (tmpvars)
2879#define TMPVAR1 (tmpvars + 1)
Glenn L McGrath545106f2002-11-11 06:21:00 +00002880
2881 while (op) {
Denys Vlasenkof9782ff2010-03-12 21:32:13 +01002882 struct {
2883 var *v;
2884 const char *s;
2885 } L = L; /* for compiler */
2886 struct {
2887 var *v;
2888 const char *s;
2889 } R = R;
2890 double L_d = L_d;
2891 uint32_t opinfo;
2892 int opn;
2893 node *op1;
2894
Glenn L McGrath545106f2002-11-11 06:21:00 +00002895 opinfo = op->info;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00002896 opn = (opinfo & OPNMASK);
2897 g_lineno = op->lineno;
Denys Vlasenkof9782ff2010-03-12 21:32:13 +01002898 op1 = op->l.n;
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02002899 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002900
Mike Frysingerde2b9382005-09-27 03:18:00 +00002901 /* execute inevitable things */
Denys Vlasenko47d91332021-07-02 18:28:12 +02002902 if (opinfo & OF_RES1) {
2903 if ((opinfo & OF_REQUIRED) && !op1)
2904 syntax_error(EMSG_TOO_FEW_ARGS);
Denys Vlasenko15734872021-07-01 16:02:16 +02002905 L.v = evaluate(op1, TMPVAR0);
Denys Vlasenko4d902ea2021-07-02 22:28:51 +02002906 if (opinfo & OF_STR1) {
2907 L.s = getvar_s(L.v);
2908 debug_printf_eval("L.s:'%s'\n", L.s);
2909 }
2910 if (opinfo & OF_NUM1) {
2911 L_d = getvar_i(L.v);
2912 debug_printf_eval("L_d:%f\n", L_d);
2913 }
Denys Vlasenkod527e0c2010-10-05 13:22:11 +02002914 }
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02002915 /* NB: Must get string/numeric values of L (done above)
2916 * _before_ evaluate()'ing R.v: if both L and R are $NNNs,
2917 * and right one is large, then L.v points to Fields[NNN1],
2918 * second evaluate() reallocates and moves (!) Fields[],
2919 * R.v points to Fields[NNN2] but L.v now points to freed mem!
2920 * (Seen trying to evaluate "$444 $44444")
2921 */
2922 if (opinfo & OF_RES2) {
Denys Vlasenko15734872021-07-01 16:02:16 +02002923 R.v = evaluate(op->r.n, TMPVAR1);
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02002924 //TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
2925 //L.v = NULL;
Denys Vlasenko4d902ea2021-07-02 22:28:51 +02002926 if (opinfo & OF_STR2) {
2927 R.s = getvar_s(R.v);
2928 debug_printf_eval("R.s:'%s'\n", R.s);
2929 }
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02002930 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00002931
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02002932 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002933 switch (XC(opinfo & OPCLSMASK)) {
2934
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00002935 /* -- iterative node type -- */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002936
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00002937 /* test pattern */
Denis Vlasenkof782f522007-01-01 23:51:30 +00002938 case XC( OC_TEST ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02002939 debug_printf_eval("TEST\n");
Denys Vlasenko08ca3132021-07-03 13:57:47 +02002940 if (op1->info == TI_COMMA) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002941 /* it's range pattern */
2942 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2943 op->info |= OF_CHECKED;
2944 if (ptest(op1->r.n))
2945 op->info &= ~OF_CHECKED;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002946 op = op->a.n;
2947 } else {
2948 op = op->r.n;
2949 }
2950 } else {
Denys Vlasenko7b81db12010-03-12 21:04:47 +01002951 op = ptest(op1) ? op->a.n : op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002952 }
2953 break;
2954
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00002955 /* just evaluate an expression, also used as unconditional jump */
Denis Vlasenkof782f522007-01-01 23:51:30 +00002956 case XC( OC_EXEC ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02002957 debug_printf_eval("EXEC\n");
Mike Frysingerde2b9382005-09-27 03:18:00 +00002958 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002959
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00002960 /* branch, used in if-else and various loops */
Denis Vlasenkof782f522007-01-01 23:51:30 +00002961 case XC( OC_BR ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02002962 debug_printf_eval("BR\n");
Mike Frysingerde2b9382005-09-27 03:18:00 +00002963 op = istrue(L.v) ? op->a.n : op->r.n;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002964 break;
2965
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00002966 /* initialize for-in loop */
Denis Vlasenkof782f522007-01-01 23:51:30 +00002967 case XC( OC_WALKINIT ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02002968 debug_printf_eval("WALKINIT\n");
Mike Frysingerde2b9382005-09-27 03:18:00 +00002969 hashwalk_init(L.v, iamarray(R.v));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002970 break;
2971
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00002972 /* get next array item */
Denis Vlasenkof782f522007-01-01 23:51:30 +00002973 case XC( OC_WALKNEXT ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02002974 debug_printf_eval("WALKNEXT\n");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002975 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2976 break;
2977
Denis Vlasenkof782f522007-01-01 23:51:30 +00002978 case XC( OC_PRINT ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02002979 debug_printf_eval("PRINT /\n");
2980 case XC( OC_PRINTF ):
2981 debug_printf_eval("PRINTF\n");
2982 {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01002983 FILE *F = stdout;
2984
Mike Frysingerde2b9382005-09-27 03:18:00 +00002985 if (op->r.n) {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01002986 rstream *rsm = newfile(R.s);
2987 if (!rsm->F) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00002988 if (opn == '|') {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01002989 rsm->F = popen(R.s, "w");
2990 if (rsm->F == NULL)
James Byrne69374872019-07-02 11:35:03 +02002991 bb_simple_perror_msg_and_die("popen");
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01002992 rsm->is_pipe = 1;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002993 } else {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01002994 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002995 }
2996 }
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01002997 F = rsm->F;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002998 }
2999
Denys Vlasenko3d57a842021-07-11 12:00:31 +02003000 /* Can't just check 'opinfo == OC_PRINT' here, parser ORs
3001 * additional bits to opinfos of print/printf with redirects
3002 */
3003 if ((opinfo & OPCLSMASK) == OC_PRINT) {
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00003004 if (!op1) {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003005 fputs(getvar_s(intvar[F0]), F);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003006 } else {
Denys Vlasenkoca9278e2021-06-30 12:42:39 +02003007 for (;;) {
Denys Vlasenko15734872021-07-01 16:02:16 +02003008 var *v = evaluate(nextarg(&op1), TMPVAR0);
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003009 if (v->type & VF_NUMBER) {
Denys Vlasenkoe2e38022021-07-04 01:25:34 +02003010 fmt_num(getvar_s(intvar[OFMT]),
3011 getvar_i(v));
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003012 fputs(g_buf, F);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003013 } else {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003014 fputs(getvar_s(v), F);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003015 }
Denys Vlasenkoca9278e2021-06-30 12:42:39 +02003016 if (!op1)
3017 break;
3018 fputs(getvar_s(intvar[OFS]), F);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003019 }
3020 }
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003021 fputs(getvar_s(intvar[ORS]), F);
Denys Vlasenko08ca3132021-07-03 13:57:47 +02003022 } else { /* PRINTF */
Denys Vlasenko4ef88412021-07-11 12:25:33 +02003023 IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
Ron Yorstone8fe9f92021-01-27 11:19:14 +00003024 char *s = awk_printf(op1, &len);
3025#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3026 fwrite(s, len, 1, F);
3027#else
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003028 fputs(s, F);
Ron Yorstone8fe9f92021-01-27 11:19:14 +00003029#endif
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003030 free(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003031 }
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003032 fflush(F);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003033 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003034 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003035
Denys Vlasenko640212a2021-07-02 15:19:14 +02003036 case XC( OC_DELETE ):
3037 debug_printf_eval("DELETE\n");
3038 {
3039 /* "delete" is special:
3040 * "delete array[var--]" must evaluate index expr only once.
3041 */
3042 uint32_t info = op1->info & OPCLSMASK;
3043 var *v;
3044
3045 if (info == OC_VAR) {
3046 v = op1->l.v;
3047 } else if (info == OC_FNARG) {
3048 v = &fnargs[op1->l.aidx];
3049 } else {
3050 syntax_error(EMSG_NOT_ARRAY);
3051 }
3052 if (op1->r.n) { /* array ref? */
3053 const char *s;
3054 s = getvar_s(evaluate(op1->r.n, TMPVAR0));
3055 hash_remove(iamarray(v), s);
3056 } else {
3057 clear_array(iamarray(v));
3058 }
3059 break;
3060 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003061
Denis Vlasenkof782f522007-01-01 23:51:30 +00003062 case XC( OC_NEWSOURCE ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003063 debug_printf_eval("NEWSOURCE\n");
Denys Vlasenko7b81db12010-03-12 21:04:47 +01003064 g_progname = op->l.new_progname;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003065 break;
3066
Denis Vlasenkof782f522007-01-01 23:51:30 +00003067 case XC( OC_RETURN ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003068 debug_printf_eval("RETURN\n");
Glenn L McGrath545106f2002-11-11 06:21:00 +00003069 copyvar(res, L.v);
3070 break;
3071
Denis Vlasenkof782f522007-01-01 23:51:30 +00003072 case XC( OC_NEXTFILE ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003073 debug_printf_eval("NEXTFILE\n");
Mike Frysingerde2b9382005-09-27 03:18:00 +00003074 nextfile = TRUE;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003075 case XC( OC_NEXT ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003076 debug_printf_eval("NEXT\n");
Mike Frysingerde2b9382005-09-27 03:18:00 +00003077 nextrec = TRUE;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003078 case XC( OC_DONE ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003079 debug_printf_eval("DONE\n");
Glenn L McGrath545106f2002-11-11 06:21:00 +00003080 clrvar(res);
3081 break;
3082
Denis Vlasenkof782f522007-01-01 23:51:30 +00003083 case XC( OC_EXIT ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003084 debug_printf_eval("EXIT\n");
Denys Vlasenko4d902ea2021-07-02 22:28:51 +02003085 if (op1)
3086 G.exitcode = (int)L_d;
3087 awk_exit();
Glenn L McGrath545106f2002-11-11 06:21:00 +00003088
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00003089 /* -- recursive node type -- */
Glenn L McGrath545106f2002-11-11 06:21:00 +00003090
Denis Vlasenkof782f522007-01-01 23:51:30 +00003091 case XC( OC_VAR ):
Denys Vlasenko6f4a7852018-01-07 01:19:08 +01003092 debug_printf_eval("VAR\n");
Mike Frysingerde2b9382005-09-27 03:18:00 +00003093 L.v = op->l.v;
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003094 if (L.v == intvar[NF])
Glenn L McGrath545106f2002-11-11 06:21:00 +00003095 split_f0();
3096 goto v_cont;
3097
Denis Vlasenkof782f522007-01-01 23:51:30 +00003098 case XC( OC_FNARG ):
Denys Vlasenko6f4a7852018-01-07 01:19:08 +01003099 debug_printf_eval("FNARG[%d]\n", op->l.aidx);
Denys Vlasenko7b81db12010-03-12 21:04:47 +01003100 L.v = &fnargs[op->l.aidx];
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00003101 v_cont:
3102 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003103 break;
3104
Denis Vlasenkof782f522007-01-01 23:51:30 +00003105 case XC( OC_IN ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003106 debug_printf_eval("IN\n");
Glenn L McGrath545106f2002-11-11 06:21:00 +00003107 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
3108 break;
3109
Denis Vlasenkof782f522007-01-01 23:51:30 +00003110 case XC( OC_REGEXP ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003111 debug_printf_eval("REGEXP\n");
Mike Frysingerde2b9382005-09-27 03:18:00 +00003112 op1 = op;
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003113 L.s = getvar_s(intvar[F0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003114 goto re_cont;
3115
Denis Vlasenkof782f522007-01-01 23:51:30 +00003116 case XC( OC_MATCH ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003117 debug_printf_eval("MATCH\n");
Mike Frysingerde2b9382005-09-27 03:18:00 +00003118 op1 = op->r.n;
Denis Vlasenkoe1d3e032007-01-01 23:53:52 +00003119 re_cont:
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003120 {
3121 regex_t *re = as_regex(op1, &sreg);
3122 int i = regexec(re, L.s, 0, NULL, 0);
3123 if (re == &sreg)
3124 regfree(re);
3125 setvar_i(res, (i == 0) ^ (opn == '!'));
3126 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003127 break;
3128
Denis Vlasenkof782f522007-01-01 23:51:30 +00003129 case XC( OC_MOVE ):
Denys Vlasenkod527e0c2010-10-05 13:22:11 +02003130 debug_printf_eval("MOVE\n");
Mike Frysingerde2b9382005-09-27 03:18:00 +00003131 /* if source is a temporary string, jusk relink it to dest */
Denys Vlasenkocb042b02021-07-03 13:29:32 +02003132 if (R.v == TMPVAR1
3133 && !(R.v->type & VF_NUMBER)
3134 /* Why check !NUMBER? if R.v is a number but has cached R.v->string,
3135 * L.v ends up a string, which is wrong */
3136 /*&& R.v->string - always not NULL (right?) */
3137 ) {
3138 res = setvar_p(L.v, R.v->string); /* avoids strdup */
3139 R.v->string = NULL;
3140 } else {
Mike Frysingerde2b9382005-09-27 03:18:00 +00003141 res = copyvar(L.v, R.v);
Denys Vlasenkocb042b02021-07-03 13:29:32 +02003142 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003143 break;
3144
Denis Vlasenkof782f522007-01-01 23:51:30 +00003145 case XC( OC_TERNARY ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003146 debug_printf_eval("TERNARY\n");
Denys Vlasenko08ca3132021-07-03 13:57:47 +02003147 if (op->r.n->info != TI_COLON)
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00003148 syntax_error(EMSG_POSSIBLE_ERROR);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003149 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
3150 break;
3151
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003152 case XC( OC_FUNC ): {
Denys Vlasenko15734872021-07-01 16:02:16 +02003153 var *argvars, *sv_fnargs;
Denys Vlasenkof9782ff2010-03-12 21:32:13 +01003154 const char *sv_progname;
Denys Vlasenkod7354df2021-06-30 12:52:51 +02003155 int nargs, i;
Denys Vlasenko6cf6f1e2021-06-30 02:12:27 +02003156
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003157 debug_printf_eval("FUNC\n");
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003158
Denys Vlasenkod1507102021-06-30 12:23:51 +02003159 if (!op->r.f->defined)
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00003160 syntax_error(EMSG_UNDEF_FUNC);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003161
Denys Vlasenko6cf6f1e2021-06-30 02:12:27 +02003162 /* The body might be empty, still has to eval the args */
Denys Vlasenkod7354df2021-06-30 12:52:51 +02003163 nargs = op->r.f->nargs;
Denys Vlasenko15734872021-07-01 16:02:16 +02003164 argvars = nvalloc(nargs);
Denys Vlasenko6cf6f1e2021-06-30 02:12:27 +02003165 i = 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003166 while (op1) {
Denys Vlasenko15734872021-07-01 16:02:16 +02003167 var *arg = evaluate(nextarg(&op1), TMPVAR0);
Denys Vlasenkod7354df2021-06-30 12:52:51 +02003168 if (i == nargs) {
3169 /* call with more arguments than function takes.
3170 * (gawk warns: "warning: function 'f' called with more arguments than declared").
3171 * They are still evaluated, but discarded: */
3172 clrvar(arg);
3173 continue;
3174 }
Denys Vlasenko15734872021-07-01 16:02:16 +02003175 copyvar(&argvars[i], arg);
3176 argvars[i].type |= VF_CHILD;
3177 argvars[i].x.parent = arg;
Denys Vlasenkod7354df2021-06-30 12:52:51 +02003178 i++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003179 }
3180
Denys Vlasenko6cf6f1e2021-06-30 02:12:27 +02003181 sv_fnargs = fnargs;
Denys Vlasenkof9782ff2010-03-12 21:32:13 +01003182 sv_progname = g_progname;
3183
Denys Vlasenko15734872021-07-01 16:02:16 +02003184 fnargs = argvars;
Denys Vlasenkof9782ff2010-03-12 21:32:13 +01003185 res = evaluate(op->r.f->body.first, res);
Denys Vlasenko15734872021-07-01 16:02:16 +02003186 nvfree(argvars, nargs);
Denys Vlasenkof9782ff2010-03-12 21:32:13 +01003187
3188 g_progname = sv_progname;
Denys Vlasenko6cf6f1e2021-06-30 02:12:27 +02003189 fnargs = sv_fnargs;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003190
Glenn L McGrath545106f2002-11-11 06:21:00 +00003191 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003192 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003193
Denis Vlasenkof782f522007-01-01 23:51:30 +00003194 case XC( OC_GETLINE ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003195 debug_printf_eval("GETLINE /\n");
3196 case XC( OC_PGETLINE ):
3197 debug_printf_eval("PGETLINE\n");
3198 {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003199 rstream *rsm;
3200 int i;
3201
Mike Frysingerde2b9382005-09-27 03:18:00 +00003202 if (op1) {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003203 rsm = newfile(L.s);
3204 if (!rsm->F) {
Denys Vlasenko39aabfe2021-07-11 12:51:43 +02003205 /* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */
3206 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003207 rsm->F = popen(L.s, "r");
3208 rsm->is_pipe = TRUE;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003209 } else {
Denys Vlasenkofb132e42010-10-29 11:46:52 +02003210 rsm->F = fopen_for_read(L.s); /* not xfopen! */
Glenn L McGrath545106f2002-11-11 06:21:00 +00003211 }
3212 }
3213 } else {
Denys Vlasenko6ebdf7a2010-03-11 12:41:55 +01003214 if (!iF)
3215 iF = next_input_file();
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003216 rsm = iF;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003217 }
3218
Denys Vlasenkof65c5f52011-09-07 20:01:39 +02003219 if (!rsm || !rsm->F) {
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003220 setvar_i(intvar[ERRNO], errno);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003221 setvar_i(res, -1);
3222 break;
3223 }
3224
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003225 if (!op->r.n)
3226 R.v = intvar[F0];
Glenn L McGrath545106f2002-11-11 06:21:00 +00003227
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003228 i = awk_getline(rsm, R.v);
3229 if (i > 0 && !op1) {
3230 incvar(intvar[FNR]);
3231 incvar(intvar[NR]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003232 }
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003233 setvar_i(res, i);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003234 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003235 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003236
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00003237 /* simple builtins */
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003238 case XC( OC_FBLTIN ): {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003239 double R_d = R_d; /* for compiler */
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003240 debug_printf_eval("FBLTIN\n");
Glenn L McGrath545106f2002-11-11 06:21:00 +00003241
Denys Vlasenko08ca3132021-07-03 13:57:47 +02003242 if (op1 && op1->info == TI_COMMA)
Denys Vlasenko47d91332021-07-02 18:28:12 +02003243 /* Simple builtins take one arg maximum */
3244 syntax_error("Too many arguments");
3245
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003246 switch (opn) {
Denis Vlasenkof782f522007-01-01 23:51:30 +00003247 case F_in:
Denys Vlasenko1390a012013-07-20 21:23:01 +02003248 R_d = (long long)L_d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003249 break;
3250
Denys Vlasenko47d91332021-07-02 18:28:12 +02003251 case F_rn: /*rand*/
3252 if (op1)
3253 syntax_error("Too many arguments");
Denys Vlasenko8bb03da2021-07-02 19:38:03 +02003254 {
3255#if RAND_MAX >= 0x7fffffff
3256 uint32_t u = ((uint32_t)rand() << 16) ^ rand();
3257 uint64_t v = ((uint64_t)rand() << 32) | u;
3258 /* the above shift+or is optimized out on 32-bit arches */
3259# if RAND_MAX > 0x7fffffff
Denys Vlasenko0e3ef4e2021-07-03 11:57:59 +02003260 v &= 0x7fffffffffffffffULL;
Denys Vlasenko8bb03da2021-07-02 19:38:03 +02003261# endif
Denys Vlasenko0e3ef4e2021-07-03 11:57:59 +02003262 R_d = (double)v / 0x8000000000000000ULL;
Denys Vlasenko8bb03da2021-07-02 19:38:03 +02003263#else
3264# error Not implemented for this value of RAND_MAX
3265#endif
Glenn L McGrath545106f2002-11-11 06:21:00 +00003266 break;
Denys Vlasenko8bb03da2021-07-02 19:38:03 +02003267 }
Denis Vlasenkof782f522007-01-01 23:51:30 +00003268 case F_co:
Rob Landleyd8205b32010-10-24 03:27:22 +02003269 if (ENABLE_FEATURE_AWK_LIBM) {
3270 R_d = cos(L_d);
3271 break;
3272 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003273
Denis Vlasenkof782f522007-01-01 23:51:30 +00003274 case F_ex:
Rob Landleyd8205b32010-10-24 03:27:22 +02003275 if (ENABLE_FEATURE_AWK_LIBM) {
3276 R_d = exp(L_d);
3277 break;
3278 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003279
Denis Vlasenkof782f522007-01-01 23:51:30 +00003280 case F_lg:
Rob Landleyd8205b32010-10-24 03:27:22 +02003281 if (ENABLE_FEATURE_AWK_LIBM) {
3282 R_d = log(L_d);
3283 break;
3284 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003285
Denis Vlasenkof782f522007-01-01 23:51:30 +00003286 case F_si:
Rob Landleyd8205b32010-10-24 03:27:22 +02003287 if (ENABLE_FEATURE_AWK_LIBM) {
3288 R_d = sin(L_d);
3289 break;
3290 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003291
Denis Vlasenkof782f522007-01-01 23:51:30 +00003292 case F_sq:
Rob Landleyd8205b32010-10-24 03:27:22 +02003293 if (ENABLE_FEATURE_AWK_LIBM) {
3294 R_d = sqrt(L_d);
3295 break;
3296 }
3297
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00003298 syntax_error(EMSG_NO_MATH);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003299 break;
Rob Landleyd8205b32010-10-24 03:27:22 +02003300
Denis Vlasenkof782f522007-01-01 23:51:30 +00003301 case F_sr:
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003302 R_d = (double)seed;
3303 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003304 srand(seed);
3305 break;
3306
Denys Vlasenko47d91332021-07-02 18:28:12 +02003307 case F_ti: /*systime*/
3308 if (op1)
3309 syntax_error("Too many arguments");
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003310 R_d = time(NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003311 break;
3312
Denis Vlasenkof782f522007-01-01 23:51:30 +00003313 case F_le:
Denys Vlasenko7985bc12013-10-12 04:51:54 +02003314 debug_printf_eval("length: L.s:'%s'\n", L.s);
3315 if (!op1) {
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003316 L.s = getvar_s(intvar[F0]);
Denys Vlasenko7985bc12013-10-12 04:51:54 +02003317 debug_printf_eval("length: L.s='%s'\n", L.s);
3318 }
3319 else if (L.v->type & VF_ARRAY) {
3320 R_d = L.v->x.array->nel;
3321 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
3322 break;
3323 }
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003324 R_d = strlen(L.s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003325 break;
3326
Denis Vlasenkof782f522007-01-01 23:51:30 +00003327 case F_sy:
Denys Vlasenko8131eea2009-11-02 14:19:51 +01003328 fflush_all();
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003329 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
Denis Vlasenko249fabf2006-12-19 00:29:22 +00003330 ? (system(L.s) >> 8) : 0;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003331 break;
3332
Denis Vlasenkof782f522007-01-01 23:51:30 +00003333 case F_ff:
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003334 if (!op1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00003335 fflush(stdout);
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003336 } else if (L.s && *L.s) {
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02003337 rstream *rsm = newfile(L.s);
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003338 fflush(rsm->F);
3339 } else {
3340 fflush_all();
Glenn L McGrath545106f2002-11-11 06:21:00 +00003341 }
3342 break;
3343
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02003344 case F_cl: {
3345 rstream *rsm;
3346 int err = 0;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003347 rsm = (rstream *)hash_search(fdhash, L.s);
Denys Vlasenko786ca192021-07-02 17:32:08 +02003348 debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003349 if (rsm) {
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02003350 debug_printf_eval("OC_FBLTIN F_cl "
3351 "rsm->is_pipe:%d, ->F:%p\n",
3352 rsm->is_pipe, rsm->F);
3353 /* Can be NULL if open failed. Example:
3354 * getline line <"doesnt_exist";
3355 * close("doesnt_exist"); <--- here rsm->F is NULL
3356 */
3357 if (rsm->F)
3358 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
Denys Vlasenko786ca192021-07-02 17:32:08 +02003359//TODO: fix this case:
3360// $ awk 'BEGIN { print close(""); print ERRNO }'
3361// -1
3362// close of redirection that was never opened
3363// (we print 0, 0)
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003364 free(rsm->buffer);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003365 hash_remove(fdhash, L.s);
3366 }
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02003367 if (err)
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003368 setvar_i(intvar[ERRNO], errno);
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02003369 R_d = (double)err;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003370 break;
3371 }
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02003372 } /* switch */
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003373 setvar_i(res, R_d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003374 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003375 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003376
Denis Vlasenkof782f522007-01-01 23:51:30 +00003377 case XC( OC_BUILTIN ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003378 debug_printf_eval("BUILTIN\n");
Glenn L McGrath545106f2002-11-11 06:21:00 +00003379 res = exec_builtin(op, res);
3380 break;
3381
Denis Vlasenkof782f522007-01-01 23:51:30 +00003382 case XC( OC_SPRINTF ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003383 debug_printf_eval("SPRINTF\n");
Ron Yorstone8fe9f92021-01-27 11:19:14 +00003384 setvar_p(res, awk_printf(op1, NULL));
Glenn L McGrath545106f2002-11-11 06:21:00 +00003385 break;
3386
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003387 case XC( OC_UNARY ):
3388 debug_printf_eval("UNARY\n");
3389 {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003390 double Ld, R_d;
3391
3392 Ld = R_d = getvar_i(R.v);
Mike Frysingerde2b9382005-09-27 03:18:00 +00003393 switch (opn) {
Denis Vlasenkof782f522007-01-01 23:51:30 +00003394 case 'P':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003395 Ld = ++R_d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003396 goto r_op_change;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003397 case 'p':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003398 R_d++;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003399 goto r_op_change;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003400 case 'M':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003401 Ld = --R_d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003402 goto r_op_change;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003403 case 'm':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003404 R_d--;
3405 r_op_change:
3406 setvar_i(R.v, R_d);
3407 break;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003408 case '!':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003409 Ld = !istrue(R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003410 break;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003411 case '-':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003412 Ld = -R_d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003413 break;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003414 }
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003415 setvar_i(res, Ld);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003416 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003417 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003418
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003419 case XC( OC_FIELD ):
3420 debug_printf_eval("FIELD\n");
3421 {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003422 int i = (int)getvar_i(R.v);
Denys Vlasenko2454e672018-04-23 10:53:18 +02003423 if (i < 0)
3424 syntax_error(EMSG_NEGATIVE_FIELD);
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003425 if (i == 0) {
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003426 res = intvar[F0];
Glenn L McGrath545106f2002-11-11 06:21:00 +00003427 } else {
3428 split_f0();
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003429 if (i > nfields)
3430 fsrealloc(i);
3431 res = &Fields[i - 1];
Glenn L McGrath545106f2002-11-11 06:21:00 +00003432 }
3433 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003434 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003435
Denis Vlasenkocd5c7862007-05-17 16:37:22 +00003436 /* concatenation (" ") and index joining (",") */
Denis Vlasenkof782f522007-01-01 23:51:30 +00003437 case XC( OC_CONCAT ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003438 debug_printf_eval("CONCAT /\n");
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003439 case XC( OC_COMMA ): {
3440 const char *sep = "";
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003441 debug_printf_eval("COMMA\n");
Denys Vlasenko08ca3132021-07-03 13:57:47 +02003442 if (opinfo == TI_COMMA)
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003443 sep = getvar_s(intvar[SUBSEP]);
3444 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
Glenn L McGrath545106f2002-11-11 06:21:00 +00003445 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003446 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003447
Denis Vlasenkof782f522007-01-01 23:51:30 +00003448 case XC( OC_LAND ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003449 debug_printf_eval("LAND\n");
Glenn L McGrath545106f2002-11-11 06:21:00 +00003450 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
3451 break;
3452
Denis Vlasenkof782f522007-01-01 23:51:30 +00003453 case XC( OC_LOR ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003454 debug_printf_eval("LOR\n");
Glenn L McGrath545106f2002-11-11 06:21:00 +00003455 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
3456 break;
3457
Denis Vlasenkof782f522007-01-01 23:51:30 +00003458 case XC( OC_BINARY ):
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003459 debug_printf_eval("BINARY /\n");
3460 case XC( OC_REPLACE ):
3461 debug_printf_eval("REPLACE\n");
3462 {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003463 double R_d = getvar_i(R.v);
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003464 debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003465 switch (opn) {
Denis Vlasenkof782f522007-01-01 23:51:30 +00003466 case '+':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003467 L_d += R_d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003468 break;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003469 case '-':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003470 L_d -= R_d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003471 break;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003472 case '*':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003473 L_d *= R_d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003474 break;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003475 case '/':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003476 if (R_d == 0)
Denys Vlasenkocdeda162009-11-30 01:14:16 +01003477 syntax_error(EMSG_DIV_BY_ZERO);
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003478 L_d /= R_d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003479 break;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003480 case '&':
Rob Landleyd8205b32010-10-24 03:27:22 +02003481 if (ENABLE_FEATURE_AWK_LIBM)
3482 L_d = pow(L_d, R_d);
3483 else
3484 syntax_error(EMSG_NO_MATH);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003485 break;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003486 case '%':
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003487 if (R_d == 0)
Denys Vlasenkocdeda162009-11-30 01:14:16 +01003488 syntax_error(EMSG_DIV_BY_ZERO);
Denys Vlasenko1390a012013-07-20 21:23:01 +02003489 L_d -= (long long)(L_d / R_d) * R_d;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003490 break;
3491 }
Denys Vlasenkod527e0c2010-10-05 13:22:11 +02003492 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003493 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003494 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003495 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003496
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003497 case XC( OC_COMPARE ): {
3498 int i = i; /* for compiler */
3499 double Ld;
Denys Vlasenkoa885ce12021-06-16 09:18:08 +02003500 debug_printf_eval("COMPARE\n");
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003501
Glenn L McGrath545106f2002-11-11 06:21:00 +00003502 if (is_numeric(L.v) && is_numeric(R.v)) {
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003503 Ld = getvar_i(L.v) - getvar_i(R.v);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003504 } else {
Denys Vlasenkof9782ff2010-03-12 21:32:13 +01003505 const char *l = getvar_s(L.v);
3506 const char *r = getvar_s(R.v);
3507 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003508 }
3509 switch (opn & 0xfe) {
Denis Vlasenkof782f522007-01-01 23:51:30 +00003510 case 0:
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003511 i = (Ld > 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003512 break;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003513 case 2:
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003514 i = (Ld >= 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003515 break;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003516 case 4:
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003517 i = (Ld == 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003518 break;
3519 }
Denys Vlasenkoe2952df2022-01-08 22:42:35 +01003520 debug_printf_eval("COMPARE result: %d\n", (i == 0) ^ (opn & 1));
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003521 setvar_i(res, (i == 0) ^ (opn & 1));
Glenn L McGrath545106f2002-11-11 06:21:00 +00003522 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003523 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003524
Denis Vlasenkof782f522007-01-01 23:51:30 +00003525 default:
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00003526 syntax_error(EMSG_POSSIBLE_ERROR);
Denys Vlasenko6f4a7852018-01-07 01:19:08 +01003527 } /* switch */
Denys Vlasenko640212a2021-07-02 15:19:14 +02003528
Glenn L McGrath545106f2002-11-11 06:21:00 +00003529 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3530 op = op->a.n;
3531 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3532 break;
3533 if (nextrec)
3534 break;
Denys Vlasenkoc6ba9972010-03-12 21:05:09 +01003535 } /* while (op) */
3536
Denys Vlasenko15734872021-07-01 16:02:16 +02003537 nvfree(tmpvars, 2);
3538#undef TMPVAR0
3539#undef TMPVAR1
3540
Denys Vlasenko6a0d7492010-10-23 21:02:15 +02003541 debug_printf_eval("returning from %s(): %p\n", __func__, res);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003542 return res;
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00003543#undef fnargs
3544#undef seed
3545#undef sreg
Glenn L McGrath545106f2002-11-11 06:21:00 +00003546}
3547
Glenn L McGrath545106f2002-11-11 06:21:00 +00003548/* -------- main & co. -------- */
3549
Denys Vlasenko4d902ea2021-07-02 22:28:51 +02003550static int awk_exit(void)
Mike Frysinger10a11e22005-09-27 02:23:02 +00003551{
Denis Vlasenkof782f522007-01-01 23:51:30 +00003552 unsigned i;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003553
3554 if (!exiting) {
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00003555 exiting = TRUE;
Glenn L McGrathca29ffc2004-09-24 09:24:27 +00003556 nextrec = FALSE;
Denys Vlasenko966cafc2021-07-02 14:33:13 +02003557 evaluate(endseq.first, &G.exit__tmpvar);
Glenn L McGrath00ed36f2003-10-30 13:36:39 +00003558 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003559
3560 /* waiting for children */
Denis Vlasenkof782f522007-01-01 23:51:30 +00003561 for (i = 0; i < fdhash->csize; i++) {
Denys Vlasenko78645d82021-06-25 19:41:05 +02003562 hash_item *hi;
Glenn L McGrath545106f2002-11-11 06:21:00 +00003563 hi = fdhash->items[i];
Denis Vlasenkobf0a2012006-12-26 10:42:51 +00003564 while (hi) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00003565 if (hi->data.rs.F && hi->data.rs.is_pipe)
3566 pclose(hi->data.rs.F);
3567 hi = hi->next;
3568 }
3569 }
3570
Denys Vlasenko4d902ea2021-07-02 22:28:51 +02003571 exit(G.exitcode);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003572}
3573
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +00003574int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denys Vlasenko844a6c52018-01-07 14:39:34 +01003575int awk_main(int argc UNUSED_PARAM, char **argv)
Mike Frysinger10a11e22005-09-27 02:23:02 +00003576{
Denis Vlasenko67b23e62006-10-03 21:00:06 +00003577 unsigned opt;
Denys Vlasenko7b46d112011-09-11 00:30:56 +02003578 char *opt_F;
Denis Vlasenko3bb2bbd2008-07-01 01:57:36 +00003579 llist_t *list_v = NULL;
3580 llist_t *list_f = NULL;
Sven-Göran Berghf200f732013-11-12 14:18:25 +01003581#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3582 llist_t *list_e = NULL;
3583#endif
Denys Vlasenko8c5da032021-06-25 19:38:27 +02003584 int i;
Denis Vlasenkof782f522007-01-01 23:51:30 +00003585
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00003586 INIT_G();
3587
Denis Vlasenko150f4022007-01-13 21:06:21 +00003588 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
Denis Vlasenko6dc6ebb2007-01-01 23:53:12 +00003589 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3590 if (ENABLE_LOCALE_SUPPORT)
3591 setlocale(LC_NUMERIC, "C");
3592
Glenn L McGrath545106f2002-11-11 06:21:00 +00003593 /* initialize variables */
Denys Vlasenko21fbee22021-06-29 14:33:04 +02003594 vhash = hash_init();
Denys Vlasenko78645d82021-06-25 19:41:05 +02003595 {
3596 char *vnames = (char *)vNames; /* cheat */
3597 char *vvalues = (char *)vValues;
3598 for (i = 0; *vnames; i++) {
3599 var *v;
3600 intvar[i] = v = newvar(nextword(&vnames));
3601 if (*vvalues != '\377')
3602 setvar_s(v, nextword(&vvalues));
3603 else
3604 setvar_i(v, 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003605
Denys Vlasenko78645d82021-06-25 19:41:05 +02003606 if (*vnames == '*') {
3607 v->type |= VF_SPECIAL;
3608 vnames++;
3609 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003610 }
3611 }
3612
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003613 handle_special(intvar[FS]);
3614 handle_special(intvar[RS]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003615
Denis Vlasenkof71d9162007-05-03 22:57:56 +00003616 /* Huh, people report that sometimes environ is NULL. Oh well. */
Denys Vlasenko78645d82021-06-25 19:41:05 +02003617 if (environ) {
3618 char **envp;
3619 for (envp = environ; *envp; envp++) {
3620 /* environ is writable, thus we don't strdup it needlessly */
3621 char *s = *envp;
3622 char *s1 = strchr(s, '=');
3623 if (s1) {
3624 *s1 = '\0';
3625 /* Both findvar and setvar_u take const char*
3626 * as 2nd arg -> environment is not trashed */
3627 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3628 *s1 = '=';
3629 }
Eric Andersen67776be2004-07-30 23:52:08 +00003630 }
Glenn L McGrath545106f2002-11-11 06:21:00 +00003631 }
Sven-Göran Berghf200f732013-11-12 14:18:25 +01003632 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
Denis Vlasenkof782f522007-01-01 23:51:30 +00003633 argv += optind;
Denys Vlasenko844a6c52018-01-07 14:39:34 +01003634 //argc -= optind;
Denys Vlasenkobd0e2212013-11-21 15:09:55 +01003635 if (opt & OPT_W)
James Byrne69374872019-07-02 11:35:03 +02003636 bb_simple_error_msg("warning: option -W is ignored");
Denys Vlasenkobd0e2212013-11-21 15:09:55 +01003637 if (opt & OPT_F) {
Denys Vlasenkoea664dd2012-06-22 18:41:01 +02003638 unescape_string_in_place(opt_F);
3639 setvar_s(intvar[FS], opt_F);
3640 }
Denys Vlasenkobd0e2212013-11-21 15:09:55 +01003641 while (list_v) {
Denis Vlasenko3bb2bbd2008-07-01 01:57:36 +00003642 if (!is_assignment(llist_pop(&list_v)))
Denis Vlasenkobe644a82007-03-10 17:22:14 +00003643 bb_show_usage();
3644 }
Denys Vlasenko21fbee22021-06-29 14:33:04 +02003645
3646 /* Parse all supplied programs */
3647 fnhash = hash_init();
3648 ahash = hash_init();
Denys Vlasenkobd0e2212013-11-21 15:09:55 +01003649 while (list_f) {
Denys Vlasenko8c5da032021-06-25 19:38:27 +02003650 int fd;
3651 char *s;
Denis Vlasenko3bb2bbd2008-07-01 01:57:36 +00003652
Sven-Göran Berghf200f732013-11-12 14:18:25 +01003653 g_progname = llist_pop(&list_f);
Denys Vlasenko8c5da032021-06-25 19:38:27 +02003654 fd = xopen_stdin(g_progname);
Denys Vlasenko4f275032021-06-29 03:27:07 +02003655 s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
Denys Vlasenko8c5da032021-06-25 19:38:27 +02003656 close(fd);
Denys Vlasenko4f275032021-06-29 03:27:07 +02003657 parse_program(s);
Sven-Göran Berghf200f732013-11-12 14:18:25 +01003658 free(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003659 }
Sven-Göran Berghf200f732013-11-12 14:18:25 +01003660 g_progname = "cmd. line";
3661#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
Denys Vlasenkobd0e2212013-11-21 15:09:55 +01003662 while (list_e) {
Sven-Göran Berghf200f732013-11-12 14:18:25 +01003663 parse_program(llist_pop(&list_e));
3664 }
3665#endif
Denys Vlasenko646429e2021-07-02 23:24:52 +02003666//FIXME: preserve order of -e and -f
3667//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
Sven-Göran Berghf200f732013-11-12 14:18:25 +01003668 if (!(opt & (OPT_f | OPT_e))) {
3669 if (!*argv)
3670 bb_show_usage();
3671 parse_program(*argv++);
Sven-Göran Berghf200f732013-11-12 14:18:25 +01003672 }
Denys Vlasenkob3c91a12021-06-29 18:33:25 +02003673 /* Free unused parse structures */
3674 //hash_free(fnhash); // ~250 bytes when empty, used only for function names
3675 //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
3676 // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not).
3677 free(fnhash->items);
3678 free(fnhash);
3679 fnhash = NULL; // debug
3680 //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing
3681
3682 /* Parsing done, on to executing */
Glenn L McGrath545106f2002-11-11 06:21:00 +00003683
Glenn L McGrath545106f2002-11-11 06:21:00 +00003684 /* fill in ARGV array */
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003685 setari_u(intvar[ARGV], 0, "awk");
Denis Vlasenkof782f522007-01-01 23:51:30 +00003686 i = 0;
3687 while (*argv)
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003688 setari_u(intvar[ARGV], ++i, *argv++);
Denys Vlasenko844a6c52018-01-07 14:39:34 +01003689 setvar_i(intvar[ARGC], i + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003690
Denys Vlasenko3aff3b92021-06-29 19:07:36 +02003691 //fdhash = ahash; // done via define
Denys Vlasenko21fbee22021-06-29 14:33:04 +02003692 newfile("/dev/stdin")->F = stdin;
3693 newfile("/dev/stdout")->F = stdout;
3694 newfile("/dev/stderr")->F = stderr;
3695
Denys Vlasenko966cafc2021-07-02 14:33:13 +02003696 evaluate(beginseq.first, &G.main__tmpvar);
Denis Vlasenkof782f522007-01-01 23:51:30 +00003697 if (!mainseq.first && !endseq.first)
Denys Vlasenko4d902ea2021-07-02 22:28:51 +02003698 awk_exit();
Glenn L McGrath545106f2002-11-11 06:21:00 +00003699
3700 /* input file could already be opened in BEGIN block */
Denys Vlasenkocdeda162009-11-30 01:14:16 +01003701 if (!iF)
3702 iF = next_input_file();
Glenn L McGrath545106f2002-11-11 06:21:00 +00003703
3704 /* passing through input files */
3705 while (iF) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00003706 nextfile = FALSE;
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003707 setvar_i(intvar[FNR], 0);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003708
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003709 while ((i = awk_getline(iF, intvar[F0])) > 0) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00003710 nextrec = FALSE;
Denis Vlasenkoffba9412007-05-17 23:03:35 +00003711 incvar(intvar[NR]);
3712 incvar(intvar[FNR]);
Denys Vlasenko966cafc2021-07-02 14:33:13 +02003713 evaluate(mainseq.first, &G.main__tmpvar);
Glenn L McGrath545106f2002-11-11 06:21:00 +00003714
3715 if (nextfile)
3716 break;
3717 }
3718
Denis Vlasenkof782f522007-01-01 23:51:30 +00003719 if (i < 0)
Denis Vlasenkoae5a8aa2007-06-06 17:01:00 +00003720 syntax_error(strerror(errno));
Glenn L McGrath545106f2002-11-11 06:21:00 +00003721
3722 iF = next_input_file();
Glenn L McGrath545106f2002-11-11 06:21:00 +00003723 }
3724
Denys Vlasenko4d902ea2021-07-02 22:28:51 +02003725 awk_exit();
Denis Vlasenkof782f522007-01-01 23:51:30 +00003726 /*return 0;*/
Glenn L McGrath545106f2002-11-11 06:21:00 +00003727}