blob: d79501cc46cf71dd10484426c59117fcbeec3ea2 [file] [log] [blame]
Eric Andersen74bcd162001-07-30 21:41:37 +00001/* Copyright (c) 2001 Aaron Lehmann <aaronl@vitelus.com>
2
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 "Software"), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21*/
22
23/* This is my infix parser/evaluator. It is optimized for size, intended
24 * as a replacement for yacc-based parsers. However, it may well be faster
25 * than a comparable parser writen in yacc. The supported operators are
26 * listed in #defines below. Parens, order of operations, and error handling
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +000027 * are supported. This code is threadsafe. The exact expression format should
28 * be that which POSIX specifies for shells. */
Glenn L McGrathde9e8032002-08-23 12:04:23 +000029
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +000030/* The code uses a simple two-stack algorithm. See
31 * http://www.onthenet.com.au/~grahamis/int2008/week02/lect02.html
32 * for a detailed explaination of the infix-to-postfix algorithm on which
33 * this is based (this code differs in that it applies operators immediately
34 * to the stack instead of adding them to a queue to end up with an
35 * expression). */
Glenn L McGrathde9e8032002-08-23 12:04:23 +000036
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +000037/* To use the routine, call it with an expression string and error return
38 * pointer */
Eric Andersen74bcd162001-07-30 21:41:37 +000039
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +000040/*
41 * Aug 24, 2001 Manuel Novoa III
42 *
43 * Reduced the generated code size by about 30% (i386) and fixed several bugs.
44 *
45 * 1) In arith_apply():
46 * a) Cached values of *numptr and &(numptr[-1]).
47 * b) Removed redundant test for zero denominator.
48 *
49 * 2) In arith():
50 * a) Eliminated redundant code for processing operator tokens by moving
51 * to a table-based implementation. Also folded handling of parens
52 * into the table.
53 * b) Combined all 3 loops which called arith_apply to reduce generated
54 * code size at the cost of speed.
55 *
56 * 3) The following expressions were treated as valid by the original code:
57 * 1() , 0! , 1 ( *3 ) .
58 * These bugs have been fixed by internally enclosing the expression in
59 * parens and then checking that all binary ops and right parens are
60 * preceded by a valid expression (NUM_TOKEN).
61 *
62 * Note: It may be desireable to replace Aaron's test for whitespace with
63 * ctype's isspace() if it is used by another busybox applet or if additional
64 * whitespace chars should be considered. Look below the "#include"s for a
65 * precompiler test.
66 */
67
68/*
69 * Aug 26, 2001 Manuel Novoa III
70 *
71 * Return 0 for null expressions. Pointed out by vodz.
72 *
73 * Merge in Aaron's comments previously posted to the busybox list,
74 * modified slightly to take account of my changes to the code.
75 *
76 * TODO: May want to allow access to variables in the arith code.
77 * This would:
78 * 1) allow us to evaluate $A as 0 if A isn't set (although this
79 * would require changes to ash.c too).
80 * 2) allow us to write expressions as $(( A + 2 )).
81 * This could be done using a callback function passed to the
82 * arith() function of by requiring such a function with fixed
83 * name as an extern.
84 */
Eric Andersen74bcd162001-07-30 21:41:37 +000085
86#include <stdlib.h>
87#include <string.h>
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +000088#include <ctype.h>
Eric Andersen74bcd162001-07-30 21:41:37 +000089#include "libbb.h"
90
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +000091/*
92 * Use "#if 1" below for Aaron's original test for whitespace.
93 * Use "#if 0" for ctype's isspace().
94 * */
95#if 1
96#undef isspace
97#define isspace(arithval) \
98 (arithval == ' ' || arithval == '\n' || arithval == '\t')
99#endif
100
Eric Andersen74bcd162001-07-30 21:41:37 +0000101typedef char operator;
102
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000103/* An operator's token id is a bit of a bitfield. The lower 5 bits are the
104 * precedence, and high 3 are an ID unique accross operators of that
105 * precedence. The ID portion is so that multiple operators can have the
106 * same precedence, ensuring that the leftmost one is evaluated first.
107 * Consider * and /. */
108
Eric Andersen74bcd162001-07-30 21:41:37 +0000109#define tok_decl(prec,id) (((id)<<5)|(prec))
110#define PREC(op) ((op)&0x1F)
111
112#define TOK_LPAREN tok_decl(0,0)
113
114#define TOK_OR tok_decl(1,0)
115
116#define TOK_AND tok_decl(2,0)
117
118#define TOK_BOR tok_decl(3,0)
119
120#define TOK_BXOR tok_decl(4,0)
121
122#define TOK_BAND tok_decl(5,0)
123
124#define TOK_EQ tok_decl(6,0)
125#define TOK_NE tok_decl(6,1)
126
127#define TOK_LT tok_decl(7,0)
128#define TOK_GT tok_decl(7,1)
129#define TOK_GE tok_decl(7,2)
130#define TOK_LE tok_decl(7,3)
131
132#define TOK_LSHIFT tok_decl(8,0)
133#define TOK_RSHIFT tok_decl(8,1)
134
135#define TOK_ADD tok_decl(9,0)
136#define TOK_SUB tok_decl(9,1)
137
138#define TOK_MUL tok_decl(10,0)
139#define TOK_DIV tok_decl(10,1)
140#define TOK_REM tok_decl(10,2)
141
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000142/* For now all unary operators have the same precedence, and that's used to
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000143 * identify them as unary operators
144 */
Eric Andersen74bcd162001-07-30 21:41:37 +0000145#define UNARYPREC 14
146#define TOK_BNOT tok_decl(UNARYPREC,0)
147#define TOK_NOT tok_decl(UNARYPREC,1)
148#define TOK_UMINUS tok_decl(UNARYPREC,2)
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000149#define TOK_UPLUS tok_decl(UNARYPREC,3)
Eric Andersen74bcd162001-07-30 21:41:37 +0000150
151#define TOK_NUM tok_decl(15,0)
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000152#define TOK_RPAREN tok_decl(15,1)
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000153#define TOK_ERROR tok_decl(15,2) /* just a place-holder really */
Eric Andersen74bcd162001-07-30 21:41:37 +0000154
155#define ARITH_APPLY(op) arith_apply(op, numstack, &numstackptr)
156#define NUMPTR (*numstackptr)
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000157
158/* "applying" a token means performing it on the top elements on the integer
159 * stack. For a unary operator it will only change the top element, but a
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000160 * binary operator will pop two arguments and push a result
161 */
162static short arith_apply(operator op, long *numstack, long **numstackptr)
Eric Andersen74bcd162001-07-30 21:41:37 +0000163{
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000164 long numptr_val;
165 long *NUMPTR_M1;
Eric Andersen74bcd162001-07-30 21:41:37 +0000166
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000167 /* There is no operator that can work without arguments */
168 if (NUMPTR == numstack) {
169 goto err;
170 }
171
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000172 NUMPTR_M1 = NUMPTR - 1;
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000173 if (op == TOK_UMINUS) {
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000174 *NUMPTR_M1 *= -1;
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000175 } else if (op == TOK_NOT) {
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000176 *NUMPTR_M1 = !(*NUMPTR_M1);
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000177 } else if (op == TOK_BNOT) {
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000178 *NUMPTR_M1 = ~(*NUMPTR_M1);
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000179 } else if (op != TOK_UPLUS) {
180 /* Binary operators */
181
182 /* ... and binary operators need two arguments */
183 if (NUMPTR_M1 == numstack) {
184 goto err;
185 }
186 /* ... and they pop one */
187 numptr_val = *--NUMPTR;
188
189 NUMPTR_M1 = NUMPTR - 1;
190
191 if (op == TOK_BOR) {
192 *NUMPTR_M1 |= numptr_val;
193 } else if (op == TOK_OR) {
194 *NUMPTR_M1 = numptr_val || *NUMPTR_M1;
195 } else if (op == TOK_BAND) {
196 *NUMPTR_M1 &= numptr_val;
197 } else if (op == TOK_AND) {
198 *NUMPTR_M1 = *NUMPTR_M1 && numptr_val;
199 } else if (op == TOK_EQ) {
200 *NUMPTR_M1 = (*NUMPTR_M1 == numptr_val);
201 } else if (op == TOK_NE) {
202 *NUMPTR_M1 = (*NUMPTR_M1 != numptr_val);
203 } else if (op == TOK_GE) {
204 *NUMPTR_M1 = (*NUMPTR_M1 >= numptr_val);
205 } else if (op == TOK_RSHIFT) {
206 *NUMPTR_M1 >>= numptr_val;
207 } else if (op == TOK_LSHIFT) {
208 *NUMPTR_M1 <<= numptr_val;
209 } else if (op == TOK_GT) {
210 *NUMPTR_M1 = (*NUMPTR_M1 > numptr_val);
211 } else if (op == TOK_LT) {
212 *NUMPTR_M1 = (*NUMPTR_M1 < numptr_val);
213 } else if (op == TOK_LE) {
214 *NUMPTR_M1 = (*NUMPTR_M1 <= numptr_val);
215 } else if (op == TOK_MUL) {
216 *NUMPTR_M1 *= numptr_val;
217 } else if (op == TOK_ADD) {
218 *NUMPTR_M1 += numptr_val;
219 } else if (op == TOK_SUB) {
220 *NUMPTR_M1 -= numptr_val;
221 }
222 /* zero divisor check */
223 else if (numptr_val == 0) {
224 return -2;
225 } else if (op == TOK_DIV) {
226 *NUMPTR_M1 /= numptr_val;
227 } else if (op == TOK_REM) {
228 *NUMPTR_M1 %= numptr_val;
229 }
230 /* WARNING!!! WARNING!!! WARNING!!! */
231 /* Any new operators should be added BEFORE the zero divisor check! */
Eric Andersen74bcd162001-07-30 21:41:37 +0000232 }
233 return 0;
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000234 err:
235 return (-1);
Eric Andersen74bcd162001-07-30 21:41:37 +0000236}
237
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000238static const char endexpression[] = ")";
Eric Andersen74bcd162001-07-30 21:41:37 +0000239
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000240/* + and - (in that order) must be last */
241static const char op_char[] = "!<>=|&*/%~()+-";
242static const char op_token[] = {
243 /* paired with equal */
244 TOK_NE, TOK_LE, TOK_GE,
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000245 /* paired with self -- note: ! is special-cased below */
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000246 TOK_ERROR, TOK_LSHIFT, TOK_RSHIFT, TOK_EQ, TOK_OR, TOK_AND,
247 /* singles */
248 TOK_NOT, TOK_LT, TOK_GT, TOK_ERROR, TOK_BOR, TOK_BAND,
249 TOK_MUL, TOK_DIV, TOK_REM, TOK_BNOT, TOK_LPAREN, TOK_RPAREN,
250 TOK_ADD, TOK_SUB, TOK_UPLUS, TOK_UMINUS
251};
252
253#define NUM_PAIR_EQUAL 3
254#define NUM_PAIR_SAME 6
255
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000256extern long arith(const char *expr, int *errcode)
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000257{
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000258 register char arithval; /* Current character under analysis */
259 operator lasttok, op;
Eric Andersen74bcd162001-07-30 21:41:37 +0000260 unsigned char prec;
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000261 const char *p = endexpression;
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000262 size_t datasizes = strlen(expr);
Eric Andersen74bcd162001-07-30 21:41:37 +0000263
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000264 /* Stack of integers */
265 /* The proof that there can be no more than strlen(startbuf)/2+1 integers
266 * in any given correct or incorrect expression is left as an excersize to
267 * the reader. */
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000268 long *numstack = alloca(((datasizes + 1) / 2) * sizeof(long));
269 long *numstackptr = numstack;
270
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000271 /* Stack of operator tokens */
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000272 operator * stack = alloca((datasizes + 1) * sizeof(operator));
273 operator * stackptr = stack;
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000274
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000275 /* start off with a left paren */
276 *stackptr++ = lasttok = TOK_LPAREN;
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000277
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000278 loop:
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000279 if ((arithval = *expr) == 0) {
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000280 /* Null expression. */
281 if (p == endexpression) {
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000282 return (*errcode = 0);
283 }
284
285 /* This is only reached after all tokens have been extracted from the
286 * input stream. If there are still tokens on the operator stack, they
287 * are to be applied in order. At the end, there should be a final
288 * result on the integer stack */
289
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000290 if (expr != endexpression + 1) { /* If we haven't done so already, */
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000291 expr = endexpression; /* append a closing right paren */
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000292 goto loop; /* and let the loop process it. */
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000293 }
294 /* At this point, we're done with the expression. */
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000295 if (numstackptr != numstack + 1) { /* ... but if there isn't, it's bad */
296 err:
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000297 return (*errcode = -1);
298 /* NOTREACHED */
299 }
300 return *numstack;
301 } else {
302 /* Continue processing the expression. */
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000303 /* Skip whitespace */
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000304 if (isspace(arithval)) {
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000305 goto prologue;
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000306 }
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000307 /* isdigit ? */
308 if ((unsigned) arithval - '0' <= 9) {
Eric Andersen74bcd162001-07-30 21:41:37 +0000309 *numstackptr++ = strtol(expr, (char **) &expr, 10);
310 lasttok = TOK_NUM;
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000311 goto loop;
312 }
313#if 1
314 if ((p = strchr(op_char, arithval)) == NULL) {
315 goto err;
316 }
317#else
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000318 for (p = op_char; *p != arithval; p++) {
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000319 if (!*p) {
320 goto err;
Eric Andersen74bcd162001-07-30 21:41:37 +0000321 }
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000322 }
323#endif
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000324 p = op_token + (int) (p - op_char);
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000325 ++expr;
326 if ((p >= op_token + NUM_PAIR_EQUAL) || (*expr != '=')) {
327 p += NUM_PAIR_EQUAL;
328 if ((p >= op_token + NUM_PAIR_SAME + NUM_PAIR_EQUAL)
329 || (*expr != arithval) || (arithval == '!')) {
Eric Andersen74bcd162001-07-30 21:41:37 +0000330 --expr;
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000331 /* single = */
332 if (arithval == '=') {
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000333 goto err;
334 }
335 p += NUM_PAIR_SAME;
336 /* Plus and minus are binary (not unary) _only_ if the last
337 * token was as number, or a right paren (which pretends to be
338 * a number, since it evaluates to one). Think about it.
339 * It makes sense. */
340 if ((lasttok != TOK_NUM)
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000341 && (p >=
342 (op_token + NUM_PAIR_SAME + NUM_PAIR_EQUAL +
343 sizeof(op_char) - 2))) {
344 /* Unary plus or minus */
345 p += 2;
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000346 }
Eric Andersen74bcd162001-07-30 21:41:37 +0000347 }
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000348 }
349 op = *p;
Eric Andersen74bcd162001-07-30 21:41:37 +0000350
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000351 /* We don't want a unary operator to cause recursive descent on the
352 * stack, because there can be many in a row and it could cause an
353 * operator to be evaluated before its argument is pushed onto the
354 * integer stack. */
355 /* But for binary operators, "apply" everything on the operator
356 * stack until we find an operator with a lesser priority than the
357 * one we have just extracted. */
358 /* Left paren is given the lowest priority so it will never be
359 * "applied" in this way */
Eric Andersen74bcd162001-07-30 21:41:37 +0000360 prec = PREC(op);
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000361
362 /* not left paren or unary */
363 if ((prec > 0) && (prec != UNARYPREC)) {
364 /* binary op must be preceded by a num */
365 if (lasttok != TOK_NUM) {
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000366 goto err;
367 }
368 while (stackptr != stack) {
369 if (op == TOK_RPAREN) {
370 /* The algorithm employed here is simple: while we don't
371 * hit an open paren nor the bottom of the stack, pop
372 * tokens and apply them */
373 if (stackptr[-1] == TOK_LPAREN) {
374 --stackptr;
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000375
376 /* Any operator directly after a */
377 lasttok = TOK_NUM;
378
379 /* close paren should consider itself binary */
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000380 goto prologue;
381 }
382 } else if (PREC(stackptr[-1]) < prec) {
383 break;
384 }
Eric Andersen34506362001-08-02 05:02:46 +0000385 *errcode = ARITH_APPLY(*--stackptr);
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000386 if (*errcode) {
387 return *errcode;
388 }
Eric Andersen34506362001-08-02 05:02:46 +0000389 }
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000390 if (op == TOK_RPAREN) {
391 goto err;
392 }
393 }
Eric Andersen74bcd162001-07-30 21:41:37 +0000394
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000395 /* Push this operator to the stack and remember it. */
396 *stackptr++ = lasttok = op;
Eric Andersen74bcd162001-07-30 21:41:37 +0000397
Glenn L McGrathde9e8032002-08-23 12:04:23 +0000398 prologue:
Manuel Novoa III 6a9d1f62001-09-11 01:11:31 +0000399 ++expr;
400 goto loop;
401 }
Eric Andersen74bcd162001-07-30 21:41:37 +0000402}