bc: fix handling of "digits" above 9 function old new delta zxc_lex_next 1573 1608 +35 xc_parse_pushIndex 58 56 -2 xc_program_index 71 63 -8 zxc_program_num 1022 990 -32 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/3 up/down: 35/-42) Total: -7 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>

commit: e16a5223d20e5715b98e1fd21fa8d59e75e4e793 [log] [tgz]
author: Denys Vlasenko <vda.linux@googlemail.com> Sat Dec 29 02:24:19 2018 +0100
committer: Denys Vlasenko <vda.linux@googlemail.com> Sat Dec 29 02:24:19 2018 +0100
tree: 6f5546a00e30296c557c26036d0363ce9e9369e6
parent: f9b4cc114cb4eb2a997f08daa43af30ad598322c [diff]
diff --git a/miscutils/bc.c b/miscutils/bc.c
index c10cd73..07327af 100644
--- a/miscutils/bc.c
+++ b/miscutils/bc.c

@@ -2555,13 +2555,16 @@
 // Parsing routines
 //
 
-static bool xc_num_strValid(const char *val, size_t base)
+// "Input numbers may contain the characters 0-9 and A-Z.
+// (Note: They must be capitals.  Lower case letters are variable names.)
+// Single digit numbers always have the value of the digit regardless of
+// the value of ibase. (i.e. A = 10.) For multi-digit numbers, bc changes
+// all input digits greater or equal to ibase to the value of ibase-1.
+// This makes the number ZZZ always be the largest 3 digit number of the
+// input base."
+static bool xc_num_strValid(const char *val)
 {
-	BcDig b;
-	bool radix;
-
-	b = (BcDig)(base <= 10 ? base + '0' : base - 10 + 'A');
-	radix = false;
+	bool radix = false;
 	for (;;) {
 		BcDig c = *val++;
 		if (c == '\0')
@@ -2571,7 +2574,7 @@
 			radix = true;
 			continue;
 		}
-		if (c < '0' || c >= b || (c > '9' && c < 'A'))
+		if ((c < '0' || c > '9') && (c < 'A' || c > 'Z'))
 			return false;
 	}
 	return true;
@@ -2599,10 +2602,21 @@
 	for (i = 0; val[i]; ++i) {
 		if (val[i] != '0' && val[i] != '.') {
 			// Not entirely zero value - convert it, and exit
+			if (len == 1) {
+				char c = val[0] - '0';
+				if (c > 9) // A-Z => 10-36
+					c -= ('A' - '9' - 1);
+				n->num[0] = c;
+				n->len = 1;
+				break;
+			}
 			i = len - 1;
 			for (;;) {
-				n->num[n->len] = val[i] - '0';
-				++n->len;
+				char c = val[i] - '0';
+				if (c > 9) // A-Z => 9
+					c = 9;
+				n->num[n->len] = c;
+				n->len++;
  skip_dot:
 				if (i == 0) break;
 				if (val[--i] == '.') goto skip_dot;
@@ -2692,7 +2706,7 @@
 
 static BC_STATUS zxc_num_parse(BcNum *n, const char *val, unsigned base_t)
 {
-	if (!xc_num_strValid(val, base_t))
+	if (!xc_num_strValid(val))
 		RETURN_STATUS(bc_error("bad number string"));
 
 	bc_num_zero(n);
@@ -2807,6 +2821,13 @@
 	bc_vec_pop_all(&p->lex_strnumbuf);
 	bc_vec_pushByte(&p->lex_strnumbuf, last);
 
+// "Input numbers may contain the characters 0-9 and A-Z.
+// (Note: They must be capitals.  Lower case letters are variable names.)
+// Single digit numbers always have the value of the digit regardless of
+// the value of ibase. (i.e. A = 10.) For multi-digit numbers, bc changes
+// all input digits greater or equal to ibase to the value of ibase-1.
+// This makes the number ZZZ always be the largest 3 digit number of the
+// input base."
 	pt = (last == '.');
 	p->lex = XC_LEX_NUMBER;
 	for (;;) {
@@ -2822,13 +2843,13 @@
 			c = peek_inbuf(); // force next line to be read
 			goto check_c;
 		}
-		if (!isdigit(c) && (c < 'A' || c > 'F')) {
+		if (!isdigit(c) && (c < 'A' || c > 'Z')) {
 			if (c != '.') break;
 			// if '.' was already seen, stop on second one:
 			if (pt) break;
 			pt = true;
 		}
-		// c is one of "0-9A-F."
+		// c is one of "0-9A-Z."
 		last = c;
 		bc_vec_push(&p->lex_strnumbuf, p->lex_inbuf);
 		p->lex_inbuf++;
@@ -3167,6 +3188,26 @@
 	case 'D':
 	case 'E':
 	case 'F':
+	case 'G':
+	case 'H':
+	case 'I':
+	case 'J':
+	case 'K':
+	case 'L':
+	case 'M':
+	case 'N':
+	case 'O':
+	case 'P':
+	case 'Q':
+	case 'R':
+	case 'S':
+	case 'T':
+	case 'U':
+	case 'V':
+	case 'W':
+	case 'X':
+	case 'Y':
+	case 'Z':
 		s = zxc_lex_number(c);
 		break;
 	case ';':
@@ -3450,13 +3491,14 @@
 
 	mask = ((size_t)0xff) << (sizeof(idx) * 8 - 8);
 	amt = sizeof(idx);
-	do {
+	for (;;) {
 		if (idx & mask) break;
 		mask >>= 8;
 		amt--;
-	} while (amt != 0);
+	}
+	// amt is at least 1 here - "one byte of length data follows"
 
-	xc_parse_push(SMALL_INDEX_LIMIT + amt);
+	xc_parse_push((SMALL_INDEX_LIMIT - 1) + amt);
 
 	while (idx != 0) {
  push_idx:
@@ -5260,13 +5302,15 @@
 		*bgn += 1;
 		return amt;
 	}
-	amt -= SMALL_INDEX_LIMIT;
+	amt -= (SMALL_INDEX_LIMIT - 1); // amt is 1 or more here
 	*bgn += amt + 1;
 
-	amt *= 8;
 	res = 0;
-	for (i = 0; i < amt; i += 8)
+	i = 0;
+	do {
 		res |= (size_t)(*bytes++) << i;
+		i += 8;
+	} while (--amt != 0);
 
 	return res;
 }

diff --git a/testsuite/bc_numbers1.bc b/testsuite/bc_numbers1.bc
new file mode 100644
index 0000000..fd4e225
--- /dev/null
+++ b/testsuite/bc_numbers1.bc

@@ -0,0 +1,23 @@
+ibase=G
+define f() {
+a00=00;a01=01;a02=02;a03=03;a04=04;a05=05;a06=06;a07=07;a08=08;a09=09;a0a=0A;a0b=0B;a0c=0C;a0d=0D;a0e=0E;a0f=0F
+a10=10;a11=11;a12=12;a13=13;a14=14;a15=15;a16=16;a17=17;a18=18;a19=19;a1a=1A;a1b=1B;a1c=1C;a1d=1D;a1e=1E;a1f=1F
+a20=20;a21=21;a22=22;a23=23;a24=24;a25=25;a26=26;a27=27;a28=28;a29=29;a2a=2A;a2b=2B;a2c=2C;a2d=2D;a2e=2E;a2f=2F
+a30=30;a31=31;a32=32;a33=33;a34=34;a35=35;a36=36;a37=37;a38=38;a39=39;a3a=3A;a3b=3B;a3c=3C;a3d=3D;a3e=3E;a3f=3F
+a40=40;a41=41;a42=42;a43=43;a44=44;a45=45;a46=46;a47=47;a48=48;a49=49;a4a=4A;a4b=4B;a4c=4C;a4d=4D;a4e=4E;a4f=4F
+a50=50;a51=51;a52=52;a53=53;a54=54;a55=55;a56=56;a57=57;a58=58;a59=59;a5a=5A;a5b=5B;a5c=5C;a5d=5D;a5e=5E;a5f=5F
+a60=60;a61=61;a62=62;a63=63;a64=64;a65=65;a66=66;a67=67;a68=68;a69=69;a6a=6A;a6b=6B;a6c=6C;a6d=6D;a6e=6E;a6f=6F
+a70=70;a71=71;a72=72;a73=73;a74=74;a75=75;a76=76;a77=77;a78=78;a79=79;a7a=7A;a7b=7B;a7c=7C;a7d=7D;a7e=7E;a7f=7F
+a80=80;a81=81;a82=82;a83=83;a84=84;a85=85;a86=86;a87=87;a88=88;a89=89;a8a=8A;a8b=8B;a8c=8C;a8d=8D;a8e=8E;a8f=8F
+a90=90;a91=91;a92=92;a93=93;a94=94;a95=95;a96=96;a97=97;a98=98;a99=99;a9a=9A;a9b=9B;a9c=9C;a9d=9D;a9e=9E;a9f=9F
+aa0=A0;aa1=A1;aa2=A2;aa3=A3;aa4=A4;aa5=A5;aa6=A6;aa7=A7;aa8=A8;aa9=A9;aaa=AA;aab=AB;aac=AC;aad=AD;aae=AE;aaf=AF
+ab0=B0;ab1=B1;ab2=B2;ab3=B3;ab4=B4;ab5=B5;ab6=B6;ab7=B7;ab8=B8;ab9=B9;aba=BA;abb=BB;abc=BC;abd=BD;abe=BE;abf=BF
+ac0=C0;ac1=C1;ac2=C2;ac3=C3;ac4=C4;ac5=C5;ac6=C6;ac7=C7;ac8=C8;ac9=C9;aca=CA;acb=CB;acc=CC;acd=CD;ace=CE;acf=CF
+ad0=D0;ad1=D1;ad2=D2;ad3=D3;ad4=D4;ad5=D5;ad6=D6;ad7=D7;ad8=D8;ad9=D9;ada=DA;adb=DB;adc=DC;add=DD;ade=DE;adf=DF
+ae0=E0;ae1=E1;ae2=E2;ae3=E3;ae4=E4;ae5=E5;ae6=E6;ae7=E7;ae8=E8;ae9=E9;aea=EA;aeb=EB;aec=EC;aed=ED;aee=EE;aef=EF
+af0=F0;af1=F1;af2=F2;af3=F3;af4=F4;af5=F5;af6=F6;af7=F7;af8=F8;af9=F9;afa=FA;afb=FB;afc=FC;afd=FD;afe=FE;aff=FF
+# this tests "index encoding" in bc.c: are numbers after 0xfc encoded correctly?
+af0;af1;af2;af3;af4;af5;af6;af7;af8;af9;afa;afb;afc;afd;afe;aff
+}
+f()
+halt

diff --git a/testsuite/bc_numbers1_results.txt b/testsuite/bc_numbers1_results.txt
new file mode 100644
index 0000000..e0ace10
--- /dev/null
+++ b/testsuite/bc_numbers1_results.txt

@@ -0,0 +1,17 @@
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+0
commit	e16a5223d20e5715b98e1fd21fa8d59e75e4e793	[log] [tgz]
author	Denys Vlasenko <vda.linux@googlemail.com>	Sat Dec 29 02:24:19 2018 +0100
committer	Denys Vlasenko <vda.linux@googlemail.com>	Sat Dec 29 02:24:19 2018 +0100
tree	6f5546a00e30296c557c26036d0363ce9e9369e6
parent	f9b4cc114cb4eb2a997f08daa43af30ad598322c [diff]