ash: optional bash-like pattern subst and substring opts
(by James Simmons <jsimmons AT infradead.org>)
TODO: write testsuite!
BASH_COMPAT off:
scanleft 101 262 +161
subevalvar 346 335 -11
BASH_COMPAT on:
subevalvar 346 1397 +1051
scanleft 101 262 +161
readtoken1 2739 2807 +68
cmdputs 397 399 +2
static.vstype 64 48 -16
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/1 up/down: 1282/-16) Total: 1266 bytes
diff --git a/shell/Config.in b/shell/Config.in
index 9328c91..5ea071c 100644
--- a/shell/Config.in
+++ b/shell/Config.in
@@ -47,6 +47,13 @@
comment "Ash Shell Options"
depends on ASH
+config ASH_BASH_COMPAT
+ bool "bash-compatible extensions"
+ default y
+ depends on ASH
+ help
+ Enable bash-conpatible extensions.
+
config ASH_JOB_CONTROL
bool "Job control"
default y
diff --git a/shell/ash.c b/shell/ash.c
index 2b6133d..62380b3 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -466,16 +466,21 @@
#define VSQUOTE 0x80 /* inside double quotes--suppress splitting */
/* values of VSTYPE field */
-#define VSNORMAL 0x1 /* normal variable: $var or ${var} */
-#define VSMINUS 0x2 /* ${var-text} */
-#define VSPLUS 0x3 /* ${var+text} */
-#define VSQUESTION 0x4 /* ${var?message} */
-#define VSASSIGN 0x5 /* ${var=text} */
-#define VSTRIMRIGHT 0x6 /* ${var%pattern} */
-#define VSTRIMRIGHTMAX 0x7 /* ${var%%pattern} */
-#define VSTRIMLEFT 0x8 /* ${var#pattern} */
-#define VSTRIMLEFTMAX 0x9 /* ${var##pattern} */
-#define VSLENGTH 0xa /* ${#var} */
+#define VSNORMAL 0x1 /* normal variable: $var or ${var} */
+#define VSMINUS 0x2 /* ${var-text} */
+#define VSPLUS 0x3 /* ${var+text} */
+#define VSQUESTION 0x4 /* ${var?message} */
+#define VSASSIGN 0x5 /* ${var=text} */
+#define VSTRIMRIGHT 0x6 /* ${var%pattern} */
+#define VSTRIMRIGHTMAX 0x7 /* ${var%%pattern} */
+#define VSTRIMLEFT 0x8 /* ${var#pattern} */
+#define VSTRIMLEFTMAX 0x9 /* ${var##pattern} */
+#define VSLENGTH 0xa /* ${#var} */
+#if ENABLE_ASH_BASH_COMPAT
+#define VSSUBSTR 0xc /* ${var:position:length} */
+#define VSREPLACE 0xd /* ${var/pattern/replacement} */
+#define VSREPLACEALL 0xe /* ${var//pattern/replacement} */
+#endif
static const char dolatstr[] ALIGN1 = {
CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
@@ -3471,6 +3476,7 @@
}
if (is_number(p)) {
+// TODO: number() instead? It does error checking...
num = atoi(p);
if (num < njobs) {
jp = jobtab + num - 1;
@@ -4178,15 +4184,17 @@
static void
cmdputs(const char *s)
{
+ static const char vstype[VSTYPE + 1][3] = {
+ "", "}", "-", "+", "?", "=",
+ "%", "%%", "#", "##"
+ USE_ASH_BASH_COMPAT(, ":", "/", "//")
+ };
+
const char *p, *str;
char c, cc[2] = " ";
char *nextc;
int subtype = 0;
int quoted = 0;
- static const char vstype[VSTYPE + 1][4] = {
- "", "}", "-", "+", "?", "=",
- "%", "%%", "#", "##"
- };
nextc = makestrspace((strlen(s) + 1) * 8, cmdnextc);
p = s;
@@ -5681,23 +5689,37 @@
scanleft(char *startp, char *rmesc, char *rmescend ATTRIBUTE_UNUSED, char *str, int quotes,
int zero)
{
- char *loc;
- char *loc2;
+ char *loc, *loc2, *full;
char c;
loc = startp;
loc2 = rmesc;
do {
- int match;
+ int match = strlen(str);
const char *s = loc2;
+
c = *loc2;
if (zero) {
*loc2 = '\0';
s = rmesc;
}
- match = pmatch(str, s);
+
+ // chop off end if its '*'
+ full = strrchr(str, '*');
+ if (full && full != str)
+ match--;
+
+ // If str starts with '*' replace with s.
+ if ((*str == '*') && strlen(s) >= match) {
+ full = xstrdup(s);
+ strncpy(full+strlen(s)-match+1, str+1, match-1);
+ } else
+ full = xstrndup(str, match);
+ match = strncmp(s, full, strlen(full));
+ free(full);
+
*loc2 = c;
- if (match)
+ if (!match)
return loc;
if (quotes && *loc == CTLESC)
loc++;
@@ -5760,18 +5782,98 @@
ash_msg_and_raise_error("%.*s: %s%s", end - var - 1, var, msg, tail);
}
+#if ENABLE_ASH_BASH_COMPAT
+static char *
+parse_sub_pattern(char *arg, int inquotes)
+{
+ char *idx, *repl = NULL;
+ unsigned char c;
+
+ for (idx = arg; *arg; arg++) {
+ if (*arg == '/') {
+ /* Only the first '/' seen is our seperator */
+ if (!repl) {
+ *idx++ = '\0';
+ repl = idx;
+ } else
+ *idx++ = *arg;
+ } else if (*arg != '\\') {
+ *idx++ = *arg;
+ } else {
+ if (inquotes)
+ arg++;
+ else {
+ if (*(arg + 1) != '\\')
+ goto single_backslash;
+ arg += 2;
+ }
+
+ switch (*arg) {
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'v': c = '\v'; break;
+ case 'f': c = '\f'; break;
+ case 'b': c = '\b'; break;
+ case 'a': c = '\a'; break;
+ case '\\':
+ if (*(arg + 1) != '\\' && !inquotes)
+ goto single_backslash;
+ arg++;
+ /* FALLTHROUGH */
+ case '\0':
+ /* Trailing backslash, just stuff one in the buffer
+ * and backup arg so the loop will exit.
+ */
+ c = '\\';
+ if (!*arg)
+ arg--;
+ break;
+ default:
+ c = *arg;
+ if (isdigit(c)) {
+ /* It's an octal number, parse it. */
+ int i;
+ c = 0;
+
+ for (i = 0; *arg && i < 3; arg++, i++) {
+ if (*arg >= '8' || *arg < '0')
+ ash_msg_and_raise_error("Invalid octal char in pattern");
+// TODO: number() instead? It does error checking...
+ c = (c << 3) + atoi(arg);
+ }
+ /* back off one (so outer loop can do it) */
+ arg--;
+ }
+ }
+ *idx++ = c;
+ }
+ }
+ *idx = *arg;
+
+ return repl;
+
+ single_backslash:
+ ash_msg_and_raise_error("single backslash unexpected");
+ /* NOTREACHED */
+}
+#endif /* ENABLE_ASH_BASH_COMPAT */
+
static const char *
subevalvar(char *p, char *str, int strloc, int subtype,
int startloc, int varflags, int quotes, struct strlist *var_str_list)
{
+ struct nodelist *saveargbackq = argbackq;
char *startp;
char *loc;
- int saveherefd = herefd;
- struct nodelist *saveargbackq = argbackq;
- int amount;
char *rmesc, *rmescend;
+ USE_ASH_BASH_COMPAT(char *repl = NULL;)
+ USE_ASH_BASH_COMPAT(char null = '\0';)
+ USE_ASH_BASH_COMPAT(int pos, len, orig_len;)
+ int saveherefd = herefd;
+ int amount, workloc, resetloc;
int zero;
- char *(*scan)(char *, char *, char *, char *, int , int);
+ char *(*scan)(char*, char*, char*, char*, int, int);
herefd = -1;
argstr(p, (subtype != VSASSIGN && subtype != VSQUESTION) ? EXP_CASE : 0,
@@ -5788,16 +5890,76 @@
STADJUST(amount, expdest);
return startp;
+#if ENABLE_ASH_BASH_COMPAT
+ case VSSUBSTR:
+ loc = str = stackblock() + strloc;
+// TODO: number() instead? It does error checking...
+ pos = atoi(loc);
+ len = str - startp - 1;
+
+ /* *loc != '\0', guaranteed by parser */
+ if (quotes) {
+ char *ptr;
+
+ /* We must adjust the length by the number of escapes we find. */
+ for (ptr = startp; ptr < (str - 1); ptr++) {
+ if(*ptr == CTLESC) {
+ len--;
+ ptr++;
+ }
+ }
+ }
+ orig_len = len;
+
+ if (*loc++ == ':') {
+// TODO: number() instead? It does error checking...
+ len = atoi(loc);
+ } else {
+ len = orig_len;
+ while (*loc && *loc != ':')
+ loc++;
+ if (*loc++ == ':')
+// TODO: number() instead? It does error checking...
+ len = atoi(loc);
+ }
+ if (pos >= orig_len) {
+ pos = 0;
+ len = 0;
+ }
+ if (len > (orig_len - pos))
+ len = orig_len - pos;
+
+ for (str = startp; pos; str++, pos--) {
+ if (quotes && *str == CTLESC)
+ str++;
+ }
+ for (loc = startp; len; len--) {
+ if (quotes && *str == CTLESC)
+ *loc++ = *str++;
+ *loc++ = *str++;
+ }
+ *loc = '\0';
+ amount = loc - expdest;
+ STADJUST(amount, expdest);
+ return loc;
+#endif
+
case VSQUESTION:
varunset(p, str, startp, varflags);
/* NOTREACHED */
}
+ resetloc = expdest - (char *)stackblock();
- subtype -= VSTRIMRIGHT;
-#if DEBUG
- if (subtype < 0 || subtype > 3)
- abort();
-#endif
+ /* We'll comeback here if we grow the stack while handling
+ * a VSREPLACE or VSREPLACEALL, since our pointers into the
+ * stack will need rebasing, and we'll need to remove our work
+ * areas each time
+ */
+ USE_ASH_BASH_COMPAT(restart:)
+
+ amount = expdest - ((char *)stackblock() + resetloc);
+ STADJUST(-amount, expdest);
+ startp = stackblock() + startloc;
rmesc = startp;
rmescend = stackblock() + strloc;
@@ -5811,7 +5973,93 @@
rmescend--;
str = stackblock() + strloc;
preglob(str, varflags & VSQUOTE, 0);
+ workloc = expdest - (char *)stackblock();
+#if ENABLE_ASH_BASH_COMPAT
+ if (subtype == VSREPLACE || subtype == VSREPLACEALL) {
+ char *idx, *end, *restart_detect;
+
+ if(!repl) {
+ repl = parse_sub_pattern(str, varflags & VSQUOTE);
+ if (!repl)
+ repl = &null;
+ }
+
+ /* If there's no pattern to match, return the expansion unmolested */
+ if (*str == '\0')
+ return 0;
+
+ len = 0;
+ idx = startp;
+ end = str - 1;
+ while (idx < end) {
+ loc = scanright(idx, rmesc, rmescend, str, quotes, 1);
+ if (!loc) {
+ /* No match, advance */
+ restart_detect = stackblock();
+ STPUTC(*idx, expdest);
+ if (quotes && *idx == CTLESC) {
+ idx++;
+ len++;
+ STPUTC(*idx, expdest);
+ }
+ if (stackblock() != restart_detect)
+ goto restart;
+ idx++;
+ len++;
+ rmesc++;
+ continue;
+ }
+
+ if (subtype == VSREPLACEALL) {
+ while (idx < loc) {
+ if (quotes && *idx == CTLESC)
+ idx++;
+ idx++;
+ rmesc++;
+ }
+ } else
+ idx = loc;
+
+ for (loc = repl; *loc; loc++) {
+ restart_detect = stackblock();
+ STPUTC(*loc, expdest);
+ if (stackblock() != restart_detect)
+ goto restart;
+ len++;
+ }
+
+ if (subtype == VSREPLACE) {
+ while (*idx) {
+ restart_detect = stackblock();
+ STPUTC(*idx, expdest);
+ if (stackblock() != restart_detect)
+ goto restart;
+ len++;
+ idx++;
+ }
+ break;
+ }
+ }
+
+ /* We've put the replaced text into a buffer at workloc, now
+ * move it to the right place and adjust the stack.
+ */
+ startp = stackblock() + startloc;
+ STPUTC('\0', expdest);
+ memmove(startp, stackblock() + workloc, len);
+ startp[len++] = '\0';
+ amount = expdest - ((char *)stackblock() + startloc + len - 1);
+ STADJUST(-amount, expdest);
+ return startp;
+ }
+#endif /* ENABLE_ASH_BASH_COMPAT */
+
+ subtype -= VSTRIMRIGHT;
+#if DEBUG
+ if (subtype < 0 || subtype > 7)
+ abort();
+#endif
/* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */
zero = subtype >> 1;
/* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */
@@ -5925,6 +6173,7 @@
case '7':
case '8':
case '9':
+// TODO: number() instead? It does error checking...
num = atoi(name);
if (num < 0 || num > shellparam.nparam)
return -1;
@@ -6063,6 +6312,11 @@
case VSTRIMLEFTMAX:
case VSTRIMRIGHT:
case VSTRIMRIGHTMAX:
+#if ENABLE_ASH_BASH_COMPAT
+ case VSSUBSTR:
+ case VSREPLACE:
+ case VSREPLACEALL:
+#endif
break;
default:
abort();
@@ -10459,8 +10713,15 @@
if (subtype == 0) {
switch (c) {
case ':':
- flags = VSNUL;
c = pgetc();
+#if ENABLE_ASH_BASH_COMPAT
+ if (c == ':' || c == '$' || isdigit(c)) {
+ pungetc();
+ subtype = VSSUBSTR;
+ break;
+ }
+#endif
+ flags = VSNUL;
/*FALLTHROUGH*/
default:
p = strchr(types, c);
@@ -10469,18 +10730,26 @@
subtype = p - types + VSNORMAL;
break;
case '%':
- case '#':
- {
- int cc = c;
- subtype = c == '#' ? VSTRIMLEFT :
- VSTRIMRIGHT;
- c = pgetc();
- if (c == cc)
- subtype++;
- else
- pungetc();
- break;
- }
+ case '#': {
+ int cc = c;
+ subtype = c == '#' ? VSTRIMLEFT : VSTRIMRIGHT;
+ c = pgetc();
+ if (c == cc)
+ subtype++;
+ else
+ pungetc();
+ break;
+ }
+#if ENABLE_ASH_BASH_COMPAT
+ case '/':
+ subtype = VSREPLACE;
+ c = pgetc();
+ if (c == '/')
+ subtype++; /* VSREPLACEALL */
+ else
+ pungetc();
+ break;
+#endif
}
} else {
pungetc();
@@ -12621,7 +12890,7 @@
0
};
/* ptr to ")" */
-#define endexpression &op_tokens[sizeof(op_tokens)-7]
+#define endexpression (&op_tokens[sizeof(op_tokens)-7])
static arith_t
arith(const char *expr, int *perrcode)