ash: expand: Do not reprocess data when expanding words

Upstream patch:

    Date: Wed, 30 May 2018 02:06:03 +0800
    expand: Do not reprocess data when expanding words

    Currently various paths will reprocess data when performing word
    expansion.  For example, expari will skip backwards looking for
    the start of the arithmetic expansion, while evalvar will skip
    unexpanded words manually.

    This is cumbersome and error-prone.  This patch fixes this by
    making word expansions proceed in a linear fashion.  This means
    changing argstr and the various expansion functions such as expari
    and subevalvar to return the next character to be expanded.

    This is inspired by similar code from FreeBSD.  However, we take
    things one step further and completely remove the manual word
    skipping in evalvar.  This is accomplished by introducing a new
    EXP_DISCARD flag that tells argstr to only parse and not produce
    any actual expansions.

    Incidentally, argstr will now always NUL-terminate the expansion
    unless the EXP_WORD flag is set.  This is because all but one
    caller of argstr wants the result to be NUL-termianted.

    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Also includes two one-line follow-up fixes:

    expand: Eat closing brace for length parameter
            if (subtype == VSLENGTH) {
    +               p++;
                    if (flag & EXP_DISCARD)
    expand: Fix double-decrement in argstr
    -               newloc = expdest - (char *)stackblock() - end;
    +               newloc = q - (char *)stackblock() - end;

and changes in code for bash substring extensions.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/shell/ash.c b/shell/ash.c
index 03864c6..425da6b 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -6035,6 +6035,7 @@
 #define EXP_WORD        0x40    /* expand word in parameter expansion */
 #define EXP_QUOTED      0x100   /* expand word in double quotes */
 #define EXP_KEEPNUL     0x200   /* do not skip NUL characters */
+#define EXP_DISCARD     0x400   /* discard result of expansion */
 
 /*
  * rmescape() flags
@@ -6452,13 +6453,15 @@
 }
 
 static char *
-exptilde(char *startp, char *p, int flag)
+exptilde(char *startp, int flag)
 {
 	unsigned char c;
 	char *name;
 	struct passwd *pw;
 	const char *home;
+	char *p;
 
+	p = startp;
 	name = p + 1;
 
 	while ((c = *++p) != '\0') {
@@ -6477,6 +6480,8 @@
 		}
 	}
  done:
+	if (flag & EXP_DISCARD)
+		goto out;
 	*p = '\0';
 	if (*name == '\0') {
 		home = lookupvar("HOME");
@@ -6486,13 +6491,13 @@
 			goto lose;
 		home = pw->pw_dir;
 	}
+	*p = c;
 	if (!home)
 		goto lose;
-	*p = c;
 	strtodest(home, flag | EXP_QUOTED);
+ out:
 	return p;
  lose:
-	*p = c;
 	return startp;
 }
 
@@ -6591,6 +6596,9 @@
 	int startloc;
 	struct stackmark smark;
 
+	if (flag & EXP_DISCARD)
+		goto out;
+
 	INT_OFF;
 	startloc = expdest - (char *)stackblock();
 	pushstackmark(&smark, startloc);
@@ -6632,64 +6640,57 @@
 		(int)((dest - (char *)stackblock()) - startloc),
 		(int)((dest - (char *)stackblock()) - startloc),
 		stackblock() + startloc));
+
+ out:
+	argbackq = argbackq->next;
 }
 
+/* expari needs it */
+static char *argstr(char *p, int flag);
+
 #if ENABLE_FEATURE_SH_MATH
 /*
  * Expand arithmetic expression.  Backup to start of expression,
  * evaluate, place result in (backed up) result, adjust string position.
  */
-static void
-expari(int flag)
+static char *
+expari(char *start, int flag)
 {
-	char *p, *start;
+	struct stackmark sm;
 	int begoff;
+	int endoff;
 	int len;
+	arith_t result;
+	char *p;
 
-	/* ifsfree(); */
+	p = stackblock();
+	begoff = expdest - p;
+	p = argstr(start, flag & EXP_DISCARD);
 
-	/*
-	 * This routine is slightly over-complicated for
-	 * efficiency.  Next we scan backwards looking for the
-	 * start of arithmetic.
-	 */
+	if (flag & EXP_DISCARD)
+		goto out;
+
 	start = stackblock();
-	p = expdest - 1;
-	*p = '\0';
-	p--;
-	while (1) {
-		int esc;
-
-		while ((unsigned char)*p != CTLARI) {
-			p--;
-#if DEBUG
-			if (p < start) {
-				ash_msg_and_raise_error("missing CTLARI (shouldn't happen)");
-			}
-#endif
-		}
-
-		esc = esclen(start, p);
-		if (!(esc % 2)) {
-			break;
-		}
-
-		p -= esc + 1;
-	}
-
-	begoff = p - start;
+	endoff = expdest - start;
+	start += begoff;
+	STADJUST(start - expdest, expdest);
 
 	removerecordregions(begoff);
 
-	expdest = p;
-
 	if (flag & QUOTES_ESC)
-		rmescapes(p + 1, 0, NULL);
+		rmescapes(start, 0, NULL);
 
-	len = cvtnum(ash_arith(p + 1), flag);
+	pushstackmark(&sm, endoff);
+	result = ash_arith(start);
+	popstackmark(&sm);
+
+	len = cvtnum(result, flag);
 
 	if (!(flag & EXP_QUOTED))
 		recordregion(begoff, begoff + len, 0);
+
+ out:
+	return p;
 }
 #endif
 
@@ -6701,7 +6702,7 @@
  * characters to allow for further processing.  Otherwise treat
  * $@ like $* since no splitting will be performed.
  */
-static void
+static char *
 argstr(char *p, int flag)
 {
 	static const char spclchars[] ALIGN1 = {
@@ -6713,6 +6714,7 @@
 		CTLVAR,
 		CTLBACKQ,
 #if ENABLE_FEATURE_SH_MATH
+		CTLARI,
 		CTLENDARI,
 #endif
 		'\0'
@@ -6723,41 +6725,45 @@
 	size_t length;
 	int startloc;
 
-	if (!(flag & EXP_VARTILDE)) {
-		reject += 2;
-	} else if (flag & EXP_VARTILDE2) {
-		reject++;
-	}
+	reject += !!(flag & EXP_VARTILDE2);
+	reject += flag & EXP_VARTILDE ? 0 : 2;
 	inquotes = 0;
 	length = 0;
 	if (flag & EXP_TILDE) {
-		char *q;
-
 		flag &= ~EXP_TILDE;
  tilde:
-		q = p;
-		if (*q == '~')
-			p = exptilde(p, q, flag);
+		if (*p == '~')
+			p = exptilde(p, flag);
 	}
  start:
 	startloc = expdest - (char *)stackblock();
 	for (;;) {
+		int end;
 		unsigned char c;
 
 		length += strcspn(p + length, reject);
+		end = 0;
 		c = p[length];
-		if (c) {
-			if (!(c & 0x80)
-			IF_FEATURE_SH_MATH(|| c == CTLENDARI)
-			) {
-				/* c == '=' || c == ':' || c == CTLENDARI */
-				length++;
-			}
+		if (!(c & 0x80)
+		 IF_FEATURE_SH_MATH(|| c == CTLENDARI)
+		 || c == CTLENDVAR
+		) {
+			/*
+			 * c == '=' || c == ':' || c == '\0' ||
+			 * c == CTLENDARI || c == CTLENDVAR
+			 */
+			length++;
+			/* c == '\0' || c == CTLENDARI || c == CTLENDVAR */
+			end = !!((c - 1) & 0x80);
 		}
-		if (length > 0) {
+		if (length > 0 && !(flag & EXP_DISCARD)) {
 			int newloc;
-			expdest = stnputs(p, length, expdest);
-			newloc = expdest - (char *)stackblock();
+			char *q;
+
+			q = stnputs(p, length, expdest);
+			q[-1] &= end - 1;
+			expdest = q - (flag & EXP_WORD ? end : 0);
+			newloc = q - (char *)stackblock() - end;
 			if (breakall && !inquotes && newloc > startloc) {
 				recordregion(startloc, newloc, 0);
 			}
@@ -6766,14 +6772,11 @@
 		p += length + 1;
 		length = 0;
 
+		if (end)
+			break;
+
 		switch (c) {
-		case '\0':
-			goto breakloop;
 		case '=':
-			if (flag & EXP_VARTILDE2) {
-				p--;
-				continue;
-			}
 			flag |= EXP_VARTILDE2;
 			reject++;
 			/* fall through */
@@ -6786,11 +6789,6 @@
 				goto tilde;
 			}
 			continue;
-		}
-
-		switch (c) {
-		case CTLENDVAR: /* ??? */
-			goto breakloop;
 		case CTLQUOTEMARK:
 			/* "$@" syntax adherence hack */
 			if (!inquotes && !memcmp(p, dolatstr + 1, DOLATSTRLEN - 1)) {
@@ -6816,17 +6814,15 @@
 			goto start;
 		case CTLBACKQ:
 			expbackq(argbackq->n, flag | inquotes);
-			argbackq = argbackq->next;
 			goto start;
 #if ENABLE_FEATURE_SH_MATH
-		case CTLENDARI:
-			p--;
-			expari(flag | inquotes);
+		case CTLARI:
+			p = expari(p, flag | inquotes);
 			goto start;
 #endif
 		}
 	}
- breakloop: ;
+	return p - 1;
 }
 
 static char *
@@ -6951,25 +6947,27 @@
 	ash_msg_and_raise_error("%.*s: %s%s", (int)(end - var - 1), var, msg, tail);
 }
 
-static const char *
-subevalvar(char *p, char *str, int strloc, int subtype,
+static char *
+subevalvar(char *start, char *str, int strloc,
 		int startloc, int varflags, int flag)
 {
-	struct nodelist *saveargbackq = argbackq;
+	int subtype = varflags & VSTYPE;
 	int quotes = flag & QUOTES_ESC;
 	char *startp;
 	char *loc;
 	char *rmesc, *rmescend;
-	int amount, resetloc;
+	long amount;
+	int resetloc;
 	int argstr_flags;
 	IF_BASH_PATTERN_SUBST(int workloc;)
 	IF_BASH_PATTERN_SUBST(int slash_pos;)
 	IF_BASH_PATTERN_SUBST(char *repl;)
 	int zero;
 	char *(*scan)(char*, char*, char*, char*, int, int);
+	char *p;
 
-	//bb_error_msg("subevalvar(p:'%s',str:'%s',strloc:%d,subtype:%d,startloc:%d,varflags:%x,quotes:%d)",
-	//		p, str, strloc, subtype, startloc, varflags, quotes);
+	//bb_error_msg("subevalvar(start:'%s',str:'%s',strloc:%d,startloc:%d,varflags:%x,quotes:%d)",
+	//		start, str, strloc, startloc, varflags, quotes);
 
 #if BASH_PATTERN_SUBST
 	/* For "${v/pattern/repl}", we must find the delimiter _before_
@@ -6979,7 +6977,7 @@
 	repl = NULL;
 	if (subtype == VSREPLACE || subtype == VSREPLACEALL) {
 		/* Find '/' and replace with NUL */
-		repl = p;
+		repl = start;
 		/* The pattern can't be empty.
 		 * IOW: if the first char after "${v//" is a slash,
 		 * it does not terminate the pattern - it's the first char of the pattern:
@@ -7004,17 +7002,17 @@
 		}
 	}
 #endif
-	argstr_flags = EXP_TILDE;
-	if (subtype != VSASSIGN
-	 && subtype != VSQUESTION
+	argstr_flags = (flag & EXP_DISCARD) | EXP_TILDE;
+	if (!str
 #if BASH_SUBSTR
 	 && subtype != VSSUBSTR
 #endif
 	) {
 		/* EXP_CASE keeps CTLESC's */
-		argstr_flags = EXP_TILDE | EXP_CASE;
+		argstr_flags |= EXP_CASE;
 	}
-	argstr(p, argstr_flags);
+	p = argstr(start, argstr_flags);
+
 	//bb_error_msg("str0:'%s'", (char *)stackblock() + strloc);
 #if BASH_PATTERN_SUBST
 	slash_pos = -1;
@@ -7022,24 +7020,25 @@
 		slash_pos = expdest - ((char *)stackblock() + strloc);
 		STPUTC('/', expdest);
 		//bb_error_msg("repl+1:'%s'", repl + 1);
-		argstr(repl + 1, EXP_TILDE); /* EXP_TILDE: echo "${v/x/~}" expands ~ ! */
+		p = argstr(repl + 1, (flag & EXP_DISCARD) | EXP_TILDE); /* EXP_TILDE: echo "${v/x/~}" expands ~ ! */
 		*repl = '/';
 	}
 #endif
-	STPUTC('\0', expdest);
-	argbackq = saveargbackq;
+	if (flag & EXP_DISCARD)
+		return p;
+
 	startp = (char *)stackblock() + startloc;
 	//bb_error_msg("str1:'%s'", (char *)stackblock() + strloc);
 
 	switch (subtype) {
 	case VSASSIGN:
 		setvar0(str, startp);
-		amount = startp - expdest;
-		STADJUST(amount, expdest);
-		return startp;
+
+		loc = startp;
+		goto out;
 
 	case VSQUESTION:
-		varunset(p, str, startp, varflags);
+		varunset(start, str, startp, varflags);
 		/* NOTREACHED */
 
 #if BASH_SUBSTR
@@ -7110,9 +7109,7 @@
 			*loc++ = *vstr++;
 		}
 		*loc = '\0';
-		amount = loc - expdest;
-		STADJUST(amount, expdest);
-		return loc;
+		goto out;
 	}
 #endif /* BASH_SUBSTR */
 	}
@@ -7178,7 +7175,7 @@
 
 		/* If there's no pattern to match, return the expansion unmolested */
 		if (str[0] == '\0')
-			return NULL;
+			goto out1;
 
 		len = 0;
 		idx = startp;
@@ -7259,9 +7256,8 @@
 		startp = (char *)stackblock() + startloc;
 		memmove(startp, (char *)stackblock() + workloc, len + 1);
 		//bb_error_msg("startp:'%s'", startp);
-		amount = expdest - (startp + len);
-		STADJUST(-amount, expdest);
-		return startp;
+		loc = startp + len;
+		goto out;
 	}
 #endif /* BASH_PATTERN_SUBST */
 
@@ -7282,10 +7278,17 @@
 			loc = startp + (str - loc) - 1;
 		}
 		*loc = '\0';
-		amount = loc - expdest;
-		STADJUST(amount, expdest);
-	}
-	return loc;
+	} else
+		loc = str - 1;
+
+ out:
+	amount = loc - expdest;
+	STADJUST(amount, expdest);
+ out1:
+	/* Remove any recorded regions beyond start of variable */
+	removerecordregions(startloc);
+
+	return p;
 }
 
 /*
@@ -7310,7 +7313,14 @@
 	ssize_t len = 0;
 	int sep;
 	int subtype = varflags & VSTYPE;
-	int discard = subtype == VSPLUS || subtype == VSLENGTH;
+	int discard = (subtype == VSPLUS || subtype == VSLENGTH) | (flags & EXP_DISCARD);
+
+	if (!subtype) {
+		if (discard)
+			return -1;
+
+		raise_error_syntax("bad substitution");
+	}
 
 	flags |= EXP_KEEPNUL;
 	flags &= discard ? ~QUOTES_ESC : ~0;
@@ -7427,6 +7437,7 @@
 
 	if (discard)
 		STADJUST(-len, expdest);
+
 	return len;
 }
 
@@ -7439,18 +7450,15 @@
 {
 	char varflags;
 	char subtype;
-	int quoted;
 	char *var;
 	int patloc;
 	int startloc;
 	ssize_t varlen;
+	int quoted;
 
 	varflags = (unsigned char) *p++;
 	subtype = varflags & VSTYPE;
 
-	if (!subtype)
-		raise_error_syntax("bad substitution");
-
 	quoted = flag & EXP_QUOTED;
 	var = p;
 	startloc = expdest - (char *)stackblock();
@@ -7461,35 +7469,29 @@
 	if (varflags & VSNUL)
 		varlen--;
 
-	if (subtype == VSPLUS) {
+	switch (subtype) {
+	case VSPLUS:
 		varlen = -1 - varlen;
-		goto vsplus;
-	}
-
-	if (subtype == VSMINUS) {
- vsplus:
-		if (varlen < 0) {
-			argstr(
-				p,
-				flag | EXP_TILDE | EXP_WORD
-			);
-			goto end;
-		}
+		/* fall through */
+	case 0:
+	case VSMINUS:
+		p = argstr(p, flag | EXP_TILDE | EXP_WORD);
+		if (varlen < 0)
+			return p;
 		goto record;
-	}
 
-	if (subtype == VSASSIGN || subtype == VSQUESTION) {
+	case VSASSIGN:
+	case VSQUESTION:
 		if (varlen >= 0)
 			goto record;
 
-		subevalvar(p, var, 0, subtype, startloc, varflags,
+		p = subevalvar(p, var, 0, startloc, varflags,
 			   flag & ~QUOTES_ESC);
+
+		if (flag & EXP_DISCARD)
+			return p;
+
 		varflags &= ~VSNUL;
-		/*
-		 * Remove any recorded regions beyond
-		 * start of variable
-		 */
-		removerecordregions(startloc);
 		goto again;
 	}
 
@@ -7497,20 +7499,15 @@
 		varunset(p, var, 0, 0);
 
 	if (subtype == VSLENGTH) {
+		p++;
+		if (flag & EXP_DISCARD)
+			return p;
 		cvtnum(varlen > 0 ? varlen : 0, flag);
 		goto record;
 	}
 
-	if (subtype == VSNORMAL) {
- record:
-		if (quoted) {
-			quoted = *var == '@' && shellparam.nparam;
-			if (!quoted)
-				goto end;
-		}
-		recordregion(startloc, expdest - (char *)stackblock(), quoted);
-		goto end;
-	}
+	if (subtype == VSNORMAL)
+		goto record;
 
 #if DEBUG
 	switch (subtype) {
@@ -7531,46 +7528,28 @@
 	}
 #endif
 
-	if (varlen >= 0) {
+	flag |= varlen < 0 ? EXP_DISCARD : 0;
+	if (!(flag & EXP_DISCARD)) {
 		/*
 		 * Terminate the string and start recording the pattern
 		 * right after it
 		 */
 		STPUTC('\0', expdest);
-		patloc = expdest - (char *)stackblock();
-		if (NULL == subevalvar(p, /* varname: */ NULL, patloc, subtype,
-				startloc, varflags, flag)) {
-			int amount = expdest - (
-				(char *)stackblock() + patloc - 1
-			);
-			STADJUST(-amount, expdest);
-		}
-		/* Remove any recorded regions beyond start of variable */
-		removerecordregions(startloc);
-		goto record;
 	}
 
-	varlen = 0;
+	patloc = expdest - (char *)stackblock();
+	p = subevalvar(p, NULL, patloc, startloc, varflags, flag);
 
- end:
-	if (subtype != VSNORMAL) {      /* skip to end of alternative */
-		int nesting = 1;
-		for (;;) {
-			unsigned char c = *p++;
-			if (c == CTLESC)
-				p++;
-			else if (c == CTLBACKQ) {
-				if (varlen >= 0)
-					argbackq = argbackq->next;
-			} else if (c == CTLVAR) {
-				if ((*p++ & VSTYPE) != VSNORMAL)
-					nesting++;
-			} else if (c == CTLENDVAR) {
-				if (--nesting == 0)
-					break;
-			}
-		}
+ record:
+	if (flag & EXP_DISCARD)
+		return p;
+
+	if (quoted) {
+		quoted = *var == '@' && shellparam.nparam;
+		if (!quoted)
+			return p;
 	}
+	recordregion(startloc, expdest - (char *)stackblock(), quoted);
 	return p;
 }
 
@@ -7983,13 +7962,11 @@
 	STARTSTACKSTR(expdest);
 	TRACE(("expandarg: argstr('%s',flags:%x)\n", arg->narg.text, flag));
 	argstr(arg->narg.text, flag);
-	p = _STPUTC('\0', expdest);
-	expdest = p - 1;
 	if (arglist == NULL) {
 		/* here document expanded */
 		goto out;
 	}
-	p = grabstackstr(p);
+	p = grabstackstr(expdest);
 	TRACE(("expandarg: p:'%s'\n", p));
 	exparg.lastp = &exparg.list;
 	/*
@@ -8050,7 +8027,6 @@
 	argbackq = pattern->narg.backquote;
 	STARTSTACKSTR(expdest);
 	argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
-	STACKSTRNUL(expdest);
 	ifsfree();
 	result = patmatch(stackblock(), val);
 	popstackmark(&smark);