[PATCH] ash: improve speed of variable pattern substitution
Ron Yorston
rmy at pobox.com
Wed Jul 21 12:53:58 UTC 2021
Bash pattern substitution of variables (${var/find/replace}) was
found to take time O(N^2) (or worse). Performance can be improved
considerably if the 'find' string doesn't contain any wildcard
characters.
Detect if this is the case, remove the backslashes added to
the pattern to protect literal references to wildcards and call
a simpler replacement function to search for the pattern.
Like a similar optimisation for patterns that end with '*' this
implementation is controlled by the ASH_OPTIMIZE_FOR_SIZE option.
Since this is enabled in the default configuration there's no
effect on the size of the binary.
When ASH_OPTIMIZE_FOR_SIZE is disabled:
function old new delta
subevalvar 1541 1708 +167
hasmeta - 128 +128
expandarg 1099 987 -112
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 1/1 up/down: 295/-112) Total: 183 bytes
Signed-off-by: Ron Yorston <rmy at pobox.com>
---
shell/ash.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 52 insertions(+), 2 deletions(-)
diff --git a/shell/ash.c b/shell/ash.c
index 2eac6e113..18d60baf2 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -7024,6 +7024,27 @@ scanright(char *startp, char *rmesc, char *rmescend,
return NULL;
}
+#if BASH_PATTERN_SUBST && !ENABLE_ASH_OPTIMIZE_FOR_SIZE
+static int hasmeta(const char *p);
+
+/*
+ * Like scanright() but for patterns that don't require the use of fnmatch().
+ */
+static char *
+scannoglob(char *startp, char *rmesc, char *pattern, size_t len)
+{
+ int match = strncmp(rmesc, pattern, len) == 0;
+ //bb_error_msg("strncmp(s:'%s',pattern:'%s',len:%lu):%d", rmesc, pattern, len, match);
+ if (match) {
+ while (len--)
+ if (*startp++ == CTLESC)
+ startp++;
+ return startp;
+ }
+ return NULL;
+}
+#endif
+
static void varunset(const char *, const char *, const char *, int) NORETURN;
static void
varunset(const char *end, const char *var, const char *umsg, int varflags)
@@ -7260,6 +7281,12 @@ subevalvar(char *start, char *str, int strloc,
if (subtype == VSREPLACE || subtype == VSREPLACEALL) {
int len;
char *idx, *end;
+# if !ENABLE_ASH_OPTIMIZE_FOR_SIZE
+ int isglob;
+ size_t patlen;
+# else
+ const int isglob = 1;
+# endif
if (!repl) {
//bb_error_msg("str9:'%s' slash_pos:%d", str, slash_pos);
@@ -7275,12 +7302,35 @@ subevalvar(char *start, char *str, int strloc,
if (str[0] == '\0')
goto out1;
+# if !ENABLE_ASH_OPTIMIZE_FOR_SIZE
+ isglob = hasmeta(str);
+ if (!isglob) {
+ /* If the pattern doesn't have any special characters remove
+ * the backslash escapes that were added by rmescapes(). */
+ char *s, *t;
+ s = t = str;
+ while (*t) {
+ if (*t == '\\')
+ if (*++t == '\0')
+ break;
+ *s++ = *t++;
+ }
+ *s = '\0';
+ patlen = strlen(str);
+ }
+# endif
+
len = 0;
idx = startp;
end = str - 1;
while (idx <= end) {
try_to_match:
- loc = scanright(idx, rmesc, rmescend, str, quotes, 1);
+ if (isglob)
+ loc = scanright(idx, rmesc, rmescend, str, quotes, 1);
+# if !ENABLE_ASH_OPTIMIZE_FOR_SIZE
+ else
+ loc = scannoglob(idx, rmesc, str, patlen);
+# endif
//bb_error_msg("scanright('%s'):'%s'", str, loc);
if (!loc) {
/* No match, advance */
@@ -7300,7 +7350,7 @@ subevalvar(char *start, char *str, int strloc,
len++;
rmesc++;
/* continue; - prone to quadratic behavior, smarter code: */
- if (str[0] == '*') {
+ if (isglob && str[0] == '*') {
/* Pattern is "*foo". If "*foo" does not match "long_string",
* it would never match "ong_string" etc, no point in trying.
*/
--
2.31.1
More information about the busybox
mailing list