No subject
Easymail
andrey.dobrovolsky.odessa at gmail.com
Mon Jun 28 12:17:23 UTC 2021
>From a8649c2724f6fc2bd921836803b63e5b0c3fc77b Mon Sep 17 00:00:00 2001
From: AndreyDobrovolskyOdessa <andrey.dobrovolsky.odessa at gmail.com>
Date: Mon, 28 Jun 2021 02:43:02 +0300
Subject: [PATCH 2/2] vi: allow regular expressions in ':s' commands
BusyBox vi has never supported the use of regular expressions in
search/replace (':s') commands. Implement this using GNU regex
when VI_REGEX_SEARCH is enabled.
The implementation:
- uses basic regular expressions, to match those used in the search
command;
- only supports substitution of back references ('\0' - '\9') in the
replacement string. Any other character following a backslash is
treated as that literal character.
VI_REGEX_SEARCH isn't enabled in the default build. In that case:
function old new delta
colon 4024 4021 -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3) Total: -3 bytes
When VI_REGEX_SEARCH is enabled:
function old new delta
colon 4024 4306 +282
do_substitution - 122 +122
.rodata 108207 108229 +22
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 2/0 up/down: 426/0) Total: 426 bytes
Signed-off-by: Andrey Dobrovolsky <andrey.dobrovolsky.odessa at gmail.com>
Signed-off-by: Ron Yorston <rmy at pobox.com>
---
editors/vi.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 97 insertions(+), 8 deletions(-)
diff --git a/editors/vi.c b/editors/vi.c
index 959362b25..eed51fbad 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -2675,6 +2675,49 @@ static char *expand_args(char *args)
# endif
#endif /* FEATURE_VI_COLON */
+#if ENABLE_FEATURE_VI_REGEX_SEARCH
+# define MAX_SUBPATTERN 10 // subpatterns \0 .. \9
+
+// orig - input string
+// regm - array of subpatterns bounds
+// s - replace pattern
+// result - buffer to place the substitution result
+static size_t do_substitution(const char *orig, regmatch_t *regm,
+ const char *s, char *result)
+{
+ const char *from; // memcpy source pointer
+ size_t len, total_len = 0;
+ regmatch_t *cur_match;
+
+ while (*s) {
+ len = 1; // default is to copy one char from replace pattern
+ from = s;
+ if (*s == '\\') {
+ from = ++s; // skip backslash
+ if (*s >= '0' && *s < '0' + MAX_SUBPATTERN) {
+ cur_match = regm + (*s - '0');
+ if (cur_match->rm_so >= 0) {
+ len = cur_match->rm_eo - cur_match->rm_so;
+ from = orig + cur_match->rm_so;
+ }
+ }
+ }
+ total_len += len;
+ if (result) {
+ memcpy(result, from, len);
+ result += len;
+ *result = '\0';
+ }
+ s++;
+ }
+
+ return total_len;
+}
+
+// do_substitution dry run
+# define get_substituted_size(x, y) do_substitution(NULL, x, y, NULL)
+#endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
// buf must be no longer than MAX_INPUT_LEN!
static void colon(char *buf)
{
@@ -3082,6 +3125,12 @@ static void colon(char *buf)
# if ENABLE_FEATURE_VI_VERBOSE_STATUS
int last_line = 0, lines = 0;
# endif
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ regex_t preg;
+ int cflags;
+ regmatch_t regmatch[MAX_SUBPATTERN];
+ char *Rorig;
+# endif
// F points to the "find" pattern
// R points to the "replace" pattern
@@ -3098,7 +3147,6 @@ static void colon(char *buf)
*flags++ = '\0'; // terminate "replace"
gflag = *flags;
}
- len_R = strlen(R);
if (len_F) { // save "find" as last search pattern
free(last_search_pattern);
@@ -3120,18 +3168,61 @@ static void colon(char *buf)
b = e;
}
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ Rorig = R;
+ cflags = 0;
+ if (ignorecase)
+ cflags = REG_ICASE;
+ memset(&preg, 0, sizeof(preg));
+ if (regcomp(&preg, F, cflags) != 0) {
+ status_line(":s bad search pattern");
+ goto regex_search_end;
+ }
+# else
+ len_R = strlen(R);
+# endif
+
for (i = b; i <= e; i++) { // so, :20,23 s \0 find \0 replace \0
char *ls = q; // orig line start
char *found;
vc4:
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ found = NULL;
+ regmatch[0].rm_so = 0;
+ regmatch[0].rm_eo = end_line(q) - q;
+ if (!regexec(&preg, q, MAX_SUBPATTERN, regmatch, REG_STARTEND)) {
+ found = q + regmatch[0].rm_so;
+ len_F = regmatch[0].rm_eo - regmatch[0].rm_so;
+ len_R = get_substituted_size(regmatch, Rorig);
+ R = xmalloc(len_R + 1);
+ do_substitution(q, regmatch, Rorig, R);
+ }
+# else
found = char_search(q, F, (FORWARD << 1) | LIMITED); // search cur line only for "find"
+# endif
if (found) {
uintptr_t bias;
// we found the "find" pattern - delete it
// For undo support, the first item should not be chained
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ if (len_F) { // match can be empty, no delete needed
+ text_hole_delete(found, found + len_F - 1,
+ subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
+ }
+ // insert the "replace" patern
+ bias = string_insert(found, R,
+ subs || len_F ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
+ free(R);
+# else
text_hole_delete(found, found + len_F - 1,
subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
- // can't do this above, no undo => no third argument
+ // insert the "replace" patern
+ bias = string_insert(found, R, ALLOW_UNDO_CHAIN);
+# endif
+ found += bias;
+ ls += bias;
+ dot = ls;
+ //q += bias; - recalculated anyway
subs++;
# if ENABLE_FEATURE_VI_VERBOSE_STATUS
if (last_line != i) {
@@ -3139,12 +3230,6 @@ static void colon(char *buf)
++lines;
}
# endif
- // insert the "replace" patern
- bias = string_insert(found, R, ALLOW_UNDO_CHAIN);
- found += bias;
- ls += bias;
- dot = ls;
- //q += bias; - recalculated anyway
// check for "global" :s/foo/bar/g
if (gflag == 'g') {
if ((found + len_R) < end_line(ls)) {
@@ -3164,6 +3249,10 @@ static void colon(char *buf)
status_line("%d substitutions on %d lines", subs, lines);
# endif
}
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ regex_search_end:
+ regfree(&preg);
+# endif
# endif /* FEATURE_VI_SEARCH */
} else if (strncmp(cmd, "version", i) == 0) { // show software version
status_line(BB_VER);
--
2.31.1
More information about the busybox
mailing list