No subject

Easymail andrey.dobrovolsky.odessa at gmail.com
Mon Jun 28 12:17:23 UTC 2021


>From a8649c2724f6fc2bd921836803b63e5b0c3fc77b Mon Sep 17 00:00:00 2001
From: AndreyDobrovolskyOdessa <andrey.dobrovolsky.odessa at gmail.com>
Date: Mon, 28 Jun 2021 02:43:02 +0300
Subject: [PATCH 2/2] vi: allow regular expressions in ':s' commands

BusyBox vi has never supported the use of regular expressions in
search/replace (':s') commands.  Implement this using GNU regex
when VI_REGEX_SEARCH is enabled.

The implementation:

- uses basic regular expressions, to match those used in the search
  command;

- only supports substitution of back references ('\0' - '\9') in the
  replacement string.  Any other character following a backslash is
  treated as that literal character.

VI_REGEX_SEARCH isn't enabled in the default build.  In that case:

function                                             old     new   delta
colon                                               4024    4021      -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3)               Total: -3 bytes

When VI_REGEX_SEARCH is enabled:

function                                             old     new   delta
colon                                               4024    4306    +282
do_substitution                                        -     122    +122
.rodata                                           108207  108229     +22
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 2/0 up/down: 426/0)             Total: 426 bytes

Signed-off-by: Andrey Dobrovolsky <andrey.dobrovolsky.odessa at gmail.com>
Signed-off-by: Ron Yorston <rmy at pobox.com>
---
 editors/vi.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 97 insertions(+), 8 deletions(-)

diff --git a/editors/vi.c b/editors/vi.c
index 959362b25..eed51fbad 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -2675,6 +2675,49 @@ static char *expand_args(char *args)
 # endif
 #endif /* FEATURE_VI_COLON */
 
+#if ENABLE_FEATURE_VI_REGEX_SEARCH
+# define MAX_SUBPATTERN 10	// subpatterns \0 .. \9
+
+// orig   - input string
+// regm   - array of subpatterns bounds
+// s      - replace pattern
+// result - buffer to place the substitution result
+static size_t do_substitution(const char *orig, regmatch_t *regm,
+					const char *s, char *result)
+{
+	const char *from;	// memcpy source pointer
+	size_t len, total_len = 0;
+	regmatch_t *cur_match;
+
+	while (*s) {
+		len = 1;	// default is to copy one char from replace pattern
+		from = s;
+		if (*s == '\\') {
+			from = ++s;	// skip backslash
+			if (*s >= '0' && *s < '0' + MAX_SUBPATTERN) {
+				cur_match = regm + (*s - '0');
+				if (cur_match->rm_so >= 0) {
+					len = cur_match->rm_eo - cur_match->rm_so;
+					from = orig + cur_match->rm_so;
+				}
+			}
+		}
+		total_len += len;
+		if (result) {
+			memcpy(result, from, len);
+			result += len;
+			*result = '\0';
+		}
+		s++;
+	}
+
+	return total_len;
+}
+
+// do_substitution dry run
+# define get_substituted_size(x, y) do_substitution(NULL, x, y, NULL)
+#endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
 // buf must be no longer than MAX_INPUT_LEN!
 static void colon(char *buf)
 {
@@ -3082,6 +3125,12 @@ static void colon(char *buf)
 #  if ENABLE_FEATURE_VI_VERBOSE_STATUS
 		int last_line = 0, lines = 0;
 #  endif
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+		regex_t preg;
+		int cflags;
+		regmatch_t regmatch[MAX_SUBPATTERN];
+		char *Rorig;
+#  endif
 
 		// F points to the "find" pattern
 		// R points to the "replace" pattern
@@ -3098,7 +3147,6 @@ static void colon(char *buf)
 			*flags++ = '\0';	// terminate "replace"
 			gflag = *flags;
 		}
-		len_R = strlen(R);
 
 		if (len_F) {	// save "find" as last search pattern
 			free(last_search_pattern);
@@ -3120,18 +3168,61 @@ static void colon(char *buf)
 			b = e;
 		}
 
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+		Rorig = R;
+		cflags = 0;
+		if (ignorecase)
+			cflags = REG_ICASE;
+		memset(&preg, 0, sizeof(preg));
+		if (regcomp(&preg, F, cflags) != 0) {
+			status_line(":s bad search pattern");
+			goto regex_search_end;
+		}
+#  else
+		len_R = strlen(R);
+#  endif
+
 		for (i = b; i <= e; i++) {	// so, :20,23 s \0 find \0 replace \0
 			char *ls = q;		// orig line start
 			char *found;
  vc4:
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+			found = NULL;
+			regmatch[0].rm_so = 0;
+			regmatch[0].rm_eo = end_line(q) - q;
+			if (!regexec(&preg, q, MAX_SUBPATTERN, regmatch, REG_STARTEND)) {
+				found = q + regmatch[0].rm_so;
+				len_F = regmatch[0].rm_eo - regmatch[0].rm_so;
+				len_R = get_substituted_size(regmatch, Rorig);
+				R = xmalloc(len_R + 1);
+				do_substitution(q, regmatch, Rorig, R);
+			}
+#  else
 			found = char_search(q, F, (FORWARD << 1) | LIMITED);	// search cur line only for "find"
+#  endif
 			if (found) {
 				uintptr_t bias;
 				// we found the "find" pattern - delete it
 				// For undo support, the first item should not be chained
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+				if (len_F) {	// match can be empty, no delete needed
+					text_hole_delete(found, found + len_F - 1,
+							subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
+				}
+				// insert the "replace" patern
+				bias = string_insert(found, R,
+							subs || len_F ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
+				free(R);
+#  else
 				text_hole_delete(found, found + len_F - 1,
 							subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
-				// can't do this above, no undo => no third argument
+				// insert the "replace" patern
+				bias = string_insert(found, R, ALLOW_UNDO_CHAIN);
+#  endif
+				found += bias;
+				ls += bias;
+				dot = ls;
+				//q += bias; - recalculated anyway
 				subs++;
 #  if ENABLE_FEATURE_VI_VERBOSE_STATUS
 				if (last_line != i) {
@@ -3139,12 +3230,6 @@ static void colon(char *buf)
 					++lines;
 				}
 #  endif
-				// insert the "replace" patern
-				bias = string_insert(found, R, ALLOW_UNDO_CHAIN);
-				found += bias;
-				ls += bias;
-				dot = ls;
-				//q += bias; - recalculated anyway
 				// check for "global"  :s/foo/bar/g
 				if (gflag == 'g') {
 					if ((found + len_R) < end_line(ls)) {
@@ -3164,6 +3249,10 @@ static void colon(char *buf)
 				status_line("%d substitutions on %d lines", subs, lines);
 #  endif
 		}
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+ regex_search_end:
+		regfree(&preg);
+#  endif
 # endif /* FEATURE_VI_SEARCH */
 	} else if (strncmp(cmd, "version", i) == 0) {  // show software version
 		status_line(BB_VER);
-- 
2.31.1



More information about the busybox mailing list