[git commit] sed: fix handling of 1d;1,3p and 1d;1,REGEXp

Denys Vlasenko vda.linux at googlemail.com
Tue Jun 30 17:19:37 UTC 2009


commit: http://git.busybox.net/busybox/commit/?id=8bca3e20b9c057e9144af27870ca3905f1e5d316
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master


function                                             old     new   delta
process_files                                       2084    2173     +89

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 editors/sed.c       |  476 ++++++++++++++++++++++++++-------------------------
 testsuite/sed.tests |   13 ++
 2 files changed, 260 insertions(+), 229 deletions(-)

diff --git a/editors/sed.c b/editors/sed.c
index eb31f7d..2127301 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -865,63 +865,80 @@ static void process_files(void)
 	/* Prime the pump */
 	next_line = get_next_line(&next_gets_char);
 
-	/* go through every line in each file */
+	/* Go through every line in each file */
  again:
 	substituted = 0;
 
 	/* Advance to next line.  Stop if out of lines. */
 	pattern_space = next_line;
-	if (!pattern_space) return;
+	if (!pattern_space)
+		return;
 	last_gets_char = next_gets_char;
 
 	/* Read one line in advance so we can act on the last line,
 	 * the '$' address */
 	next_line = get_next_line(&next_gets_char);
 	linenum++;
+
+	/* For every line, go through all the commands */
  restart:
-	/* for every line, go through all the commands */
 	for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
 		int old_matched, matched;
 
 		old_matched = sed_cmd->in_match;
 
 		/* Determine if this command matches this line: */
-
-		/* Are we continuing a previous multi-line match? */
+			/* Are we continuing a previous multi-line match? */
 		sed_cmd->in_match = sed_cmd->in_match
 			/* Or is no range necessary? */
 			|| (!sed_cmd->beg_line && !sed_cmd->end_line
 				&& !sed_cmd->beg_match && !sed_cmd->end_match)
 			/* Or did we match the start of a numerical range? */
-			|| (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum))
+			|| (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum
+							/* "shadowed beginning" case: "1d;1,ENDp" - p still matches at line 2
+							 * even though 1d skipped line 1 which is a start line for p */
+							|| (sed_cmd->end_line && sed_cmd->beg_line < linenum && sed_cmd->end_line >= linenum)
+							|| (sed_cmd->end_match && sed_cmd->beg_line < linenum)
+						)
+			)
 			/* Or does this line match our begin address regex? */
 			|| (beg_match(sed_cmd, pattern_space))
 			/* Or did we match last line of input? */
 			|| (sed_cmd->beg_line == -1 && next_line == NULL);
 
-		/* Snapshot the value */
-
 		matched = sed_cmd->in_match;
 
-		/* Is this line the end of the current match? */
+		//bb_error_msg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d",
+		//sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum);
 
+		/* Is this line the end of the current match? */
 		if (matched) {
-			sed_cmd->in_match = !(
+			int n = (
 				/* has the ending line come, or is this a single address command? */
-				(sed_cmd->end_line ?
+				sed_cmd->end_line ?
 					sed_cmd->end_line == -1 ?
 						!next_line
 						: (sed_cmd->end_line <= linenum)
 					: !sed_cmd->end_match
-				)
+				);
+			if (!n) {
 				/* or does this line matches our last address regex */
-				|| (sed_cmd->end_match && old_matched
+				n = (sed_cmd->end_match
+				     && old_matched
 				     && (regexec(sed_cmd->end_match,
-				                 pattern_space, 0, NULL, 0) == 0))
-			);
+				                 pattern_space, 0, NULL, 0) == 0)
+				);
+				if (n && sed_cmd->beg_line > 0) {
+					/* Once matched, "n,regex" range is dead, disabling it */
+					regfree(sed_cmd->end_match);
+					free(sed_cmd->end_match);
+					sed_cmd->end_match = NULL;
+				}
+			}
+			sed_cmd->in_match = !n;
 		}
 
-		/* Skip blocks of commands we didn't match. */
+		/* Skip blocks of commands we didn't match */
 		if (sed_cmd->cmd == '{') {
 			if (sed_cmd->invert ? matched : !matched) {
 				while (sed_cmd->cmd != '}') {
@@ -934,253 +951,254 @@ static void process_files(void)
 		}
 
 		/* Okay, so did this line match? */
-		if (sed_cmd->invert ? !matched : matched) {
-			/* Update last used regex in case a blank substitute BRE is found */
-			if (sed_cmd->beg_match) {
-				G.previous_regex_ptr = sed_cmd->beg_match;
-			}
+		if (sed_cmd->invert ? matched : !matched)
+			continue; /* no */
 
-			/* actual sedding */
-			switch (sed_cmd->cmd) {
+		/* Update last used regex in case a blank substitute BRE is found */
+		if (sed_cmd->beg_match) {
+			G.previous_regex_ptr = sed_cmd->beg_match;
+		}
 
-			/* Print line number */
-			case '=':
-				fprintf(G.nonstdout, "%d\n", linenum);
-				break;
+		/* actual sedding */
+		switch (sed_cmd->cmd) {
 
-			/* Write the current pattern space up to the first newline */
-			case 'P':
-			{
-				char *tmp = strchr(pattern_space, '\n');
-
-				if (tmp) {
-					*tmp = '\0';
-					/* TODO: explain why '\n' below */
-					sed_puts(pattern_space, '\n');
-					*tmp = '\n';
-					break;
-				}
-				/* Fall Through */
-			}
+		/* Print line number */
+		case '=':
+			fprintf(G.nonstdout, "%d\n", linenum);
+			break;
 
-			/* Write the current pattern space to output */
-			case 'p':
-				/* NB: we print this _before_ the last line
-				 * (of current file) is printed. Even if
-				 * that line is nonterminated, we print
-				 * '\n' here (gnu sed does the same) */
+		/* Write the current pattern space up to the first newline */
+		case 'P':
+		{
+			char *tmp = strchr(pattern_space, '\n');
+
+			if (tmp) {
+				*tmp = '\0';
+				/* TODO: explain why '\n' below */
 				sed_puts(pattern_space, '\n');
+				*tmp = '\n';
 				break;
-			/* Delete up through first newline */
-			case 'D':
-			{
-				char *tmp = strchr(pattern_space, '\n');
-
-				if (tmp) {
-					tmp = xstrdup(tmp+1);
-					free(pattern_space);
-					pattern_space = tmp;
-					goto restart;
-				}
 			}
-			/* discard this line. */
-			case 'd':
-				goto discard_line;
+			/* Fall Through */
+		}
 
-			/* Substitute with regex */
-			case 's':
-				if (!do_subst_command(sed_cmd, &pattern_space))
-					break;
-				substituted |= 1;
-
-				/* handle p option */
-				if (sed_cmd->sub_p)
-					sed_puts(pattern_space, last_gets_char);
-				/* handle w option */
-				if (sed_cmd->sw_file)
-					puts_maybe_newline(
-						pattern_space, sed_cmd->sw_file,
-						&sed_cmd->sw_last_char, last_gets_char);
-				break;
+		/* Write the current pattern space to output */
+		case 'p':
+			/* NB: we print this _before_ the last line
+			 * (of current file) is printed. Even if
+			 * that line is nonterminated, we print
+			 * '\n' here (gnu sed does the same) */
+			sed_puts(pattern_space, '\n');
+			break;
+		/* Delete up through first newline */
+		case 'D':
+		{
+			char *tmp = strchr(pattern_space, '\n');
 
-			/* Append line to linked list to be printed later */
-			case 'a':
-				append(sed_cmd->string);
-				break;
+			if (tmp) {
+				tmp = xstrdup(tmp+1);
+				free(pattern_space);
+				pattern_space = tmp;
+				goto restart;
+			}
+		}
+		/* discard this line. */
+		case 'd':
+			goto discard_line;
 
-			/* Insert text before this line */
-			case 'i':
-				sed_puts(sed_cmd->string, '\n');
+		/* Substitute with regex */
+		case 's':
+			if (!do_subst_command(sed_cmd, &pattern_space))
 				break;
+			substituted |= 1;
 
-			/* Cut and paste text (replace) */
-			case 'c':
-				/* Only triggers on last line of a matching range. */
-				if (!sed_cmd->in_match)
-					sed_puts(sed_cmd->string, NO_EOL_CHAR);
-				goto discard_line;
+			/* handle p option */
+			if (sed_cmd->sub_p)
+				sed_puts(pattern_space, last_gets_char);
+			/* handle w option */
+			if (sed_cmd->sw_file)
+				puts_maybe_newline(
+					pattern_space, sed_cmd->sw_file,
+					&sed_cmd->sw_last_char, last_gets_char);
+			break;
 
-			/* Read file, append contents to output */
-			case 'r':
-			{
-				FILE *rfile;
+		/* Append line to linked list to be printed later */
+		case 'a':
+			append(sed_cmd->string);
+			break;
 
-				rfile = fopen_for_read(sed_cmd->string);
-				if (rfile) {
-					char *line;
+		/* Insert text before this line */
+		case 'i':
+			sed_puts(sed_cmd->string, '\n');
+			break;
 
-					while ((line = xmalloc_fgetline(rfile))
-							!= NULL)
-						append(line);
-					xprint_and_close_file(rfile);
-				}
+		/* Cut and paste text (replace) */
+		case 'c':
+			/* Only triggers on last line of a matching range. */
+			if (!sed_cmd->in_match)
+				sed_puts(sed_cmd->string, NO_EOL_CHAR);
+			goto discard_line;
 
-				break;
+		/* Read file, append contents to output */
+		case 'r':
+		{
+			FILE *rfile;
+
+			rfile = fopen_for_read(sed_cmd->string);
+			if (rfile) {
+				char *line;
+
+				while ((line = xmalloc_fgetline(rfile))
+						!= NULL)
+					append(line);
+				xprint_and_close_file(rfile);
 			}
 
-			/* Write pattern space to file. */
-			case 'w':
-				puts_maybe_newline(
-					pattern_space, sed_cmd->sw_file,
-					&sed_cmd->sw_last_char, last_gets_char);
-				break;
+			break;
+		}
 
-			/* Read next line from input */
-			case 'n':
-				if (!G.be_quiet)
-					sed_puts(pattern_space, last_gets_char);
-				if (next_line) {
-					free(pattern_space);
-					pattern_space = next_line;
-					last_gets_char = next_gets_char;
-					next_line = get_next_line(&next_gets_char);
-					substituted = 0;
-					linenum++;
-					break;
-				}
-				/* fall through */
+		/* Write pattern space to file. */
+		case 'w':
+			puts_maybe_newline(
+				pattern_space, sed_cmd->sw_file,
+				&sed_cmd->sw_last_char, last_gets_char);
+			break;
 
-			/* Quit.  End of script, end of input. */
-			case 'q':
-				/* Exit the outer while loop */
-				free(next_line);
-				next_line = NULL;
-				goto discard_commands;
-
-			/* Append the next line to the current line */
-			case 'N':
-			{
-				int len;
-				/* If no next line, jump to end of script and exit. */
-				if (next_line == NULL) {
-					/* Jump to end of script and exit */
-					free(next_line);
-					next_line = NULL;
-					goto discard_line;
-				/* append next_line, read new next_line. */
-				}
-				len = strlen(pattern_space);
-				pattern_space = realloc(pattern_space, len + strlen(next_line) + 2);
-				pattern_space[len] = '\n';
-				strcpy(pattern_space + len+1, next_line);
+		/* Read next line from input */
+		case 'n':
+			if (!G.be_quiet)
+				sed_puts(pattern_space, last_gets_char);
+			if (next_line) {
+				free(pattern_space);
+				pattern_space = next_line;
 				last_gets_char = next_gets_char;
 				next_line = get_next_line(&next_gets_char);
+				substituted = 0;
 				linenum++;
 				break;
 			}
+			/* fall through */
 
-			/* Test/branch if substitution occurred */
-			case 't':
-				if (!substituted) break;
-				substituted = 0;
-				/* Fall through */
-			/* Test/branch if substitution didn't occur */
-			case 'T':
-				if (substituted) break;
-				/* Fall through */
-			/* Branch to label */
-			case 'b':
-				if (!sed_cmd->string) goto discard_commands;
-				else sed_cmd = branch_to(sed_cmd->string);
-				break;
-			/* Transliterate characters */
-			case 'y':
-			{
-				int i, j;
-
-				for (i = 0; pattern_space[i]; i++) {
-					for (j = 0; sed_cmd->string[j]; j += 2) {
-						if (pattern_space[i] == sed_cmd->string[j]) {
-							pattern_space[i] = sed_cmd->string[j + 1];
-							break;
-						}
-					}
-				}
+		/* Quit.  End of script, end of input. */
+		case 'q':
+			/* Exit the outer while loop */
+			free(next_line);
+			next_line = NULL;
+			goto discard_commands;
 
-				break;
+		/* Append the next line to the current line */
+		case 'N':
+		{
+			int len;
+			/* If no next line, jump to end of script and exit. */
+			if (next_line == NULL) {
+				/* Jump to end of script and exit */
+				free(next_line);
+				next_line = NULL;
+				goto discard_line;
+			/* append next_line, read new next_line. */
 			}
-			case 'g':	/* Replace pattern space with hold space */
-				free(pattern_space);
-				pattern_space = xstrdup(G.hold_space ? G.hold_space : "");
-				break;
-			case 'G':	/* Append newline and hold space to pattern space */
-			{
-				int pattern_space_size = 2;
-				int hold_space_size = 0;
-
-				if (pattern_space)
-					pattern_space_size += strlen(pattern_space);
-				if (G.hold_space)
-					hold_space_size = strlen(G.hold_space);
-				pattern_space = xrealloc(pattern_space,
-						pattern_space_size + hold_space_size);
-				if (pattern_space_size == 2)
-					pattern_space[0] = 0;
-				strcat(pattern_space, "\n");
-				if (G.hold_space)
-					strcat(pattern_space, G.hold_space);
-				last_gets_char = '\n';
+			len = strlen(pattern_space);
+			pattern_space = realloc(pattern_space, len + strlen(next_line) + 2);
+			pattern_space[len] = '\n';
+			strcpy(pattern_space + len+1, next_line);
+			last_gets_char = next_gets_char;
+			next_line = get_next_line(&next_gets_char);
+			linenum++;
+			break;
+		}
 
-				break;
-			}
-			case 'h':	/* Replace hold space with pattern space */
-				free(G.hold_space);
-				G.hold_space = xstrdup(pattern_space);
-				break;
-			case 'H':	/* Append newline and pattern space to hold space */
-			{
-				int hold_space_size = 2;
-				int pattern_space_size = 0;
-
-				if (G.hold_space)
-					hold_space_size += strlen(G.hold_space);
-				if (pattern_space)
-					pattern_space_size = strlen(pattern_space);
-				G.hold_space = xrealloc(G.hold_space,
-						hold_space_size + pattern_space_size);
-
-				if (hold_space_size == 2)
-					*G.hold_space = 0;
-				strcat(G.hold_space, "\n");
-				if (pattern_space)
-					strcat(G.hold_space, pattern_space);
+		/* Test/branch if substitution occurred */
+		case 't':
+			if (!substituted) break;
+			substituted = 0;
+			/* Fall through */
+		/* Test/branch if substitution didn't occur */
+		case 'T':
+			if (substituted) break;
+			/* Fall through */
+		/* Branch to label */
+		case 'b':
+			if (!sed_cmd->string) goto discard_commands;
+			else sed_cmd = branch_to(sed_cmd->string);
+			break;
+		/* Transliterate characters */
+		case 'y':
+		{
+			int i, j;
 
-				break;
-			}
-			case 'x': /* Exchange hold and pattern space */
-			{
-				char *tmp = pattern_space;
-				pattern_space = G.hold_space ? : xzalloc(1);
-				last_gets_char = '\n';
-				G.hold_space = tmp;
-				break;
-			}
+			for (i = 0; pattern_space[i]; i++) {
+				for (j = 0; sed_cmd->string[j]; j += 2) {
+					if (pattern_space[i] == sed_cmd->string[j]) {
+						pattern_space[i] = sed_cmd->string[j + 1];
+						break;
+					}
+				}
 			}
+
+			break;
 		}
-	}
+		case 'g':	/* Replace pattern space with hold space */
+			free(pattern_space);
+			pattern_space = xstrdup(G.hold_space ? G.hold_space : "");
+			break;
+		case 'G':	/* Append newline and hold space to pattern space */
+		{
+			int pattern_space_size = 2;
+			int hold_space_size = 0;
+
+			if (pattern_space)
+				pattern_space_size += strlen(pattern_space);
+			if (G.hold_space)
+				hold_space_size = strlen(G.hold_space);
+			pattern_space = xrealloc(pattern_space,
+					pattern_space_size + hold_space_size);
+			if (pattern_space_size == 2)
+				pattern_space[0] = 0;
+			strcat(pattern_space, "\n");
+			if (G.hold_space)
+				strcat(pattern_space, G.hold_space);
+			last_gets_char = '\n';
+
+			break;
+		}
+		case 'h':	/* Replace hold space with pattern space */
+			free(G.hold_space);
+			G.hold_space = xstrdup(pattern_space);
+			break;
+		case 'H':	/* Append newline and pattern space to hold space */
+		{
+			int hold_space_size = 2;
+			int pattern_space_size = 0;
+
+			if (G.hold_space)
+				hold_space_size += strlen(G.hold_space);
+			if (pattern_space)
+				pattern_space_size = strlen(pattern_space);
+			G.hold_space = xrealloc(G.hold_space,
+					hold_space_size + pattern_space_size);
+
+			if (hold_space_size == 2)
+				*G.hold_space = 0;
+			strcat(G.hold_space, "\n");
+			if (pattern_space)
+				strcat(G.hold_space, pattern_space);
+
+			break;
+		}
+		case 'x': /* Exchange hold and pattern space */
+		{
+			char *tmp = pattern_space;
+			pattern_space = G.hold_space ? : xzalloc(1);
+			last_gets_char = '\n';
+			G.hold_space = tmp;
+			break;
+		}
+		} /* switch */
+	} /* for each cmd */
 
 	/*
-	 * exit point from sedding...
+	 * Exit point from sedding...
 	 */
  discard_commands:
 	/* we will print the line unless we were told to be quiet ('-n')
diff --git a/testsuite/sed.tests b/testsuite/sed.tests
index 9a7f886..8af156a 100755
--- a/testsuite/sed.tests
+++ b/testsuite/sed.tests
@@ -207,4 +207,17 @@ testing "sed n command must reset 'substituted' bit" \
 	"sed 's/1/x/;T;n;: next;s/3/y/;t quit;n;b next;: quit;q'" \
 	"0\nx\n2\ny\n" "" "0\n1\n2\n3\n"
 
+testing "sed d does not break n,m matching" \
+	"sed -n '1d;1,3p'" \
+	"second\nthird\n" "" "first\nsecond\nthird\nfourth\n"
+
+testing "sed d does not break n,regex matching" \
+	"sed -n '1d;1,/hir/p'" \
+	"second\nthird\n" "" "first\nsecond\nthird\nfourth\n"
+
+testing "sed d does not break n,regex matching #2" \
+	"sed -n '1,5d;1,/hir/p'" \
+	"second2\nthird2\n" "" \
+	"first\nsecond\nthird\nfourth\n""first2\nsecond2\nthird2\nfourth2\n"
+
 exit $FAILCOUNT
-- 
1.6.0.6


More information about the busybox-cvs mailing list