[PATCH] sed: open input files sequentially to avoid EMFILE

Daniel Borca dborca at yahoo.com
Tue Nov 26 20:54:31 UTC 2013


Currently, sed pre-opens all files, which may cause EMFILE errors
on systems with low ulimit -n.  Change sed to open one file at a time.

Ok, make bloatcheck output seems rather odd...

With this patch, status = EXIT_FAILURE is never set.  I welcome any
comments on this.

-dborca
-------------- next part --------------
function                                             old     new   delta
get_next_line                                        169     257     +88
__stat                                                 -      16     +16
__fstat                                               15       -     -15
sed_main                                             744     683     -61
------------------------------------------------------------------------------
(add/remove: 1/1 grow/shrink: 1/1 up/down: 104/-76)            Total: 28 bytes
   text	   data	    bss	    dec	    hex	filename
  16698	    906	   8304	  25908	   6534	busybox_old
  16647	    898	   8304	  25849	   64f9	busybox_unstripped
-------------- next part --------------
Signed-off-by: Daniel Borca <dborca at yahoo.com>
---
 editors/sed.c |   40 +++++++++++++++++++++-------------------
 1 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/editors/sed.c b/editors/sed.c
index 777f383..570c6a2 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -23,7 +23,7 @@
  * resulting sed_cmd_t structures are appended to a linked list
  * (G.sed_cmd_head/G.sed_cmd_tail).
  *
- * add_input_file() adds a FILE* to the list of input files.  We need to
+ * add_input_file() adds a char* to the list of input files.  We need to
  * know all input sources ahead of time to find the last line for the $ match.
  *
  * process_files() does actual sedding, reading data lines from each input FILE*
@@ -140,7 +140,8 @@ struct globals {
 
 	/* List of input files */
 	int input_file_count, current_input_file;
-	FILE **input_file_list;
+	char **input_file_list;
+	FILE *current_fp;
 
 	regmatch_t regmatch[10];
 	regex_t *previous_regex_ptr;
@@ -200,8 +201,8 @@ static void sed_free_and_close_stuff(void)
 
 	free(G.hold_space);
 
-	while (G.current_input_file < G.input_file_count)
-		fclose(G.input_file_list[G.current_input_file++]);
+	if (G.current_fp)
+		fclose(G.current_fp);
 }
 #else
 void sed_free_and_close_stuff(void);
@@ -939,8 +940,14 @@ static char *get_next_line(char *gets_char, char *last_puts_char, char last_gets
 	/* will be returned if last line in the file
 	 * doesn't end with either '\n' or '\0' */
 	gc = NO_EOL_CHAR;
-	while (G.current_input_file < G.input_file_count) {
-		FILE *fp = G.input_file_list[G.current_input_file];
+	for (; G.current_input_file < G.input_file_count; G.current_input_file++) {
+		FILE *fp = G.current_fp;
+		if (!fp) {
+			const char *path = G.input_file_list[G.current_input_file];
+			fp = G.current_fp = path ? fopen_or_warn(path, "r") : stdin;
+			if (!fp)
+				continue;
+		}
 		/* Read line up to a newline or NUL byte, inclusive,
 		 * return malloc'ed char[]. length of the chunk read
 		 * is stored in len. NULL if EOF/error */
@@ -971,8 +978,9 @@ static char *get_next_line(char *gets_char, char *last_puts_char, char last_gets
 		 * (note: *no* newline after "b bang"!) */
 		}
 		/* Close this file and advance to next one */
-		fclose(fp);
-		G.current_input_file++;
+		if (G.input_file_list[G.current_input_file])
+			fclose(fp);
+		G.current_fp = NULL;
 	}
 	*gets_char = gc;
 	return temp;
@@ -1399,7 +1407,7 @@ static void add_cmd_block(char *cmdstr)
 	free(sv);
 }
 
-static void add_input_file(FILE *file)
+static void add_input_file(char *file)
 {
 	G.input_file_list = xrealloc_vector(G.input_file_list, 2, G.input_file_count);
 	G.input_file_list[G.input_file_count++] = file;
@@ -1491,27 +1499,21 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
 	if (argv[0] == NULL) {
 		if (opt & OPT_in_place)
 			bb_error_msg_and_die(bb_msg_requires_arg, "-i");
-		add_input_file(stdin);
+		add_input_file(NULL);
 	} else {
 		int i;
 
 		for (i = 0; argv[i]; i++) {
 			struct stat statbuf;
 			int nonstdoutfd;
-			FILE *file;
 			sed_cmd_t *sed_cmd;
 
 			if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) {
-				add_input_file(stdin);
+				add_input_file(NULL);
 				process_files();
 				continue;
 			}
-			file = fopen_or_warn(argv[i], "r");
-			if (!file) {
-				status = EXIT_FAILURE;
-				continue;
-			}
-			add_input_file(file);
+			add_input_file(argv[i]);
 			if (!(opt & OPT_in_place)) {
 				continue;
 			}
@@ -1523,7 +1525,7 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
 			G.nonstdout = xfdopen_for_write(nonstdoutfd);
 
 			/* Set permissions/owner of output file */
-			fstat(fileno(file), &statbuf);
+			stat(argv[i], &statbuf);
 			/* chmod'ing AFTER chown would preserve suid/sgid bits,
 			 * but GNU sed 4.2.1 does not preserve them either */
 			fchmod(nonstdoutfd, statbuf.st_mode);
-- 
1.7.4.4



More information about the busybox mailing list