[PATCH] sed: open input files sequentially to avoid EMFILE
Daniel Borca
dborca at yahoo.com
Tue Nov 26 20:54:31 UTC 2013
Currently, sed pre-opens all files, which may cause EMFILE errors
on systems with low ulimit -n. Change sed to open one file at a time.
Ok, make bloatcheck output seems rather odd...
With this patch, status = EXIT_FAILURE is never set. I welcome any
comments on this.
-dborca
-------------- next part --------------
function old new delta
get_next_line 169 257 +88
__stat - 16 +16
__fstat 15 - -15
sed_main 744 683 -61
------------------------------------------------------------------------------
(add/remove: 1/1 grow/shrink: 1/1 up/down: 104/-76) Total: 28 bytes
text data bss dec hex filename
16698 906 8304 25908 6534 busybox_old
16647 898 8304 25849 64f9 busybox_unstripped
-------------- next part --------------
Signed-off-by: Daniel Borca <dborca at yahoo.com>
---
editors/sed.c | 40 +++++++++++++++++++++-------------------
1 files changed, 21 insertions(+), 19 deletions(-)
diff --git a/editors/sed.c b/editors/sed.c
index 777f383..570c6a2 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -23,7 +23,7 @@
* resulting sed_cmd_t structures are appended to a linked list
* (G.sed_cmd_head/G.sed_cmd_tail).
*
- * add_input_file() adds a FILE* to the list of input files. We need to
+ * add_input_file() adds a char* to the list of input files. We need to
* know all input sources ahead of time to find the last line for the $ match.
*
* process_files() does actual sedding, reading data lines from each input FILE*
@@ -140,7 +140,8 @@ struct globals {
/* List of input files */
int input_file_count, current_input_file;
- FILE **input_file_list;
+ char **input_file_list;
+ FILE *current_fp;
regmatch_t regmatch[10];
regex_t *previous_regex_ptr;
@@ -200,8 +201,8 @@ static void sed_free_and_close_stuff(void)
free(G.hold_space);
- while (G.current_input_file < G.input_file_count)
- fclose(G.input_file_list[G.current_input_file++]);
+ if (G.current_fp)
+ fclose(G.current_fp);
}
#else
void sed_free_and_close_stuff(void);
@@ -939,8 +940,14 @@ static char *get_next_line(char *gets_char, char *last_puts_char, char last_gets
/* will be returned if last line in the file
* doesn't end with either '\n' or '\0' */
gc = NO_EOL_CHAR;
- while (G.current_input_file < G.input_file_count) {
- FILE *fp = G.input_file_list[G.current_input_file];
+ for (; G.current_input_file < G.input_file_count; G.current_input_file++) {
+ FILE *fp = G.current_fp;
+ if (!fp) {
+ const char *path = G.input_file_list[G.current_input_file];
+ fp = G.current_fp = path ? fopen_or_warn(path, "r") : stdin;
+ if (!fp)
+ continue;
+ }
/* Read line up to a newline or NUL byte, inclusive,
* return malloc'ed char[]. length of the chunk read
* is stored in len. NULL if EOF/error */
@@ -971,8 +978,9 @@ static char *get_next_line(char *gets_char, char *last_puts_char, char last_gets
* (note: *no* newline after "b bang"!) */
}
/* Close this file and advance to next one */
- fclose(fp);
- G.current_input_file++;
+ if (G.input_file_list[G.current_input_file])
+ fclose(fp);
+ G.current_fp = NULL;
}
*gets_char = gc;
return temp;
@@ -1399,7 +1407,7 @@ static void add_cmd_block(char *cmdstr)
free(sv);
}
-static void add_input_file(FILE *file)
+static void add_input_file(char *file)
{
G.input_file_list = xrealloc_vector(G.input_file_list, 2, G.input_file_count);
G.input_file_list[G.input_file_count++] = file;
@@ -1491,27 +1499,21 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
if (argv[0] == NULL) {
if (opt & OPT_in_place)
bb_error_msg_and_die(bb_msg_requires_arg, "-i");
- add_input_file(stdin);
+ add_input_file(NULL);
} else {
int i;
for (i = 0; argv[i]; i++) {
struct stat statbuf;
int nonstdoutfd;
- FILE *file;
sed_cmd_t *sed_cmd;
if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) {
- add_input_file(stdin);
+ add_input_file(NULL);
process_files();
continue;
}
- file = fopen_or_warn(argv[i], "r");
- if (!file) {
- status = EXIT_FAILURE;
- continue;
- }
- add_input_file(file);
+ add_input_file(argv[i]);
if (!(opt & OPT_in_place)) {
continue;
}
@@ -1523,7 +1525,7 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
G.nonstdout = xfdopen_for_write(nonstdoutfd);
/* Set permissions/owner of output file */
- fstat(fileno(file), &statbuf);
+ stat(argv[i], &statbuf);
/* chmod'ing AFTER chown would preserve suid/sgid bits,
* but GNU sed 4.2.1 does not preserve them either */
fchmod(nonstdoutfd, statbuf.st_mode);
--
1.7.4.4
More information about the busybox
mailing list