sed: open input files sequentially to avoid EMFILE
Currently, sed pre-opens all files, which may cause EMFILE errors
on systems with low ulimit -n. Change sed to open one file at a time.
function old new delta
get_next_line 177 235 +58
sed_main 682 652 -30
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 58/-30) Total: 28 bytes
Based on the patch by Daniel Borca <dborca@yahoo.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/editors/sed.c b/editors/sed.c
index 777f383..87fa002 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -23,7 +23,7 @@
* resulting sed_cmd_t structures are appended to a linked list
* (G.sed_cmd_head/G.sed_cmd_tail).
*
- * add_input_file() adds a FILE* to the list of input files. We need to
+ * add_input_file() adds a char* to the list of input files. We need to
* know all input sources ahead of time to find the last line for the $ match.
*
* process_files() does actual sedding, reading data lines from each input FILE*
@@ -135,12 +135,15 @@
struct globals {
/* options */
int be_quiet, regex_type;
+
FILE *nonstdout;
char *outname, *hold_space;
+ smallint exitcode;
- /* List of input files */
+ /* list of input files */
int input_file_count, current_input_file;
- FILE **input_file_list;
+ const char **input_file_list;
+ FILE *current_fp;
regmatch_t regmatch[10];
regex_t *previous_regex_ptr;
@@ -148,7 +151,7 @@
/* linked list of sed commands */
sed_cmd_t *sed_cmd_head, **sed_cmd_tail;
- /* Linked list of append lines */
+ /* linked list of append lines */
llist_t *append_head;
char *add_cmd_line;
@@ -200,8 +203,8 @@
free(G.hold_space);
- while (G.current_input_file < G.input_file_count)
- fclose(G.input_file_list[G.current_input_file++]);
+ if (G.current_fp)
+ fclose(G.current_fp);
}
#else
void sed_free_and_close_stuff(void);
@@ -939,8 +942,20 @@
/* will be returned if last line in the file
* doesn't end with either '\n' or '\0' */
gc = NO_EOL_CHAR;
- while (G.current_input_file < G.input_file_count) {
- FILE *fp = G.input_file_list[G.current_input_file];
+ for (; G.input_file_list[G.current_input_file]; G.current_input_file++) {
+ FILE *fp = G.current_fp;
+ if (!fp) {
+ const char *path = G.input_file_list[G.current_input_file];
+ fp = stdin;
+ if (path != bb_msg_standard_input) {
+ fp = fopen_or_warn(path, "r");
+ if (!fp) {
+ G.exitcode = EXIT_FAILURE;
+ continue;
+ }
+ }
+ G.current_fp = fp;
+ }
/* Read line up to a newline or NUL byte, inclusive,
* return malloc'ed char[]. length of the chunk read
* is stored in len. NULL if EOF/error */
@@ -971,8 +986,8 @@
* (note: *no* newline after "b bang"!) */
}
/* Close this file and advance to next one */
- fclose(fp);
- G.current_input_file++;
+ fclose_if_not_stdin(fp);
+ G.current_fp = NULL;
}
*gets_char = gc;
return temp;
@@ -1399,7 +1414,7 @@
free(sv);
}
-static void add_input_file(FILE *file)
+static void add_input_file(const char *file)
{
G.input_file_list = xrealloc_vector(G.input_file_list, 2, G.input_file_count);
G.input_file_list[G.input_file_count++] = file;
@@ -1423,8 +1438,6 @@
"file\0" Required_argument "f";
#endif
- int status = EXIT_SUCCESS;
-
INIT_G();
/* destroy command strings on exit */
@@ -1491,27 +1504,21 @@
if (argv[0] == NULL) {
if (opt & OPT_in_place)
bb_error_msg_and_die(bb_msg_requires_arg, "-i");
- add_input_file(stdin);
+ add_input_file(bb_msg_standard_input);
} else {
int i;
for (i = 0; argv[i]; i++) {
struct stat statbuf;
int nonstdoutfd;
- FILE *file;
sed_cmd_t *sed_cmd;
if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) {
- add_input_file(stdin);
+ add_input_file(bb_msg_standard_input);
process_files();
continue;
}
- file = fopen_or_warn(argv[i], "r");
- if (!file) {
- status = EXIT_FAILURE;
- continue;
- }
- add_input_file(file);
+ add_input_file(argv[i]);
if (!(opt & OPT_in_place)) {
continue;
}
@@ -1523,7 +1530,7 @@
G.nonstdout = xfdopen_for_write(nonstdoutfd);
/* Set permissions/owner of output file */
- fstat(fileno(file), &statbuf);
+ stat(argv[i], &statbuf);
/* chmod'ing AFTER chown would preserve suid/sgid bits,
* but GNU sed 4.2.1 does not preserve them either */
fchmod(nonstdoutfd, statbuf.st_mode);
@@ -1549,12 +1556,13 @@
}
}
/* Here, to handle "sed 'cmds' nonexistent_file" case we did:
- * if (G.current_input_file >= G.input_file_count)
- * return status;
+ * if (G.current_input_file[G.current_input_file] == NULL)
+ * return G.exitcode;
* but it's not needed since process_files() works correctly
* in this case too. */
}
+
process_files();
- return status;
+ return G.exitcode;
}
diff --git a/libbb/fclose_nonstdin.c b/libbb/fclose_nonstdin.c
index 5ce9d5b..1b14413 100644
--- a/libbb/fclose_nonstdin.c
+++ b/libbb/fclose_nonstdin.c
@@ -18,7 +18,8 @@
{
/* Some more paranoid applets want ferror() check too */
int r = ferror(f); /* NB: does NOT set errno! */
- if (r) errno = EIO; /* so we'll help it */
+ if (r)
+ errno = EIO; /* so we'll help it */
if (f != stdin)
return (r | fclose(f)); /* fclose does set errno on error */
return r;