[PATCH] libbb: in xmalloc_fgetline, use getline if enabled

tito farmatito at tiscali.it
Sun May 28 19:53:39 UTC 2023


On Sun, 28 May 2023 19:01:52 +0200
Elvira Khabirova <lineprinter0 at gmail.com> wrote:

> When xmalloc_fgetline was introduced, getline(3) was a GNU extension
> and was not necessarily implemented in libcs. Since then,
> it's become a part of POSIX.1-2008 and is now implemented in
> glibc, uClibc-ng, and musl.
> 
> Previously, xmalloc_fgetline directly called bb_get_chunk_from_file.
> The issue with that approach is that bb_get_chunk_from_file stops
> both at \n and at \0. This introduces unexpected behavior when tools
> are presented with inputs containing \0. For example, in a comparison
> of GNU core utils cut with busybox cut:
> 
> % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | cut -b1-3 | hexdump -C
> 00000000  00 01 00 0a                                       |....|
> 00000004
> % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | ./busybox cut -b1-3 | hexdump -C
> 00000000  0a 01 0a 03 04 05 0a                              |.......|
> 00000007
> 
> Here, due to bb_get_chunk_from_file splitting lines by \n and \0,
> busybox cut interprets the input as three lines instead of one.
> 
> Also, since xmalloc_fgetline never returned strings with embedded \0,
> cut_file expects strlen of the returned string to match the string's
> total length.
> 
> To fix the behavior of the cut utility, introduce xmalloc_fgetline_n,
> that fully matches the behavior of xmalloc_fgetline,
> but also returns the amount of bytes read.
> 
> Add a configuration option, FEATURE_USE_GETLINE, and enable it
> by default. Use getline in xmalloc_fgetline_n if the feature is enabled.
> 
> Change the behavior of cut_file to use the values returned
> by the new function instead of calling strlen.
> 
> Call xmalloc_fgetline_n from xmalloc_fgetline.
> 
> Add a test for the previously mentioned case.
> 
> With FEATURE_USE_GETLINE:
> 
> function                                             old     new   delta
> xmalloc_fgetline_n                                     -     173    +173
> xmalloc_fgetline                                      85      58     -27
> cut_main                                            1406    1367     -39
> ------------------------------------------------------------------------------
> (add/remove: 1/0 grow/shrink: 0/2 up/down: 173/-66)           Total: 107 bytes
> 
> Without FEATURE_USE_GETLINE:
> 
> function                                             old     new   delta
> xmalloc_fgetline_n                                     -      41     +41
> xmalloc_fgetline                                      85      58     -27
> cut_main                                            1406    1367     -39
> ------------------------------------------------------------------------------
> (add/remove: 1/0 grow/shrink: 0/2 up/down: 41/-66)            Total: -25 bytes
> 
> Fixes: https://bugs.busybox.net/show_bug.cgi?id=15276
> Signed-off-by: Elvira Khabirova <lineprinter0 at gmail.com>
> ---
>  coreutils/cut.c            |  4 ++--
>  include/libbb.h            |  2 ++
>  libbb/Config.src           | 11 +++++++++++
>  libbb/get_line_from_file.c | 37 ++++++++++++++++++++++++++++++++-----
>  testsuite/cut.tests        |  2 ++
>  5 files changed, 49 insertions(+), 7 deletions(-)
> 
> diff --git a/coreutils/cut.c b/coreutils/cut.c
> index 55bdd9386..7c87467ca 100644
> --- a/coreutils/cut.c
> +++ b/coreutils/cut.c
> @@ -89,6 +89,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
>  		const struct cut_list *cut_lists, unsigned nlists)
>  {
>  	char *line;
> +	size_t linelen = 0;
>  	unsigned linenum = 0;	/* keep these zero-based to be consistent */
>  	regex_t reg;
>  	int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS;
> @@ -96,10 +97,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
>  	if (shoe) xregcomp(&reg, delim, REG_EXTENDED);
>  
>  	/* go through every line in the file */
> -	while ((line = xmalloc_fgetline(file)) != NULL) {
> +	while ((line = xmalloc_fgetline_n(file, &linelen)) != NULL) {
>  
>  		/* set up a list so we can keep track of what's been printed */
> -		int linelen = strlen(line);
>  		char *printed = xzalloc(linelen + 1);
>  		char *orig_line = line;
>  		unsigned cl_pos = 0;
> diff --git a/include/libbb.h b/include/libbb.h
> index 6191debb1..73d16647a 100644
> --- a/include/libbb.h
> +++ b/include/libbb.h
> @@ -1048,6 +1048,8 @@ extern char *xmalloc_fgetline_str(FILE *file, const char *terminating_string) FA
>  extern char *xmalloc_fgets(FILE *file) FAST_FUNC RETURNS_MALLOC;
>  /* Chops off '\n' from the end, unlike fgets: */
>  extern char *xmalloc_fgetline(FILE *file) FAST_FUNC RETURNS_MALLOC;
> +/* Same as xmalloc_fgetline but returns number of bytes read: */
> +extern char* xmalloc_fgetline_n(FILE *file, size_t *n) FAST_FUNC RETURNS_MALLOC;
>  /* Same, but doesn't try to conserve space (may have some slack after the end) */
>  /* extern char *xmalloc_fgetline_fast(FILE *file) FAST_FUNC RETURNS_MALLOC; */
>  
> diff --git a/libbb/Config.src b/libbb/Config.src
> index b980f19a9..7a37929d6 100644
> --- a/libbb/Config.src
> +++ b/libbb/Config.src
> @@ -147,6 +147,17 @@ config MONOTONIC_SYSCALL
>  	will be used instead (which gives wrong results if date/time
>  	is reset).
>  
> +config FEATURE_USE_GETLINE
> +	bool "Use getline library function"
> +	default y
> +	help
> +	When enabled, busybox will use the libc getline() function
> +	instead of getc loops to read data from files.
> +	Using getline provides better behavior when utilities
> +	encounter NUL bytes in their inputs.
> +	getline() was originally a GNU extension, but now is a part
> +	of POSIX.1-2008 and is implemented in contemporary libcs.
> +
>  config IOCTL_HEX2STR_ERROR
>  	bool "Use ioctl names rather than hex values in error messages"
>  	default y
> diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c
> index 903ff1fb6..096bbd66f 100644
> --- a/libbb/get_line_from_file.c
> +++ b/libbb/get_line_from_file.c
> @@ -12,9 +12,9 @@
>  
>  char* FAST_FUNC bb_get_chunk_from_file(FILE *file, size_t *end)
>  {
> -	int ch;
> -	size_t idx = 0;
>  	char *linebuf = NULL;
> +	size_t idx = 0;
> +	int ch;
>  
>  	while ((ch = getc(file)) != EOF) {
>  		/* grow the line buffer as necessary */
> @@ -55,10 +55,37 @@ char* FAST_FUNC xmalloc_fgets(FILE *file)
>  char* FAST_FUNC xmalloc_fgetline(FILE *file)
>  {
>  	size_t i;
> -	char *c = bb_get_chunk_from_file(file, &i);
>  
> -	if (i && c[--i] == '\n')
> -		c[i] = '\0';
> +	return xmalloc_fgetline_n(file, &i);
> +}
> +
> +/* Get line.  Remove trailing \n.  Return the number of bytes read. */
> +char* FAST_FUNC xmalloc_fgetline_n(FILE *file, size_t *n)
> +{
> +	char *c = NULL;
> +#if ENABLE_FEATURE_USE_GETLINE
> +	size_t idx = 0;
> +	ssize_t nread;
> +
> +	nread = getline(&c, &idx, file);
> +	if (nread < 0) {
> +		if (errno == ENOMEM)
> +			bb_die_memory_exhausted();
> +		free(c);
> +		c = NULL;
> +		*n = 0;
> +	} else {
> +		*n = nread;
> +	}
> +#else
> +	c = bb_get_chunk_from_file(file, n);
> +#endif
> +
> +	if (*n) {
> +		if (c[*n - 1] == '\n') {
> +			c[--*n] = '\0';
> +		}
> +	}
>  
>  	return c;
>  }
> diff --git a/testsuite/cut.tests b/testsuite/cut.tests
> index 2458c019c..0fd731688 100755
> --- a/testsuite/cut.tests
> +++ b/testsuite/cut.tests
> @@ -81,4 +81,6 @@ SKIP=
>  testing "cut empty field" "cut -d ':' -f 1-3" "a::b\n" "" "a::b\n"
>  testing "cut empty field 2" "cut -d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n"
>  
> +testing "cut with embedded NUL" "cut -c 1-3" "\x00\x01\x00\n" "" "\x00\x01\x00\x03\x04\x05\x06"
> +
>  exit $FAILCOUNT

Hi,
what is this change for?

-	int ch;
-	size_t idx = 0;
 	char *linebuf = NULL;
+	size_t idx = 0;
+	int ch;

Ciao,
Tito


More information about the busybox mailing list