diff options
| author | Pádraig Brady <P@draigBrady.com> | 2025-10-17 19:14:21 +0100 |
|---|---|---|
| committer | Pádraig Brady <P@draigBrady.com> | 2025-10-18 18:37:10 +0100 |
| commit | 770078e315232b49c0e113152a469df4df1e5f4d (patch) | |
| tree | 0ffe185e150fa6c535be468d614b5c6448b9c44b | |
| parent | numfmt: add --unit-separator (diff) | |
| download | coreutils-770078e315232b49c0e113152a469df4df1e5f4d.tar.gz coreutils-770078e315232b49c0e113152a469df4df1e5f4d.zip | |
numfmt: fix issues with multi-byte blanks
* src/numfmt.c (process_line): Restore byte overwritten with NUL,
as it may be part of a multi-byte blank.
(process_suffixed_number): Skip multi-byte blanks,
and correctly determine width with mbswidth().
(parse_format_string): Use c_isblank() to explicitly
indicate that's all the format spec supports.
* tests/misc/numfmt.pl: Add test cases.
* NEWS: Mention the bug fix.
| -rw-r--r-- | NEWS | 3 | ||||
| -rw-r--r-- | src/numfmt.c | 28 | ||||
| -rwxr-xr-x | tests/misc/numfmt.pl | 14 |
3 files changed, 37 insertions, 8 deletions
@@ -21,6 +21,9 @@ GNU coreutils NEWS -*- outline -*- 'numfmt' no longer reads out-of-bounds memory with trailing blanks in input. [bug introduced with numfmt in coreutils-8.21] + 'numfmt' no longer outputs invalid characters with multi-byte blanks in input. + [bug introduced in coreutils-9.5] + 'rm -d DIR' no longer fails on Ceph snapshot directories. Although these directories are nonempty, 'rmdir DIR' succeeds on them. [bug introduced in coreutils-8.16] diff --git a/src/numfmt.c b/src/numfmt.c index 26f918054..67458558a 100644 --- a/src/numfmt.c +++ b/src/numfmt.c @@ -1150,7 +1150,7 @@ parse_format_string (char const *fmt) errno = 0; user_precision = strtol (fmt + i, &endptr, 10); if (errno == ERANGE || user_precision < 0 || SIZE_MAX < user_precision - || isblank (fmt[i]) || fmt[i] == '+') + || c_isblank (fmt[i]) || fmt[i] == '+') { /* Note we disallow negative user_precision to be consistent with printf(1). POSIX states that @@ -1340,15 +1340,18 @@ process_suffixed_number (char *text, long double *result, devmsg ("no valid suffix found\n"); } - /* Skip white space - always. */ - char *p = text; - while (*p && isblank (to_uchar (*p))) - ++p; + /* Skip blanks - always. */ + char *p = skip_str_matching (text, newline_or_blank, true); /* setup auto-padding. */ if (auto_padding) { - padding_width = text < p || 1 < field ? strlen (text) : 0; + padding_width = text < p || 1 < field + ? mbswidth (text, + MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE) + : 0; + if (padding_width < 0) + padding_width = strlen (text); devmsg ("setting Auto-Padding to %jd characters\n", padding_width); } @@ -1455,7 +1458,8 @@ process_line (char *line, bool newline) if (*line != '\0') { - /* nul terminate the current field string and process */ + /* NUL terminate the current field string and process */ + char end_field = *line; *line = '\0'; if (! process_field (next, field)) @@ -1463,7 +1467,15 @@ process_line (char *line, bool newline) fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout); - ++line; + + if (delimiter != DELIMITER_DEFAULT) + line++; + else + { + *line = end_field; + mcel_t g = mcel_scanz (line); + line += g.len; + } } else { diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl index ff22c7303..2f03efd1c 100755 --- a/tests/misc/numfmt.pl +++ b/tests/misc/numfmt.pl @@ -1172,6 +1172,20 @@ my @Locale_Tests = {ENV=>"LC_ALL=$locale"}], ['lcl-suf-6', "--from=auto '2\xe2\x81\xa0Ki'", {OUT => "2048"}, {ENV=>"LC_ALL=$locale"}], + # multi-byte blank char (em space, \u2003) + # Ensure trailing multi-byte blanks skipped + ['lcl-suf-7', "'2\xe2\x80\x83 '", {OUT => "2 "}, + {ENV=>"LC_ALL=$locale"}], + ['lcl-suf-8', "-d '' --from=auto '2Ki\xe2\x80\x83 '", {OUT => "2048"}, + {ENV=>"LC_ALL=$locale"}], + # Ensure multi-byte blank field separators not corrupted + ['lcl-suf-9', "--field=1 '1\xe2\x80\x832'", {OUT => "1 2"}, + {ENV=>"LC_ALL=$locale"}], + ['lcl-suf-10', "--field=2 '1\xe2\x80\x832'", {OUT => "1 2"}, + {ENV=>"LC_ALL=$locale"}], + # Ensure multi-byte blank field separators width determined correctly + ['lcl-suf-11', "--field=2 '1 \xe2\x80\x832'", + {OUT => "1 2"}, {ENV=>"LC_ALL=$locale"}], ); if ($locale ne 'C') |
