aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2025-10-17 19:14:21 +0100
committerPádraig Brady <P@draigBrady.com>2025-10-18 18:37:10 +0100
commit770078e315232b49c0e113152a469df4df1e5f4d (patch)
tree0ffe185e150fa6c535be468d614b5c6448b9c44b
parentnumfmt: add --unit-separator (diff)
downloadcoreutils-770078e315232b49c0e113152a469df4df1e5f4d.tar.gz
coreutils-770078e315232b49c0e113152a469df4df1e5f4d.zip
numfmt: fix issues with multi-byte blanks
* src/numfmt.c (process_line): Restore byte overwritten with NUL, as it may be part of a multi-byte blank. (process_suffixed_number): Skip multi-byte blanks, and correctly determine width with mbswidth(). (parse_format_string): Use c_isblank() to explicitly indicate that's all the format spec supports. * tests/misc/numfmt.pl: Add test cases. * NEWS: Mention the bug fix.
-rw-r--r--NEWS3
-rw-r--r--src/numfmt.c28
-rwxr-xr-xtests/misc/numfmt.pl14
3 files changed, 37 insertions, 8 deletions
diff --git a/NEWS b/NEWS
index b34513271..f80363f87 100644
--- a/NEWS
+++ b/NEWS
@@ -21,6 +21,9 @@ GNU coreutils NEWS -*- outline -*-
'numfmt' no longer reads out-of-bounds memory with trailing blanks in input.
[bug introduced with numfmt in coreutils-8.21]
+ 'numfmt' no longer outputs invalid characters with multi-byte blanks in input.
+ [bug introduced in coreutils-9.5]
+
'rm -d DIR' no longer fails on Ceph snapshot directories.
Although these directories are nonempty, 'rmdir DIR' succeeds on them.
[bug introduced in coreutils-8.16]
diff --git a/src/numfmt.c b/src/numfmt.c
index 26f918054..67458558a 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -1150,7 +1150,7 @@ parse_format_string (char const *fmt)
errno = 0;
user_precision = strtol (fmt + i, &endptr, 10);
if (errno == ERANGE || user_precision < 0 || SIZE_MAX < user_precision
- || isblank (fmt[i]) || fmt[i] == '+')
+ || c_isblank (fmt[i]) || fmt[i] == '+')
{
/* Note we disallow negative user_precision to be
consistent with printf(1). POSIX states that
@@ -1340,15 +1340,18 @@ process_suffixed_number (char *text, long double *result,
devmsg ("no valid suffix found\n");
}
- /* Skip white space - always. */
- char *p = text;
- while (*p && isblank (to_uchar (*p)))
- ++p;
+ /* Skip blanks - always. */
+ char *p = skip_str_matching (text, newline_or_blank, true);
/* setup auto-padding. */
if (auto_padding)
{
- padding_width = text < p || 1 < field ? strlen (text) : 0;
+ padding_width = text < p || 1 < field
+ ? mbswidth (text,
+ MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE)
+ : 0;
+ if (padding_width < 0)
+ padding_width = strlen (text);
devmsg ("setting Auto-Padding to %jd characters\n", padding_width);
}
@@ -1455,7 +1458,8 @@ process_line (char *line, bool newline)
if (*line != '\0')
{
- /* nul terminate the current field string and process */
+ /* NUL terminate the current field string and process */
+ char end_field = *line;
*line = '\0';
if (! process_field (next, field))
@@ -1463,7 +1467,15 @@ process_line (char *line, bool newline)
fputc ((delimiter == DELIMITER_DEFAULT) ?
' ' : delimiter, stdout);
- ++line;
+
+ if (delimiter != DELIMITER_DEFAULT)
+ line++;
+ else
+ {
+ *line = end_field;
+ mcel_t g = mcel_scanz (line);
+ line += g.len;
+ }
}
else
{
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index ff22c7303..2f03efd1c 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -1172,6 +1172,20 @@ my @Locale_Tests =
{ENV=>"LC_ALL=$locale"}],
['lcl-suf-6', "--from=auto '2\xe2\x81\xa0Ki'", {OUT => "2048"},
{ENV=>"LC_ALL=$locale"}],
+ # multi-byte blank char (em space, \u2003)
+ # Ensure trailing multi-byte blanks skipped
+ ['lcl-suf-7', "'2\xe2\x80\x83 '", {OUT => "2 "},
+ {ENV=>"LC_ALL=$locale"}],
+ ['lcl-suf-8', "-d '' --from=auto '2Ki\xe2\x80\x83 '", {OUT => "2048"},
+ {ENV=>"LC_ALL=$locale"}],
+ # Ensure multi-byte blank field separators not corrupted
+ ['lcl-suf-9', "--field=1 '1\xe2\x80\x832'", {OUT => "1 2"},
+ {ENV=>"LC_ALL=$locale"}],
+ ['lcl-suf-10', "--field=2 '1\xe2\x80\x832'", {OUT => "1 2"},
+ {ENV=>"LC_ALL=$locale"}],
+ # Ensure multi-byte blank field separators width determined correctly
+ ['lcl-suf-11', "--field=2 '1 \xe2\x80\x832'",
+ {OUT => "1 2"}, {ENV=>"LC_ALL=$locale"}],
);
if ($locale ne 'C')