diff options
| author | Collin Funk <collin.funk1@gmail.com> | 2025-08-25 23:15:21 -0700 |
|---|---|---|
| committer | Collin Funk <collin.funk1@gmail.com> | 2025-08-26 16:59:32 -0700 |
| commit | ae89cd646a7dfd0cf655e8c0d0d56b93288abb71 (patch) | |
| tree | 43229cad147335fe1e38709de25a4f61d65bada0 /src | |
| parent | tests: fold: consolidate all fold tests in tests/fold (diff) | |
| download | coreutils-ae89cd646a7dfd0cf655e8c0d0d56b93288abb71.tar.gz coreutils-ae89cd646a7dfd0cf655e8c0d0d56b93288abb71.zip | |
fold: don't truncate multibyte characters at the end of the buffer
* src/fold.c (fold_file): Replace invalid characters with the original
byte read. Copy multibyte sequences that may not yet be read to the
start of the buffer before reading more bytes.
* tests/fold/fold-characters.sh: Add a test case.
Diffstat (limited to 'src')
| -rw-r--r-- | src/fold.c | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/src/fold.c b/src/fold.c index 7bf30cd0b..208b004d6 100644 --- a/src/fold.c +++ b/src/fold.c @@ -139,6 +139,7 @@ fold_file (char const *filename, size_t width) idx_t offset_out = 0; /* Index in 'line_out' for next char. */ static char line_out[IO_BUFSIZE]; static char line_in[IO_BUFSIZE]; + static size_t offset_in = 0; static size_t length_in = 0; int saved_errno; @@ -158,14 +159,30 @@ fold_file (char const *filename, size_t width) fadvise (istream, FADVISE_SEQUENTIAL); - while (0 < (length_in = fread (line_in, 1, sizeof line_in, istream))) + while (0 < (length_in = fread (line_in + offset_in, 1, + sizeof line_in - offset_in, istream))) { char *p = line_in; - char *lim = p + length_in; + char *lim = p + length_in + offset_in; mcel_t g; for (; p < lim; p += g.len) { g = mcel_scan (p, lim); + if (g.err) + { + /* Replace the character with the byte if it cannot be a + truncated multibyte sequence. */ + if (!(lim - p <= MCEL_LEN_MAX)) + g.ch = p[0]; + else + { + /* It may be a truncated multibyte sequence. Move it to the + front of the input buffer. */ + memmove (line_in, p, lim - p); + offset_in = lim - p; + goto next_line; + } + } if (g.ch == '\n') { memcpy (line_out + offset_out, p, g.len); @@ -241,6 +258,9 @@ fold_file (char const *filename, size_t width) } if (feof (istream)) break; + /* We read a full buffer of complete characters. */ + offset_in = 0; + next_line: } saved_errno = errno; |
