/* Buffering for multi-byte characters. Copyright (C) 2025 Free Software Foundation, Inc. This file is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . */ /* Written by Collin Funk. */ #ifndef _MBBUF_H #define _MBBUF_H 1 #ifndef _GL_INLINE_HEADER_BEGIN # error "Please include config.h first." #endif #include #include #include "mcel.h" #include "idx.h" _GL_INLINE_HEADER_BEGIN #ifndef MBBUF_INLINE # define MBBUF_INLINE _GL_INLINE #endif /* End of file. */ #define MBBUF_EOF UINT32_MAX /* MBBUF_EOF should not be a valid character. */ static_assert (MCEL_CHAR_MAX < MBBUF_EOF); typedef struct { char *buffer; /* Input buffer. */ FILE *fp; /* Input file stream. */ idx_t size; /* Number of bytes allocated for BUFFER. */ idx_t length; /* Number of bytes with data in BUFFER. */ idx_t offset; /* Current position in BUFFER. */ } mbbuf_t; /* Initialize MBBUF with an allocated BUFFER of SIZE bytes and a file stream FP open for reading. SIZE must be greater than or equal to MCEL_LEN_MAX. */ MBBUF_INLINE void mbbuf_init (mbbuf_t *mbbuf, char *buffer, idx_t size, FILE *fp) { if (size < MCEL_LEN_MAX) unreachable (); mbbuf->buffer = buffer; mbbuf->fp = fp; mbbuf->size = size; mbbuf->length = 0; mbbuf->offset = 0; } /* Get the next character in the buffer, filling it from FP if necessary. If an invalid multi-byte character is seen, we assume the program wants to fall back to the read byte. */ MBBUF_INLINE mcel_t mbbuf_get_char (mbbuf_t *mbbuf) { idx_t available = mbbuf->length - mbbuf->offset; /* Check if we need to fill the input buffer. */ if (available < MCEL_LEN_MAX && ! feof (mbbuf->fp)) { idx_t start; if (!(0 < available)) start = 0; else { memmove (mbbuf->buffer, mbbuf->buffer + mbbuf->offset, available); start = available; } mbbuf->length = fread (mbbuf->buffer + start, 1, mbbuf->size - start, mbbuf->fp) + start; mbbuf->offset = 0; available = mbbuf->length - mbbuf->offset; } if (available <= 0) return (mcel_t) { .ch = MBBUF_EOF }; mcel_t g = mcel_scan (mbbuf->buffer + mbbuf->offset, mbbuf->buffer + mbbuf->length); if (! g.err) mbbuf->offset += g.len; else { /* Assume the program will emit the byte, but keep the error flag. */ g.ch = mbbuf->buffer[mbbuf->offset++]; } return g; } /* Returns a pointer to the first byte in the previously read character from mbbuf_get_char. */ MBBUF_INLINE char * mbbuf_char_offset (mbbuf_t *mbbuf, mcel_t g) { if (mbbuf->offset < g.len) unreachable (); return mbbuf->buffer + (mbbuf->offset - g.len); } _GL_INLINE_HEADER_END #endif