aboutsummaryrefslogtreecommitdiffstats
path: root/gl/lib/mbbuf.h
blob: 135b82f806c9aaa90ac7152e31f6b6306ec8366e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/* Buffering for multi-byte characters.
   Copyright (C) 2025 Free Software Foundation, Inc.

   This file is free software: you can redistribute it and/or modify
   it under the terms of the GNU Lesser General Public License as
   published by the Free Software Foundation; either version 2.1 of the
   License, or (at your option) any later version.

   This file is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

/* Written by Collin Funk.  */

#ifndef _MBBUF_H
#define _MBBUF_H 1

#ifndef _GL_INLINE_HEADER_BEGIN
# error "Please include config.h first."
#endif

#include <stdio.h>
#include <stddef.h>

#include "mcel.h"
#include "idx.h"

_GL_INLINE_HEADER_BEGIN
#ifndef MBBUF_INLINE
# define MBBUF_INLINE _GL_INLINE
#endif

/* End of file.  */
#define MBBUF_EOF UINT32_MAX

/* MBBUF_EOF should not be a valid character.  */
static_assert (MCEL_CHAR_MAX < MBBUF_EOF);

typedef struct
{
  char *buffer;    /* Input buffer.  */
  FILE *fp;        /* Input file stream.  */
  idx_t size;      /* Number of bytes allocated for BUFFER.  */
  idx_t length;    /* Number of bytes with data in BUFFER.  */
  idx_t offset;    /* Current position in BUFFER.  */
} mbbuf_t;

/* Initialize MBBUF with an allocated BUFFER of SIZE bytes and a file stream
   FP open for reading.  SIZE must be greater than or equal to MCEL_LEN_MAX.
 */
MBBUF_INLINE void
mbbuf_init (mbbuf_t *mbbuf, char *buffer, idx_t size, FILE *fp)
{
  if (size < MCEL_LEN_MAX)
    unreachable ();
  mbbuf->buffer = buffer;
  mbbuf->fp = fp;
  mbbuf->size = size;
  mbbuf->length = 0;
  mbbuf->offset = 0;
}

/* Get the next character in the buffer, filling it from FP if necessary.
   If an invalid multi-byte character is seen, we assume the program wants to
   fall back to the read byte.  */
MBBUF_INLINE mcel_t
mbbuf_get_char (mbbuf_t *mbbuf)
{
  idx_t available = mbbuf->length - mbbuf->offset;
  /* Check if we need to fill the input buffer.  */
  if (available < MCEL_LEN_MAX && ! feof (mbbuf->fp))
    {
      idx_t start;
      if (!(0 < available))
        start = 0;
      else
        {
          memmove (mbbuf->buffer, mbbuf->buffer + mbbuf->offset, available);
          start = available;
        }
      mbbuf->length = fread (mbbuf->buffer + start, 1, mbbuf->size - start,
                             mbbuf->fp) + start;
      mbbuf->offset = 0;
      available = mbbuf->length - mbbuf->offset;
    }
  if (available <= 0)
    return (mcel_t) { .ch = MBBUF_EOF };
  mcel_t g = mcel_scan (mbbuf->buffer + mbbuf->offset,
                        mbbuf->buffer + mbbuf->length);
  if (! g.err)
    mbbuf->offset += g.len;
  else
    {
      /* Assume the program will emit the byte, but keep the error flag.  */
      g.ch = mbbuf->buffer[mbbuf->offset++];
    }
  return g;
}

/* Returns a pointer to the first byte in the previously read character from
   mbbuf_get_char.  */
MBBUF_INLINE char *
mbbuf_char_offset (mbbuf_t *mbbuf, mcel_t g)
{
  if (mbbuf->offset < g.len)
    unreachable ();
  return mbbuf->buffer + (mbbuf->offset - g.len);
}

_GL_INLINE_HEADER_END

#endif