aboutsummaryrefslogtreecommitdiffstats
path: root/reftable/block.c
diff options
context:
space:
mode:
Diffstat (limited to 'reftable/block.c')
-rw-r--r--reftable/block.c310
1 files changed, 192 insertions, 118 deletions
diff --git a/reftable/block.c b/reftable/block.c
index b14a8f1259..920b3f4486 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -1,15 +1,16 @@
/*
-Copyright 2020 Google LLC
-
-Use of this source code is governed by a BSD-style
-license that can be found in the LICENSE file or at
-https://developers.google.com/open-source/licenses/bsd
-*/
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file or at
+ * https://developers.google.com/open-source/licenses/bsd
+ */
#include "block.h"
#include "blocksource.h"
#include "constants.h"
+#include "iter.h"
#include "record.h"
#include "reftable-error.h"
#include "system.h"
@@ -49,7 +50,7 @@ static int block_writer_register_restart(struct block_writer *w, int n,
if (is_restart)
rlen++;
if (2 + 3 * rlen + n > w->block_size - w->next)
- return -1;
+ return REFTABLE_ENTRY_TOO_BIG_ERROR;
if (is_restart) {
REFTABLE_ALLOC_GROW_OR_NULL(w->restarts, w->restart_len + 1,
w->restart_cap);
@@ -97,9 +98,10 @@ uint8_t block_writer_type(struct block_writer *bw)
return bw->block[bw->header_off];
}
-/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
- success. Returns REFTABLE_API_ERROR if attempting to write a record with
- empty key. */
+/*
+ * Adds the reftable_record to the block. Returns 0 on success and
+ * appropriate error codes on failure.
+ */
int block_writer_add(struct block_writer *w, struct reftable_record *rec)
{
struct reftable_buf empty = REFTABLE_BUF_INIT;
@@ -126,14 +128,14 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
n = reftable_encode_key(&is_restart, out, last, w->scratch,
reftable_record_val_type(rec));
if (n < 0) {
- err = -1;
+ err = n;
goto done;
}
string_view_consume(&out, n);
n = reftable_record_encode(rec, out, w->hash_size);
if (n < 0) {
- err = -1;
+ err = n;
goto done;
}
string_view_consume(&out, n);
@@ -147,19 +149,19 @@ done:
int block_writer_finish(struct block_writer *w)
{
for (uint32_t i = 0; i < w->restart_len; i++) {
- put_be24(w->block + w->next, w->restarts[i]);
+ reftable_put_be24(w->block + w->next, w->restarts[i]);
w->next += 3;
}
- put_be16(w->block + w->next, w->restart_len);
+ reftable_put_be16(w->block + w->next, w->restart_len);
w->next += 2;
- put_be24(w->block + 1 + w->header_off, w->next);
+ reftable_put_be24(w->block + 1 + w->header_off, w->next);
/*
* Log records are stored zlib-compressed. Note that the compression
* also spans over the restart points we have just written.
*/
- if (block_writer_type(w) == BLOCK_TYPE_LOG) {
+ if (block_writer_type(w) == REFTABLE_BLOCK_TYPE_LOG) {
int block_header_skip = 4 + w->header_off;
uLongf src_len = w->next - block_header_skip, compressed_len;
int ret;
@@ -209,61 +211,91 @@ int block_writer_finish(struct block_writer *w)
return w->next;
}
-int block_reader_init(struct block_reader *br, struct reftable_block *block,
- uint32_t header_off, uint32_t table_block_size,
- uint32_t hash_size)
+static int read_block(struct reftable_block_source *source,
+ struct reftable_block_data *dest, uint64_t off,
+ uint32_t sz)
{
+ size_t size = block_source_size(source);
+ block_source_release_data(dest);
+ if (off >= size)
+ return 0;
+ if (off + sz > size)
+ sz = size - off;
+ return block_source_read_data(source, dest, off, sz);
+}
+
+int reftable_block_init(struct reftable_block *block,
+ struct reftable_block_source *source,
+ uint32_t offset, uint32_t header_size,
+ uint32_t table_block_size, uint32_t hash_size,
+ uint8_t want_type)
+{
+ uint32_t guess_block_size = table_block_size ?
+ table_block_size : DEFAULT_BLOCK_SIZE;
uint32_t full_block_size = table_block_size;
- uint8_t typ = block->data[header_off];
- uint32_t sz = get_be24(block->data + header_off + 1);
- int err = 0;
- uint16_t restart_count = 0;
- uint32_t restart_start = 0;
- uint8_t *restart_bytes = NULL;
+ uint16_t restart_count;
+ uint32_t restart_off;
+ uint32_t block_size;
+ uint8_t block_type;
+ int err;
- reftable_block_done(&br->block);
+ err = read_block(source, &block->block_data, offset, guess_block_size);
+ if (err < 0)
+ goto done;
- if (!reftable_is_block_type(typ)) {
- err = REFTABLE_FORMAT_ERROR;
+ block_type = block->block_data.data[header_size];
+ if (!reftable_is_block_type(block_type)) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+ if (want_type != REFTABLE_BLOCK_TYPE_ANY && block_type != want_type) {
+ err = 1;
goto done;
}
- if (typ == BLOCK_TYPE_LOG) {
- uint32_t block_header_skip = 4 + header_off;
- uLong dst_len = sz - block_header_skip;
- uLong src_len = block->len - block_header_skip;
+ block_size = reftable_get_be24(block->block_data.data + header_size + 1);
+ if (block_size > guess_block_size) {
+ err = read_block(source, &block->block_data, offset, block_size);
+ if (err < 0)
+ goto done;
+ }
+
+ if (block_type == REFTABLE_BLOCK_TYPE_LOG) {
+ uint32_t block_header_skip = 4 + header_size;
+ uLong dst_len = block_size - block_header_skip;
+ uLong src_len = block->block_data.len - block_header_skip;
/* Log blocks specify the *uncompressed* size in their header. */
- REFTABLE_ALLOC_GROW_OR_NULL(br->uncompressed_data, sz,
- br->uncompressed_cap);
- if (!br->uncompressed_data) {
+ REFTABLE_ALLOC_GROW_OR_NULL(block->uncompressed_data, block_size,
+ block->uncompressed_cap);
+ if (!block->uncompressed_data) {
err = REFTABLE_OUT_OF_MEMORY_ERROR;
goto done;
}
/* Copy over the block header verbatim. It's not compressed. */
- memcpy(br->uncompressed_data, block->data, block_header_skip);
+ memcpy(block->uncompressed_data, block->block_data.data, block_header_skip);
- if (!br->zstream) {
- REFTABLE_CALLOC_ARRAY(br->zstream, 1);
- if (!br->zstream) {
+ if (!block->zstream) {
+ REFTABLE_CALLOC_ARRAY(block->zstream, 1);
+ if (!block->zstream) {
err = REFTABLE_OUT_OF_MEMORY_ERROR;
goto done;
}
- err = inflateInit(br->zstream);
+ err = inflateInit(block->zstream);
} else {
- err = inflateReset(br->zstream);
+ err = inflateReset(block->zstream);
}
if (err != Z_OK) {
err = REFTABLE_ZLIB_ERROR;
goto done;
}
- br->zstream->next_in = block->data + block_header_skip;
- br->zstream->avail_in = src_len;
- br->zstream->next_out = br->uncompressed_data + block_header_skip;
- br->zstream->avail_out = dst_len;
+ block->zstream->next_in = block->block_data.data + block_header_skip;
+ block->zstream->avail_in = src_len;
+ block->zstream->next_out = block->uncompressed_data + block_header_skip;
+ block->zstream->avail_out = dst_len;
/*
* We know both input as well as output size, and we know that
@@ -272,72 +304,71 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
* here to instruct zlib to inflate the data in one go, which
* is more efficient than using `Z_NO_FLUSH`.
*/
- err = inflate(br->zstream, Z_FINISH);
+ err = inflate(block->zstream, Z_FINISH);
if (err != Z_STREAM_END) {
err = REFTABLE_ZLIB_ERROR;
goto done;
}
err = 0;
- if (br->zstream->total_out + block_header_skip != sz) {
+ if (block->zstream->total_out + block_header_skip != block_size) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}
/* We're done with the input data. */
- reftable_block_done(block);
- block->data = br->uncompressed_data;
- block->len = sz;
- full_block_size = src_len + block_header_skip - br->zstream->avail_in;
+ block_source_release_data(&block->block_data);
+ block->block_data.data = block->uncompressed_data;
+ block->block_data.len = block_size;
+ full_block_size = src_len + block_header_skip - block->zstream->avail_in;
} else if (full_block_size == 0) {
- full_block_size = sz;
- } else if (sz < full_block_size && sz < block->len &&
- block->data[sz] != 0) {
+ full_block_size = block_size;
+ } else if (block_size < full_block_size && block_size < block->block_data.len &&
+ block->block_data.data[block_size] != 0) {
/* If the block is smaller than the full block size, it is
padded (data followed by '\0') or the next block is
unaligned. */
- full_block_size = sz;
+ full_block_size = block_size;
}
- restart_count = get_be16(block->data + sz - 2);
- restart_start = sz - 2 - 3 * restart_count;
- restart_bytes = block->data + restart_start;
+ restart_count = reftable_get_be16(block->block_data.data + block_size - 2);
+ restart_off = block_size - 2 - 3 * restart_count;
- /* transfer ownership. */
- br->block = *block;
- block->data = NULL;
- block->len = 0;
+ block->block_type = block_type;
+ block->hash_size = hash_size;
+ block->restart_off = restart_off;
+ block->full_block_size = full_block_size;
+ block->header_off = header_size;
+ block->restart_count = restart_count;
- br->hash_size = hash_size;
- br->block_len = restart_start;
- br->full_block_size = full_block_size;
- br->header_off = header_off;
- br->restart_count = restart_count;
- br->restart_bytes = restart_bytes;
+ err = 0;
done:
+ if (err < 0)
+ reftable_block_release(block);
return err;
}
-void block_reader_release(struct block_reader *br)
+void reftable_block_release(struct reftable_block *block)
{
- inflateEnd(br->zstream);
- reftable_free(br->zstream);
- reftable_free(br->uncompressed_data);
- reftable_block_done(&br->block);
+ inflateEnd(block->zstream);
+ reftable_free(block->zstream);
+ reftable_free(block->uncompressed_data);
+ block_source_release_data(&block->block_data);
+ memset(block, 0, sizeof(*block));
}
-uint8_t block_reader_type(const struct block_reader *r)
+uint8_t reftable_block_type(const struct reftable_block *b)
{
- return r->block.data[r->header_off];
+ return b->block_data.data[b->header_off];
}
-int block_reader_first_key(const struct block_reader *br, struct reftable_buf *key)
+int reftable_block_first_key(const struct reftable_block *block, struct reftable_buf *key)
{
- int off = br->header_off + 4, n;
+ int off = block->header_off + 4, n;
struct string_view in = {
- .buf = br->block.data + off,
- .len = br->block_len - off,
+ .buf = block->block_data.data + off,
+ .len = block->restart_off - off,
};
uint8_t extra = 0;
@@ -352,33 +383,36 @@ int block_reader_first_key(const struct block_reader *br, struct reftable_buf *k
return 0;
}
-static uint32_t block_reader_restart_offset(const struct block_reader *br, size_t idx)
+static uint32_t block_restart_offset(const struct reftable_block *b, size_t idx)
+{
+ return reftable_get_be24(b->block_data.data + b->restart_off + 3 * idx);
+}
+
+void block_iter_init(struct block_iter *it, const struct reftable_block *block)
{
- return get_be24(br->restart_bytes + 3 * idx);
+ it->block = block;
+ block_iter_seek_start(it);
}
-void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
+void block_iter_seek_start(struct block_iter *it)
{
- it->block = br->block.data;
- it->block_len = br->block_len;
- it->hash_size = br->hash_size;
reftable_buf_reset(&it->last_key);
- it->next_off = br->header_off + 4;
+ it->next_off = it->block->header_off + 4;
}
struct restart_needle_less_args {
int error;
struct reftable_buf needle;
- const struct block_reader *reader;
+ const struct reftable_block *block;
};
static int restart_needle_less(size_t idx, void *_args)
{
struct restart_needle_less_args *args = _args;
- uint32_t off = block_reader_restart_offset(args->reader, idx);
+ uint32_t off = block_restart_offset(args->block, idx);
struct string_view in = {
- .buf = args->reader->block.data + off,
- .len = args->reader->block_len - off,
+ .buf = args->block->block_data.data + off,
+ .len = args->block->restart_off - off,
};
uint64_t prefix_len, suffix_len;
uint8_t extra;
@@ -411,14 +445,14 @@ static int restart_needle_less(size_t idx, void *_args)
int block_iter_next(struct block_iter *it, struct reftable_record *rec)
{
struct string_view in = {
- .buf = (unsigned char *) it->block + it->next_off,
- .len = it->block_len - it->next_off,
+ .buf = (unsigned char *) it->block->block_data.data + it->next_off,
+ .len = it->block->restart_off - it->next_off,
};
struct string_view start = in;
uint8_t extra = 0;
int n = 0;
- if (it->next_off >= it->block_len)
+ if (it->next_off >= it->block->restart_off)
return 1;
n = reftable_decode_key(&it->last_key, &extra, in);
@@ -428,7 +462,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
return REFTABLE_FORMAT_ERROR;
string_view_consume(&in, n);
- n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
+ n = reftable_record_decode(rec, it->last_key, extra, in, it->block->hash_size,
&it->scratch);
if (n < 0)
return -1;
@@ -443,8 +477,6 @@ void block_iter_reset(struct block_iter *it)
reftable_buf_reset(&it->last_key);
it->next_off = 0;
it->block = NULL;
- it->block_len = 0;
- it->hash_size = 0;
}
void block_iter_close(struct block_iter *it)
@@ -453,12 +485,11 @@ void block_iter_close(struct block_iter *it)
reftable_buf_release(&it->scratch);
}
-int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
- struct reftable_buf *want)
+int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want)
{
struct restart_needle_less_args args = {
.needle = *want,
- .reader = br,
+ .block = it->block,
};
struct reftable_record rec;
int err = 0;
@@ -476,7 +507,7 @@ int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
* restart point. While that works alright, we would end up scanning
* too many record.
*/
- i = binsearch(br->restart_count, &restart_needle_less, &args);
+ i = binsearch(it->block->restart_count, &restart_needle_less, &args);
if (args.error) {
err = REFTABLE_FORMAT_ERROR;
goto done;
@@ -501,19 +532,18 @@ int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
* starting from the preceding restart point.
*/
if (i > 0)
- it->next_off = block_reader_restart_offset(br, i - 1);
+ it->next_off = block_restart_offset(it->block, i - 1);
else
- it->next_off = br->header_off + 4;
- it->block = br->block.data;
- it->block_len = br->block_len;
- it->hash_size = br->hash_size;
+ it->next_off = it->block->header_off + 4;
- reftable_record_init(&rec, block_reader_type(br));
+ err = reftable_record_init(&rec, reftable_block_type(it->block));
+ if (err < 0)
+ goto done;
/*
* We're looking for the last entry less than the wanted key so that
* the next call to `block_reader_next()` would yield the wanted
- * record. We thus don't want to position our reader at the sought
+ * record. We thus don't want to position our iterator at the sought
* after record, but one before. To do so, we have to go one entry too
* far and then back up.
*/
@@ -558,6 +588,61 @@ done:
return err;
}
+static int block_iter_seek_void(void *it, struct reftable_record *want)
+{
+ struct reftable_buf buf = REFTABLE_BUF_INIT;
+ struct block_iter *bi = it;
+ int err;
+
+ if (bi->block->block_type != want->type)
+ return REFTABLE_API_ERROR;
+
+ err = reftable_record_key(want, &buf);
+ if (err < 0)
+ goto out;
+
+ err = block_iter_seek_key(it, &buf);
+ if (err < 0)
+ goto out;
+
+ err = 0;
+
+out:
+ reftable_buf_release(&buf);
+ return err;
+}
+
+static int block_iter_next_void(void *it, struct reftable_record *rec)
+{
+ return block_iter_next(it, rec);
+}
+
+static void block_iter_close_void(void *it)
+{
+ block_iter_close(it);
+}
+
+static struct reftable_iterator_vtable block_iter_vtable = {
+ .seek = &block_iter_seek_void,
+ .next = &block_iter_next_void,
+ .close = &block_iter_close_void,
+};
+
+int reftable_block_init_iterator(const struct reftable_block *b,
+ struct reftable_iterator *it)
+{
+ struct block_iter *bi;
+
+ REFTABLE_CALLOC_ARRAY(bi, 1);
+ block_iter_init(bi, b);
+
+ assert(!it->ops);
+ it->iter_arg = bi;
+ it->ops = &block_iter_vtable;
+
+ return 0;
+}
+
void block_writer_release(struct block_writer *bw)
{
deflateEnd(bw->zstream);
@@ -568,14 +653,3 @@ void block_writer_release(struct block_writer *bw)
reftable_buf_release(&bw->last_key);
/* the block is not owned. */
}
-
-void reftable_block_done(struct reftable_block *blockp)
-{
- struct reftable_block_source source = blockp->source;
- if (blockp && source.ops)
- source.ops->return_block(source.arg, blockp);
- blockp->data = NULL;
- blockp->len = 0;
- blockp->source.ops = NULL;
- blockp->source.arg = NULL;
-}