diff options
Diffstat (limited to 'reftable')
39 files changed, 1547 insertions, 1242 deletions
diff --git a/reftable/basics.c b/reftable/basics.c index fe2b83ff83..9988ebd635 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #define REFTABLE_ALLOW_BANNED_ALLOCATORS #include "basics.h" @@ -147,25 +147,6 @@ char *reftable_buf_detach(struct reftable_buf *buf) return result; } -void put_be24(uint8_t *out, uint32_t i) -{ - out[0] = (uint8_t)((i >> 16) & 0xff); - out[1] = (uint8_t)((i >> 8) & 0xff); - out[2] = (uint8_t)(i & 0xff); -} - -uint32_t get_be24(uint8_t *in) -{ - return (uint32_t)(in[0]) << 16 | (uint32_t)(in[1]) << 8 | - (uint32_t)(in[2]); -} - -void put_be16(uint8_t *out, uint16_t i) -{ - out[0] = (uint8_t)((i >> 8) & 0xff); - out[1] = (uint8_t)(i & 0xff); -} - size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) { size_t lo = 0; @@ -263,18 +244,16 @@ int names_equal(const char **a, const char **b) return a[i] == b[i]; } -int common_prefix_size(struct reftable_buf *a, struct reftable_buf *b) +size_t common_prefix_size(struct reftable_buf *a, struct reftable_buf *b) { - int p = 0; - for (; p < a->len && p < b->len; p++) { + size_t p = 0; + for (; p < a->len && p < b->len; p++) if (a->buf[p] != b->buf[p]) break; - } - return p; } -int hash_size(enum reftable_hash id) +uint32_t hash_size(enum reftable_hash id) { if (!id) return REFTABLE_HASH_SIZE_SHA1; diff --git a/reftable/basics.h b/reftable/basics.h index 4bf71b0954..7d22f96261 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef BASICS_H #define BASICS_H @@ -16,12 +16,11 @@ https://developers.google.com/open-source/licenses/bsd #include "system.h" #include "reftable-basics.h" -struct reftable_buf { - size_t alloc; - size_t len; - char *buf; -}; -#define REFTABLE_BUF_INIT { 0 } +#ifdef __GNUC__ +#define REFTABLE_UNUSED __attribute__((__unused__)) +#else +#define REFTABLE_UNUSED +#endif /* * Initialize the buffer such that it is ready for use. This is equivalent to @@ -76,9 +75,79 @@ char *reftable_buf_detach(struct reftable_buf *buf); /* Bigendian en/decoding of integers */ -void put_be24(uint8_t *out, uint32_t i); -uint32_t get_be24(uint8_t *in); -void put_be16(uint8_t *out, uint16_t i); +static inline void reftable_put_be16(void *out, uint16_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 8) & 0xff); + p[1] = (uint8_t)((i >> 0) & 0xff); +} + +static inline void reftable_put_be24(void *out, uint32_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 16) & 0xff); + p[1] = (uint8_t)((i >> 8) & 0xff); + p[2] = (uint8_t)((i >> 0) & 0xff); +} + +static inline void reftable_put_be32(void *out, uint32_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 24) & 0xff); + p[1] = (uint8_t)((i >> 16) & 0xff); + p[2] = (uint8_t)((i >> 8) & 0xff); + p[3] = (uint8_t)((i >> 0) & 0xff); +} + +static inline void reftable_put_be64(void *out, uint64_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 56) & 0xff); + p[1] = (uint8_t)((i >> 48) & 0xff); + p[2] = (uint8_t)((i >> 40) & 0xff); + p[3] = (uint8_t)((i >> 32) & 0xff); + p[4] = (uint8_t)((i >> 24) & 0xff); + p[5] = (uint8_t)((i >> 16) & 0xff); + p[6] = (uint8_t)((i >> 8) & 0xff); + p[7] = (uint8_t)((i >> 0) & 0xff); +} + +static inline uint16_t reftable_get_be16(const void *in) +{ + const unsigned char *p = in; + return (uint16_t)(p[0]) << 8 | + (uint16_t)(p[1]) << 0; +} + +static inline uint32_t reftable_get_be24(const void *in) +{ + const unsigned char *p = in; + return (uint32_t)(p[0]) << 16 | + (uint32_t)(p[1]) << 8 | + (uint32_t)(p[2]) << 0; +} + +static inline uint32_t reftable_get_be32(const void *in) +{ + const unsigned char *p = in; + return (uint32_t)(p[0]) << 24 | + (uint32_t)(p[1]) << 16 | + (uint32_t)(p[2]) << 8| + (uint32_t)(p[3]) << 0; +} + +static inline uint64_t reftable_get_be64(const void *in) +{ + const unsigned char *p = in; + return (uint64_t)(p[0]) << 56 | + (uint64_t)(p[1]) << 48 | + (uint64_t)(p[2]) << 40 | + (uint64_t)(p[3]) << 32 | + (uint64_t)(p[4]) << 24 | + (uint64_t)(p[5]) << 16 | + (uint64_t)(p[6]) << 8 | + (uint64_t)(p[7]) << 0; +} /* * find smallest index i in [0, sz) at which `f(i) > 0`, assuming that f is @@ -117,18 +186,46 @@ void reftable_free(void *p); void *reftable_calloc(size_t nelem, size_t elsize); char *reftable_strdup(const char *str); -#define REFTABLE_ALLOC_ARRAY(x, alloc) (x) = reftable_malloc(st_mult(sizeof(*(x)), (alloc))) +static inline int reftable_alloc_size(size_t nelem, size_t elsize, size_t *out) +{ + if (nelem && elsize > SIZE_MAX / nelem) + return -1; + *out = nelem * elsize; + return 0; +} + +#define REFTABLE_ALLOC_ARRAY(x, alloc) do { \ + size_t alloc_size; \ + if (reftable_alloc_size(sizeof(*(x)), (alloc), &alloc_size) < 0) { \ + errno = ENOMEM; \ + (x) = NULL; \ + } else { \ + (x) = reftable_malloc(alloc_size); \ + } \ + } while (0) #define REFTABLE_CALLOC_ARRAY(x, alloc) (x) = reftable_calloc((alloc), sizeof(*(x))) -#define REFTABLE_REALLOC_ARRAY(x, alloc) (x) = reftable_realloc((x), st_mult(sizeof(*(x)), (alloc))) +#define REFTABLE_REALLOC_ARRAY(x, alloc) do { \ + size_t alloc_size; \ + if (reftable_alloc_size(sizeof(*(x)), (alloc), &alloc_size) < 0) { \ + errno = ENOMEM; \ + (x) = NULL; \ + } else { \ + (x) = reftable_realloc((x), alloc_size); \ + } \ + } while (0) static inline void *reftable_alloc_grow(void *p, size_t nelem, size_t elsize, size_t *allocp) { void *new_p; - size_t alloc = *allocp * 2 + 1; + size_t alloc = *allocp * 2 + 1, alloc_bytes; if (alloc < nelem) alloc = nelem; - new_p = reftable_realloc(p, st_mult(elsize, alloc)); + if (reftable_alloc_size(elsize, alloc, &alloc_bytes) < 0) { + errno = ENOMEM; + return p; + } + new_p = reftable_realloc(p, alloc_bytes); if (!new_p) return p; *allocp = alloc; @@ -168,10 +265,19 @@ static inline void *reftable_alloc_grow(void *p, size_t nelem, size_t elsize, # define strdup(str) REFTABLE_BANNED(strdup) #endif +#define REFTABLE_SWAP(a, b) do { \ + void *_swap_a_ptr = &(a); \ + void *_swap_b_ptr = &(b); \ + unsigned char _swap_buffer[sizeof(a) - 2 * sizeof(a) * (sizeof(a) != sizeof(b))]; \ + memcpy(_swap_buffer, _swap_a_ptr, sizeof(a)); \ + memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a)); \ + memcpy(_swap_b_ptr, _swap_buffer, sizeof(a)); \ +} while (0) + /* Find the longest shared prefix size of `a` and `b` */ -int common_prefix_size(struct reftable_buf *a, struct reftable_buf *b); +size_t common_prefix_size(struct reftable_buf *a, struct reftable_buf *b); -int hash_size(enum reftable_hash id); +uint32_t hash_size(enum reftable_hash id); /* * Format IDs that identify the hash function used by a reftable. Note that diff --git a/reftable/block.c b/reftable/block.c index 9858bbc7c5..920b3f4486 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -1,21 +1,21 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #include "block.h" #include "blocksource.h" #include "constants.h" +#include "iter.h" #include "record.h" #include "reftable-error.h" #include "system.h" -#include <zlib.h> -int header_size(int version) +size_t header_size(int version) { switch (version) { case 1: @@ -26,7 +26,7 @@ int header_size(int version) abort(); } -int footer_size(int version) +size_t footer_size(int version) { switch (version) { case 1: @@ -40,18 +40,17 @@ int footer_size(int version) static int block_writer_register_restart(struct block_writer *w, int n, int is_restart, struct reftable_buf *key) { - int rlen, err; + uint32_t rlen; + int err; rlen = w->restart_len; - if (rlen >= MAX_RESTARTS) { + if (rlen >= MAX_RESTARTS) is_restart = 0; - } - if (is_restart) { + if (is_restart) rlen++; - } if (2 + 3 * rlen + n > w->block_size - w->next) - return -1; + return REFTABLE_ENTRY_TOO_BIG_ERROR; if (is_restart) { REFTABLE_ALLOC_GROW_OR_NULL(w->restarts, w->restart_len + 1, w->restart_cap); @@ -72,7 +71,7 @@ static int block_writer_register_restart(struct block_writer *w, int n, } int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, - uint32_t block_size, uint32_t header_off, int hash_size) + uint32_t block_size, uint32_t header_off, uint32_t hash_size) { bw->block = block; bw->hash_size = hash_size; @@ -99,9 +98,10 @@ uint8_t block_writer_type(struct block_writer *bw) return bw->block[bw->header_off]; } -/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on - success. Returns REFTABLE_API_ERROR if attempting to write a record with - empty key. */ +/* + * Adds the reftable_record to the block. Returns 0 on success and + * appropriate error codes on failure. + */ int block_writer_add(struct block_writer *w, struct reftable_record *rec) { struct reftable_buf empty = REFTABLE_BUF_INIT; @@ -128,14 +128,14 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) n = reftable_encode_key(&is_restart, out, last, w->scratch, reftable_record_val_type(rec)); if (n < 0) { - err = -1; + err = n; goto done; } string_view_consume(&out, n); n = reftable_record_encode(rec, out, w->hash_size); if (n < 0) { - err = -1; + err = n; goto done; } string_view_consume(&out, n); @@ -148,21 +148,20 @@ done: int block_writer_finish(struct block_writer *w) { - int i; - for (i = 0; i < w->restart_len; i++) { - put_be24(w->block + w->next, w->restarts[i]); + for (uint32_t i = 0; i < w->restart_len; i++) { + reftable_put_be24(w->block + w->next, w->restarts[i]); w->next += 3; } - put_be16(w->block + w->next, w->restart_len); + reftable_put_be16(w->block + w->next, w->restart_len); w->next += 2; - put_be24(w->block + 1 + w->header_off, w->next); + reftable_put_be24(w->block + 1 + w->header_off, w->next); /* * Log records are stored zlib-compressed. Note that the compression * also spans over the restart points we have just written. */ - if (block_writer_type(w) == BLOCK_TYPE_LOG) { + if (block_writer_type(w) == REFTABLE_BLOCK_TYPE_LOG) { int block_header_skip = 4 + w->header_off; uLongf src_len = w->next - block_header_skip, compressed_len; int ret; @@ -212,61 +211,91 @@ int block_writer_finish(struct block_writer *w) return w->next; } -int block_reader_init(struct block_reader *br, struct reftable_block *block, - uint32_t header_off, uint32_t table_block_size, - int hash_size) +static int read_block(struct reftable_block_source *source, + struct reftable_block_data *dest, uint64_t off, + uint32_t sz) { + size_t size = block_source_size(source); + block_source_release_data(dest); + if (off >= size) + return 0; + if (off + sz > size) + sz = size - off; + return block_source_read_data(source, dest, off, sz); +} + +int reftable_block_init(struct reftable_block *block, + struct reftable_block_source *source, + uint32_t offset, uint32_t header_size, + uint32_t table_block_size, uint32_t hash_size, + uint8_t want_type) +{ + uint32_t guess_block_size = table_block_size ? + table_block_size : DEFAULT_BLOCK_SIZE; uint32_t full_block_size = table_block_size; - uint8_t typ = block->data[header_off]; - uint32_t sz = get_be24(block->data + header_off + 1); - int err = 0; - uint16_t restart_count = 0; - uint32_t restart_start = 0; - uint8_t *restart_bytes = NULL; + uint16_t restart_count; + uint32_t restart_off; + uint32_t block_size; + uint8_t block_type; + int err; - reftable_block_done(&br->block); + err = read_block(source, &block->block_data, offset, guess_block_size); + if (err < 0) + goto done; - if (!reftable_is_block_type(typ)) { - err = REFTABLE_FORMAT_ERROR; + block_type = block->block_data.data[header_size]; + if (!reftable_is_block_type(block_type)) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + if (want_type != REFTABLE_BLOCK_TYPE_ANY && block_type != want_type) { + err = 1; goto done; } - if (typ == BLOCK_TYPE_LOG) { - uint32_t block_header_skip = 4 + header_off; - uLong dst_len = sz - block_header_skip; - uLong src_len = block->len - block_header_skip; + block_size = reftable_get_be24(block->block_data.data + header_size + 1); + if (block_size > guess_block_size) { + err = read_block(source, &block->block_data, offset, block_size); + if (err < 0) + goto done; + } + + if (block_type == REFTABLE_BLOCK_TYPE_LOG) { + uint32_t block_header_skip = 4 + header_size; + uLong dst_len = block_size - block_header_skip; + uLong src_len = block->block_data.len - block_header_skip; /* Log blocks specify the *uncompressed* size in their header. */ - REFTABLE_ALLOC_GROW_OR_NULL(br->uncompressed_data, sz, - br->uncompressed_cap); - if (!br->uncompressed_data) { + REFTABLE_ALLOC_GROW_OR_NULL(block->uncompressed_data, block_size, + block->uncompressed_cap); + if (!block->uncompressed_data) { err = REFTABLE_OUT_OF_MEMORY_ERROR; goto done; } /* Copy over the block header verbatim. It's not compressed. */ - memcpy(br->uncompressed_data, block->data, block_header_skip); + memcpy(block->uncompressed_data, block->block_data.data, block_header_skip); - if (!br->zstream) { - REFTABLE_CALLOC_ARRAY(br->zstream, 1); - if (!br->zstream) { + if (!block->zstream) { + REFTABLE_CALLOC_ARRAY(block->zstream, 1); + if (!block->zstream) { err = REFTABLE_OUT_OF_MEMORY_ERROR; goto done; } - err = inflateInit(br->zstream); + err = inflateInit(block->zstream); } else { - err = inflateReset(br->zstream); + err = inflateReset(block->zstream); } if (err != Z_OK) { err = REFTABLE_ZLIB_ERROR; goto done; } - br->zstream->next_in = block->data + block_header_skip; - br->zstream->avail_in = src_len; - br->zstream->next_out = br->uncompressed_data + block_header_skip; - br->zstream->avail_out = dst_len; + block->zstream->next_in = block->block_data.data + block_header_skip; + block->zstream->avail_in = src_len; + block->zstream->next_out = block->uncompressed_data + block_header_skip; + block->zstream->avail_out = dst_len; /* * We know both input as well as output size, and we know that @@ -275,72 +304,71 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, * here to instruct zlib to inflate the data in one go, which * is more efficient than using `Z_NO_FLUSH`. */ - err = inflate(br->zstream, Z_FINISH); + err = inflate(block->zstream, Z_FINISH); if (err != Z_STREAM_END) { err = REFTABLE_ZLIB_ERROR; goto done; } err = 0; - if (br->zstream->total_out + block_header_skip != sz) { + if (block->zstream->total_out + block_header_skip != block_size) { err = REFTABLE_FORMAT_ERROR; goto done; } /* We're done with the input data. */ - reftable_block_done(block); - block->data = br->uncompressed_data; - block->len = sz; - full_block_size = src_len + block_header_skip - br->zstream->avail_in; + block_source_release_data(&block->block_data); + block->block_data.data = block->uncompressed_data; + block->block_data.len = block_size; + full_block_size = src_len + block_header_skip - block->zstream->avail_in; } else if (full_block_size == 0) { - full_block_size = sz; - } else if (sz < full_block_size && sz < block->len && - block->data[sz] != 0) { + full_block_size = block_size; + } else if (block_size < full_block_size && block_size < block->block_data.len && + block->block_data.data[block_size] != 0) { /* If the block is smaller than the full block size, it is padded (data followed by '\0') or the next block is unaligned. */ - full_block_size = sz; + full_block_size = block_size; } - restart_count = get_be16(block->data + sz - 2); - restart_start = sz - 2 - 3 * restart_count; - restart_bytes = block->data + restart_start; + restart_count = reftable_get_be16(block->block_data.data + block_size - 2); + restart_off = block_size - 2 - 3 * restart_count; - /* transfer ownership. */ - br->block = *block; - block->data = NULL; - block->len = 0; + block->block_type = block_type; + block->hash_size = hash_size; + block->restart_off = restart_off; + block->full_block_size = full_block_size; + block->header_off = header_size; + block->restart_count = restart_count; - br->hash_size = hash_size; - br->block_len = restart_start; - br->full_block_size = full_block_size; - br->header_off = header_off; - br->restart_count = restart_count; - br->restart_bytes = restart_bytes; + err = 0; done: + if (err < 0) + reftable_block_release(block); return err; } -void block_reader_release(struct block_reader *br) +void reftable_block_release(struct reftable_block *block) { - inflateEnd(br->zstream); - reftable_free(br->zstream); - reftable_free(br->uncompressed_data); - reftable_block_done(&br->block); + inflateEnd(block->zstream); + reftable_free(block->zstream); + reftable_free(block->uncompressed_data); + block_source_release_data(&block->block_data); + memset(block, 0, sizeof(*block)); } -uint8_t block_reader_type(const struct block_reader *r) +uint8_t reftable_block_type(const struct reftable_block *b) { - return r->block.data[r->header_off]; + return b->block_data.data[b->header_off]; } -int block_reader_first_key(const struct block_reader *br, struct reftable_buf *key) +int reftable_block_first_key(const struct reftable_block *block, struct reftable_buf *key) { - int off = br->header_off + 4, n; + int off = block->header_off + 4, n; struct string_view in = { - .buf = br->block.data + off, - .len = br->block_len - off, + .buf = block->block_data.data + off, + .len = block->restart_off - off, }; uint8_t extra = 0; @@ -355,33 +383,36 @@ int block_reader_first_key(const struct block_reader *br, struct reftable_buf *k return 0; } -static uint32_t block_reader_restart_offset(const struct block_reader *br, size_t idx) +static uint32_t block_restart_offset(const struct reftable_block *b, size_t idx) { - return get_be24(br->restart_bytes + 3 * idx); + return reftable_get_be24(b->block_data.data + b->restart_off + 3 * idx); } -void block_iter_seek_start(struct block_iter *it, const struct block_reader *br) +void block_iter_init(struct block_iter *it, const struct reftable_block *block) +{ + it->block = block; + block_iter_seek_start(it); +} + +void block_iter_seek_start(struct block_iter *it) { - it->block = br->block.data; - it->block_len = br->block_len; - it->hash_size = br->hash_size; reftable_buf_reset(&it->last_key); - it->next_off = br->header_off + 4; + it->next_off = it->block->header_off + 4; } struct restart_needle_less_args { int error; struct reftable_buf needle; - const struct block_reader *reader; + const struct reftable_block *block; }; static int restart_needle_less(size_t idx, void *_args) { struct restart_needle_less_args *args = _args; - uint32_t off = block_reader_restart_offset(args->reader, idx); + uint32_t off = block_restart_offset(args->block, idx); struct string_view in = { - .buf = args->reader->block.data + off, - .len = args->reader->block_len - off, + .buf = args->block->block_data.data + off, + .len = args->block->restart_off - off, }; uint64_t prefix_len, suffix_len; uint8_t extra; @@ -414,14 +445,14 @@ static int restart_needle_less(size_t idx, void *_args) int block_iter_next(struct block_iter *it, struct reftable_record *rec) { struct string_view in = { - .buf = (unsigned char *) it->block + it->next_off, - .len = it->block_len - it->next_off, + .buf = (unsigned char *) it->block->block_data.data + it->next_off, + .len = it->block->restart_off - it->next_off, }; struct string_view start = in; uint8_t extra = 0; int n = 0; - if (it->next_off >= it->block_len) + if (it->next_off >= it->block->restart_off) return 1; n = reftable_decode_key(&it->last_key, &extra, in); @@ -431,7 +462,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec) return REFTABLE_FORMAT_ERROR; string_view_consume(&in, n); - n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size, + n = reftable_record_decode(rec, it->last_key, extra, in, it->block->hash_size, &it->scratch); if (n < 0) return -1; @@ -446,8 +477,6 @@ void block_iter_reset(struct block_iter *it) reftable_buf_reset(&it->last_key); it->next_off = 0; it->block = NULL; - it->block_len = 0; - it->hash_size = 0; } void block_iter_close(struct block_iter *it) @@ -456,12 +485,11 @@ void block_iter_close(struct block_iter *it) reftable_buf_release(&it->scratch); } -int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, - struct reftable_buf *want) +int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want) { struct restart_needle_less_args args = { .needle = *want, - .reader = br, + .block = it->block, }; struct reftable_record rec; int err = 0; @@ -479,7 +507,7 @@ int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, * restart point. While that works alright, we would end up scanning * too many record. */ - i = binsearch(br->restart_count, &restart_needle_less, &args); + i = binsearch(it->block->restart_count, &restart_needle_less, &args); if (args.error) { err = REFTABLE_FORMAT_ERROR; goto done; @@ -504,19 +532,18 @@ int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, * starting from the preceding restart point. */ if (i > 0) - it->next_off = block_reader_restart_offset(br, i - 1); + it->next_off = block_restart_offset(it->block, i - 1); else - it->next_off = br->header_off + 4; - it->block = br->block.data; - it->block_len = br->block_len; - it->hash_size = br->hash_size; + it->next_off = it->block->header_off + 4; - reftable_record_init(&rec, block_reader_type(br)); + err = reftable_record_init(&rec, reftable_block_type(it->block)); + if (err < 0) + goto done; /* * We're looking for the last entry less than the wanted key so that * the next call to `block_reader_next()` would yield the wanted - * record. We thus don't want to position our reader at the sought + * record. We thus don't want to position our iterator at the sought * after record, but one before. To do so, we have to go one entry too * far and then back up. */ @@ -561,6 +588,61 @@ done: return err; } +static int block_iter_seek_void(void *it, struct reftable_record *want) +{ + struct reftable_buf buf = REFTABLE_BUF_INIT; + struct block_iter *bi = it; + int err; + + if (bi->block->block_type != want->type) + return REFTABLE_API_ERROR; + + err = reftable_record_key(want, &buf); + if (err < 0) + goto out; + + err = block_iter_seek_key(it, &buf); + if (err < 0) + goto out; + + err = 0; + +out: + reftable_buf_release(&buf); + return err; +} + +static int block_iter_next_void(void *it, struct reftable_record *rec) +{ + return block_iter_next(it, rec); +} + +static void block_iter_close_void(void *it) +{ + block_iter_close(it); +} + +static struct reftable_iterator_vtable block_iter_vtable = { + .seek = &block_iter_seek_void, + .next = &block_iter_next_void, + .close = &block_iter_close_void, +}; + +int reftable_block_init_iterator(const struct reftable_block *b, + struct reftable_iterator *it) +{ + struct block_iter *bi; + + REFTABLE_CALLOC_ARRAY(bi, 1); + block_iter_init(bi, b); + + assert(!it->ops); + it->iter_arg = bi; + it->ops = &block_iter_vtable; + + return 0; +} + void block_writer_release(struct block_writer *bw) { deflateEnd(bw->zstream); @@ -571,14 +653,3 @@ void block_writer_release(struct block_writer *bw) reftable_buf_release(&bw->last_key); /* the block is not owned. */ } - -void reftable_block_done(struct reftable_block *blockp) -{ - struct reftable_block_source source = blockp->source; - if (blockp && source.ops) - source.ops->return_block(source.arg, blockp); - blockp->data = NULL; - blockp->len = 0; - blockp->source.ops = NULL; - blockp->source.arg = NULL; -} diff --git a/reftable/block.h b/reftable/block.h index 0431e8591f..d6dfaae33e 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -1,16 +1,17 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef BLOCK_H #define BLOCK_H #include "basics.h" #include "record.h" +#include "reftable-block.h" #include "reftable-blocksource.h" /* @@ -18,7 +19,7 @@ https://developers.google.com/open-source/licenses/bsd * allocation overhead. */ struct block_writer { - z_stream *zstream; + struct z_stream_s *zstream; unsigned char *compressed; size_t compressed_cap; @@ -30,7 +31,7 @@ struct block_writer { /* How often to restart keys. */ uint16_t restart_interval; - int hash_size; + uint32_t hash_size; /* Offset of next uint8_t to write. */ uint32_t next; @@ -48,12 +49,12 @@ struct block_writer { * initializes the blockwriter to write `typ` entries, using `block` as temporary * storage. `block` is not owned by the block_writer. */ int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, - uint32_t block_size, uint32_t header_off, int hash_size); + uint32_t block_size, uint32_t header_off, uint32_t hash_size); /* returns the block type (eg. 'r' for ref records. */ uint8_t block_writer_type(struct block_writer *bw); -/* appends the record, or -1 if it doesn't fit. */ +/* Attempts to append the record. Returns 0 on success or error code on failure. */ int block_writer_add(struct block_writer *w, struct reftable_record *rec); /* appends the key restarts, and compress the block if necessary. */ @@ -62,53 +63,11 @@ int block_writer_finish(struct block_writer *w); /* clears out internally allocated block_writer members. */ void block_writer_release(struct block_writer *bw); -struct z_stream; - -/* Read a block. */ -struct block_reader { - /* offset of the block header; nonzero for the first block in a - * reftable. */ - uint32_t header_off; - - /* the memory block */ - struct reftable_block block; - int hash_size; - - /* Uncompressed data for log entries. */ - z_stream *zstream; - unsigned char *uncompressed_data; - size_t uncompressed_cap; - - /* size of the data, excluding restart data. */ - uint32_t block_len; - uint8_t *restart_bytes; - uint16_t restart_count; - - /* size of the data in the file. For log blocks, this is the compressed - * size. */ - uint32_t full_block_size; -}; - -/* initializes a block reader. */ -int block_reader_init(struct block_reader *br, struct reftable_block *bl, - uint32_t header_off, uint32_t table_block_size, - int hash_size); - -void block_reader_release(struct block_reader *br); - -/* Returns the block type (eg. 'r' for refs) */ -uint8_t block_reader_type(const struct block_reader *r); - -/* Decodes the first key in the block */ -int block_reader_first_key(const struct block_reader *br, struct reftable_buf *key); - -/* Iterate over entries in a block */ +/* Iterator for records contained in a single block. */ struct block_iter { /* offset within the block of the next entry to read. */ uint32_t next_off; - const unsigned char *block; - size_t block_len; - int hash_size; + const struct reftable_block *block; /* key for last entry we read. */ struct reftable_buf last_key; @@ -120,12 +79,23 @@ struct block_iter { .scratch = REFTABLE_BUF_INIT, \ } -/* Position `it` at start of the block */ -void block_iter_seek_start(struct block_iter *it, const struct block_reader *br); +/* + * Initialize the block iterator with the given block. The iterator will be + * positioned at the first record contained in the block. The block must remain + * valid until the end of the iterator's lifetime. It is valid to re-initialize + * iterators multiple times. + */ +void block_iter_init(struct block_iter *it, const struct reftable_block *block); + +/* Position the initialized iterator at the first record of its block. */ +void block_iter_seek_start(struct block_iter *it); -/* Position `it` to the `want` key in the block */ -int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, - struct reftable_buf *want); +/* + * Position the initialized iterator at the desired record key. It is not an + * error in case the record cannot be found. If so, a subsequent call to + * `block_iter_next()` will indicate that the iterator is exhausted. + */ +int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want); /* return < 0 for error, 0 for OK, > 0 for EOF. */ int block_iter_next(struct block_iter *it, struct reftable_record *rec); @@ -137,12 +107,9 @@ void block_iter_reset(struct block_iter *it); void block_iter_close(struct block_iter *it); /* size of file header, depending on format version */ -int header_size(int version); +size_t header_size(int version); /* size of file footer, depending on format version */ -int footer_size(int version); - -/* returns a block to its source. */ -void reftable_block_done(struct reftable_block *ret); +size_t footer_size(int version); #endif diff --git a/reftable/blocksource.c b/reftable/blocksource.c index 52e0915a67..573c81287f 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #include "system.h" @@ -13,19 +13,54 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-blocksource.h" #include "reftable-error.h" -static void reftable_buf_return_block(void *b UNUSED, struct reftable_block *dest) +void block_source_release_data(struct reftable_block_data *data) +{ + struct reftable_block_source source = data->source; + if (data && source.ops) + source.ops->release_data(source.arg, data); + data->data = NULL; + data->len = 0; + data->source.ops = NULL; + data->source.arg = NULL; +} + +void block_source_close(struct reftable_block_source *source) +{ + if (!source->ops) { + return; + } + + source->ops->close(source->arg); + source->ops = NULL; +} + +ssize_t block_source_read_data(struct reftable_block_source *source, + struct reftable_block_data *dest, uint64_t off, + uint32_t size) +{ + ssize_t result = source->ops->read_data(source->arg, dest, off, size); + dest->source = *source; + return result; +} + +uint64_t block_source_size(struct reftable_block_source *source) +{ + return source->ops->size(source->arg); +} + +static void reftable_buf_release_data(void *b REFTABLE_UNUSED, struct reftable_block_data *dest) { if (dest->len) memset(dest->data, 0xff, dest->len); reftable_free(dest->data); } -static void reftable_buf_close(void *b UNUSED) +static void reftable_buf_close(void *b REFTABLE_UNUSED) { } -static int reftable_buf_read_block(void *v, struct reftable_block *dest, - uint64_t off, uint32_t size) +static ssize_t reftable_buf_read_data(void *v, struct reftable_block_data *dest, + uint64_t off, uint32_t size) { struct reftable_buf *b = v; assert(off + size <= b->len); @@ -44,8 +79,8 @@ static uint64_t reftable_buf_size(void *b) static struct reftable_block_source_vtable reftable_buf_vtable = { .size = &reftable_buf_size, - .read_block = &reftable_buf_read_block, - .return_block = &reftable_buf_return_block, + .read_data = &reftable_buf_read_data, + .release_data = &reftable_buf_release_data, .close = &reftable_buf_close, }; @@ -67,7 +102,7 @@ static uint64_t file_size(void *b) return ((struct file_block_source *)b)->size; } -static void file_return_block(void *b UNUSED, struct reftable_block *dest UNUSED) +static void file_release_data(void *b REFTABLE_UNUSED, struct reftable_block_data *dest REFTABLE_UNUSED) { } @@ -78,8 +113,8 @@ static void file_close(void *v) reftable_free(b); } -static int file_read_block(void *v, struct reftable_block *dest, uint64_t off, - uint32_t size) +static ssize_t file_read_data(void *v, struct reftable_block_data *dest, uint64_t off, + uint32_t size) { struct file_block_source *b = v; assert(off + size <= b->size); @@ -90,15 +125,15 @@ static int file_read_block(void *v, struct reftable_block *dest, uint64_t off, static struct reftable_block_source_vtable file_vtable = { .size = &file_size, - .read_block = &file_read_block, - .return_block = &file_return_block, + .read_data = &file_read_data, + .release_data = &file_release_data, .close = &file_close, }; int reftable_block_source_from_file(struct reftable_block_source *bs, const char *name) { - struct file_block_source *p; + struct file_block_source *p = NULL; struct stat st; int fd, err; @@ -122,7 +157,12 @@ int reftable_block_source_from_file(struct reftable_block_source *bs, } p->size = st.st_size; - p->data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + p->data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (p->data == MAP_FAILED) { + err = REFTABLE_IO_ERROR; + p->data = NULL; + goto out; + } assert(!bs->ops); bs->ops = &file_vtable; @@ -135,5 +175,5 @@ out: close(fd); if (err < 0) reftable_free(p); - return 0; + return err; } diff --git a/reftable/blocksource.h b/reftable/blocksource.h index a84a3ccd89..a110e05958 100644 --- a/reftable/blocksource.h +++ b/reftable/blocksource.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef BLOCKSOURCE_H #define BLOCKSOURCE_H @@ -12,9 +12,34 @@ https://developers.google.com/open-source/licenses/bsd #include "system.h" struct reftable_block_source; +struct reftable_block_data; struct reftable_buf; -/* Create an in-memory block source for reading reftables */ +/* + * Close the block source and the underlying resource. This is a no-op in case + * the block source is zero-initialized. + */ +void block_source_close(struct reftable_block_source *source); + +/* + * Read a block of length `size` from the source at the given `off`. + */ +ssize_t block_source_read_data(struct reftable_block_source *source, + struct reftable_block_data *dest, uint64_t off, + uint32_t size); + +/* + * Return the total length of the underlying resource. + */ +uint64_t block_source_size(struct reftable_block_source *source); + +/* + * Return a block to its original source, releasing any resources associated + * with it. + */ +void block_source_release_data(struct reftable_block_data *data); + +/* Create an in-memory block source for reading reftables. */ void block_source_from_buf(struct reftable_block_source *bs, struct reftable_buf *buf); diff --git a/reftable/constants.h b/reftable/constants.h index f6beb843eb..e3b1aaa516 100644 --- a/reftable/constants.h +++ b/reftable/constants.h @@ -1,19 +1,15 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef CONSTANTS_H #define CONSTANTS_H -#define BLOCK_TYPE_LOG 'g' -#define BLOCK_TYPE_INDEX 'i' -#define BLOCK_TYPE_REF 'r' -#define BLOCK_TYPE_OBJ 'o' -#define BLOCK_TYPE_ANY 0 +#include "reftable-constants.h" #define MAX_RESTARTS ((1 << 16) - 1) #define DEFAULT_BLOCK_SIZE 4096 diff --git a/reftable/error.c b/reftable/error.c index 660d029617..c7cab2dbc4 100644 --- a/reftable/error.c +++ b/reftable/error.c @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #include "system.h" #include "reftable-error.h" diff --git a/reftable/iter.c b/reftable/iter.c index 86e801ca9f..2ecc52b336 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -1,19 +1,20 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #include "iter.h" #include "system.h" #include "block.h" +#include "blocksource.h" #include "constants.h" -#include "reader.h" #include "reftable-error.h" +#include "table.h" int iterator_seek(struct reftable_iterator *it, struct reftable_record *want) { @@ -25,17 +26,17 @@ int iterator_next(struct reftable_iterator *it, struct reftable_record *rec) return it->ops->next(it->iter_arg, rec); } -static int empty_iterator_seek(void *arg UNUSED, struct reftable_record *want UNUSED) +static int empty_iterator_seek(void *arg REFTABLE_UNUSED, struct reftable_record *want REFTABLE_UNUSED) { return 0; } -static int empty_iterator_next(void *arg UNUSED, struct reftable_record *rec UNUSED) +static int empty_iterator_next(void *arg REFTABLE_UNUSED, struct reftable_record *rec REFTABLE_UNUSED) { return 1; } -static void empty_iterator_close(void *arg UNUSED) +static void empty_iterator_close(void *arg REFTABLE_UNUSED) { } @@ -113,7 +114,7 @@ static void indexed_table_ref_iter_close(void *p) { struct indexed_table_ref_iter *it = p; block_iter_close(&it->cur); - reftable_block_done(&it->block_reader.block); + block_source_release_data(&it->block.block_data); reftable_free(it->offsets); reftable_buf_release(&it->oid); } @@ -127,11 +128,10 @@ static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it) return 1; } - reftable_block_done(&it->block_reader.block); + block_source_release_data(&it->block.block_data); off = it->offsets[it->offset_idx++]; - err = reader_init_block_reader(it->r, &it->block_reader, off, - BLOCK_TYPE_REF); + err = table_init_block(it->table, &it->block, off, REFTABLE_BLOCK_TYPE_REF); if (err < 0) { return err; } @@ -139,15 +139,14 @@ static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it) /* indexed block does not exist. */ return REFTABLE_FORMAT_ERROR; } - block_iter_seek_start(&it->cur, &it->block_reader); + block_iter_init(&it->cur, &it->block); return 0; } -static int indexed_table_ref_iter_seek(void *p UNUSED, - struct reftable_record *want UNUSED) +static int indexed_table_ref_iter_seek(void *p REFTABLE_UNUSED, + struct reftable_record *want REFTABLE_UNUSED) { - BUG("seeking indexed table is not supported"); - return -1; + return REFTABLE_API_ERROR; } static int indexed_table_ref_iter_next(void *p, struct reftable_record *rec) @@ -182,7 +181,7 @@ static int indexed_table_ref_iter_next(void *p, struct reftable_record *rec) } int indexed_table_ref_iter_new(struct indexed_table_ref_iter **dest, - struct reftable_reader *r, uint8_t *oid, + struct reftable_table *t, uint8_t *oid, int oid_len, uint64_t *offsets, int offset_len) { struct indexed_table_ref_iter empty = INDEXED_TABLE_REF_ITER_INIT; @@ -196,7 +195,7 @@ int indexed_table_ref_iter_new(struct indexed_table_ref_iter **dest, } *itr = empty; - itr->r = r; + itr->table = t; err = reftable_buf_add(&itr->oid, oid, oid_len); if (err < 0) @@ -247,7 +246,7 @@ int reftable_iterator_seek_ref(struct reftable_iterator *it, const char *name) { struct reftable_record want = { - .type = BLOCK_TYPE_REF, + .type = REFTABLE_BLOCK_TYPE_REF, .u.ref = { .refname = (char *)name, }, @@ -259,7 +258,7 @@ int reftable_iterator_next_ref(struct reftable_iterator *it, struct reftable_ref_record *ref) { struct reftable_record rec = { - .type = BLOCK_TYPE_REF, + .type = REFTABLE_BLOCK_TYPE_REF, .u = { .ref = *ref }, @@ -273,7 +272,7 @@ int reftable_iterator_seek_log_at(struct reftable_iterator *it, const char *name, uint64_t update_index) { struct reftable_record want = { - .type = BLOCK_TYPE_LOG, + .type = REFTABLE_BLOCK_TYPE_LOG, .u.log = { .refname = (char *)name, .update_index = update_index, @@ -292,7 +291,7 @@ int reftable_iterator_next_log(struct reftable_iterator *it, struct reftable_log_record *log) { struct reftable_record rec = { - .type = BLOCK_TYPE_LOG, + .type = REFTABLE_BLOCK_TYPE_LOG, .u = { .log = *log, }, diff --git a/reftable/iter.h b/reftable/iter.h index 40f98893b8..cc920970a5 100644 --- a/reftable/iter.h +++ b/reftable/iter.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef ITER_H #define ITER_H @@ -59,7 +59,7 @@ void iterator_from_filtering_ref_iterator(struct reftable_iterator *, * but using the object index. */ struct indexed_table_ref_iter { - struct reftable_reader *r; + struct reftable_table *table; struct reftable_buf oid; /* mutable */ @@ -68,7 +68,7 @@ struct indexed_table_ref_iter { /* Points to the next offset to read. */ int offset_idx; int offset_len; - struct block_reader block_reader; + struct reftable_block block; struct block_iter cur; int is_finished; }; @@ -83,7 +83,7 @@ void iterator_from_indexed_table_ref_iter(struct reftable_iterator *it, /* Takes ownership of `offsets` */ int indexed_table_ref_iter_new(struct indexed_table_ref_iter **dest, - struct reftable_reader *r, uint8_t *oid, + struct reftable_table *t, uint8_t *oid, int oid_len, uint64_t *offsets, int offset_len); #endif diff --git a/reftable/merged.c b/reftable/merged.c index e72b39e178..733de07454 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -1,21 +1,21 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #include "merged.h" #include "constants.h" #include "iter.h" #include "pq.h" -#include "reader.h" #include "record.h" #include "reftable-merged.h" #include "reftable-error.h" #include "system.h" +#include "table.h" struct merged_subiter { struct reftable_iterator iter; @@ -66,8 +66,11 @@ static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want int err; mi->advance_index = -1; - while (!merged_iter_pqueue_is_empty(mi->pq)) - merged_iter_pqueue_remove(&mi->pq); + while (!merged_iter_pqueue_is_empty(mi->pq)) { + err = merged_iter_pqueue_remove(&mi->pq, NULL); + if (err < 0) + return err; + } for (size_t i = 0; i < mi->subiters_len; i++) { err = iterator_seek(&mi->subiters[i].iter, want); @@ -120,7 +123,9 @@ static int merged_iter_next_entry(struct merged_iter *mi, if (empty) return 1; - entry = merged_iter_pqueue_remove(&mi->pq); + err = merged_iter_pqueue_remove(&mi->pq, &entry); + if (err < 0) + return err; /* One can also use reftable as datacenter-local storage, where the ref @@ -134,18 +139,23 @@ static int merged_iter_next_entry(struct merged_iter *mi, struct pq_entry top = merged_iter_pqueue_top(mi->pq); int cmp; - cmp = reftable_record_cmp(top.rec, entry.rec); + err = reftable_record_cmp(top.rec, entry.rec, &cmp); + if (err < 0) + return err; if (cmp > 0) break; - merged_iter_pqueue_remove(&mi->pq); + err = merged_iter_pqueue_remove(&mi->pq, NULL); + if (err < 0) + return err; + err = merged_iter_advance_subiter(mi, top.index); if (err < 0) return err; } mi->advance_index = entry.index; - SWAP(*rec, *entry.rec); + REFTABLE_SWAP(*rec, *entry.rec); return 0; } @@ -182,7 +192,7 @@ static void iterator_from_merged_iter(struct reftable_iterator *it, } int reftable_merged_table_new(struct reftable_merged_table **dest, - struct reftable_reader **readers, size_t n, + struct reftable_table **tables, size_t n, enum reftable_hash hash_id) { struct reftable_merged_table *m = NULL; @@ -190,10 +200,10 @@ int reftable_merged_table_new(struct reftable_merged_table **dest, uint64_t first_min = 0; for (size_t i = 0; i < n; i++) { - uint64_t min = reftable_reader_min_update_index(readers[i]); - uint64_t max = reftable_reader_max_update_index(readers[i]); + uint64_t min = reftable_table_min_update_index(tables[i]); + uint64_t max = reftable_table_max_update_index(tables[i]); - if (reftable_reader_hash_id(readers[i]) != hash_id) { + if (reftable_table_hash_id(tables[i]) != hash_id) { return REFTABLE_FORMAT_ERROR; } if (i == 0 || min < first_min) { @@ -208,8 +218,8 @@ int reftable_merged_table_new(struct reftable_merged_table **dest, if (!m) return REFTABLE_OUT_OF_MEMORY_ERROR; - m->readers = readers; - m->readers_len = n; + m->tables = tables; + m->tables_len = n; m->min = first_min; m->max = last_max; m->hash_id = hash_id; @@ -244,17 +254,20 @@ int merged_table_init_iter(struct reftable_merged_table *mt, struct merged_iter *mi = NULL; int ret; - if (mt->readers_len) { - REFTABLE_CALLOC_ARRAY(subiters, mt->readers_len); + if (mt->tables_len) { + REFTABLE_CALLOC_ARRAY(subiters, mt->tables_len); if (!subiters) { ret = REFTABLE_OUT_OF_MEMORY_ERROR; goto out; } } - for (size_t i = 0; i < mt->readers_len; i++) { - reftable_record_init(&subiters[i].rec, typ); - ret = reader_init_iter(mt->readers[i], &subiters[i].iter, typ); + for (size_t i = 0; i < mt->tables_len; i++) { + ret = reftable_record_init(&subiters[i].rec, typ); + if (ret < 0) + goto out; + + ret = table_init_iter(mt->tables[i], &subiters[i].iter, typ); if (ret < 0) goto out; } @@ -267,14 +280,14 @@ int merged_table_init_iter(struct reftable_merged_table *mt, mi->advance_index = -1; mi->suppress_deletions = mt->suppress_deletions; mi->subiters = subiters; - mi->subiters_len = mt->readers_len; + mi->subiters_len = mt->tables_len; iterator_from_merged_iter(it, mi); ret = 0; out: if (ret < 0) { - for (size_t i = 0; subiters && i < mt->readers_len; i++) { + for (size_t i = 0; subiters && i < mt->tables_len; i++) { reftable_iterator_destroy(&subiters[i].iter); reftable_record_release(&subiters[i].rec); } @@ -288,13 +301,13 @@ out: int reftable_merged_table_init_ref_iterator(struct reftable_merged_table *mt, struct reftable_iterator *it) { - return merged_table_init_iter(mt, it, BLOCK_TYPE_REF); + return merged_table_init_iter(mt, it, REFTABLE_BLOCK_TYPE_REF); } int reftable_merged_table_init_log_iterator(struct reftable_merged_table *mt, struct reftable_iterator *it) { - return merged_table_init_iter(mt, it, BLOCK_TYPE_LOG); + return merged_table_init_iter(mt, it, REFTABLE_BLOCK_TYPE_LOG); } enum reftable_hash reftable_merged_table_hash_id(struct reftable_merged_table *mt) diff --git a/reftable/merged.h b/reftable/merged.h index 0b7d939e92..4317e5f5f6 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef MERGED_H #define MERGED_H @@ -13,8 +13,8 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-basics.h" struct reftable_merged_table { - struct reftable_reader **readers; - size_t readers_len; + struct reftable_table **tables; + size_t tables_len; enum reftable_hash hash_id; /* If unset, produce deletions. This is useful for compaction. For the diff --git a/reftable/pq.c b/reftable/pq.c index 5591e875e1..9a79f5c5ee 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #include "pq.h" @@ -15,13 +15,18 @@ https://developers.google.com/open-source/licenses/bsd int pq_less(struct pq_entry *a, struct pq_entry *b) { - int cmp = reftable_record_cmp(a->rec, b->rec); + int cmp, err; + + err = reftable_record_cmp(a->rec, b->rec, &cmp); + if (err < 0) + return err; + if (cmp == 0) return a->index > b->index; return cmp < 0; } -struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) +int merged_iter_pqueue_remove(struct merged_iter_pqueue *pq, struct pq_entry *out) { size_t i = 0; struct pq_entry e = pq->heap[0]; @@ -32,17 +37,34 @@ struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) size_t min = i; size_t j = 2 * i + 1; size_t k = 2 * i + 2; - if (j < pq->len && pq_less(&pq->heap[j], &pq->heap[i])) - min = j; - if (k < pq->len && pq_less(&pq->heap[k], &pq->heap[min])) - min = k; + int cmp; + + if (j < pq->len) { + cmp = pq_less(&pq->heap[j], &pq->heap[i]); + if (cmp < 0) + return -1; + else if (cmp) + min = j; + } + + if (k < pq->len) { + cmp = pq_less(&pq->heap[k], &pq->heap[min]); + if (cmp < 0) + return -1; + else if (cmp) + min = k; + } + if (min == i) break; - SWAP(pq->heap[i], pq->heap[min]); + REFTABLE_SWAP(pq->heap[i], pq->heap[min]); i = min; } - return e; + if (out) + *out = e; + + return 0; } int merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e) @@ -59,7 +81,7 @@ int merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry size_t j = (i - 1) / 2; if (pq_less(&pq->heap[j], &pq->heap[i])) break; - SWAP(pq->heap[j], pq->heap[i]); + REFTABLE_SWAP(pq->heap[j], pq->heap[i]); i = j; } diff --git a/reftable/pq.h b/reftable/pq.h index 83c062eeca..42310670b0 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef PQ_H #define PQ_H @@ -22,7 +22,7 @@ struct merged_iter_pqueue { size_t cap; }; -struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq); +int merged_iter_pqueue_remove(struct merged_iter_pqueue *pq, struct pq_entry *out); int merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e); void merged_iter_pqueue_release(struct merged_iter_pqueue *pq); int pq_less(struct pq_entry *a, struct pq_entry *b); diff --git a/reftable/reader.h b/reftable/reader.h deleted file mode 100644 index d2b48a4849..0000000000 --- a/reftable/reader.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#ifndef READER_H -#define READER_H - -#include "block.h" -#include "record.h" -#include "reftable-iterator.h" -#include "reftable-reader.h" - -uint64_t block_source_size(struct reftable_block_source *source); - -int block_source_read_block(struct reftable_block_source *source, - struct reftable_block *dest, uint64_t off, - uint32_t size); -void block_source_close(struct reftable_block_source *source); - -/* metadata for a block type */ -struct reftable_reader_offsets { - int is_present; - uint64_t offset; - uint64_t index_offset; -}; - -/* The state for reading a reftable file. */ -struct reftable_reader { - /* for convenience, associate a name with the instance. */ - char *name; - struct reftable_block_source source; - - /* Size of the file, excluding the footer. */ - uint64_t size; - - /* The hash function used for ref records. */ - enum reftable_hash hash_id; - - uint32_t block_size; - uint64_t min_update_index; - uint64_t max_update_index; - /* Length of the OID keys in the 'o' section */ - int object_id_len; - int version; - - struct reftable_reader_offsets ref_offsets; - struct reftable_reader_offsets obj_offsets; - struct reftable_reader_offsets log_offsets; - - uint64_t refcount; -}; - -const char *reader_name(struct reftable_reader *r); - -int reader_init_iter(struct reftable_reader *r, - struct reftable_iterator *it, - uint8_t typ); - -/* initialize a block reader to read from `r` */ -int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br, - uint64_t next_off, uint8_t want_typ); - -#endif diff --git a/reftable/record.c b/reftable/record.c index 04429d23fe..fcd387ba5d 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ /* record.c - methods for different types of records. */ @@ -21,56 +21,58 @@ static void *reftable_record_data(struct reftable_record *rec); int get_var_int(uint64_t *dest, struct string_view *in) { - int ptr = 0; + const unsigned char *buf = in->buf; + unsigned char c; uint64_t val; - if (in->len == 0) + if (!in->len) return -1; - val = in->buf[ptr] & 0x7f; - - while (in->buf[ptr] & 0x80) { - ptr++; - if (ptr > in->len) { + c = *buf++; + val = c & 0x7f; + + while (c & 0x80) { + /* + * We use a micro-optimization here: whenever we see that the + * 0x80 bit is set, we know that the remainder of the value + * cannot be 0. The zero-values thus doesn't need to be encoded + * at all, which is why we subtract 1 when encoding and add 1 + * when decoding. + * + * This allows us to save a byte in some edge cases. + */ + val += 1; + if (!val || (val & (uint64_t)(~0ULL << (64 - 7)))) + return -1; /* overflow */ + if (buf >= in->buf + in->len) return -1; - } - val = (val + 1) << 7 | (uint64_t)(in->buf[ptr] & 0x7f); + c = *buf++; + val = (val << 7) + (c & 0x7f); } *dest = val; - return ptr + 1; + return buf - in->buf; } -int put_var_int(struct string_view *dest, uint64_t val) +int put_var_int(struct string_view *dest, uint64_t value) { - uint8_t buf[10] = { 0 }; - int i = 9; - int n = 0; - buf[i] = (uint8_t)(val & 0x7f); - i--; - while (1) { - val >>= 7; - if (!val) { - break; - } - val--; - buf[i] = 0x80 | (uint8_t)(val & 0x7f); - i--; - } - - n = sizeof(buf) - i - 1; - if (dest->len < n) - return -1; - memcpy(dest->buf, &buf[i + 1], n); - return n; + unsigned char varint[10]; + unsigned pos = sizeof(varint) - 1; + varint[pos] = value & 0x7f; + while (value >>= 7) + varint[--pos] = 0x80 | (--value & 0x7f); + if (dest->len < sizeof(varint) - pos) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + memcpy(dest->buf, varint + pos, sizeof(varint) - pos); + return sizeof(varint) - pos; } int reftable_is_block_type(uint8_t typ) { switch (typ) { - case BLOCK_TYPE_REF: - case BLOCK_TYPE_LOG: - case BLOCK_TYPE_OBJ: - case BLOCK_TYPE_INDEX: + case REFTABLE_BLOCK_TYPE_REF: + case REFTABLE_BLOCK_TYPE_LOG: + case REFTABLE_BLOCK_TYPE_OBJ: + case REFTABLE_BLOCK_TYPE_INDEX: return 1; } return 0; @@ -124,13 +126,13 @@ static int decode_string(struct reftable_buf *dest, struct string_view in) static int encode_string(const char *str, struct string_view s) { struct string_view start = s; - int l = strlen(str); + size_t l = strlen(str); int n = put_var_int(&s, l); if (n < 0) - return -1; + return n; string_view_consume(&s, n); if (s.len < l) - return -1; + return REFTABLE_ENTRY_TOO_BIG_ERROR; memcpy(s.buf, str, l); string_view_consume(&s, l); @@ -142,22 +144,22 @@ int reftable_encode_key(int *restart, struct string_view dest, uint8_t extra) { struct string_view start = dest; - int prefix_len = common_prefix_size(&prev_key, &key); + size_t prefix_len = common_prefix_size(&prev_key, &key); uint64_t suffix_len = key.len - prefix_len; - int n = put_var_int(&dest, (uint64_t)prefix_len); + int n = put_var_int(&dest, prefix_len); if (n < 0) - return -1; + return n; string_view_consume(&dest, n); *restart = (prefix_len == 0); n = put_var_int(&dest, suffix_len << 3 | (uint64_t)extra); if (n < 0) - return -1; + return n; string_view_consume(&dest, n); if (dest.len < suffix_len) - return -1; + return REFTABLE_ENTRY_TOO_BIG_ERROR; memcpy(dest.buf, key.buf + prefix_len, suffix_len); string_view_consume(&dest, suffix_len); @@ -227,7 +229,7 @@ static int reftable_ref_record_key(const void *r, struct reftable_buf *dest) } static int reftable_ref_record_copy_from(void *rec, const void *src_rec, - int hash_size) + uint32_t hash_size) { struct reftable_ref_record *ref = rec; const struct reftable_ref_record *src = src_rec; @@ -235,13 +237,11 @@ static int reftable_ref_record_copy_from(void *rec, const void *src_rec, size_t refname_cap = 0; int err; - assert(hash_size > 0); - - SWAP(refname, ref->refname); - SWAP(refname_cap, ref->refname_cap); + REFTABLE_SWAP(refname, ref->refname); + REFTABLE_SWAP(refname_cap, ref->refname_cap); reftable_ref_record_release(ref); - SWAP(ref->refname, refname); - SWAP(ref->refname_cap, refname_cap); + REFTABLE_SWAP(ref->refname, refname); + REFTABLE_SWAP(ref->refname_cap, refname_cap); if (src->refname) { size_t refname_len = strlen(src->refname); @@ -317,38 +317,34 @@ static uint8_t reftable_ref_record_val_type(const void *rec) } static int reftable_ref_record_encode(const void *rec, struct string_view s, - int hash_size) + uint32_t hash_size) { const struct reftable_ref_record *r = (const struct reftable_ref_record *)rec; struct string_view start = s; int n = put_var_int(&s, r->update_index); - assert(hash_size > 0); if (n < 0) - return -1; + return n; string_view_consume(&s, n); switch (r->value_type) { case REFTABLE_REF_SYMREF: n = encode_string(r->value.symref, s); - if (n < 0) { - return -1; - } + if (n < 0) + return n; string_view_consume(&s, n); break; case REFTABLE_REF_VAL2: - if (s.len < 2 * hash_size) { - return -1; - } + if (s.len < 2 * hash_size) + return REFTABLE_ENTRY_TOO_BIG_ERROR; memcpy(s.buf, r->value.val2.value, hash_size); string_view_consume(&s, hash_size); memcpy(s.buf, r->value.val2.target_value, hash_size); string_view_consume(&s, hash_size); break; case REFTABLE_REF_VAL1: - if (s.len < hash_size) { - return -1; - } + if (s.len < hash_size) + return REFTABLE_ENTRY_TOO_BIG_ERROR; memcpy(s.buf, r->value.val1, hash_size); string_view_consume(&s, hash_size); break; @@ -363,7 +359,7 @@ static int reftable_ref_record_encode(const void *rec, struct string_view s, static int reftable_ref_record_decode(void *rec, struct reftable_buf key, uint8_t val_type, struct string_view in, - int hash_size, struct reftable_buf *scratch) + uint32_t hash_size, struct reftable_buf *scratch) { struct reftable_ref_record *r = rec; struct string_view start = in; @@ -372,18 +368,16 @@ static int reftable_ref_record_decode(void *rec, struct reftable_buf key, size_t refname_cap = 0; int n, err; - assert(hash_size > 0); - n = get_var_int(&update_index, &in); if (n < 0) return n; string_view_consume(&in, n); - SWAP(refname, r->refname); - SWAP(refname_cap, r->refname_cap); + REFTABLE_SWAP(refname, r->refname); + REFTABLE_SWAP(refname_cap, r->refname_cap); reftable_ref_record_release(r); - SWAP(r->refname, refname); - SWAP(r->refname_cap, refname_cap); + REFTABLE_SWAP(r->refname, refname); + REFTABLE_SWAP(r->refname_cap, refname_cap); REFTABLE_ALLOC_GROW_OR_NULL(r->refname, key.len + 1, r->refname_cap); if (!r->refname) { @@ -449,7 +443,7 @@ static int reftable_ref_record_is_deletion_void(const void *p) } static int reftable_ref_record_equal_void(const void *a, - const void *b, int hash_size) + const void *b, uint32_t hash_size) { struct reftable_ref_record *ra = (struct reftable_ref_record *) a; struct reftable_ref_record *rb = (struct reftable_ref_record *) b; @@ -465,7 +459,7 @@ static int reftable_ref_record_cmp_void(const void *_a, const void *_b) static struct reftable_record_vtable reftable_ref_record_vtable = { .key = &reftable_ref_record_key, - .type = BLOCK_TYPE_REF, + .type = REFTABLE_BLOCK_TYPE_REF, .copy_from = &reftable_ref_record_copy_from, .val_type = &reftable_ref_record_val_type, .encode = &reftable_ref_record_encode, @@ -493,7 +487,7 @@ static void reftable_obj_record_release(void *rec) } static int reftable_obj_record_copy_from(void *rec, const void *src_rec, - int hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { struct reftable_obj_record *obj = rec; const struct reftable_obj_record *src = src_rec; @@ -507,11 +501,17 @@ static int reftable_obj_record_copy_from(void *rec, const void *src_rec, if (src->hash_prefix_len) memcpy(obj->hash_prefix, src->hash_prefix, obj->hash_prefix_len); - REFTABLE_ALLOC_ARRAY(obj->offsets, src->offset_len); - if (!obj->offsets) - return REFTABLE_OUT_OF_MEMORY_ERROR; - obj->offset_len = src->offset_len; - COPY_ARRAY(obj->offsets, src->offsets, src->offset_len); + if (src->offset_len) { + if (sizeof(*src->offsets) > SIZE_MAX / src->offset_len) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + REFTABLE_ALLOC_ARRAY(obj->offsets, src->offset_len); + if (!obj->offsets) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + memcpy(obj->offsets, src->offsets, sizeof(*src->offsets) * src->offset_len); + obj->offset_len = src->offset_len; + } return 0; } @@ -525,7 +525,7 @@ static uint8_t reftable_obj_record_val_type(const void *rec) } static int reftable_obj_record_encode(const void *rec, struct string_view s, - int hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { const struct reftable_obj_record *r = rec; struct string_view start = s; @@ -534,24 +534,22 @@ static int reftable_obj_record_encode(const void *rec, struct string_view s, uint64_t last = 0; if (r->offset_len == 0 || r->offset_len >= 8) { n = put_var_int(&s, r->offset_len); - if (n < 0) { - return -1; - } + if (n < 0) + return n; string_view_consume(&s, n); } if (r->offset_len == 0) return start.len - s.len; n = put_var_int(&s, r->offsets[0]); if (n < 0) - return -1; + return n; string_view_consume(&s, n); last = r->offsets[0]; for (i = 1; i < r->offset_len; i++) { int n = put_var_int(&s, r->offsets[i] - last); - if (n < 0) { - return -1; - } + if (n < 0) + return n; string_view_consume(&s, n); last = r->offsets[i]; } @@ -560,15 +558,14 @@ static int reftable_obj_record_encode(const void *rec, struct string_view s, static int reftable_obj_record_decode(void *rec, struct reftable_buf key, uint8_t val_type, struct string_view in, - int hash_size UNUSED, - struct reftable_buf *scratch UNUSED) + uint32_t hash_size REFTABLE_UNUSED, + struct reftable_buf *scratch REFTABLE_UNUSED) { struct string_view start = in; struct reftable_obj_record *r = rec; uint64_t count = val_type; int n = 0; uint64_t last; - int j; reftable_obj_record_release(r); @@ -603,8 +600,7 @@ static int reftable_obj_record_decode(void *rec, struct reftable_buf key, string_view_consume(&in, n); last = r->offsets[0]; - j = 1; - while (j < count) { + for (uint64_t j = 1; j < count; j++) { uint64_t delta = 0; int n = get_var_int(&delta, &in); if (n < 0) { @@ -613,18 +609,17 @@ static int reftable_obj_record_decode(void *rec, struct reftable_buf key, string_view_consume(&in, n); last = r->offsets[j] = (delta + last); - j++; } return start.len - in.len; } -static int not_a_deletion(const void *p UNUSED) +static int not_a_deletion(const void *p REFTABLE_UNUSED) { return 0; } static int reftable_obj_record_equal_void(const void *a, const void *b, - int hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { struct reftable_obj_record *ra = (struct reftable_obj_record *) a; struct reftable_obj_record *rb = (struct reftable_obj_record *) b; @@ -664,7 +659,7 @@ static int reftable_obj_record_cmp_void(const void *_a, const void *_b) static struct reftable_record_vtable reftable_obj_record_vtable = { .key = &reftable_obj_record_key, - .type = BLOCK_TYPE_OBJ, + .type = REFTABLE_BLOCK_TYPE_OBJ, .copy_from = &reftable_obj_record_copy_from, .val_type = &reftable_obj_record_val_type, .encode = &reftable_obj_record_encode, @@ -689,7 +684,7 @@ static int reftable_log_record_key(const void *r, struct reftable_buf *dest) return err; ts = (~ts) - rec->update_index; - put_be64(&i64[0], ts); + reftable_put_be64(&i64[0], ts); err = reftable_buf_add(dest, i64, sizeof(i64)); if (err < 0) @@ -699,7 +694,7 @@ static int reftable_log_record_key(const void *r, struct reftable_buf *dest) } static int reftable_log_record_copy_from(void *rec, const void *src_rec, - int hash_size) + uint32_t hash_size) { struct reftable_log_record *dst = rec; const struct reftable_log_record *src = @@ -780,7 +775,7 @@ static uint8_t reftable_log_record_val_type(const void *rec) } static int reftable_log_record_encode(const void *rec, struct string_view s, - int hash_size) + uint32_t hash_size) { const struct reftable_log_record *r = rec; struct string_view start = s; @@ -789,7 +784,7 @@ static int reftable_log_record_encode(const void *rec, struct string_view s, return 0; if (s.len < 2 * hash_size) - return -1; + return REFTABLE_ENTRY_TOO_BIG_ERROR; memcpy(s.buf, r->value.update.old_hash, hash_size); memcpy(s.buf + hash_size, r->value.update.new_hash, hash_size); @@ -797,30 +792,30 @@ static int reftable_log_record_encode(const void *rec, struct string_view s, n = encode_string(r->value.update.name ? r->value.update.name : "", s); if (n < 0) - return -1; + return n; string_view_consume(&s, n); n = encode_string(r->value.update.email ? r->value.update.email : "", s); if (n < 0) - return -1; + return n; string_view_consume(&s, n); n = put_var_int(&s, r->value.update.time); if (n < 0) - return -1; + return n; string_view_consume(&s, n); if (s.len < 2) - return -1; + return REFTABLE_ENTRY_TOO_BIG_ERROR; - put_be16(s.buf, r->value.update.tz_offset); + reftable_put_be16(s.buf, r->value.update.tz_offset); string_view_consume(&s, 2); n = encode_string( r->value.update.message ? r->value.update.message : "", s); if (n < 0) - return -1; + return n; string_view_consume(&s, n); return start.len - s.len; @@ -828,7 +823,7 @@ static int reftable_log_record_encode(const void *rec, struct string_view s, static int reftable_log_record_decode(void *rec, struct reftable_buf key, uint8_t val_type, struct string_view in, - int hash_size, struct reftable_buf *scratch) + uint32_t hash_size, struct reftable_buf *scratch) { struct string_view start = in; struct reftable_log_record *r = rec; @@ -846,7 +841,7 @@ static int reftable_log_record_decode(void *rec, struct reftable_buf key, } memcpy(r->refname, key.buf, key.len - 8); - ts = get_be64(key.buf + key.len - 8); + ts = reftable_get_be64((unsigned char *)key.buf + key.len - 8); r->update_index = (~max) - ts; @@ -937,7 +932,7 @@ static int reftable_log_record_decode(void *rec, struct reftable_buf key, goto done; } - r->value.update.tz_offset = get_be16(in.buf); + r->value.update.tz_offset = reftable_get_be16(in.buf); string_view_consume(&in, 2); n = decode_string(scratch, in); @@ -976,7 +971,7 @@ static int null_streq(const char *a, const char *b) } static int reftable_log_record_equal_void(const void *a, - const void *b, int hash_size) + const void *b, uint32_t hash_size) { return reftable_log_record_equal((struct reftable_log_record *) a, (struct reftable_log_record *) b, @@ -1000,7 +995,7 @@ static int reftable_log_record_cmp_void(const void *_a, const void *_b) } int reftable_log_record_equal(const struct reftable_log_record *a, - const struct reftable_log_record *b, int hash_size) + const struct reftable_log_record *b, uint32_t hash_size) { if (!(null_streq(a->refname, b->refname) && a->update_index == b->update_index && @@ -1035,7 +1030,7 @@ static int reftable_log_record_is_deletion_void(const void *p) static struct reftable_record_vtable reftable_log_record_vtable = { .key = &reftable_log_record_key, - .type = BLOCK_TYPE_LOG, + .type = REFTABLE_BLOCK_TYPE_LOG, .copy_from = &reftable_log_record_copy_from, .val_type = &reftable_log_record_val_type, .encode = &reftable_log_record_encode, @@ -1054,7 +1049,7 @@ static int reftable_index_record_key(const void *r, struct reftable_buf *dest) } static int reftable_index_record_copy_from(void *rec, const void *src_rec, - int hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { struct reftable_index_record *dst = rec; const struct reftable_index_record *src = src_rec; @@ -1075,13 +1070,13 @@ static void reftable_index_record_release(void *rec) reftable_buf_release(&idx->last_key); } -static uint8_t reftable_index_record_val_type(const void *rec UNUSED) +static uint8_t reftable_index_record_val_type(const void *rec REFTABLE_UNUSED) { return 0; } static int reftable_index_record_encode(const void *rec, struct string_view out, - int hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { const struct reftable_index_record *r = (const struct reftable_index_record *)rec; @@ -1097,10 +1092,10 @@ static int reftable_index_record_encode(const void *rec, struct string_view out, } static int reftable_index_record_decode(void *rec, struct reftable_buf key, - uint8_t val_type UNUSED, + uint8_t val_type REFTABLE_UNUSED, struct string_view in, - int hash_size UNUSED, - struct reftable_buf *scratch UNUSED) + uint32_t hash_size REFTABLE_UNUSED, + struct reftable_buf *scratch REFTABLE_UNUSED) { struct string_view start = in; struct reftable_index_record *r = rec; @@ -1120,7 +1115,7 @@ static int reftable_index_record_decode(void *rec, struct reftable_buf key, } static int reftable_index_record_equal(const void *a, const void *b, - int hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { struct reftable_index_record *ia = (struct reftable_index_record *) a; struct reftable_index_record *ib = (struct reftable_index_record *) b; @@ -1137,7 +1132,7 @@ static int reftable_index_record_cmp(const void *_a, const void *_b) static struct reftable_record_vtable reftable_index_record_vtable = { .key = &reftable_index_record_key, - .type = BLOCK_TYPE_INDEX, + .type = REFTABLE_BLOCK_TYPE_INDEX, .copy_from = &reftable_index_record_copy_from, .val_type = &reftable_index_record_val_type, .encode = &reftable_index_record_encode, @@ -1154,14 +1149,14 @@ int reftable_record_key(struct reftable_record *rec, struct reftable_buf *dest) } int reftable_record_encode(struct reftable_record *rec, struct string_view dest, - int hash_size) + uint32_t hash_size) { return reftable_record_vtable(rec)->encode(reftable_record_data(rec), dest, hash_size); } int reftable_record_copy_from(struct reftable_record *rec, - struct reftable_record *src, int hash_size) + struct reftable_record *src, uint32_t hash_size) { assert(src->type == rec->type); @@ -1176,7 +1171,7 @@ uint8_t reftable_record_val_type(struct reftable_record *rec) } int reftable_record_decode(struct reftable_record *rec, struct reftable_buf key, - uint8_t extra, struct string_view src, int hash_size, + uint8_t extra, struct string_view src, uint32_t hash_size, struct reftable_buf *scratch) { return reftable_record_vtable(rec)->decode(reftable_record_data(rec), @@ -1195,15 +1190,17 @@ int reftable_record_is_deletion(struct reftable_record *rec) reftable_record_data(rec)); } -int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b) +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b, + int *cmp) { if (a->type != b->type) - BUG("cannot compare reftable records of different type"); - return reftable_record_vtable(a)->cmp( - reftable_record_data(a), reftable_record_data(b)); + return -1; + *cmp = reftable_record_vtable(a)->cmp(reftable_record_data(a), + reftable_record_data(b)); + return 0; } -int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size) +int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, uint32_t hash_size) { if (a->type != b->type) return 0; @@ -1211,7 +1208,7 @@ int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, reftable_record_data(a), reftable_record_data(b), hash_size); } -static int hash_equal(const unsigned char *a, const unsigned char *b, int hash_size) +static int hash_equal(const unsigned char *a, const unsigned char *b, uint32_t hash_size) { if (a && b) return !memcmp(a, b, hash_size); @@ -1220,9 +1217,8 @@ static int hash_equal(const unsigned char *a, const unsigned char *b, int hash_s } int reftable_ref_record_equal(const struct reftable_ref_record *a, - const struct reftable_ref_record *b, int hash_size) + const struct reftable_ref_record *b, uint32_t hash_size) { - assert(hash_size > 0); if (!null_streq(a->refname, b->refname)) return 0; @@ -1279,13 +1275,13 @@ int reftable_log_record_is_deletion(const struct reftable_log_record *log) static void *reftable_record_data(struct reftable_record *rec) { switch (rec->type) { - case BLOCK_TYPE_REF: + case REFTABLE_BLOCK_TYPE_REF: return &rec->u.ref; - case BLOCK_TYPE_LOG: + case REFTABLE_BLOCK_TYPE_LOG: return &rec->u.log; - case BLOCK_TYPE_INDEX: + case REFTABLE_BLOCK_TYPE_INDEX: return &rec->u.idx; - case BLOCK_TYPE_OBJ: + case REFTABLE_BLOCK_TYPE_OBJ: return &rec->u.obj; } abort(); @@ -1295,32 +1291,32 @@ static struct reftable_record_vtable * reftable_record_vtable(struct reftable_record *rec) { switch (rec->type) { - case BLOCK_TYPE_REF: + case REFTABLE_BLOCK_TYPE_REF: return &reftable_ref_record_vtable; - case BLOCK_TYPE_LOG: + case REFTABLE_BLOCK_TYPE_LOG: return &reftable_log_record_vtable; - case BLOCK_TYPE_INDEX: + case REFTABLE_BLOCK_TYPE_INDEX: return &reftable_index_record_vtable; - case BLOCK_TYPE_OBJ: + case REFTABLE_BLOCK_TYPE_OBJ: return &reftable_obj_record_vtable; } abort(); } -void reftable_record_init(struct reftable_record *rec, uint8_t typ) +int reftable_record_init(struct reftable_record *rec, uint8_t typ) { memset(rec, 0, sizeof(*rec)); rec->type = typ; switch (typ) { - case BLOCK_TYPE_REF: - case BLOCK_TYPE_LOG: - case BLOCK_TYPE_OBJ: - return; - case BLOCK_TYPE_INDEX: + case REFTABLE_BLOCK_TYPE_REF: + case REFTABLE_BLOCK_TYPE_LOG: + case REFTABLE_BLOCK_TYPE_OBJ: + return 0; + case REFTABLE_BLOCK_TYPE_INDEX: reftable_buf_init(&rec->u.idx.last_key); - return; + return 0; default: - BUG("unhandled record type"); + return REFTABLE_API_ERROR; } } diff --git a/reftable/record.h b/reftable/record.h index 25aa908c85..7953f352a3 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef RECORD_H #define RECORD_H @@ -32,8 +32,10 @@ static inline void string_view_consume(struct string_view *s, int n) s->len -= n; } -/* utilities for de/encoding varints */ - +/* + * Decode and encode a varint. Returns the number of bytes read/written, or a + * negative value in case encoding/decoding the varint has failed. + */ int get_var_int(uint64_t *dest, struct string_view *in); int put_var_int(struct string_view *dest, uint64_t val); @@ -45,18 +47,18 @@ struct reftable_record_vtable { /* The record type of ('r' for ref). */ uint8_t type; - int (*copy_from)(void *dest, const void *src, int hash_size); + int (*copy_from)(void *dest, const void *src, uint32_t hash_size); /* a value of [0..7], indicating record subvariants (eg. ref vs. symref * vs ref deletion) */ uint8_t (*val_type)(const void *rec); /* encodes rec into dest, returning how much space was used. */ - int (*encode)(const void *rec, struct string_view dest, int hash_size); + int (*encode)(const void *rec, struct string_view dest, uint32_t hash_size); /* decode data from `src` into the record. */ int (*decode)(void *rec, struct reftable_buf key, uint8_t extra, - struct string_view src, int hash_size, + struct string_view src, uint32_t hash_size, struct reftable_buf *scratch); /* deallocate and null the record. */ @@ -66,16 +68,13 @@ struct reftable_record_vtable { int (*is_deletion)(const void *rec); /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */ - int (*equal)(const void *a, const void *b, int hash_size); + int (*equal)(const void *a, const void *b, uint32_t hash_size); /* * Compare keys of two records with each other. The records must have * the same type. */ int (*cmp)(const void *a, const void *b); - - /* Print on stdout, for debugging. */ - void (*print)(const void *rec, int hash_size); }; /* returns true for recognized block types. Block start with the block type. */ @@ -131,21 +130,21 @@ struct reftable_record { } u; }; -/* Initialize the reftable record for the given type */ -void reftable_record_init(struct reftable_record *rec, uint8_t typ); +/* Initialize the reftable record for the given type. */ +int reftable_record_init(struct reftable_record *rec, uint8_t typ); /* see struct record_vtable */ -int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); -int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b, int *cmp); +int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, uint32_t hash_size); int reftable_record_key(struct reftable_record *rec, struct reftable_buf *dest); int reftable_record_copy_from(struct reftable_record *rec, - struct reftable_record *src, int hash_size); + struct reftable_record *src, uint32_t hash_size); uint8_t reftable_record_val_type(struct reftable_record *rec); int reftable_record_encode(struct reftable_record *rec, struct string_view dest, - int hash_size); + uint32_t hash_size); int reftable_record_decode(struct reftable_record *rec, struct reftable_buf key, uint8_t extra, struct string_view src, - int hash_size, struct reftable_buf *scratch); + uint32_t hash_size, struct reftable_buf *scratch); int reftable_record_is_deletion(struct reftable_record *rec); static inline uint8_t reftable_record_type(struct reftable_record *rec) diff --git a/reftable/reftable-basics.h b/reftable/reftable-basics.h index e0397ed583..6d73f19c85 100644 --- a/reftable/reftable-basics.h +++ b/reftable/reftable-basics.h @@ -4,13 +4,21 @@ * Use of this source code is governed by a BSD-style * license that can be found in the LICENSE file or at * https://developers.google.com/open-source/licenses/bsd -*/ + */ #ifndef REFTABLE_BASICS_H #define REFTABLE_BASICS_H #include <stddef.h> +/* A buffer that contains arbitrary byte slices. */ +struct reftable_buf { + size_t alloc; + size_t len; + char *buf; +}; +#define REFTABLE_BUF_INIT { 0 } + /* * Hash functions understood by the reftable library. Note that the values are * arbitrary and somewhat random such that we can easily detect cases where the diff --git a/reftable/reftable-block.h b/reftable/reftable-block.h new file mode 100644 index 0000000000..0b05a8f7e3 --- /dev/null +++ b/reftable/reftable-block.h @@ -0,0 +1,75 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_BLOCK_H +#define REFTABLE_BLOCK_H + +#include <stdint.h> + +#include "reftable-basics.h" +#include "reftable-blocksource.h" +#include "reftable-iterator.h" + +struct z_stream_s; + +/* + * A block part of a reftable. Contains records as well as some metadata + * describing them. + */ +struct reftable_block { + /* + * Offset of the block header; nonzero for the first block in a + * reftable. + */ + uint32_t header_off; + + /* The memory block. */ + struct reftable_block_data block_data; + uint32_t hash_size; + + /* Uncompressed data for log entries. */ + struct z_stream_s *zstream; + unsigned char *uncompressed_data; + size_t uncompressed_cap; + + /* + * Restart point data. Restart points are located after the block's + * record data. + */ + uint16_t restart_count; + uint32_t restart_off; + + /* + * Size of the data in the file. For log blocks, this is the compressed + * size. + */ + uint32_t full_block_size; + uint8_t block_type; +}; + +/* Initialize a reftable block from the given block source. */ +int reftable_block_init(struct reftable_block *b, + struct reftable_block_source *source, + uint32_t offset, uint32_t header_size, + uint32_t table_block_size, uint32_t hash_size, + uint8_t want_type); + +/* Release resources allocated by the block. */ +void reftable_block_release(struct reftable_block *b); + +/* Initialize a generic record iterator from the given block. */ +int reftable_block_init_iterator(const struct reftable_block *b, + struct reftable_iterator *it); + +/* Returns the block type (eg. 'r' for refs). */ +uint8_t reftable_block_type(const struct reftable_block *b); + +/* Decodes the first key in the block. */ +int reftable_block_first_key(const struct reftable_block *b, struct reftable_buf *key); + +#endif /* REFTABLE_BLOCK_H */ diff --git a/reftable/reftable-blocksource.h b/reftable/reftable-blocksource.h index 5aa3990a57..f5ba867bd6 100644 --- a/reftable/reftable-blocksource.h +++ b/reftable/reftable-blocksource.h @@ -1,17 +1,18 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef REFTABLE_BLOCKSOURCE_H #define REFTABLE_BLOCKSOURCE_H #include <stdint.h> -/* block_source is a generic wrapper for a seekable readable file. +/* + * Generic wrapper for a seekable readable file. */ struct reftable_block_source { struct reftable_block_source_vtable *ops; @@ -20,25 +21,28 @@ struct reftable_block_source { /* a contiguous segment of bytes. It keeps track of its generating block_source * so it can return itself into the pool. */ -struct reftable_block { +struct reftable_block_data { uint8_t *data; - int len; + size_t len; struct reftable_block_source source; }; /* block_source_vtable are the operations that make up block_source */ struct reftable_block_source_vtable { - /* returns the size of a block source */ + /* Returns the size of a block source. */ uint64_t (*size)(void *source); - /* reads a segment from the block source. It is an error to read - beyond the end of the block */ - int (*read_block)(void *source, struct reftable_block *dest, - uint64_t off, uint32_t size); - /* mark the block as read; may return the data back to malloc */ - void (*return_block)(void *source, struct reftable_block *blockp); + /* + * Reads a segment from the block source. It is an error to read beyond + * the end of the block. + */ + ssize_t (*read_data)(void *source, struct reftable_block_data *dest, + uint64_t off, uint32_t size); + + /* Mark the block as read; may release the data. */ + void (*release_data)(void *source, struct reftable_block_data *data); - /* release all resources associated with the block source */ + /* Release all resources associated with the block source. */ void (*close)(void *source); }; diff --git a/reftable/reftable-constants.h b/reftable/reftable-constants.h new file mode 100644 index 0000000000..4ae9ba4bac --- /dev/null +++ b/reftable/reftable-constants.h @@ -0,0 +1,18 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_CONSTANTS_H +#define REFTABLE_CONSTANTS_H + +#define REFTABLE_BLOCK_TYPE_LOG 'g' +#define REFTABLE_BLOCK_TYPE_INDEX 'i' +#define REFTABLE_BLOCK_TYPE_REF 'r' +#define REFTABLE_BLOCK_TYPE_OBJ 'o' +#define REFTABLE_BLOCK_TYPE_ANY 0 + +#endif /* REFTABLE_CONSTANTS_H */ diff --git a/reftable/reftable-error.h b/reftable/reftable-error.h index f404826562..d100e0df92 100644 --- a/reftable/reftable-error.h +++ b/reftable/reftable-error.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef REFTABLE_ERROR_H #define REFTABLE_ERROR_H @@ -30,6 +30,7 @@ enum reftable_error { /* Misuse of the API: * - on writing a record with NULL refname. + * - on writing a record before setting the writer limits. * - on writing a reftable_ref_record outside the table limits * - on writing a ref or log record before the stack's * next_update_inde*x diff --git a/reftable/reftable-iterator.h b/reftable/reftable-iterator.h index e3bf688d53..af582028c2 100644 --- a/reftable/reftable-iterator.h +++ b/reftable/reftable-iterator.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef REFTABLE_ITERATOR_H #define REFTABLE_ITERATOR_H diff --git a/reftable/reftable-merged.h b/reftable/reftable-merged.h index f2d01c3ef8..e5af846b32 100644 --- a/reftable/reftable-merged.h +++ b/reftable/reftable-merged.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef REFTABLE_MERGED_H #define REFTABLE_MERGED_H @@ -26,14 +26,14 @@ https://developers.google.com/open-source/licenses/bsd /* A merged table is implements seeking/iterating over a stack of tables. */ struct reftable_merged_table; -struct reftable_reader; +struct reftable_table; /* - * reftable_merged_table_new creates a new merged table. The readers must be + * reftable_merged_table_new creates a new merged table. The tables must be * kept alive as long as the merged table is still in use. */ int reftable_merged_table_new(struct reftable_merged_table **dest, - struct reftable_reader **readers, size_t n, + struct reftable_table **tables, size_t n, enum reftable_hash hash_id); /* Initialize a merged table iterator for reading refs. */ diff --git a/reftable/reftable-reader.h b/reftable/reftable-reader.h deleted file mode 100644 index 0085fbb903..0000000000 --- a/reftable/reftable-reader.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - Copyright 2020 Google LLC - - Use of this source code is governed by a BSD-style - license that can be found in the LICENSE file or at - https://developers.google.com/open-source/licenses/bsd -*/ - -#ifndef REFTABLE_READER_H -#define REFTABLE_READER_H - -#include "reftable-iterator.h" -#include "reftable-blocksource.h" - -/* - * Reading single tables - * - * The follow routines are for reading single files. For an - * application-level interface, skip ahead to struct - * reftable_merged_table and struct reftable_stack. - */ - -/* The reader struct is a handle to an open reftable file. */ -struct reftable_reader; - -/* reftable_reader_new opens a reftable for reading. If successful, - * returns 0 code and sets pp. The name is used for creating a - * stack. Typically, it is the basename of the file. The block source - * `src` is owned by the reader, and is closed on calling - * reftable_reader_destroy(). On error, the block source `src` is - * closed as well. - */ -int reftable_reader_new(struct reftable_reader **pp, - struct reftable_block_source *src, const char *name); - -/* - * Manage the reference count of the reftable reader. A newly initialized - * reader starts with a refcount of 1 and will be deleted once the refcount has - * reached 0. - * - * This is required because readers may have longer lifetimes than the stack - * they belong to. The stack may for example be reloaded while the old tables - * are still being accessed by an iterator. - */ -void reftable_reader_incref(struct reftable_reader *reader); -void reftable_reader_decref(struct reftable_reader *reader); - -/* Initialize a reftable iterator for reading refs. */ -int reftable_reader_init_ref_iterator(struct reftable_reader *r, - struct reftable_iterator *it); - -/* Initialize a reftable iterator for reading logs. */ -int reftable_reader_init_log_iterator(struct reftable_reader *r, - struct reftable_iterator *it); - -/* returns the hash ID used in this table. */ -enum reftable_hash reftable_reader_hash_id(struct reftable_reader *r); - -/* return an iterator for the refs pointing to `oid`. */ -int reftable_reader_refs_for(struct reftable_reader *r, - struct reftable_iterator *it, uint8_t *oid); - -/* return the max_update_index for a table */ -uint64_t reftable_reader_max_update_index(struct reftable_reader *r); - -/* return the min_update_index for a table */ -uint64_t reftable_reader_min_update_index(struct reftable_reader *r); - -/* print blocks onto stdout for debugging. */ -int reftable_reader_print_blocks(const char *tablename); - -#endif diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index ddd48eb579..385a74cc86 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef REFTABLE_RECORD_H #define REFTABLE_RECORD_H @@ -65,7 +65,7 @@ void reftable_ref_record_release(struct reftable_ref_record *ref); /* returns whether two reftable_ref_records are the same. Useful for testing. */ int reftable_ref_record_equal(const struct reftable_ref_record *a, - const struct reftable_ref_record *b, int hash_size); + const struct reftable_ref_record *b, uint32_t hash_size); /* reftable_log_record holds a reflog entry */ struct reftable_log_record { @@ -105,6 +105,6 @@ void reftable_log_record_release(struct reftable_log_record *log); /* returns whether two records are equal. Useful for testing. */ int reftable_log_record_equal(const struct reftable_log_record *a, - const struct reftable_log_record *b, int hash_size); + const struct reftable_log_record *b, uint32_t hash_size); #endif diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h index ae14270ea7..910ec6ef3a 100644 --- a/reftable/reftable-stack.h +++ b/reftable/reftable-stack.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef REFTABLE_STACK_H #define REFTABLE_STACK_H diff --git a/reftable/reftable-table.h b/reftable/reftable-table.h new file mode 100644 index 0000000000..5f935d02e3 --- /dev/null +++ b/reftable/reftable-table.h @@ -0,0 +1,115 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_TABLE_H +#define REFTABLE_TABLE_H + +#include "reftable-iterator.h" +#include "reftable-block.h" +#include "reftable-blocksource.h" + +/* + * Reading single tables + * + * The follow routines are for reading single files. For an + * application-level interface, skip ahead to struct + * reftable_merged_table and struct reftable_stack. + */ + +/* Metadata for a block type. */ +struct reftable_table_offsets { + int is_present; + uint64_t offset; + uint64_t index_offset; +}; + +/* The table struct is a handle to an open reftable file. */ +struct reftable_table { + /* for convenience, associate a name with the instance. */ + char *name; + struct reftable_block_source source; + + /* Size of the file, excluding the footer. */ + uint64_t size; + + /* The hash function used for ref records. */ + enum reftable_hash hash_id; + + uint32_t block_size; + uint64_t min_update_index; + uint64_t max_update_index; + /* Length of the OID keys in the 'o' section */ + int object_id_len; + int version; + + struct reftable_table_offsets ref_offsets; + struct reftable_table_offsets obj_offsets; + struct reftable_table_offsets log_offsets; + + uint64_t refcount; +}; + +/* reftable_table_new opens a reftable for reading. If successful, + * returns 0 code and sets pp. The name is used for creating a + * stack. Typically, it is the basename of the file. The block source + * `src` is owned by the table, and is closed on calling + * reftable_table_destroy(). On error, the block source `src` is + * closed as well. + */ +int reftable_table_new(struct reftable_table **out, + struct reftable_block_source *src, const char *name); + +/* + * Manage the reference count of the reftable table. A newly initialized + * table starts with a refcount of 1 and will be deleted once the refcount has + * reached 0. + * + * This is required because tables may have longer lifetimes than the stack + * they belong to. The stack may for example be reloaded while the old tables + * are still being accessed by an iterator. + */ +void reftable_table_incref(struct reftable_table *table); +void reftable_table_decref(struct reftable_table *table); + +/* Initialize a reftable iterator for reading refs. */ +int reftable_table_init_ref_iterator(struct reftable_table *t, + struct reftable_iterator *it); + +/* Initialize a reftable iterator for reading logs. */ +int reftable_table_init_log_iterator(struct reftable_table *t, + struct reftable_iterator *it); + +/* returns the hash ID used in this table. */ +enum reftable_hash reftable_table_hash_id(struct reftable_table *t); + +/* return an iterator for the refs pointing to `oid`. */ +int reftable_table_refs_for(struct reftable_table *t, + struct reftable_iterator *it, uint8_t *oid); + +/* return the max_update_index for a table */ +uint64_t reftable_table_max_update_index(struct reftable_table *t); + +/* return the min_update_index for a table */ +uint64_t reftable_table_min_update_index(struct reftable_table *t); + +/* + * An iterator that iterates through the blocks contained in a given table. + */ +struct reftable_table_iterator { + void *iter_arg; +}; + +int reftable_table_iterator_init(struct reftable_table_iterator *it, + struct reftable_table *t); + +void reftable_table_iterator_release(struct reftable_table_iterator *it); + +int reftable_table_iterator_next(struct reftable_table_iterator *it, + const struct reftable_block **out); + +#endif diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index 5f9afa620b..0fbeff17f4 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef REFTABLE_WRITER_H #define REFTABLE_WRITER_H @@ -84,7 +84,7 @@ struct reftable_block_stats { /* total number of entries written */ int entries; /* total number of key restarts */ - int restarts; + uint32_t restarts; /* total number of blocks */ int blocks; /* total number of index blocks */ @@ -124,17 +124,21 @@ int reftable_writer_new(struct reftable_writer **out, int (*flush_func)(void *), void *writer_arg, const struct reftable_write_options *opts); -/* Set the range of update indices for the records we will add. When writing a - table into a stack, the min should be at least - reftable_stack_next_update_index(), or REFTABLE_API_ERROR is returned. - - For transactional updates to a stack, typically min==max, and the - update_index can be obtained by inspeciting the stack. When converting an - existing ref database into a single reftable, this would be a range of - update-index timestamps. +/* + * Set the range of update indices for the records we will add. When writing a + * table into a stack, the min should be at least + * reftable_stack_next_update_index(), or REFTABLE_API_ERROR is returned. + * + * For transactional updates to a stack, typically min==max, and the + * update_index can be obtained by inspeciting the stack. When converting an + * existing ref database into a single reftable, this would be a range of + * update-index timestamps. + * + * The function should be called before adding any records to the writer. If not + * it will fail with REFTABLE_API_ERROR. */ -void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, - uint64_t max); +int reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, + uint64_t max); /* Add a reftable_ref_record. The record should have names that come after diff --git a/reftable/stack.c b/reftable/stack.c index 531660a49f..4caf96aa1d 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -1,20 +1,20 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #include "stack.h" #include "system.h" #include "constants.h" #include "merged.h" -#include "reader.h" #include "reftable-error.h" #include "reftable-record.h" #include "reftable-merged.h" +#include "table.h" #include "writer.h" static int stack_try_add(struct reftable_stack *st, @@ -48,6 +48,25 @@ static int stack_fsync(const struct reftable_write_options *opts, int fd) return fsync(fd); } +static ssize_t reftable_write_data(int fd, const void *data, size_t size) +{ + size_t total_written = 0; + const char *p = data; + + while (total_written < size) { + ssize_t bytes_written = write(fd, p, size - total_written); + if (bytes_written < 0 && (errno == EAGAIN || errno == EINTR)) + continue; + if (bytes_written < 0) + return REFTABLE_IO_ERROR; + + total_written += bytes_written; + p += bytes_written; + } + + return total_written; +} + struct fd_writer { const struct reftable_write_options *opts; int fd; @@ -56,7 +75,7 @@ struct fd_writer { static ssize_t fd_writer_write(void *arg, const void *data, size_t sz) { struct fd_writer *writer = arg; - return write_in_full(writer->fd, data, sz); + return reftable_write_data(writer->fd, data, sz); } static int fd_writer_flush(void *arg) @@ -115,13 +134,16 @@ out: static int fd_read_lines(int fd, char ***namesp) { - off_t size = lseek(fd, 0, SEEK_END); char *buf = NULL; int err = 0; + off_t size; + + size = lseek(fd, 0, SEEK_END); if (size < 0) { err = REFTABLE_IO_ERROR; goto done; } + err = lseek(fd, 0, SEEK_SET); if (err < 0) { err = REFTABLE_IO_ERROR; @@ -134,9 +156,16 @@ static int fd_read_lines(int fd, char ***namesp) goto done; } - if (read_in_full(fd, buf, size) != size) { - err = REFTABLE_IO_ERROR; - goto done; + for (off_t total_read = 0; total_read < size; ) { + ssize_t bytes_read = read(fd, buf + total_read, size - total_read); + if (bytes_read < 0 && (errno == EAGAIN || errno == EINTR)) + continue; + if (bytes_read < 0 || !bytes_read) { + err = REFTABLE_IO_ERROR; + goto done; + } + + total_read += bytes_read; } buf[size] = 0; @@ -174,14 +203,14 @@ int reftable_stack_init_ref_iterator(struct reftable_stack *st, struct reftable_iterator *it) { return merged_table_init_iter(reftable_stack_merged_table(st), - it, BLOCK_TYPE_REF); + it, REFTABLE_BLOCK_TYPE_REF); } int reftable_stack_init_log_iterator(struct reftable_stack *st, struct reftable_iterator *it) { return merged_table_init_iter(reftable_stack_merged_table(st), - it, BLOCK_TYPE_LOG); + it, REFTABLE_BLOCK_TYPE_LOG); } struct reftable_merged_table * @@ -219,11 +248,11 @@ void reftable_stack_destroy(struct reftable_stack *st) REFTABLE_FREE_AND_NULL(names); } - if (st->readers) { - int i = 0; + if (st->tables) { struct reftable_buf filename = REFTABLE_BUF_INIT; - for (i = 0; i < st->readers_len; i++) { - const char *name = reader_name(st->readers[i]); + + for (size_t i = 0; i < st->tables_len; i++) { + const char *name = reftable_table_name(st->tables[i]); int try_unlinking = 1; reftable_buf_reset(&filename); @@ -231,16 +260,17 @@ void reftable_stack_destroy(struct reftable_stack *st) if (stack_filename(&filename, st, name) < 0) try_unlinking = 0; } - reftable_reader_decref(st->readers[i]); + reftable_table_decref(st->tables[i]); if (try_unlinking && filename.len) { /* On Windows, can only unlink after closing. */ unlink(filename.buf); } } + reftable_buf_release(&filename); - st->readers_len = 0; - REFTABLE_FREE_AND_NULL(st->readers); + st->tables_len = 0; + REFTABLE_FREE_AND_NULL(st->tables); } if (st->list_fd >= 0) { @@ -254,14 +284,14 @@ void reftable_stack_destroy(struct reftable_stack *st) free_names(names); } -static struct reftable_reader **stack_copy_readers(struct reftable_stack *st, - size_t cur_len) +static struct reftable_table **stack_copy_tables(struct reftable_stack *st, + size_t cur_len) { - struct reftable_reader **cur = reftable_calloc(cur_len, sizeof(*cur)); + struct reftable_table **cur = reftable_calloc(cur_len, sizeof(*cur)); if (!cur) return NULL; for (size_t i = 0; i < cur_len; i++) - cur[i] = st->readers[i]; + cur[i] = st->tables[i]; return cur; } @@ -269,19 +299,19 @@ static int reftable_stack_reload_once(struct reftable_stack *st, const char **names, int reuse_open) { - size_t cur_len = !st->merged ? 0 : st->merged->readers_len; - struct reftable_reader **cur = NULL; - struct reftable_reader **reused = NULL; - struct reftable_reader **new_readers = NULL; + size_t cur_len = !st->merged ? 0 : st->merged->tables_len; + struct reftable_table **cur = NULL; + struct reftable_table **reused = NULL; + struct reftable_table **new_tables = NULL; size_t reused_len = 0, reused_alloc = 0, names_len; - size_t new_readers_len = 0; + size_t new_tables_len = 0; struct reftable_merged_table *new_merged = NULL; struct reftable_buf table_path = REFTABLE_BUF_INIT; int err = 0; size_t i; if (cur_len) { - cur = stack_copy_readers(st, cur_len); + cur = stack_copy_tables(st, cur_len); if (!cur) { err = REFTABLE_OUT_OF_MEMORY_ERROR; goto done; @@ -291,28 +321,28 @@ static int reftable_stack_reload_once(struct reftable_stack *st, names_len = names_length(names); if (names_len) { - new_readers = reftable_calloc(names_len, sizeof(*new_readers)); - if (!new_readers) { + new_tables = reftable_calloc(names_len, sizeof(*new_tables)); + if (!new_tables) { err = REFTABLE_OUT_OF_MEMORY_ERROR; goto done; } } while (*names) { - struct reftable_reader *rd = NULL; + struct reftable_table *table = NULL; const char *name = *names++; /* this is linear; we assume compaction keeps the number of tables under control so this is not quadratic. */ for (i = 0; reuse_open && i < cur_len; i++) { if (cur[i] && 0 == strcmp(cur[i]->name, name)) { - rd = cur[i]; + table = cur[i]; cur[i] = NULL; /* * When reloading the stack fails, we end up - * releasing all new readers. This also - * includes the reused readers, even though + * releasing all new tables. This also + * includes the reused tables, even though * they are still in used by the old stack. We * thus need to keep them alive here, which we * do by bumping their refcount. @@ -324,13 +354,13 @@ static int reftable_stack_reload_once(struct reftable_stack *st, err = REFTABLE_OUT_OF_MEMORY_ERROR; goto done; } - reused[reused_len++] = rd; - reftable_reader_incref(rd); + reused[reused_len++] = table; + reftable_table_incref(table); break; } } - if (!rd) { + if (!table) { struct reftable_block_source src = { NULL }; err = stack_filename(&table_path, st, name); @@ -342,36 +372,36 @@ static int reftable_stack_reload_once(struct reftable_stack *st, if (err < 0) goto done; - err = reftable_reader_new(&rd, &src, name); + err = reftable_table_new(&table, &src, name); if (err < 0) goto done; } - new_readers[new_readers_len] = rd; - new_readers_len++; + new_tables[new_tables_len] = table; + new_tables_len++; } /* success! */ - err = reftable_merged_table_new(&new_merged, new_readers, - new_readers_len, st->opts.hash_id); + err = reftable_merged_table_new(&new_merged, new_tables, + new_tables_len, st->opts.hash_id); if (err < 0) goto done; /* - * Close the old, non-reused readers and proactively try to unlink + * Close the old, non-reused tables and proactively try to unlink * them. This is done for systems like Windows, where the underlying - * file of such an open reader wouldn't have been possible to be + * file of such an open table wouldn't have been possible to be * unlinked by the compacting process. */ for (i = 0; i < cur_len; i++) { if (cur[i]) { - const char *name = reader_name(cur[i]); + const char *name = reftable_table_name(cur[i]); err = stack_filename(&table_path, st, name); if (err < 0) goto done; - reftable_reader_decref(cur[i]); + reftable_table_decref(cur[i]); unlink(table_path.buf); } } @@ -382,25 +412,25 @@ static int reftable_stack_reload_once(struct reftable_stack *st, new_merged->suppress_deletions = 1; st->merged = new_merged; - if (st->readers) - reftable_free(st->readers); - st->readers = new_readers; - st->readers_len = new_readers_len; - new_readers = NULL; - new_readers_len = 0; + if (st->tables) + reftable_free(st->tables); + st->tables = new_tables; + st->tables_len = new_tables_len; + new_tables = NULL; + new_tables_len = 0; /* - * Decrement the refcount of reused readers again. This only needs to + * Decrement the refcount of reused tables again. This only needs to * happen on the successful case, because on the unsuccessful one we - * decrement their refcount via `new_readers`. + * decrement their refcount via `new_tables`. */ for (i = 0; i < reused_len; i++) - reftable_reader_decref(reused[i]); + reftable_table_decref(reused[i]); done: - for (i = 0; i < new_readers_len; i++) - reftable_reader_decref(new_readers[i]); - reftable_free(new_readers); + for (i = 0; i < new_tables_len; i++) + reftable_table_decref(new_tables[i]); + reftable_free(new_tables); reftable_free(reused); reftable_free(cur); reftable_buf_release(&table_path); @@ -493,8 +523,8 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, close(fd); fd = -1; - delay = delay + (delay * rand()) / RAND_MAX + 1; - sleep_millisec(delay); + delay = delay + (delay * reftable_rand()) / UINT32_MAX + 1; + poll(NULL, 0, delay); } out: @@ -568,7 +598,6 @@ static int stack_uptodate(struct reftable_stack *st) { char **names = NULL; int err; - int i = 0; /* * When we have cached stat information available then we use it to @@ -586,10 +615,10 @@ static int stack_uptodate(struct reftable_stack *st) /* * It's fine for "tables.list" to not exist. In that * case, we have to refresh when the loaded stack has - * any readers. + * any tables. */ if (errno == ENOENT) - return !!st->readers_len; + return !!st->tables_len; return REFTABLE_IO_ERROR; } @@ -608,19 +637,19 @@ static int stack_uptodate(struct reftable_stack *st) if (err < 0) return err; - for (i = 0; i < st->readers_len; i++) { + for (size_t i = 0; i < st->tables_len; i++) { if (!names[i]) { err = 1; goto done; } - if (strcmp(st->readers[i]->name, names[i])) { + if (strcmp(st->tables[i]->name, names[i])) { err = 1; goto done; } } - if (names[st->merged->readers_len]) { + if (names[st->merged->tables_len]) { err = 1; goto done; } @@ -659,7 +688,7 @@ int reftable_stack_add(struct reftable_stack *st, static int format_name(struct reftable_buf *dest, uint64_t min, uint64_t max) { char buf[100]; - uint32_t rnd = (uint32_t)git_rand(); + uint32_t rnd = reftable_rand(); snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", min, max, rnd); reftable_buf_reset(dest); @@ -763,8 +792,8 @@ int reftable_addition_commit(struct reftable_addition *add) if (add->new_tables_len == 0) goto done; - for (i = 0; i < add->stack->merged->readers_len; i++) { - if ((err = reftable_buf_addstr(&table_list, add->stack->readers[i]->name)) < 0 || + for (i = 0; i < add->stack->merged->tables_len; i++) { + if ((err = reftable_buf_addstr(&table_list, add->stack->tables[i]->name)) < 0 || (err = reftable_buf_addstr(&table_list, "\n")) < 0) goto done; } @@ -774,7 +803,8 @@ int reftable_addition_commit(struct reftable_addition *add) goto done; } - err = write_in_full(add->tables_list_lock.fd, table_list.buf, table_list.len); + err = reftable_write_data(add->tables_list_lock.fd, + table_list.buf, table_list.len); reftable_buf_release(&table_list); if (err < 0) { err = REFTABLE_IO_ERROR; @@ -970,9 +1000,9 @@ done: uint64_t reftable_stack_next_update_index(struct reftable_stack *st) { - int sz = st->merged->readers_len; + int sz = st->merged->tables_len; if (sz > 0) - return reftable_reader_max_update_index(st->readers[sz - 1]) + + return reftable_table_max_update_index(st->tables[sz - 1]) + 1; return 1; } @@ -991,8 +1021,8 @@ static int stack_compact_locked(struct reftable_stack *st, struct reftable_tmpfile tab_file = REFTABLE_TMPFILE_INIT; int err = 0; - err = format_name(&next_name, reftable_reader_min_update_index(st->readers[first]), - reftable_reader_max_update_index(st->readers[last])); + err = format_name(&next_name, reftable_table_min_update_index(st->tables[first]), + reftable_table_max_update_index(st->tables[last])); if (err < 0) goto done; @@ -1057,16 +1087,18 @@ static int stack_write_compact(struct reftable_stack *st, int err = 0; for (size_t i = first; i <= last; i++) - st->stats.bytes += st->readers[i]->size; - reftable_writer_set_limits(wr, st->readers[first]->min_update_index, - st->readers[last]->max_update_index); + st->stats.bytes += st->tables[i]->size; + err = reftable_writer_set_limits(wr, st->tables[first]->min_update_index, + st->tables[last]->max_update_index); + if (err < 0) + goto done; - err = reftable_merged_table_new(&mt, st->readers + first, subtabs_len, + err = reftable_merged_table_new(&mt, st->tables + first, subtabs_len, st->opts.hash_id); if (err < 0) goto done; - err = merged_table_init_iter(mt, &it, BLOCK_TYPE_REF); + err = merged_table_init_iter(mt, &it, REFTABLE_BLOCK_TYPE_REF); if (err < 0) goto done; @@ -1094,7 +1126,7 @@ static int stack_write_compact(struct reftable_stack *st, } reftable_iterator_destroy(&it); - err = merged_table_init_iter(mt, &it, BLOCK_TYPE_LOG); + err = merged_table_init_iter(mt, &it, REFTABLE_BLOCK_TYPE_LOG); if (err < 0) goto done; @@ -1218,7 +1250,7 @@ static int stack_compact_range(struct reftable_stack *st, table_locks[i] = REFTABLE_FLOCK_INIT; for (i = last + 1; i > first; i--) { - err = stack_filename(&table_name, st, reader_name(st->readers[i - 1])); + err = stack_filename(&table_name, st, reftable_table_name(st->tables[i - 1])); if (err < 0) goto done; @@ -1344,7 +1376,7 @@ static int stack_compact_range(struct reftable_stack *st, * compacted in the updated "tables.list" file. */ for (size_t i = 0; names[i]; i++) { - if (strcmp(names[i], st->readers[first]->name)) + if (strcmp(names[i], st->tables[first]->name)) continue; /* @@ -1354,8 +1386,8 @@ static int stack_compact_range(struct reftable_stack *st, * have compacted them. */ for (size_t j = 1; j < last - first + 1; j++) { - const char *old = first + j < st->merged->readers_len ? - st->readers[first + j]->name : NULL; + const char *old = first + j < st->merged->tables_len ? + st->tables[first + j]->name : NULL; const char *new = names[i + j]; /* @@ -1395,16 +1427,16 @@ static int stack_compact_range(struct reftable_stack *st, * `fd_read_lines()` uses a `NULL` sentinel to indicate that * the array is at its end. As we use `free_names()` to free * the array, we need to include this sentinel value here and - * thus have to allocate `readers_len + 1` many entries. + * thus have to allocate `tables_len + 1` many entries. */ - REFTABLE_CALLOC_ARRAY(names, st->merged->readers_len + 1); + REFTABLE_CALLOC_ARRAY(names, st->merged->tables_len + 1); if (!names) { err = REFTABLE_OUT_OF_MEMORY_ERROR; goto done; } - for (size_t i = 0; i < st->merged->readers_len; i++) { - names[i] = reftable_strdup(st->readers[i]->name); + for (size_t i = 0; i < st->merged->tables_len; i++) { + names[i] = reftable_strdup(st->tables[i]->name); if (!names[i]) { err = REFTABLE_OUT_OF_MEMORY_ERROR; goto done; @@ -1419,8 +1451,8 @@ static int stack_compact_range(struct reftable_stack *st, * it into place now. */ if (!is_empty_table) { - err = format_name(&new_table_name, st->readers[first]->min_update_index, - st->readers[last]->max_update_index); + err = format_name(&new_table_name, st->tables[first]->min_update_index, + st->tables[last]->max_update_index); if (err < 0) goto done; @@ -1458,8 +1490,8 @@ static int stack_compact_range(struct reftable_stack *st, goto done; } - err = write_in_full(tables_list_lock.fd, - tables_list_buf.buf, tables_list_buf.len); + err = reftable_write_data(tables_list_lock.fd, + tables_list_buf.buf, tables_list_buf.len); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); @@ -1527,7 +1559,7 @@ done: int reftable_stack_compact_all(struct reftable_stack *st, struct reftable_log_expiry_config *config) { - size_t last = st->merged->readers_len ? st->merged->readers_len - 1 : 0; + size_t last = st->merged->tables_len ? st->merged->tables_len - 1 : 0; return stack_compact_range(st, 0, last, config, 0); } @@ -1618,12 +1650,12 @@ static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) int overhead = header_size(version) - 1; uint64_t *sizes; - REFTABLE_CALLOC_ARRAY(sizes, st->merged->readers_len); + REFTABLE_CALLOC_ARRAY(sizes, st->merged->tables_len); if (!sizes) return NULL; - for (size_t i = 0; i < st->merged->readers_len; i++) - sizes[i] = st->readers[i]->size - overhead; + for (size_t i = 0; i < st->merged->tables_len; i++) + sizes[i] = st->tables[i]->size - overhead; return sizes; } @@ -1633,14 +1665,14 @@ int reftable_stack_auto_compact(struct reftable_stack *st) struct segment seg; uint64_t *sizes; - if (st->merged->readers_len < 2) + if (st->merged->tables_len < 2) return 0; sizes = stack_table_sizes_for_compaction(st); if (!sizes) return REFTABLE_OUT_OF_MEMORY_ERROR; - seg = suggest_compaction_segment(sizes, st->merged->readers_len, + seg = suggest_compaction_segment(sizes, st->merged->tables_len, st->opts.auto_compaction_factor); reftable_free(sizes); @@ -1731,7 +1763,7 @@ static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max, int err = 0; uint64_t update_idx = 0; struct reftable_block_source src = { NULL }; - struct reftable_reader *rd = NULL; + struct reftable_table *table = NULL; struct reftable_buf table_path = REFTABLE_BUF_INIT; err = stack_filename(&table_path, st, name); @@ -1742,12 +1774,12 @@ static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max, if (err < 0) goto done; - err = reftable_reader_new(&rd, &src, name); + err = reftable_table_new(&table, &src, name); if (err < 0) goto done; - update_idx = reftable_reader_max_update_index(rd); - reftable_reader_decref(rd); + update_idx = reftable_table_max_update_index(table); + reftable_table_decref(table); if (update_idx <= max) { unlink(table_path.buf); @@ -1767,14 +1799,12 @@ static int reftable_stack_clean_locked(struct reftable_stack *st) } while ((d = readdir(dir))) { - int i = 0; int found = 0; if (!is_table_name(d->d_name)) continue; - for (i = 0; !found && i < st->readers_len; i++) { - found = !strcmp(reader_name(st->readers[i]), d->d_name); - } + for (size_t i = 0; !found && i < st->tables_len; i++) + found = !strcmp(reftable_table_name(st->tables[i]), d->d_name); if (found) continue; diff --git a/reftable/stack.h b/reftable/stack.h index 5b45cff4f7..bc28f2998a 100644 --- a/reftable/stack.h +++ b/reftable/stack.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef STACK_H #define STACK_H @@ -22,8 +22,8 @@ struct reftable_stack { struct reftable_write_options opts; - struct reftable_reader **readers; - size_t readers_len; + struct reftable_table **tables; + size_t tables_len; struct reftable_merged_table *merged; struct reftable_compaction_stats stats; }; diff --git a/reftable/system.c b/reftable/system.c index adf8e4d30b..1ee268b125 100644 --- a/reftable/system.c +++ b/reftable/system.c @@ -1,9 +1,16 @@ +#include "../git-compat-util.h" + #include "system.h" #include "basics.h" #include "reftable-error.h" #include "../lockfile.h" #include "../tempfile.h" +uint32_t reftable_rand(void) +{ + return git_rand(CSPRNG_BYTES_INSECURE); +} + int tmpfile_from_pattern(struct reftable_tmpfile *out, const char *pattern) { struct tempfile *tempfile; diff --git a/reftable/system.h b/reftable/system.h index 5274eca1d0..beb9d2431f 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -1,19 +1,25 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef SYSTEM_H #define SYSTEM_H /* This header glues the reftable library to the rest of Git */ -#define DISABLE_SIGN_COMPARE_WARNINGS +#define MINGW_DONT_HANDLE_IN_USE_ERROR +#include "compat/posix.h" +#include "compat/zlib-compat.h" -#include "git-compat-util.h" +/* + * Return a random 32 bit integer. This function is expected to return + * pre-seeded data. + */ +uint32_t reftable_rand(void); /* * An implementation-specific temporary file. By making this specific to the diff --git a/reftable/reader.c b/reftable/table.c index ea82955c9b..56362df0ed 100644 --- a/reftable/reader.c +++ b/reftable/table.c @@ -1,83 +1,46 @@ /* -Copyright 2020 Google LLC + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "reader.h" +#include "table.h" #include "system.h" #include "block.h" +#include "blocksource.h" #include "constants.h" #include "iter.h" #include "record.h" #include "reftable-error.h" -uint64_t block_source_size(struct reftable_block_source *source) -{ - return source->ops->size(source->arg); -} - -int block_source_read_block(struct reftable_block_source *source, - struct reftable_block *dest, uint64_t off, - uint32_t size) -{ - int result = source->ops->read_block(source->arg, dest, off, size); - dest->source = *source; - return result; -} - -void block_source_close(struct reftable_block_source *source) -{ - if (!source->ops) { - return; - } - - source->ops->close(source->arg); - source->ops = NULL; -} - -static struct reftable_reader_offsets * -reader_offsets_for(struct reftable_reader *r, uint8_t typ) +static struct reftable_table_offsets * +table_offsets_for(struct reftable_table *t, uint8_t typ) { switch (typ) { - case BLOCK_TYPE_REF: - return &r->ref_offsets; - case BLOCK_TYPE_LOG: - return &r->log_offsets; - case BLOCK_TYPE_OBJ: - return &r->obj_offsets; + case REFTABLE_BLOCK_TYPE_REF: + return &t->ref_offsets; + case REFTABLE_BLOCK_TYPE_LOG: + return &t->log_offsets; + case REFTABLE_BLOCK_TYPE_OBJ: + return &t->obj_offsets; } abort(); } -static int reader_get_block(struct reftable_reader *r, - struct reftable_block *dest, uint64_t off, - uint32_t sz) -{ - if (off >= r->size) - return 0; - - if (off + sz > r->size) { - sz = r->size - off; - } - - return block_source_read_block(&r->source, dest, off, sz); -} - -enum reftable_hash reftable_reader_hash_id(struct reftable_reader *r) +enum reftable_hash reftable_table_hash_id(struct reftable_table *t) { - return r->hash_id; + return t->hash_id; } -const char *reader_name(struct reftable_reader *r) +const char *reftable_table_name(struct reftable_table *t) { - return r->name; + return t->name; } -static int parse_footer(struct reftable_reader *r, uint8_t *footer, +static int parse_footer(struct reftable_table *t, uint8_t *footer, uint8_t *header) { uint8_t *f = footer; @@ -92,29 +55,29 @@ static int parse_footer(struct reftable_reader *r, uint8_t *footer, } f += 4; - if (memcmp(footer, header, header_size(r->version))) { + if (memcmp(footer, header, header_size(t->version))) { err = REFTABLE_FORMAT_ERROR; goto done; } f++; - r->block_size = get_be24(f); + t->block_size = reftable_get_be24(f); f += 3; - r->min_update_index = get_be64(f); + t->min_update_index = reftable_get_be64(f); f += 8; - r->max_update_index = get_be64(f); + t->max_update_index = reftable_get_be64(f); f += 8; - if (r->version == 1) { - r->hash_id = REFTABLE_HASH_SHA1; + if (t->version == 1) { + t->hash_id = REFTABLE_HASH_SHA1; } else { - switch (get_be32(f)) { + switch (reftable_get_be32(f)) { case REFTABLE_FORMAT_ID_SHA1: - r->hash_id = REFTABLE_HASH_SHA1; + t->hash_id = REFTABLE_HASH_SHA1; break; case REFTABLE_FORMAT_ID_SHA256: - r->hash_id = REFTABLE_HASH_SHA256; + t->hash_id = REFTABLE_HASH_SHA256; break; default: err = REFTABLE_FORMAT_ERROR; @@ -124,37 +87,37 @@ static int parse_footer(struct reftable_reader *r, uint8_t *footer, f += 4; } - r->ref_offsets.index_offset = get_be64(f); + t->ref_offsets.index_offset = reftable_get_be64(f); f += 8; - r->obj_offsets.offset = get_be64(f); + t->obj_offsets.offset = reftable_get_be64(f); f += 8; - r->object_id_len = r->obj_offsets.offset & ((1 << 5) - 1); - r->obj_offsets.offset >>= 5; + t->object_id_len = t->obj_offsets.offset & ((1 << 5) - 1); + t->obj_offsets.offset >>= 5; - r->obj_offsets.index_offset = get_be64(f); + t->obj_offsets.index_offset = reftable_get_be64(f); f += 8; - r->log_offsets.offset = get_be64(f); + t->log_offsets.offset = reftable_get_be64(f); f += 8; - r->log_offsets.index_offset = get_be64(f); + t->log_offsets.index_offset = reftable_get_be64(f); f += 8; computed_crc = crc32(0, footer, f - footer); - file_crc = get_be32(f); + file_crc = reftable_get_be32(f); f += 4; if (computed_crc != file_crc) { err = REFTABLE_FORMAT_ERROR; goto done; } - first_block_typ = header[header_size(r->version)]; - r->ref_offsets.is_present = (first_block_typ == BLOCK_TYPE_REF); - r->ref_offsets.offset = 0; - r->log_offsets.is_present = (first_block_typ == BLOCK_TYPE_LOG || - r->log_offsets.offset > 0); - r->obj_offsets.is_present = r->obj_offsets.offset > 0; - if (r->obj_offsets.is_present && !r->object_id_len) { + first_block_typ = header[header_size(t->version)]; + t->ref_offsets.is_present = (first_block_typ == REFTABLE_BLOCK_TYPE_REF); + t->ref_offsets.offset = 0; + t->log_offsets.is_present = (first_block_typ == REFTABLE_BLOCK_TYPE_LOG || + t->log_offsets.offset > 0); + t->obj_offsets.is_present = t->obj_offsets.offset > 0; + if (t->obj_offsets.is_present && !t->object_id_len) { err = REFTABLE_FORMAT_ERROR; goto done; } @@ -165,20 +128,20 @@ done: } struct table_iter { - struct reftable_reader *r; + struct reftable_table *table; uint8_t typ; uint64_t block_off; - struct block_reader br; + struct reftable_block block; struct block_iter bi; int is_finished; }; -static int table_iter_init(struct table_iter *ti, struct reftable_reader *r) +static int table_iter_init(struct table_iter *ti, struct reftable_table *t) { struct block_iter bi = BLOCK_ITER_INIT; memset(ti, 0, sizeof(*ti)); - reftable_reader_incref(r); - ti->r = r; + reftable_table_incref(t); + ti->table = t; ti->bi = bi; return 0; } @@ -187,8 +150,8 @@ static int table_iter_next_in_block(struct table_iter *ti, struct reftable_record *rec) { int res = block_iter_next(&ti->bi, rec); - if (res == 0 && reftable_record_type(rec) == BLOCK_TYPE_REF) { - rec->u.ref.update_index += ti->r->min_update_index; + if (res == 0 && reftable_record_type(rec) == REFTABLE_BLOCK_TYPE_REF) { + rec->u.ref.update_index += ti->table->min_update_index; } return res; @@ -196,68 +159,23 @@ static int table_iter_next_in_block(struct table_iter *ti, static void table_iter_block_done(struct table_iter *ti) { - block_reader_release(&ti->br); + reftable_block_release(&ti->block); block_iter_reset(&ti->bi); } -static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off, - int version) -{ - int32_t result = 0; - - if (off == 0) { - data += header_size(version); - } - - *typ = data[0]; - if (reftable_is_block_type(*typ)) { - result = get_be24(data + 1); - } - return result; -} - -int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br, - uint64_t next_off, uint8_t want_typ) +int table_init_block(struct reftable_table *t, struct reftable_block *block, + uint64_t next_off, uint8_t want_typ) { - int32_t guess_block_size = r->block_size ? r->block_size : - DEFAULT_BLOCK_SIZE; - struct reftable_block block = { NULL }; - uint8_t block_typ = 0; - int err = 0; - uint32_t header_off = next_off ? 0 : header_size(r->version); - int32_t block_size = 0; + uint32_t header_off = next_off ? 0 : header_size(t->version); + int err; - if (next_off >= r->size) + if (next_off >= t->size) return 1; - err = reader_get_block(r, &block, next_off, guess_block_size); - if (err < 0) - goto done; - - block_size = extract_block_size(block.data, &block_typ, next_off, - r->version); - if (block_size < 0) { - err = block_size; - goto done; - } - if (want_typ != BLOCK_TYPE_ANY && block_typ != want_typ) { - err = 1; - goto done; - } - - if (block_size > guess_block_size) { - reftable_block_done(&block); - err = reader_get_block(r, &block, next_off, block_size); - if (err < 0) { - goto done; - } - } - - err = block_reader_init(br, &block, header_off, r->block_size, - hash_size(r->hash_id)); -done: - reftable_block_done(&block); - + err = reftable_block_init(block, &t->source, next_off, header_off, + t->block_size, hash_size(t->hash_id), want_typ); + if (err) + reftable_block_release(block); return err; } @@ -265,15 +183,15 @@ static void table_iter_close(struct table_iter *ti) { table_iter_block_done(ti); block_iter_close(&ti->bi); - reftable_reader_decref(ti->r); + reftable_table_decref(ti->table); } static int table_iter_next_block(struct table_iter *ti) { - uint64_t next_block_off = ti->block_off + ti->br.full_block_size; + uint64_t next_block_off = ti->block_off + ti->block.full_block_size; int err; - err = reader_init_block_reader(ti->r, &ti->br, next_block_off, ti->typ); + err = table_init_block(ti->table, &ti->block, next_block_off, ti->typ); if (err > 0) ti->is_finished = 1; if (err) @@ -281,7 +199,7 @@ static int table_iter_next_block(struct table_iter *ti) ti->block_off = next_block_off; ti->is_finished = 0; - block_iter_seek_start(&ti->bi, &ti->br); + block_iter_init(&ti->bi, &ti->block); return 0; } @@ -323,27 +241,27 @@ static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ) { int err; - err = reader_init_block_reader(ti->r, &ti->br, off, typ); + err = table_init_block(ti->table, &ti->block, off, typ); if (err != 0) return err; - ti->typ = block_reader_type(&ti->br); + ti->typ = reftable_block_type(&ti->block); ti->block_off = off; - block_iter_seek_start(&ti->bi, &ti->br); + block_iter_init(&ti->bi, &ti->block); ti->is_finished = 0; return 0; } static int table_iter_seek_start(struct table_iter *ti, uint8_t typ, int index) { - struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ); + struct reftable_table_offsets *offs = table_offsets_for(ti->table, typ); uint64_t off = offs->offset; if (index) { off = offs->index_offset; if (off == 0) { return 1; } - typ = BLOCK_TYPE_INDEX; + typ = REFTABLE_BLOCK_TYPE_INDEX; } return table_iter_seek_to(ti, off, typ); @@ -357,7 +275,10 @@ static int table_iter_seek_linear(struct table_iter *ti, struct reftable_record rec; int err; - reftable_record_init(&rec, reftable_record_type(want)); + err = reftable_record_init(&rec, reftable_record_type(want)); + if (err < 0) + goto done; + err = reftable_record_key(want, &want_key); if (err < 0) goto done; @@ -390,10 +311,10 @@ static int table_iter_seek_linear(struct table_iter *ti, * as we have more than three blocks we would have an index, so * we would not do a linear search there anymore. */ - memset(&next.br.block, 0, sizeof(next.br.block)); - next.br.zstream = NULL; - next.br.uncompressed_data = NULL; - next.br.uncompressed_cap = 0; + memset(&next.block.block_data, 0, sizeof(next.block.block_data)); + next.block.zstream = NULL; + next.block.uncompressed_data = NULL; + next.block.uncompressed_cap = 0; err = table_iter_next_block(&next); if (err < 0) @@ -401,7 +322,7 @@ static int table_iter_seek_linear(struct table_iter *ti, if (err > 0) break; - err = block_reader_first_key(&next.br, &got_key); + err = reftable_block_first_key(&next.block, &got_key); if (err < 0) goto done; @@ -419,7 +340,8 @@ static int table_iter_seek_linear(struct table_iter *ti, * the wanted key inside of it. If the block does not contain our key * we know that the corresponding record does not exist. */ - err = block_iter_seek_key(&ti->bi, &ti->br, &want_key); + block_iter_init(&ti->bi, &ti->block); + err = block_iter_seek_key(&ti->bi, &want_key); if (err < 0) goto done; err = 0; @@ -435,10 +357,10 @@ static int table_iter_seek_indexed(struct table_iter *ti, struct reftable_record *rec) { struct reftable_record want_index = { - .type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = REFTABLE_BUF_INIT } + .type = REFTABLE_BLOCK_TYPE_INDEX, .u.idx = { .last_key = REFTABLE_BUF_INIT } }; struct reftable_record index_result = { - .type = BLOCK_TYPE_INDEX, + .type = REFTABLE_BLOCK_TYPE_INDEX, .u.idx = { .last_key = REFTABLE_BUF_INIT }, }; int err; @@ -487,7 +409,9 @@ static int table_iter_seek_indexed(struct table_iter *ti, if (err != 0) goto done; - err = block_iter_seek_key(&ti->bi, &ti->br, &want_index.u.idx.last_key); + block_iter_init(&ti->bi, &ti->block); + + err = block_iter_seek_key(&ti->bi, &want_index.u.idx.last_key); if (err < 0) goto done; @@ -496,7 +420,7 @@ static int table_iter_seek_indexed(struct table_iter *ti, break; } - if (ti->typ != BLOCK_TYPE_INDEX) { + if (ti->typ != REFTABLE_BLOCK_TYPE_INDEX) { err = REFTABLE_FORMAT_ERROR; goto done; } @@ -512,7 +436,7 @@ static int table_iter_seek(struct table_iter *ti, struct reftable_record *want) { uint8_t typ = reftable_record_type(want); - struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ); + struct reftable_table_offsets *offs = table_offsets_for(ti->table, typ); int err; err = table_iter_seek_start(ti, reftable_record_type(want), @@ -560,11 +484,11 @@ static void iterator_from_table_iter(struct reftable_iterator *it, it->ops = &table_iter_vtable; } -int reader_init_iter(struct reftable_reader *r, - struct reftable_iterator *it, - uint8_t typ) +int table_init_iter(struct reftable_table *t, + struct reftable_iterator *it, + uint8_t typ) { - struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); + struct reftable_table_offsets *offs = table_offsets_for(t, typ); if (offs->is_present) { struct table_iter *ti; @@ -572,7 +496,7 @@ int reader_init_iter(struct reftable_reader *r, if (!ti) return REFTABLE_OUT_OF_MEMORY_ERROR; - table_iter_init(ti, r); + table_iter_init(ti, t); iterator_from_table_iter(it, ti); } else { iterator_set_empty(it); @@ -581,30 +505,31 @@ int reader_init_iter(struct reftable_reader *r, return 0; } -int reftable_reader_init_ref_iterator(struct reftable_reader *r, - struct reftable_iterator *it) +int reftable_table_init_ref_iterator(struct reftable_table *t, + struct reftable_iterator *it) { - return reader_init_iter(r, it, BLOCK_TYPE_REF); + return table_init_iter(t, it, REFTABLE_BLOCK_TYPE_REF); } -int reftable_reader_init_log_iterator(struct reftable_reader *r, - struct reftable_iterator *it) +int reftable_table_init_log_iterator(struct reftable_table *t, + struct reftable_iterator *it) { - return reader_init_iter(r, it, BLOCK_TYPE_LOG); + return table_init_iter(t, it, REFTABLE_BLOCK_TYPE_LOG); } -int reftable_reader_new(struct reftable_reader **out, - struct reftable_block_source *source, char const *name) +int reftable_table_new(struct reftable_table **out, + struct reftable_block_source *source, char const *name) { - struct reftable_block footer = { 0 }; - struct reftable_block header = { 0 }; - struct reftable_reader *r; + struct reftable_block_data footer = { 0 }; + struct reftable_block_data header = { 0 }; + struct reftable_table *t; uint64_t file_size = block_source_size(source); uint32_t read_size; + ssize_t bytes_read; int err; - REFTABLE_CALLOC_ARRAY(r, 1); - if (!r) { + REFTABLE_CALLOC_ARRAY(t, 1); + if (!t) { err = REFTABLE_OUT_OF_MEMORY_ERROR; goto done; } @@ -619,8 +544,8 @@ int reftable_reader_new(struct reftable_reader **out, goto done; } - err = block_source_read_block(source, &header, 0, read_size); - if (err != read_size) { + bytes_read = block_source_read_data(source, &header, 0, read_size); + if (bytes_read < 0 || (size_t)bytes_read != read_size) { err = REFTABLE_IO_ERROR; goto done; } @@ -629,86 +554,84 @@ int reftable_reader_new(struct reftable_reader **out, err = REFTABLE_FORMAT_ERROR; goto done; } - r->version = header.data[4]; - if (r->version != 1 && r->version != 2) { + t->version = header.data[4]; + if (t->version != 1 && t->version != 2) { err = REFTABLE_FORMAT_ERROR; goto done; } - r->size = file_size - footer_size(r->version); - r->source = *source; - r->name = reftable_strdup(name); - if (!r->name) { + t->size = file_size - footer_size(t->version); + t->source = *source; + t->name = reftable_strdup(name); + if (!t->name) { err = REFTABLE_OUT_OF_MEMORY_ERROR; goto done; } - r->hash_id = 0; - r->refcount = 1; + t->hash_id = 0; + t->refcount = 1; - err = block_source_read_block(source, &footer, r->size, - footer_size(r->version)); - if (err != footer_size(r->version)) { + bytes_read = block_source_read_data(source, &footer, t->size, + footer_size(t->version)); + if (bytes_read < 0 || (size_t)bytes_read != footer_size(t->version)) { err = REFTABLE_IO_ERROR; goto done; } - err = parse_footer(r, footer.data, header.data); + err = parse_footer(t, footer.data, header.data); if (err) goto done; - *out = r; + *out = t; done: - reftable_block_done(&footer); - reftable_block_done(&header); + block_source_release_data(&footer); + block_source_release_data(&header); if (err) { - reftable_free(r); + if (t) + reftable_free(t->name); + reftable_free(t); block_source_close(source); } return err; } -void reftable_reader_incref(struct reftable_reader *r) +void reftable_table_incref(struct reftable_table *t) { - if (!r->refcount) - BUG("cannot increment ref counter of dead reader"); - r->refcount++; + t->refcount++; } -void reftable_reader_decref(struct reftable_reader *r) +void reftable_table_decref(struct reftable_table *t) { - if (!r) + if (!t) return; - if (!r->refcount) - BUG("cannot decrement ref counter of dead reader"); - if (--r->refcount) + if (--t->refcount) return; - block_source_close(&r->source); - REFTABLE_FREE_AND_NULL(r->name); - reftable_free(r); + block_source_close(&t->source); + REFTABLE_FREE_AND_NULL(t->name); + reftable_free(t); } -static int reftable_reader_refs_for_indexed(struct reftable_reader *r, - struct reftable_iterator *it, - uint8_t *oid) +static int reftable_table_refs_for_indexed(struct reftable_table *t, + struct reftable_iterator *it, + uint8_t *oid) { struct reftable_record want = { - .type = BLOCK_TYPE_OBJ, + .type = REFTABLE_BLOCK_TYPE_OBJ, .u.obj = { .hash_prefix = oid, - .hash_prefix_len = r->object_id_len, + .hash_prefix_len = t->object_id_len, }, }; struct reftable_iterator oit = { NULL }; struct reftable_record got = { - .type = BLOCK_TYPE_OBJ, + .type = REFTABLE_BLOCK_TYPE_OBJ, .u.obj = { 0 }, }; int err = 0; struct indexed_table_ref_iter *itr = NULL; /* Look through the reverse index. */ - err = reader_init_iter(r, &oit, BLOCK_TYPE_OBJ); + err = table_init_iter(t, &oit, REFTABLE_BLOCK_TYPE_OBJ); if (err < 0) goto done; @@ -722,14 +645,14 @@ static int reftable_reader_refs_for_indexed(struct reftable_reader *r, goto done; if (err > 0 || memcmp(want.u.obj.hash_prefix, got.u.obj.hash_prefix, - r->object_id_len)) { + t->object_id_len)) { /* didn't find it; return empty iterator */ iterator_set_empty(it); err = 0; goto done; } - err = indexed_table_ref_iter_new(&itr, r, oid, hash_size(r->hash_id), + err = indexed_table_ref_iter_new(&itr, t, oid, hash_size(t->hash_id), got.u.obj.offsets, got.u.obj.offset_len); if (err < 0) @@ -743,14 +666,14 @@ done: return err; } -static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, - struct reftable_iterator *it, - uint8_t *oid) +static int reftable_table_refs_for_unindexed(struct reftable_table *t, + struct reftable_iterator *it, + uint8_t *oid) { struct table_iter *ti; struct filtering_ref_iterator *filter = NULL; struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT; - int oid_len = hash_size(r->hash_id); + uint32_t oid_len = hash_size(t->hash_id); int err; REFTABLE_ALLOC_ARRAY(ti, 1); @@ -759,8 +682,8 @@ static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, goto out; } - table_iter_init(ti, r); - err = table_iter_seek_start(ti, BLOCK_TYPE_REF, 0); + table_iter_init(ti, t); + err = table_iter_seek_start(ti, REFTABLE_BLOCK_TYPE_REF, 0); if (err < 0) goto out; @@ -790,85 +713,67 @@ out: return err; } -int reftable_reader_refs_for(struct reftable_reader *r, - struct reftable_iterator *it, uint8_t *oid) +int reftable_table_refs_for(struct reftable_table *t, + struct reftable_iterator *it, uint8_t *oid) { - if (r->obj_offsets.is_present) - return reftable_reader_refs_for_indexed(r, it, oid); - return reftable_reader_refs_for_unindexed(r, it, oid); + if (t->obj_offsets.is_present) + return reftable_table_refs_for_indexed(t, it, oid); + return reftable_table_refs_for_unindexed(t, it, oid); } -uint64_t reftable_reader_max_update_index(struct reftable_reader *r) +uint64_t reftable_table_max_update_index(struct reftable_table *t) { - return r->max_update_index; + return t->max_update_index; } -uint64_t reftable_reader_min_update_index(struct reftable_reader *r) +uint64_t reftable_table_min_update_index(struct reftable_table *t) { - return r->min_update_index; + return t->min_update_index; } -int reftable_reader_print_blocks(const char *tablename) +int reftable_table_iterator_init(struct reftable_table_iterator *it, + struct reftable_table *t) { - struct { - const char *name; - int type; - } sections[] = { - { - .name = "ref", - .type = BLOCK_TYPE_REF, - }, - { - .name = "obj", - .type = BLOCK_TYPE_OBJ, - }, - { - .name = "log", - .type = BLOCK_TYPE_LOG, - }, - }; - struct reftable_block_source src = { 0 }; - struct reftable_reader *r = NULL; - struct table_iter ti = { 0 }; - size_t i; + struct table_iter *ti; int err; - err = reftable_block_source_from_file(&src, tablename); - if (err < 0) - goto done; + REFTABLE_ALLOC_ARRAY(ti, 1); + if (!ti) + return REFTABLE_OUT_OF_MEMORY_ERROR; - err = reftable_reader_new(&r, &src, tablename); + err = table_iter_init(ti, t); if (err < 0) - goto done; + goto out; - table_iter_init(&ti, r); + it->iter_arg = ti; + err = 0; - printf("header:\n"); - printf(" block_size: %d\n", r->block_size); +out: + if (err < 0) + reftable_free(ti); + return err; +} - for (i = 0; i < ARRAY_SIZE(sections); i++) { - err = table_iter_seek_start(&ti, sections[i].type, 0); - if (err < 0) - goto done; - if (err > 0) - continue; +void reftable_table_iterator_release(struct reftable_table_iterator *it) +{ + if (!it->iter_arg) + return; + table_iter_close(it->iter_arg); + reftable_free(it->iter_arg); + it->iter_arg = NULL; +} - printf("%s:\n", sections[i].name); +int reftable_table_iterator_next(struct reftable_table_iterator *it, + const struct reftable_block **out) +{ + struct table_iter *ti = it->iter_arg; + int err; - while (1) { - printf(" - length: %u\n", ti.br.block_len); - printf(" restarts: %u\n", ti.br.restart_count); + err = table_iter_next_block(ti); + if (err) + return err; - err = table_iter_next_block(&ti); - if (err < 0) - goto done; - if (err > 0) - break; - } - } + *out = &ti->block; -done: - reftable_reader_decref(r); - table_iter_close(&ti); - return err; + return 0; } diff --git a/reftable/table.h b/reftable/table.h new file mode 100644 index 0000000000..c54703e621 --- /dev/null +++ b/reftable/table.h @@ -0,0 +1,29 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef TABLE_H +#define TABLE_H + +#include "block.h" +#include "record.h" +#include "reftable-iterator.h" +#include "reftable-table.h" + +const char *reftable_table_name(struct reftable_table *t); + +int table_init_iter(struct reftable_table *t, + struct reftable_iterator *it, + uint8_t typ); + +/* + * Initialize a block by reading from the given table and offset. + */ +int table_init_block(struct reftable_table *t, struct reftable_block *block, + uint64_t next_off, uint8_t want_typ); + +#endif diff --git a/reftable/tree.c b/reftable/tree.c index f4dbe72090..a52f7c0c7d 100644 --- a/reftable/tree.c +++ b/reftable/tree.c @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #include "system.h" #include "tree.h" diff --git a/reftable/tree.h b/reftable/tree.h index 9604453b6d..2c9c465299 100644 --- a/reftable/tree.h +++ b/reftable/tree.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef TREE_H #define TREE_H diff --git a/reftable/writer.c b/reftable/writer.c index 740c98038e..3b4ebdd6dc 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #include "writer.h" @@ -57,8 +57,10 @@ static int padded_write(struct reftable_writer *w, uint8_t *data, size_t len, return -1; n = w->write(w->write_arg, zeroed, w->pending_padding); - if (n < 0) + if (n < 0) { + reftable_free(zeroed); return n; + } w->pending_padding = 0; reftable_free(zeroed); @@ -99,9 +101,9 @@ static int writer_write_header(struct reftable_writer *w, uint8_t *dest) dest[4] = writer_version(w); - put_be24(dest + 5, w->opts.block_size); - put_be64(dest + 8, w->min_update_index); - put_be64(dest + 16, w->max_update_index); + reftable_put_be24(dest + 5, w->opts.block_size); + reftable_put_be64(dest + 8, w->min_update_index); + reftable_put_be64(dest + 16, w->max_update_index); if (writer_version(w) == 2) { uint32_t hash_id; @@ -116,7 +118,7 @@ static int writer_write_header(struct reftable_writer *w, uint8_t *dest) return -1; } - put_be32(dest + 24, hash_id); + reftable_put_be32(dest + 24, hash_id); } return header_size(writer_version(w)); @@ -158,7 +160,7 @@ int reftable_writer_new(struct reftable_writer **out, opts = *_opts; options_set_defaults(&opts); if (opts.block_size >= (1 << 24)) - BUG("configured block size exceeds 16MB"); + return REFTABLE_API_ERROR; reftable_buf_init(&wp->block_writer_data.last_key); reftable_buf_init(&wp->last_key); @@ -172,18 +174,31 @@ int reftable_writer_new(struct reftable_writer **out, wp->write_arg = writer_arg; wp->opts = opts; wp->flush = flush_func; - writer_reinit_block_writer(wp, BLOCK_TYPE_REF); + writer_reinit_block_writer(wp, REFTABLE_BLOCK_TYPE_REF); *out = wp; return 0; } -void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, +int reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, uint64_t max) { + /* + * Set the min/max update index limits for the reftable writer. + * This must be called before adding any records, since: + * - The 'next' field gets set after writing the first block. + * - The 'last_key' field updates with each new record (but resets + * after sections). + * Returns REFTABLE_API_ERROR if called after writing has begun. + */ + if (w->next || w->last_key.len) + return REFTABLE_API_ERROR; + w->min_update_index = min; w->max_update_index = max; + + return 0; } static void writer_release(struct reftable_writer *w) @@ -243,8 +258,10 @@ static int writer_index_hash(struct reftable_writer *w, struct reftable_buf *has reftable_buf_reset(&key->hash); err = reftable_buf_add(&key->hash, hash->buf, hash->len); - if (err < 0) + if (err < 0) { + reftable_free(key); return err; + } tree_insert(&w->obj_index_tree, key, &obj_index_tree_node_compare); } else { @@ -289,19 +306,19 @@ static int writer_add_record(struct reftable_writer *w, } if (block_writer_type(w->block_writer) != reftable_record_type(rec)) - BUG("record of type %d added to writer of type %d", - reftable_record_type(rec), block_writer_type(w->block_writer)); + return REFTABLE_API_ERROR; /* * Try to add the record to the writer. If this succeeds then we're * done. Otherwise the block writer may have hit the block size limit * and needs to be flushed. */ - if (!block_writer_add(w->block_writer, rec)) { - err = 0; + err = block_writer_add(w->block_writer, rec); + if (err == 0) goto done; - } + if (err != REFTABLE_ENTRY_TOO_BIG_ERROR) + goto done; /* * The current block is full, so we need to flush and reinitialize the * writer to start writing the next block. @@ -316,16 +333,10 @@ static int writer_add_record(struct reftable_writer *w, /* * Try to add the record to the writer again. If this still fails then * the record does not fit into the block size. - * - * TODO: it would be great to have `block_writer_add()` return proper - * error codes so that we don't have to second-guess the failure - * mode here. */ err = block_writer_add(w->block_writer, rec); - if (err) { - err = REFTABLE_ENTRY_TOO_BIG_ERROR; + if (err) goto done; - } done: return err; @@ -335,7 +346,7 @@ int reftable_writer_add_ref(struct reftable_writer *w, struct reftable_ref_record *ref) { struct reftable_record rec = { - .type = BLOCK_TYPE_REF, + .type = REFTABLE_BLOCK_TYPE_REF, .u = { .ref = *ref }, @@ -399,13 +410,13 @@ static int reftable_writer_add_log_verbatim(struct reftable_writer *w, struct reftable_log_record *log) { struct reftable_record rec = { - .type = BLOCK_TYPE_LOG, + .type = REFTABLE_BLOCK_TYPE_LOG, .u = { .log = *log, }, }; if (w->block_writer && - block_writer_type(w->block_writer) == BLOCK_TYPE_REF) { + block_writer_type(w->block_writer) == REFTABLE_BLOCK_TYPE_REF) { int err = writer_finish_public_section(w); if (err < 0) return err; @@ -525,7 +536,7 @@ static int writer_finish_section(struct reftable_writer *w) max_level++; index_start = w->next; - err = writer_reinit_block_writer(w, BLOCK_TYPE_INDEX); + err = writer_reinit_block_writer(w, REFTABLE_BLOCK_TYPE_INDEX); if (err < 0) return err; @@ -537,7 +548,7 @@ static int writer_finish_section(struct reftable_writer *w) w->index_cap = 0; for (i = 0; i < idx_len; i++) { struct reftable_record rec = { - .type = BLOCK_TYPE_INDEX, + .type = REFTABLE_BLOCK_TYPE_INDEX, .u = { .idx = idx[i], }, @@ -577,7 +588,7 @@ static int writer_finish_section(struct reftable_writer *w) struct common_prefix_arg { struct reftable_buf *last; - int max; + size_t max; }; static void update_common(void *void_arg, void *key) @@ -585,10 +596,9 @@ static void update_common(void *void_arg, void *key) struct common_prefix_arg *arg = void_arg; struct obj_index_tree_node *entry = key; if (arg->last) { - int n = common_prefix_size(&entry->hash, arg->last); - if (n > arg->max) { + size_t n = common_prefix_size(&entry->hash, arg->last); + if (n > arg->max) arg->max = n; - } } arg->last = &entry->hash; } @@ -603,7 +613,7 @@ static void write_object_record(void *void_arg, void *key) struct write_record_arg *arg = void_arg; struct obj_index_tree_node *entry = key; struct reftable_record - rec = { .type = BLOCK_TYPE_OBJ, + rec = { .type = REFTABLE_BLOCK_TYPE_OBJ, .u.obj = { .hash_prefix = (uint8_t *)entry->hash.buf, .hash_prefix_len = arg->w->stats.object_id_len, @@ -613,22 +623,41 @@ static void write_object_record(void *void_arg, void *key) if (arg->err < 0) goto done; + /* + * Try to add the record to the writer. If this succeeds then we're + * done. Otherwise the block writer may have hit the block size limit + * and needs to be flushed. + */ arg->err = block_writer_add(arg->w->block_writer, &rec); if (arg->err == 0) goto done; + if (arg->err != REFTABLE_ENTRY_TOO_BIG_ERROR) + goto done; + + /* + * The current block is full, so we need to flush and reinitialize the + * writer to start writing the next block. + */ arg->err = writer_flush_block(arg->w); if (arg->err < 0) goto done; - arg->err = writer_reinit_block_writer(arg->w, BLOCK_TYPE_OBJ); + arg->err = writer_reinit_block_writer(arg->w, REFTABLE_BLOCK_TYPE_OBJ); if (arg->err < 0) goto done; + /* + * If this still fails then we may need to reset record's offset + * length to reduce the data size to be written. + */ arg->err = block_writer_add(arg->w->block_writer, &rec); if (arg->err == 0) goto done; + if (arg->err != REFTABLE_ENTRY_TOO_BIG_ERROR) + goto done; + rec.u.obj.offset_len = 0; arg->err = block_writer_add(arg->w->block_writer, &rec); @@ -638,7 +667,7 @@ static void write_object_record(void *void_arg, void *key) done:; } -static void object_record_free(void *void_arg UNUSED, void *key) +static void object_record_free(void *void_arg REFTABLE_UNUSED, void *key) { struct obj_index_tree_node *entry = key; @@ -659,7 +688,7 @@ static int writer_dump_object_index(struct reftable_writer *w) infix_walk(w->obj_index_tree, &update_common, &common); w->stats.object_id_len = common.max + 1; - err = writer_reinit_block_writer(w, BLOCK_TYPE_OBJ); + err = writer_reinit_block_writer(w, REFTABLE_BLOCK_TYPE_OBJ); if (err < 0) return err; @@ -683,7 +712,7 @@ static int writer_finish_public_section(struct reftable_writer *w) err = writer_finish_section(w); if (err < 0) return err; - if (typ == BLOCK_TYPE_REF && !w->opts.skip_index_objects && + if (typ == REFTABLE_BLOCK_TYPE_REF && !w->opts.skip_index_objects && w->stats.ref_stats.index_blocks > 0) { err = writer_dump_object_index(w); if (err < 0) @@ -719,19 +748,19 @@ int reftable_writer_close(struct reftable_writer *w) } p += writer_write_header(w, footer); - put_be64(p, w->stats.ref_stats.index_offset); + reftable_put_be64(p, w->stats.ref_stats.index_offset); p += 8; - put_be64(p, (w->stats.obj_stats.offset) << 5 | w->stats.object_id_len); + reftable_put_be64(p, (w->stats.obj_stats.offset) << 5 | w->stats.object_id_len); p += 8; - put_be64(p, w->stats.obj_stats.index_offset); + reftable_put_be64(p, w->stats.obj_stats.index_offset); p += 8; - put_be64(p, w->stats.log_stats.offset); + reftable_put_be64(p, w->stats.log_stats.offset); p += 8; - put_be64(p, w->stats.log_stats.index_offset); + reftable_put_be64(p, w->stats.log_stats.index_offset); p += 8; - put_be32(p, crc32(0, footer, p - footer)); + reftable_put_be32(p, crc32(0, footer, p - footer)); p += 4; err = w->flush(w->write_arg); @@ -788,7 +817,7 @@ static int writer_flush_nonempty_block(struct reftable_writer *w) * By default, all records except for log records are padded to the * block size. */ - if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG) + if (!w->opts.unpadded && typ != REFTABLE_BLOCK_TYPE_LOG) padding = w->opts.block_size - raw_bytes; bstats = writer_reftable_block_stats(w, typ); diff --git a/reftable/writer.h b/reftable/writer.h index 1f4788a430..9f53610b27 100644 --- a/reftable/writer.h +++ b/reftable/writer.h @@ -1,10 +1,10 @@ /* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ #ifndef WRITER_H #define WRITER_H |
