diff options
Diffstat (limited to 'reftable')
48 files changed, 1863 insertions, 5795 deletions
diff --git a/reftable/basics.c b/reftable/basics.c index 0785aff941..0058619ca6 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -27,7 +27,7 @@ void put_be16(uint8_t *out, uint16_t i) out[1] = (uint8_t)(i & 0xff); } -int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) +size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) { size_t lo = 0; size_t hi = sz; @@ -39,8 +39,11 @@ int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) */ while (hi - lo > 1) { size_t mid = lo + (hi - lo) / 2; + int ret = f(mid, args); + if (ret < 0) + return sz; - if (f(mid, args)) + if (ret > 0) hi = mid; else lo = mid; @@ -64,9 +67,9 @@ void free_names(char **a) reftable_free(a); } -size_t names_length(char **names) +size_t names_length(const char **names) { - char **p = names; + const char **p = names; while (*p) p++; return p - names; @@ -99,15 +102,12 @@ void parse_names(char *buf, int size, char ***namesp) *namesp = names; } -int names_equal(char **a, char **b) +int names_equal(const char **a, const char **b) { - int i = 0; - for (; a[i] && b[i]; i++) { - if (strcmp(a[i], b[i])) { + size_t i = 0; + for (; a[i] && b[i]; i++) + if (strcmp(a[i], b[i])) return 0; - } - } - return a[i] == b[i]; } diff --git a/reftable/basics.h b/reftable/basics.h index 91f3533efe..c8fec68d4e 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -22,13 +22,14 @@ uint32_t get_be24(uint8_t *in); void put_be16(uint8_t *out, uint16_t i); /* - * find smallest index i in [0, sz) at which f(i) is true, assuming - * that f is ascending. Return sz if f(i) is false for all indices. + * find smallest index i in [0, sz) at which `f(i) > 0`, assuming that f is + * ascending. Return sz if `f(i) == 0` for all indices. The search is aborted + * and `sz` is returned in case `f(i) < 0`. * * Contrary to bsearch(3), this returns something useful if the argument is not * found. */ -int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args); +size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args); /* * Frees a NULL terminated array of malloced strings. The array itself is also @@ -41,10 +42,10 @@ void free_names(char **a); void parse_names(char *buf, int size, char ***namesp); /* compares two NULL-terminated arrays of strings. */ -int names_equal(char **a, char **b); +int names_equal(const char **a, const char **b); /* returns the array size of a NULL-terminated array of strings. */ -size_t names_length(char **names); +size_t names_length(const char **names); /* Allocation routines; they invoke the functions set through * reftable_set_alloc() */ diff --git a/reftable/basics_test.c b/reftable/basics_test.c deleted file mode 100644 index 1fcd229725..0000000000 --- a/reftable/basics_test.c +++ /dev/null @@ -1,98 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "system.h" - -#include "basics.h" -#include "test_framework.h" -#include "reftable-tests.h" - -struct binsearch_args { - int key; - int *arr; -}; - -static int binsearch_func(size_t i, void *void_args) -{ - struct binsearch_args *args = void_args; - - return args->key < args->arr[i]; -} - -static void test_binsearch(void) -{ - int arr[] = { 2, 4, 6, 8, 10 }; - size_t sz = ARRAY_SIZE(arr); - struct binsearch_args args = { - .arr = arr, - }; - - int i = 0; - for (i = 1; i < 11; i++) { - int res; - args.key = i; - res = binsearch(sz, &binsearch_func, &args); - - if (res < sz) { - EXPECT(args.key < arr[res]); - if (res > 0) { - EXPECT(args.key >= arr[res - 1]); - } - } else { - EXPECT(args.key == 10 || args.key == 11); - } - } -} - -static void test_names_length(void) -{ - char *a[] = { "a", "b", NULL }; - EXPECT(names_length(a) == 2); -} - -static void test_parse_names_normal(void) -{ - char in[] = "a\nb\n"; - char **out = NULL; - parse_names(in, strlen(in), &out); - EXPECT(!strcmp(out[0], "a")); - EXPECT(!strcmp(out[1], "b")); - EXPECT(!out[2]); - free_names(out); -} - -static void test_parse_names_drop_empty(void) -{ - char in[] = "a\n\n"; - char **out = NULL; - parse_names(in, strlen(in), &out); - EXPECT(!strcmp(out[0], "a")); - EXPECT(!out[1]); - free_names(out); -} - -static void test_common_prefix(void) -{ - struct strbuf s1 = STRBUF_INIT; - struct strbuf s2 = STRBUF_INIT; - strbuf_addstr(&s1, "abcdef"); - strbuf_addstr(&s2, "abc"); - EXPECT(common_prefix_size(&s1, &s2) == 3); - strbuf_release(&s1); - strbuf_release(&s2); -} - -int basics_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_common_prefix); - RUN_TEST(test_parse_names_normal); - RUN_TEST(test_parse_names_drop_empty); - RUN_TEST(test_binsearch); - RUN_TEST(test_names_length); - return 0; -} diff --git a/reftable/block.c b/reftable/block.c index ce8a7d24f3..00030eee06 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -76,6 +76,10 @@ void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, bw->entries = 0; bw->restart_len = 0; bw->last_key.len = 0; + if (!bw->zstream) { + REFTABLE_CALLOC_ARRAY(bw->zstream, 1); + deflateInit(bw->zstream, 9); + } } uint8_t block_writer_type(struct block_writer *bw) @@ -139,45 +143,53 @@ int block_writer_finish(struct block_writer *w) w->next += 2; put_be24(w->buf + 1 + w->header_off, w->next); + /* + * Log records are stored zlib-compressed. Note that the compression + * also spans over the restart points we have just written. + */ if (block_writer_type(w) == BLOCK_TYPE_LOG) { int block_header_skip = 4 + w->header_off; - uLongf src_len = w->next - block_header_skip; - uLongf dest_cap = src_len * 1.001 + 12; - uint8_t *compressed; - - REFTABLE_ALLOC_ARRAY(compressed, dest_cap); - - while (1) { - uLongf out_dest_len = dest_cap; - int zresult = compress2(compressed, &out_dest_len, - w->buf + block_header_skip, - src_len, 9); - if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) { - dest_cap *= 2; - compressed = - reftable_realloc(compressed, dest_cap); - if (compressed) - continue; - } - - if (Z_OK != zresult) { - reftable_free(compressed); - return REFTABLE_ZLIB_ERROR; - } - - memcpy(w->buf + block_header_skip, compressed, - out_dest_len); - w->next = out_dest_len + block_header_skip; - reftable_free(compressed); - break; - } + uLongf src_len = w->next - block_header_skip, compressed_len; + int ret; + + ret = deflateReset(w->zstream); + if (ret != Z_OK) + return REFTABLE_ZLIB_ERROR; + + /* + * Precompute the upper bound of how many bytes the compressed + * data may end up with. Combined with `Z_FINISH`, `deflate()` + * is guaranteed to return `Z_STREAM_END`. + */ + compressed_len = deflateBound(w->zstream, src_len); + REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap); + + w->zstream->next_out = w->compressed; + w->zstream->avail_out = compressed_len; + w->zstream->next_in = w->buf + block_header_skip; + w->zstream->avail_in = src_len; + + /* + * We want to perform all decompression in a single step, which + * is why we can pass Z_FINISH here. As we have precomputed the + * deflated buffer's size via `deflateBound()` this function is + * guaranteed to succeed according to the zlib documentation. + */ + ret = deflate(w->zstream, Z_FINISH); + if (ret != Z_STREAM_END) + return REFTABLE_ZLIB_ERROR; + + /* + * Overwrite the uncompressed data we have already written and + * adjust the `next` pointer to point right after the + * compressed data. + */ + memcpy(w->buf + block_header_skip, w->compressed, + w->zstream->total_out); + w->next = w->zstream->total_out + block_header_skip; } - return w->next; -} -uint8_t block_reader_type(struct block_reader *r) -{ - return r->block.data[r->header_off]; + return w->next; } int block_reader_init(struct block_reader *br, struct reftable_block *block, @@ -191,7 +203,8 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, uint16_t restart_count = 0; uint32_t restart_start = 0; uint8_t *restart_bytes = NULL; - uint8_t *uncompressed = NULL; + + reftable_block_done(&br->block); if (!reftable_is_block_type(typ)) { err = REFTABLE_FORMAT_ERROR; @@ -199,37 +212,57 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, } if (typ == BLOCK_TYPE_LOG) { - int block_header_skip = 4 + header_off; - uLongf dst_len = sz - block_header_skip; /* total size of dest - buffer. */ - uLongf src_len = block->len - block_header_skip; + uint32_t block_header_skip = 4 + header_off; + uLong dst_len = sz - block_header_skip; + uLong src_len = block->len - block_header_skip; /* Log blocks specify the *uncompressed* size in their header. */ - REFTABLE_ALLOC_ARRAY(uncompressed, sz); + REFTABLE_ALLOC_GROW(br->uncompressed_data, sz, + br->uncompressed_cap); /* Copy over the block header verbatim. It's not compressed. */ - memcpy(uncompressed, block->data, block_header_skip); + memcpy(br->uncompressed_data, block->data, block_header_skip); - /* Uncompress */ - if (Z_OK != - uncompress2(uncompressed + block_header_skip, &dst_len, - block->data + block_header_skip, &src_len)) { + if (!br->zstream) { + REFTABLE_CALLOC_ARRAY(br->zstream, 1); + err = inflateInit(br->zstream); + } else { + err = inflateReset(br->zstream); + } + if (err != Z_OK) { err = REFTABLE_ZLIB_ERROR; goto done; } - if (dst_len + block_header_skip != sz) { + br->zstream->next_in = block->data + block_header_skip; + br->zstream->avail_in = src_len; + br->zstream->next_out = br->uncompressed_data + block_header_skip; + br->zstream->avail_out = dst_len; + + /* + * We know both input as well as output size, and we know that + * the sizes should never be bigger than `uInt_MAX` because + * blocks can at most be 16MB large. We can thus use `Z_FINISH` + * here to instruct zlib to inflate the data in one go, which + * is more efficient than using `Z_NO_FLUSH`. + */ + err = inflate(br->zstream, Z_FINISH); + if (err != Z_STREAM_END) { + err = REFTABLE_ZLIB_ERROR; + goto done; + } + err = 0; + + if (br->zstream->total_out + block_header_skip != sz) { err = REFTABLE_FORMAT_ERROR; goto done; } /* We're done with the input data. */ reftable_block_done(block); - block->data = uncompressed; - uncompressed = NULL; + block->data = br->uncompressed_data; block->len = sz; - block->source = malloc_block_source(); - full_block_size = src_len + block_header_skip; + full_block_size = src_len + block_header_skip - br->zstream->avail_in; } else if (full_block_size == 0) { full_block_size = sz; } else if (sz < full_block_size && sz < block->len && @@ -257,177 +290,251 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, br->restart_bytes = restart_bytes; done: - reftable_free(uncompressed); return err; } -static uint32_t block_reader_restart_offset(struct block_reader *br, int i) +void block_reader_release(struct block_reader *br) +{ + inflateEnd(br->zstream); + reftable_free(br->zstream); + reftable_free(br->uncompressed_data); + reftable_block_done(&br->block); +} + +uint8_t block_reader_type(const struct block_reader *r) +{ + return r->block.data[r->header_off]; +} + +int block_reader_first_key(const struct block_reader *br, struct strbuf *key) +{ + int off = br->header_off + 4, n; + struct string_view in = { + .buf = br->block.data + off, + .len = br->block_len - off, + }; + uint8_t extra = 0; + + strbuf_reset(key); + + n = reftable_decode_key(key, &extra, in); + if (n < 0) + return n; + if (!key->len) + return REFTABLE_FORMAT_ERROR; + + return 0; +} + +static uint32_t block_reader_restart_offset(const struct block_reader *br, size_t idx) { - return get_be24(br->restart_bytes + 3 * i); + return get_be24(br->restart_bytes + 3 * idx); } -void block_reader_start(struct block_reader *br, struct block_iter *it) +void block_iter_seek_start(struct block_iter *it, const struct block_reader *br) { - it->br = br; + it->block = br->block.data; + it->block_len = br->block_len; + it->hash_size = br->hash_size; strbuf_reset(&it->last_key); it->next_off = br->header_off + 4; } -struct restart_find_args { +struct restart_needle_less_args { int error; - struct strbuf key; - struct block_reader *r; + struct strbuf needle; + const struct block_reader *reader; }; -static int restart_key_less(size_t idx, void *args) +static int restart_needle_less(size_t idx, void *_args) { - struct restart_find_args *a = args; - uint32_t off = block_reader_restart_offset(a->r, idx); + struct restart_needle_less_args *args = _args; + uint32_t off = block_reader_restart_offset(args->reader, idx); struct string_view in = { - .buf = a->r->block.data + off, - .len = a->r->block_len - off, + .buf = args->reader->block.data + off, + .len = args->reader->block_len - off, }; - - /* the restart key is verbatim in the block, so this could avoid the - alloc for decoding the key */ - struct strbuf rkey = STRBUF_INIT; - struct strbuf last_key = STRBUF_INIT; - uint8_t unused_extra; - int n = reftable_decode_key(&rkey, &unused_extra, last_key, in); - int result; - if (n < 0) { - a->error = 1; + uint64_t prefix_len, suffix_len; + uint8_t extra; + int n; + + /* + * Records at restart points are stored without prefix compression, so + * there is no need to fully decode the record key here. This removes + * the need for allocating memory. + */ + n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra); + if (n < 0 || prefix_len) { + args->error = 1; return -1; } - result = strbuf_cmp(&a->key, &rkey); - strbuf_release(&rkey); - return result; -} + string_view_consume(&in, n); + if (suffix_len > in.len) { + args->error = 1; + return -1; + } -void block_iter_copy_from(struct block_iter *dest, struct block_iter *src) -{ - dest->br = src->br; - dest->next_off = src->next_off; - strbuf_reset(&dest->last_key); - strbuf_addbuf(&dest->last_key, &src->last_key); + n = memcmp(args->needle.buf, in.buf, + args->needle.len < suffix_len ? args->needle.len : suffix_len); + if (n) + return n < 0; + return args->needle.len < suffix_len; } int block_iter_next(struct block_iter *it, struct reftable_record *rec) { struct string_view in = { - .buf = it->br->block.data + it->next_off, - .len = it->br->block_len - it->next_off, + .buf = (unsigned char *) it->block + it->next_off, + .len = it->block_len - it->next_off, }; struct string_view start = in; uint8_t extra = 0; int n = 0; - if (it->next_off >= it->br->block_len) + if (it->next_off >= it->block_len) return 1; - n = reftable_decode_key(&it->key, &extra, it->last_key, in); + n = reftable_decode_key(&it->last_key, &extra, in); if (n < 0) return -1; - - if (!it->key.len) + if (!it->last_key.len) return REFTABLE_FORMAT_ERROR; string_view_consume(&in, n); - n = reftable_record_decode(rec, it->key, extra, in, it->br->hash_size); + n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size, + &it->scratch); if (n < 0) return -1; string_view_consume(&in, n); - strbuf_reset(&it->last_key); - strbuf_addbuf(&it->last_key, &it->key); it->next_off += start.len - in.len; return 0; } -int block_reader_first_key(struct block_reader *br, struct strbuf *key) -{ - struct strbuf empty = STRBUF_INIT; - int off = br->header_off + 4; - struct string_view in = { - .buf = br->block.data + off, - .len = br->block_len - off, - }; - - uint8_t extra = 0; - int n = reftable_decode_key(key, &extra, empty, in); - if (n < 0) - return n; - if (!key->len) - return REFTABLE_FORMAT_ERROR; - - return 0; -} - -int block_iter_seek(struct block_iter *it, struct strbuf *want) +void block_iter_reset(struct block_iter *it) { - return block_reader_seek(it->br, it, want); + strbuf_reset(&it->last_key); + it->next_off = 0; + it->block = NULL; + it->block_len = 0; + it->hash_size = 0; } void block_iter_close(struct block_iter *it) { strbuf_release(&it->last_key); - strbuf_release(&it->key); + strbuf_release(&it->scratch); } -int block_reader_seek(struct block_reader *br, struct block_iter *it, - struct strbuf *want) +int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, + struct strbuf *want) { - struct restart_find_args args = { - .key = *want, - .r = br, + struct restart_needle_less_args args = { + .needle = *want, + .reader = br, }; - struct block_iter next = BLOCK_ITER_INIT; struct reftable_record rec; - int err = 0, i; - + int err = 0; + size_t i; + + /* + * Perform a binary search over the block's restart points, which + * avoids doing a linear scan over the whole block. Like this, we + * identify the section of the block that should contain our key. + * + * Note that we explicitly search for the first restart point _greater_ + * than the sought-after record, not _greater or equal_ to it. In case + * the sought-after record is located directly at the restart point we + * would otherwise start doing the linear search at the preceding + * restart point. While that works alright, we would end up scanning + * too many record. + */ + i = binsearch(br->restart_count, &restart_needle_less, &args); if (args.error) { err = REFTABLE_FORMAT_ERROR; goto done; } - i = binsearch(br->restart_count, &restart_key_less, &args); + /* + * Now there are multiple cases: + * + * - `i == 0`: The wanted record is smaller than the record found at + * the first restart point. As the first restart point is the first + * record in the block, our wanted record cannot be located in this + * block at all. We still need to position the iterator so that the + * next call to `block_iter_next()` will yield an end-of-iterator + * signal. + * + * - `i == restart_count`: The wanted record was not found at any of + * the restart points. As there is no restart point at the end of + * the section the record may thus be contained in the last block. + * + * - `i > 0`: The wanted record must be contained in the section + * before the found restart point. We thus do a linear search + * starting from the preceding restart point. + */ if (i > 0) it->next_off = block_reader_restart_offset(br, i - 1); else it->next_off = br->header_off + 4; - it->br = br; + it->block = br->block.data; + it->block_len = br->block_len; + it->hash_size = br->hash_size; reftable_record_init(&rec, block_reader_type(br)); - /* We're looking for the last entry less/equal than the wanted key, so - we have to go one entry too far and then back up. - */ + /* + * We're looking for the last entry less than the wanted key so that + * the next call to `block_reader_next()` would yield the wanted + * record. We thus don't want to position our reader at the sought + * after record, but one before. To do so, we have to go one entry too + * far and then back up. + */ while (1) { - block_iter_copy_from(&next, it); - err = block_iter_next(&next, &rec); + size_t prev_off = it->next_off; + + err = block_iter_next(it, &rec); if (err < 0) goto done; - - reftable_record_key(&rec, &it->key); - if (err > 0 || strbuf_cmp(&it->key, want) >= 0) { + if (err > 0) { + it->next_off = prev_off; err = 0; goto done; } - block_iter_copy_from(it, &next); + /* + * Check whether the current key is greater or equal to the + * sought-after key. In case it is greater we know that the + * record does not exist in the block and can thus abort early. + * In case it is equal to the sought-after key we have found + * the desired record. + * + * Note that we store the next record's key record directly in + * `last_key` without restoring the key of the preceding record + * in case we need to go one record back. This is safe to do as + * `block_iter_next()` would return the ref whose key is equal + * to `last_key` now, and naturally all keys share a prefix + * with themselves. + */ + reftable_record_key(&rec, &it->last_key); + if (strbuf_cmp(&it->last_key, want) >= 0) { + it->next_off = prev_off; + goto done; + } } done: - block_iter_close(&next); reftable_record_release(&rec); - return err; } void block_writer_release(struct block_writer *bw) { + deflateEnd(bw->zstream); + FREE_AND_NULL(bw->zstream); FREE_AND_NULL(bw->restarts); + FREE_AND_NULL(bw->compressed); strbuf_release(&bw->last_key); /* the block is not owned. */ } diff --git a/reftable/block.h b/reftable/block.h index 17481e6331..1c8f25ee6e 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -18,6 +18,10 @@ https://developers.google.com/open-source/licenses/bsd * allocation overhead. */ struct block_writer { + z_stream *zstream; + unsigned char *compressed; + size_t compressed_cap; + uint8_t *buf; uint32_t block_size; @@ -25,7 +29,7 @@ struct block_writer { uint32_t header_off; /* How often to restart keys. */ - int restart_interval; + uint16_t restart_interval; int hash_size; /* Offset of next uint8_t to write. */ @@ -56,6 +60,8 @@ int block_writer_finish(struct block_writer *w); /* clears out internally allocated block_writer members. */ void block_writer_release(struct block_writer *bw); +struct z_stream; + /* Read a block. */ struct block_reader { /* offset of the block header; nonzero for the first block in a @@ -66,6 +72,11 @@ struct block_reader { struct reftable_block block; int hash_size; + /* Uncompressed data for log entries. */ + z_stream *zstream; + unsigned char *uncompressed_data; + size_t uncompressed_cap; + /* size of the data, excluding restart data. */ uint32_t block_len; uint8_t *restart_bytes; @@ -76,47 +87,49 @@ struct block_reader { uint32_t full_block_size; }; +/* initializes a block reader. */ +int block_reader_init(struct block_reader *br, struct reftable_block *bl, + uint32_t header_off, uint32_t table_block_size, + int hash_size); + +void block_reader_release(struct block_reader *br); + +/* Returns the block type (eg. 'r' for refs) */ +uint8_t block_reader_type(const struct block_reader *r); + +/* Decodes the first key in the block */ +int block_reader_first_key(const struct block_reader *br, struct strbuf *key); + /* Iterate over entries in a block */ struct block_iter { /* offset within the block of the next entry to read. */ uint32_t next_off; - struct block_reader *br; + const unsigned char *block; + size_t block_len; + int hash_size; /* key for last entry we read. */ struct strbuf last_key; - struct strbuf key; + struct strbuf scratch; }; #define BLOCK_ITER_INIT { \ .last_key = STRBUF_INIT, \ - .key = STRBUF_INIT, \ + .scratch = STRBUF_INIT, \ } -/* initializes a block reader. */ -int block_reader_init(struct block_reader *br, struct reftable_block *bl, - uint32_t header_off, uint32_t table_block_size, - int hash_size); - /* Position `it` at start of the block */ -void block_reader_start(struct block_reader *br, struct block_iter *it); +void block_iter_seek_start(struct block_iter *it, const struct block_reader *br); /* Position `it` to the `want` key in the block */ -int block_reader_seek(struct block_reader *br, struct block_iter *it, - struct strbuf *want); - -/* Returns the block type (eg. 'r' for refs) */ -uint8_t block_reader_type(struct block_reader *r); - -/* Decodes the first key in the block */ -int block_reader_first_key(struct block_reader *br, struct strbuf *key); - -void block_iter_copy_from(struct block_iter *dest, struct block_iter *src); +int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, + struct strbuf *want); /* return < 0 for error, 0 for OK, > 0 for EOF. */ int block_iter_next(struct block_iter *it, struct reftable_record *rec); -/* Seek to `want` with in the block pointed to by `it` */ -int block_iter_seek(struct block_iter *it, struct strbuf *want); +/* Reset the block iterator to pristine state without releasing its memory. */ +void block_iter_reset(struct block_iter *it); /* deallocate memory for `it`. The block reader and its block is left intact. */ void block_iter_close(struct block_iter *it); diff --git a/reftable/block_test.c b/reftable/block_test.c deleted file mode 100644 index e162c6e33f..0000000000 --- a/reftable/block_test.c +++ /dev/null @@ -1,123 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "block.h" - -#include "system.h" -#include "blocksource.h" -#include "basics.h" -#include "constants.h" -#include "record.h" -#include "test_framework.h" -#include "reftable-tests.h" - -static void test_block_read_write(void) -{ - const int header_off = 21; /* random */ - char *names[30]; - const int N = ARRAY_SIZE(names); - const int block_size = 1024; - struct reftable_block block = { NULL }; - struct block_writer bw = { - .last_key = STRBUF_INIT, - }; - struct reftable_record rec = { - .type = BLOCK_TYPE_REF, - }; - int i = 0; - int n; - struct block_reader br = { 0 }; - struct block_iter it = BLOCK_ITER_INIT; - int j = 0; - struct strbuf want = STRBUF_INIT; - - REFTABLE_CALLOC_ARRAY(block.data, block_size); - block.len = block_size; - block.source = malloc_block_source(); - block_writer_init(&bw, BLOCK_TYPE_REF, block.data, block_size, - header_off, hash_size(GIT_SHA1_FORMAT_ID)); - - rec.u.ref.refname = ""; - rec.u.ref.value_type = REFTABLE_REF_DELETION; - n = block_writer_add(&bw, &rec); - EXPECT(n == REFTABLE_API_ERROR); - - for (i = 0; i < N; i++) { - char name[100]; - snprintf(name, sizeof(name), "branch%02d", i); - - rec.u.ref.refname = name; - rec.u.ref.value_type = REFTABLE_REF_VAL1; - memset(rec.u.ref.value.val1, i, GIT_SHA1_RAWSZ); - - names[i] = xstrdup(name); - n = block_writer_add(&bw, &rec); - rec.u.ref.refname = NULL; - rec.u.ref.value_type = REFTABLE_REF_DELETION; - EXPECT(n == 0); - } - - n = block_writer_finish(&bw); - EXPECT(n > 0); - - block_writer_release(&bw); - - block_reader_init(&br, &block, header_off, block_size, GIT_SHA1_RAWSZ); - - block_reader_start(&br, &it); - - while (1) { - int r = block_iter_next(&it, &rec); - EXPECT(r >= 0); - if (r > 0) { - break; - } - EXPECT_STREQ(names[j], rec.u.ref.refname); - j++; - } - - reftable_record_release(&rec); - block_iter_close(&it); - - for (i = 0; i < N; i++) { - struct block_iter it = BLOCK_ITER_INIT; - strbuf_reset(&want); - strbuf_addstr(&want, names[i]); - - n = block_reader_seek(&br, &it, &want); - EXPECT(n == 0); - - n = block_iter_next(&it, &rec); - EXPECT(n == 0); - - EXPECT_STREQ(names[i], rec.u.ref.refname); - - want.len--; - n = block_reader_seek(&br, &it, &want); - EXPECT(n == 0); - - n = block_iter_next(&it, &rec); - EXPECT(n == 0); - EXPECT_STREQ(names[10 * (i / 10)], rec.u.ref.refname); - - block_iter_close(&it); - } - - reftable_record_release(&rec); - reftable_block_done(&br.block); - strbuf_release(&want); - for (i = 0; i < N; i++) { - reftable_free(names[i]); - } -} - -int block_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_block_read_write); - return 0; -} diff --git a/reftable/blocksource.c b/reftable/blocksource.c index eeed254ba9..e93cac9bb6 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -13,14 +13,14 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-blocksource.h" #include "reftable-error.h" -static void strbuf_return_block(void *b, struct reftable_block *dest) +static void strbuf_return_block(void *b UNUSED, struct reftable_block *dest) { if (dest->len) memset(dest->data, 0xff, dest->len); reftable_free(dest->data); } -static void strbuf_close(void *b) +static void strbuf_close(void *b UNUSED) { } @@ -55,26 +55,6 @@ void block_source_from_strbuf(struct reftable_block_source *bs, bs->arg = buf; } -static void malloc_return_block(void *b, struct reftable_block *dest) -{ - if (dest->len) - memset(dest->data, 0xff, dest->len); - reftable_free(dest->data); -} - -static struct reftable_block_source_vtable malloc_vtable = { - .return_block = &malloc_return_block, -}; - -static struct reftable_block_source malloc_block_source_instance = { - .ops = &malloc_vtable, -}; - -struct reftable_block_source malloc_block_source(void) -{ - return malloc_block_source_instance; -} - struct file_block_source { uint64_t size; unsigned char *data; @@ -85,7 +65,7 @@ static uint64_t file_size(void *b) return ((struct file_block_source *)b)->size; } -static void file_return_block(void *b, struct reftable_block *dest) +static void file_return_block(void *b UNUSED, struct reftable_block *dest UNUSED) { } diff --git a/reftable/blocksource.h b/reftable/blocksource.h index 072e2727ad..659a27b406 100644 --- a/reftable/blocksource.h +++ b/reftable/blocksource.h @@ -17,6 +17,4 @@ struct reftable_block_source; void block_source_from_strbuf(struct reftable_block_source *bs, struct strbuf *buf); -struct reftable_block_source malloc_block_source(void); - #endif diff --git a/reftable/constants.h b/reftable/constants.h index 5eee72c4c1..f6beb843eb 100644 --- a/reftable/constants.h +++ b/reftable/constants.h @@ -17,5 +17,6 @@ https://developers.google.com/open-source/licenses/bsd #define MAX_RESTARTS ((1 << 16) - 1) #define DEFAULT_BLOCK_SIZE 4096 +#define DEFAULT_GEOMETRIC_FACTOR 2 #endif diff --git a/reftable/dump.c b/reftable/dump.c deleted file mode 100644 index 26e0393c7d..0000000000 --- a/reftable/dump.c +++ /dev/null @@ -1,105 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "git-compat-util.h" -#include "hash-ll.h" - -#include "reftable-blocksource.h" -#include "reftable-error.h" -#include "reftable-record.h" -#include "reftable-tests.h" -#include "reftable-writer.h" -#include "reftable-iterator.h" -#include "reftable-reader.h" -#include "reftable-stack.h" - -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> - -static int compact_stack(const char *stackdir) -{ - struct reftable_stack *stack = NULL; - struct reftable_write_options cfg = { 0 }; - - int err = reftable_new_stack(&stack, stackdir, cfg); - if (err < 0) - goto done; - - err = reftable_stack_compact_all(stack, NULL); - if (err < 0) - goto done; -done: - if (stack) { - reftable_stack_destroy(stack); - } - return err; -} - -static void print_help(void) -{ - printf("usage: dump [-cst] arg\n\n" - "options: \n" - " -c compact\n" - " -t dump table\n" - " -s dump stack\n" - " -6 sha256 hash format\n" - " -h this help\n" - "\n"); -} - -int reftable_dump_main(int argc, char *const *argv) -{ - int err = 0; - int opt_dump_table = 0; - int opt_dump_stack = 0; - int opt_compact = 0; - uint32_t opt_hash_id = GIT_SHA1_FORMAT_ID; - const char *arg = NULL, *argv0 = argv[0]; - - for (; argc > 1; argv++, argc--) - if (*argv[1] != '-') - break; - else if (!strcmp("-t", argv[1])) - opt_dump_table = 1; - else if (!strcmp("-6", argv[1])) - opt_hash_id = GIT_SHA256_FORMAT_ID; - else if (!strcmp("-s", argv[1])) - opt_dump_stack = 1; - else if (!strcmp("-c", argv[1])) - opt_compact = 1; - else if (!strcmp("-?", argv[1]) || !strcmp("-h", argv[1])) { - print_help(); - return 2; - } - - if (argc != 2) { - fprintf(stderr, "need argument\n"); - print_help(); - return 2; - } - - arg = argv[1]; - - if (opt_dump_table) { - err = reftable_reader_print_file(arg); - } else if (opt_dump_stack) { - err = reftable_stack_print_directory(arg, opt_hash_id); - } else if (opt_compact) { - err = compact_stack(arg); - } - - if (err < 0) { - fprintf(stderr, "%s: %s: %s\n", argv0, arg, - reftable_error_str(err)); - return 1; - } - return 0; -} diff --git a/reftable/error.c b/reftable/error.c index 0d1766735e..a25f28a43e 100644 --- a/reftable/error.c +++ b/reftable/error.c @@ -22,19 +22,19 @@ const char *reftable_error_str(int err) case REFTABLE_NOT_EXIST_ERROR: return "file does not exist"; case REFTABLE_LOCK_ERROR: - return "data is outdated"; + return "data is locked"; case REFTABLE_API_ERROR: return "misuse of the reftable API"; case REFTABLE_ZLIB_ERROR: return "zlib failure"; - case REFTABLE_NAME_CONFLICT: - return "file/directory conflict"; case REFTABLE_EMPTY_TABLE_ERROR: return "wrote empty table"; case REFTABLE_REFNAME_ERROR: return "invalid refname"; case REFTABLE_ENTRY_TOO_BIG_ERROR: return "entry too large"; + case REFTABLE_OUTDATED_ERROR: + return "data concurrently modified"; case -1: return "general error"; default: diff --git a/reftable/generic.c b/reftable/generic.c deleted file mode 100644 index b9f1c7c18a..0000000000 --- a/reftable/generic.c +++ /dev/null @@ -1,179 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "constants.h" -#include "record.h" -#include "generic.h" -#include "reftable-iterator.h" -#include "reftable-generic.h" - -int reftable_table_seek_ref(struct reftable_table *tab, - struct reftable_iterator *it, const char *name) -{ - struct reftable_record rec = { .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = (char *)name, - } }; - return tab->ops->seek_record(tab->table_arg, it, &rec); -} - -int reftable_table_seek_log(struct reftable_table *tab, - struct reftable_iterator *it, const char *name) -{ - struct reftable_record rec = { .type = BLOCK_TYPE_LOG, - .u.log = { - .refname = (char *)name, - .update_index = ~((uint64_t)0), - } }; - return tab->ops->seek_record(tab->table_arg, it, &rec); -} - -int reftable_table_read_ref(struct reftable_table *tab, const char *name, - struct reftable_ref_record *ref) -{ - struct reftable_iterator it = { NULL }; - int err = reftable_table_seek_ref(tab, &it, name); - if (err) - goto done; - - err = reftable_iterator_next_ref(&it, ref); - if (err) - goto done; - - if (strcmp(ref->refname, name) || - reftable_ref_record_is_deletion(ref)) { - reftable_ref_record_release(ref); - err = 1; - goto done; - } - -done: - reftable_iterator_destroy(&it); - return err; -} - -int reftable_table_print(struct reftable_table *tab) { - struct reftable_iterator it = { NULL }; - struct reftable_ref_record ref = { NULL }; - struct reftable_log_record log = { NULL }; - uint32_t hash_id = reftable_table_hash_id(tab); - int err = reftable_table_seek_ref(tab, &it, ""); - if (err < 0) { - return err; - } - - while (1) { - err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) { - break; - } - if (err < 0) { - return err; - } - reftable_ref_record_print(&ref, hash_id); - } - reftable_iterator_destroy(&it); - reftable_ref_record_release(&ref); - - err = reftable_table_seek_log(tab, &it, ""); - if (err < 0) { - return err; - } - while (1) { - err = reftable_iterator_next_log(&it, &log); - if (err > 0) { - break; - } - if (err < 0) { - return err; - } - reftable_log_record_print(&log, hash_id); - } - reftable_iterator_destroy(&it); - reftable_log_record_release(&log); - return 0; -} - -uint64_t reftable_table_max_update_index(struct reftable_table *tab) -{ - return tab->ops->max_update_index(tab->table_arg); -} - -uint64_t reftable_table_min_update_index(struct reftable_table *tab) -{ - return tab->ops->min_update_index(tab->table_arg); -} - -uint32_t reftable_table_hash_id(struct reftable_table *tab) -{ - return tab->ops->hash_id(tab->table_arg); -} - -void reftable_iterator_destroy(struct reftable_iterator *it) -{ - if (!it->ops) { - return; - } - it->ops->close(it->iter_arg); - it->ops = NULL; - FREE_AND_NULL(it->iter_arg); -} - -int reftable_iterator_next_ref(struct reftable_iterator *it, - struct reftable_ref_record *ref) -{ - struct reftable_record rec = { - .type = BLOCK_TYPE_REF, - .u = { - .ref = *ref - }, - }; - int err = iterator_next(it, &rec); - *ref = rec.u.ref; - return err; -} - -int reftable_iterator_next_log(struct reftable_iterator *it, - struct reftable_log_record *log) -{ - struct reftable_record rec = { - .type = BLOCK_TYPE_LOG, - .u = { - .log = *log, - }, - }; - int err = iterator_next(it, &rec); - *log = rec.u.log; - return err; -} - -int iterator_next(struct reftable_iterator *it, struct reftable_record *rec) -{ - return it->ops->next(it->iter_arg, rec); -} - -static int empty_iterator_next(void *arg, struct reftable_record *rec) -{ - return 1; -} - -static void empty_iterator_close(void *arg) -{ -} - -static struct reftable_iterator_vtable empty_vtable = { - .next = &empty_iterator_next, - .close = &empty_iterator_close, -}; - -void iterator_set_empty(struct reftable_iterator *it) -{ - assert(!it->ops); - it->iter_arg = NULL; - it->ops = &empty_vtable; -} diff --git a/reftable/generic.h b/reftable/generic.h deleted file mode 100644 index 98886a0640..0000000000 --- a/reftable/generic.h +++ /dev/null @@ -1,32 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#ifndef GENERIC_H -#define GENERIC_H - -#include "record.h" -#include "reftable-generic.h" - -/* generic interface to reftables */ -struct reftable_table_vtable { - int (*seek_record)(void *tab, struct reftable_iterator *it, - struct reftable_record *); - uint32_t (*hash_id)(void *tab); - uint64_t (*min_update_index)(void *tab); - uint64_t (*max_update_index)(void *tab); -}; - -struct reftable_iterator_vtable { - int (*next)(void *iter_arg, struct reftable_record *rec); - void (*close)(void *iter_arg); -}; - -void iterator_set_empty(struct reftable_iterator *it); -int iterator_next(struct reftable_iterator *it, struct reftable_record *rec); - -#endif diff --git a/reftable/iter.c b/reftable/iter.c index 8b5ebf6183..416a9f6996 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -11,14 +11,45 @@ https://developers.google.com/open-source/licenses/bsd #include "system.h" #include "block.h" -#include "generic.h" #include "constants.h" #include "reader.h" #include "reftable-error.h" -int iterator_is_null(struct reftable_iterator *it) +int iterator_seek(struct reftable_iterator *it, struct reftable_record *want) { - return !it->ops; + return it->ops->seek(it->iter_arg, want); +} + +int iterator_next(struct reftable_iterator *it, struct reftable_record *rec) +{ + return it->ops->next(it->iter_arg, rec); +} + +static int empty_iterator_seek(void *arg UNUSED, struct reftable_record *want UNUSED) +{ + return 0; +} + +static int empty_iterator_next(void *arg UNUSED, struct reftable_record *rec UNUSED) +{ + return 1; +} + +static void empty_iterator_close(void *arg UNUSED) +{ +} + +static struct reftable_iterator_vtable empty_vtable = { + .seek = &empty_iterator_seek, + .next = &empty_iterator_next, + .close = &empty_iterator_close, +}; + +void iterator_set_empty(struct reftable_iterator *it) +{ + assert(!it->ops); + it->iter_arg = NULL; + it->ops = &empty_vtable; } static void filtering_ref_iterator_close(void *iter_arg) @@ -28,6 +59,13 @@ static void filtering_ref_iterator_close(void *iter_arg) reftable_iterator_destroy(&fri->it); } +static int filtering_ref_iterator_seek(void *iter_arg, + struct reftable_record *want) +{ + struct filtering_ref_iterator *fri = iter_arg; + return iterator_seek(&fri->it, want); +} + static int filtering_ref_iterator_next(void *iter_arg, struct reftable_record *rec) { @@ -40,26 +78,6 @@ static int filtering_ref_iterator_next(void *iter_arg, break; } - if (fri->double_check) { - struct reftable_iterator it = { NULL }; - - err = reftable_table_seek_ref(&fri->tab, &it, - ref->refname); - if (err == 0) { - err = reftable_iterator_next_ref(&it, ref); - } - - reftable_iterator_destroy(&it); - - if (err < 0) { - break; - } - - if (err > 0) { - continue; - } - } - if (ref->value_type == REFTABLE_REF_VAL2 && (!memcmp(fri->oid.buf, ref->value.val2.target_value, fri->oid.len) || @@ -78,6 +96,7 @@ static int filtering_ref_iterator_next(void *iter_arg, } static struct reftable_iterator_vtable filtering_ref_iterator_vtable = { + .seek = &filtering_ref_iterator_seek, .next = &filtering_ref_iterator_next, .close = &filtering_ref_iterator_close, }; @@ -120,10 +139,17 @@ static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it) /* indexed block does not exist. */ return REFTABLE_FORMAT_ERROR; } - block_reader_start(&it->block_reader, &it->cur); + block_iter_seek_start(&it->cur, &it->block_reader); return 0; } +static int indexed_table_ref_iter_seek(void *p UNUSED, + struct reftable_record *want UNUSED) +{ + BUG("seeking indexed table is not supported"); + return -1; +} + static int indexed_table_ref_iter_next(void *p, struct reftable_record *rec) { struct indexed_table_ref_iter *it = p; @@ -180,6 +206,7 @@ int new_indexed_table_ref_iter(struct indexed_table_ref_iter **dest, } static struct reftable_iterator_vtable indexed_table_ref_iter_vtable = { + .seek = &indexed_table_ref_iter_seek, .next = &indexed_table_ref_iter_next, .close = &indexed_table_ref_iter_close, }; @@ -191,3 +218,71 @@ void iterator_from_indexed_table_ref_iter(struct reftable_iterator *it, it->iter_arg = itr; it->ops = &indexed_table_ref_iter_vtable; } + +void reftable_iterator_destroy(struct reftable_iterator *it) +{ + if (!it->ops) + return; + it->ops->close(it->iter_arg); + it->ops = NULL; + FREE_AND_NULL(it->iter_arg); +} + +int reftable_iterator_seek_ref(struct reftable_iterator *it, + const char *name) +{ + struct reftable_record want = { + .type = BLOCK_TYPE_REF, + .u.ref = { + .refname = (char *)name, + }, + }; + return it->ops->seek(it->iter_arg, &want); +} + +int reftable_iterator_next_ref(struct reftable_iterator *it, + struct reftable_ref_record *ref) +{ + struct reftable_record rec = { + .type = BLOCK_TYPE_REF, + .u = { + .ref = *ref + }, + }; + int err = iterator_next(it, &rec); + *ref = rec.u.ref; + return err; +} + +int reftable_iterator_seek_log_at(struct reftable_iterator *it, + const char *name, uint64_t update_index) +{ + struct reftable_record want = { + .type = BLOCK_TYPE_LOG, + .u.log = { + .refname = (char *)name, + .update_index = update_index, + }, + }; + return it->ops->seek(it->iter_arg, &want); +} + +int reftable_iterator_seek_log(struct reftable_iterator *it, + const char *name) +{ + return reftable_iterator_seek_log_at(it, name, ~((uint64_t) 0)); +} + +int reftable_iterator_next_log(struct reftable_iterator *it, + struct reftable_log_record *log) +{ + struct reftable_record rec = { + .type = BLOCK_TYPE_LOG, + .u = { + .log = *log, + }, + }; + int err = iterator_next(it, &rec); + *log = rec.u.log; + return err; +} diff --git a/reftable/iter.h b/reftable/iter.h index 47d67d84df..befc4597df 100644 --- a/reftable/iter.h +++ b/reftable/iter.h @@ -14,16 +14,36 @@ https://developers.google.com/open-source/licenses/bsd #include "record.h" #include "reftable-iterator.h" -#include "reftable-generic.h" -/* Returns true for a zeroed out iterator, such as the one returned from - * iterator_destroy. */ -int iterator_is_null(struct reftable_iterator *it); +/* + * The virtual function table for implementing generic reftable iterators. + */ +struct reftable_iterator_vtable { + int (*seek)(void *iter_arg, struct reftable_record *want); + int (*next)(void *iter_arg, struct reftable_record *rec); + void (*close)(void *iter_arg); +}; + +/* + * Position the iterator at the wanted record such that a call to + * `iterator_next()` would return that record, if it exists. + */ +int iterator_seek(struct reftable_iterator *it, struct reftable_record *want); + +/* + * Yield the next record and advance the iterator. Returns <0 on error, 0 when + * a record was yielded, and >0 when the iterator hit an error. + */ +int iterator_next(struct reftable_iterator *it, struct reftable_record *rec); + +/* + * Set up the iterator such that it behaves the same as an iterator with no + * entries. + */ +void iterator_set_empty(struct reftable_iterator *it); /* iterator that produces only ref records that point to `oid` */ struct filtering_ref_iterator { - int double_check; - struct reftable_table tab; struct strbuf oid; struct reftable_iterator it; }; diff --git a/reftable/merged.c b/reftable/merged.c index a0f222e07b..128a810c55 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -11,34 +11,39 @@ https://developers.google.com/open-source/licenses/bsd #include "constants.h" #include "iter.h" #include "pq.h" +#include "reader.h" #include "record.h" -#include "generic.h" #include "reftable-merged.h" #include "reftable-error.h" #include "system.h" -static int merged_iter_init(struct merged_iter *mi) -{ - for (size_t i = 0; i < mi->stack_len; i++) { - struct pq_entry e = { - .index = i, - }; - int err; - - reftable_record_init(&e.rec, mi->typ); - err = iterator_next(&mi->stack[i], &e.rec); - if (err < 0) - return err; - if (err > 0) { - reftable_iterator_destroy(&mi->stack[i]); - reftable_record_release(&e.rec); - continue; - } +struct merged_subiter { + struct reftable_iterator iter; + struct reftable_record rec; +}; - merged_iter_pqueue_add(&mi->pq, &e); - } +struct merged_iter { + struct merged_subiter *subiters; + struct merged_iter_pqueue pq; + size_t subiters_len; + int suppress_deletions; + ssize_t advance_index; +}; - return 0; +static void merged_iter_init(struct merged_iter *mi, + struct reftable_merged_table *mt, + uint8_t typ) +{ + memset(mi, 0, sizeof(*mi)); + mi->advance_index = -1; + mi->suppress_deletions = mt->suppress_deletions; + + REFTABLE_CALLOC_ARRAY(mi->subiters, mt->readers_len); + for (size_t i = 0; i < mt->readers_len; i++) { + reftable_record_init(&mi->subiters[i].rec, typ); + reader_init_iter(mt->readers[i], &mi->subiters[i].iter, typ); + } + mi->subiters_len = mt->readers_len; } static void merged_iter_close(void *p) @@ -46,56 +51,87 @@ static void merged_iter_close(void *p) struct merged_iter *mi = p; merged_iter_pqueue_release(&mi->pq); - for (size_t i = 0; i < mi->stack_len; i++) - reftable_iterator_destroy(&mi->stack[i]); - reftable_free(mi->stack); - strbuf_release(&mi->key); - strbuf_release(&mi->entry_key); + for (size_t i = 0; i < mi->subiters_len; i++) { + reftable_iterator_destroy(&mi->subiters[i].iter); + reftable_record_release(&mi->subiters[i].rec); + } + reftable_free(mi->subiters); } -static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, - size_t idx) +static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) { struct pq_entry e = { .index = idx, + .rec = &mi->subiters[idx].rec, }; int err; - reftable_record_init(&e.rec, mi->typ); - err = iterator_next(&mi->stack[idx], &e.rec); - if (err < 0) + err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec); + if (err) return err; - if (err > 0) { - reftable_iterator_destroy(&mi->stack[idx]); - reftable_record_release(&e.rec); - return 0; - } - merged_iter_pqueue_add(&mi->pq, &e); return 0; } -static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) +static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want) { - if (iterator_is_null(&mi->stack[idx])) - return 0; - return merged_iter_advance_nonnull_subiter(mi, idx); + int err; + + mi->advance_index = -1; + + for (size_t i = 0; i < mi->subiters_len; i++) { + err = iterator_seek(&mi->subiters[i].iter, want); + if (err < 0) + return err; + if (err > 0) + continue; + + err = merged_iter_advance_subiter(mi, i); + if (err < 0) + return err; + } + + return 0; } static int merged_iter_next_entry(struct merged_iter *mi, struct reftable_record *rec) { struct pq_entry entry = { 0 }; - int err = 0; + int err = 0, empty; + + empty = merged_iter_pqueue_is_empty(mi->pq); + + if (mi->advance_index >= 0) { + /* + * When there are no pqueue entries then we only have a single + * subiter left. There is no need to use the pqueue in that + * case anymore as we know that the subiter will return entries + * in the correct order already. + * + * While this may sound like a very specific edge case, it may + * happen more frequently than you think. Most repositories + * will end up having a single large base table that contains + * most of the refs. It's thus likely that we exhaust all + * subiters but the one from that base ref. + */ + if (empty) + return iterator_next(&mi->subiters[mi->advance_index].iter, + rec); + + err = merged_iter_advance_subiter(mi, mi->advance_index); + if (err < 0) + return err; + if (!err) + empty = 0; + mi->advance_index = -1; + } - if (merged_iter_pqueue_is_empty(mi->pq)) + if (empty) return 1; entry = merged_iter_pqueue_remove(&mi->pq); - err = merged_iter_advance_subiter(mi, entry.index); - if (err < 0) - return err; /* One can also use reftable as datacenter-local storage, where the ref @@ -105,56 +141,45 @@ static int merged_iter_next_entry(struct merged_iter *mi, such a deployment, the loop below must be changed to collect all entries for the same key, and return new the newest one. */ - reftable_record_key(&entry.rec, &mi->entry_key); while (!merged_iter_pqueue_is_empty(mi->pq)) { struct pq_entry top = merged_iter_pqueue_top(mi->pq); - int cmp = 0; + int cmp; - reftable_record_key(&top.rec, &mi->key); - - cmp = strbuf_cmp(&mi->key, &mi->entry_key); + cmp = reftable_record_cmp(top.rec, entry.rec); if (cmp > 0) break; merged_iter_pqueue_remove(&mi->pq); err = merged_iter_advance_subiter(mi, top.index); if (err < 0) - goto done; - reftable_record_release(&top.rec); + return err; } - reftable_record_release(rec); - *rec = entry.rec; - -done: - if (err) - reftable_record_release(&entry.rec); - return err; + mi->advance_index = entry.index; + SWAP(*rec, *entry.rec); + return 0; } -static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec) +static int merged_iter_seek_void(void *it, struct reftable_record *want) { - while (1) { - int err = merged_iter_next_entry(mi, rec); - if (err == 0 && mi->suppress_deletions && - reftable_record_is_deletion(rec)) { - continue; - } - - return err; - } + return merged_iter_seek(it, want); } static int merged_iter_next_void(void *p, struct reftable_record *rec) { struct merged_iter *mi = p; - if (merged_iter_pqueue_is_empty(mi->pq)) - return 1; - - return merged_iter_next(mi, rec); + while (1) { + int err = merged_iter_next_entry(mi, rec); + if (err) + return err; + if (mi->suppress_deletions && reftable_record_is_deletion(rec)) + continue; + return 0; + } } static struct reftable_iterator_vtable merged_iter_vtable = { + .seek = merged_iter_seek_void, .next = &merged_iter_next_void, .close = &merged_iter_close, }; @@ -167,8 +192,8 @@ static void iterator_from_merged_iter(struct reftable_iterator *it, it->ops = &merged_iter_vtable; } -int reftable_new_merged_table(struct reftable_merged_table **dest, - struct reftable_table *stack, size_t n, +int reftable_merged_table_new(struct reftable_merged_table **dest, + struct reftable_reader **readers, size_t n, uint32_t hash_id) { struct reftable_merged_table *m = NULL; @@ -176,10 +201,10 @@ int reftable_new_merged_table(struct reftable_merged_table **dest, uint64_t first_min = 0; for (size_t i = 0; i < n; i++) { - uint64_t min = reftable_table_min_update_index(&stack[i]); - uint64_t max = reftable_table_max_update_index(&stack[i]); + uint64_t min = reftable_reader_min_update_index(readers[i]); + uint64_t max = reftable_reader_max_update_index(readers[i]); - if (reftable_table_hash_id(&stack[i]) != hash_id) { + if (reftable_reader_hash_id(readers[i]) != hash_id) { return REFTABLE_FORMAT_ERROR; } if (i == 0 || min < first_min) { @@ -191,8 +216,8 @@ int reftable_new_merged_table(struct reftable_merged_table **dest, } REFTABLE_CALLOC_ARRAY(m, 1); - m->stack = stack; - m->stack_len = n; + m->readers = readers; + m->readers_len = n; m->min = first_min; m->max = last_max; m->hash_id = hash_id; @@ -200,19 +225,10 @@ int reftable_new_merged_table(struct reftable_merged_table **dest, return 0; } -/* clears the list of subtable, without affecting the readers themselves. */ -void merged_table_release(struct reftable_merged_table *mt) -{ - FREE_AND_NULL(mt->stack); - mt->stack_len = 0; -} - void reftable_merged_table_free(struct reftable_merged_table *mt) { - if (!mt) { + if (!mt) return; - } - merged_table_release(mt); reftable_free(mt); } @@ -228,122 +244,28 @@ reftable_merged_table_min_update_index(struct reftable_merged_table *mt) return mt->min; } -static int reftable_table_seek_record(struct reftable_table *tab, - struct reftable_iterator *it, - struct reftable_record *rec) -{ - return tab->ops->seek_record(tab->table_arg, it, rec); -} - -static int merged_table_seek_record(struct reftable_merged_table *mt, - struct reftable_iterator *it, - struct reftable_record *rec) -{ - struct merged_iter merged = { - .typ = reftable_record_type(rec), - .hash_id = mt->hash_id, - .suppress_deletions = mt->suppress_deletions, - .key = STRBUF_INIT, - .entry_key = STRBUF_INIT, - }; - struct merged_iter *p; - int err; - - REFTABLE_CALLOC_ARRAY(merged.stack, mt->stack_len); - for (size_t i = 0; i < mt->stack_len; i++) { - err = reftable_table_seek_record(&mt->stack[i], - &merged.stack[merged.stack_len], rec); - if (err < 0) - goto out; - if (!err) - merged.stack_len++; - } - - err = merged_iter_init(&merged); - if (err < 0) - goto out; - - p = reftable_malloc(sizeof(struct merged_iter)); - *p = merged; - iterator_from_merged_iter(it, p); - -out: - if (err < 0) - merged_iter_close(&merged); - return err; -} - -int reftable_merged_table_seek_ref(struct reftable_merged_table *mt, - struct reftable_iterator *it, - const char *name) +void merged_table_init_iter(struct reftable_merged_table *mt, + struct reftable_iterator *it, + uint8_t typ) { - struct reftable_record rec = { - .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = (char *)name, - }, - }; - return merged_table_seek_record(mt, it, &rec); + struct merged_iter *mi = reftable_malloc(sizeof(*mi)); + merged_iter_init(mi, mt, typ); + iterator_from_merged_iter(it, mi); } -int reftable_merged_table_seek_log_at(struct reftable_merged_table *mt, - struct reftable_iterator *it, - const char *name, uint64_t update_index) +void reftable_merged_table_init_ref_iterator(struct reftable_merged_table *mt, + struct reftable_iterator *it) { - struct reftable_record rec = { .type = BLOCK_TYPE_LOG, - .u.log = { - .refname = (char *)name, - .update_index = update_index, - } }; - return merged_table_seek_record(mt, it, &rec); + merged_table_init_iter(mt, it, BLOCK_TYPE_REF); } -int reftable_merged_table_seek_log(struct reftable_merged_table *mt, - struct reftable_iterator *it, - const char *name) +void reftable_merged_table_init_log_iterator(struct reftable_merged_table *mt, + struct reftable_iterator *it) { - uint64_t max = ~((uint64_t)0); - return reftable_merged_table_seek_log_at(mt, it, name, max); + merged_table_init_iter(mt, it, BLOCK_TYPE_LOG); } uint32_t reftable_merged_table_hash_id(struct reftable_merged_table *mt) { return mt->hash_id; } - -static int reftable_merged_table_seek_void(void *tab, - struct reftable_iterator *it, - struct reftable_record *rec) -{ - return merged_table_seek_record(tab, it, rec); -} - -static uint32_t reftable_merged_table_hash_id_void(void *tab) -{ - return reftable_merged_table_hash_id(tab); -} - -static uint64_t reftable_merged_table_min_update_index_void(void *tab) -{ - return reftable_merged_table_min_update_index(tab); -} - -static uint64_t reftable_merged_table_max_update_index_void(void *tab) -{ - return reftable_merged_table_max_update_index(tab); -} - -static struct reftable_table_vtable merged_table_vtable = { - .seek_record = reftable_merged_table_seek_void, - .hash_id = reftable_merged_table_hash_id_void, - .min_update_index = reftable_merged_table_min_update_index_void, - .max_update_index = reftable_merged_table_max_update_index_void, -}; - -void reftable_table_from_merged_table(struct reftable_table *tab, - struct reftable_merged_table *merged) -{ - assert(!tab->ops); - tab->ops = &merged_table_vtable; - tab->table_arg = merged; -} diff --git a/reftable/merged.h b/reftable/merged.h index d5b39dfe7f..de5fd33f01 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -9,11 +9,11 @@ https://developers.google.com/open-source/licenses/bsd #ifndef MERGED_H #define MERGED_H -#include "pq.h" +#include "system.h" struct reftable_merged_table { - struct reftable_table *stack; - size_t stack_len; + struct reftable_reader **readers; + size_t readers_len; uint32_t hash_id; /* If unset, produce deletions. This is useful for compaction. For the @@ -24,17 +24,10 @@ struct reftable_merged_table { uint64_t max; }; -struct merged_iter { - struct reftable_iterator *stack; - uint32_t hash_id; - size_t stack_len; - uint8_t typ; - int suppress_deletions; - struct merged_iter_pqueue pq; - struct strbuf key; - struct strbuf entry_key; -}; +struct reftable_iterator; -void merged_table_release(struct reftable_merged_table *mt); +void merged_table_init_iter(struct reftable_merged_table *mt, + struct reftable_iterator *it, + uint8_t typ); #endif diff --git a/reftable/merged_test.c b/reftable/merged_test.c deleted file mode 100644 index d0f77a3b8f..0000000000 --- a/reftable/merged_test.c +++ /dev/null @@ -1,457 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "merged.h" - -#include "system.h" - -#include "basics.h" -#include "blocksource.h" -#include "reader.h" -#include "record.h" -#include "test_framework.h" -#include "reftable-merged.h" -#include "reftable-tests.h" -#include "reftable-generic.h" -#include "reftable-writer.h" - -static void write_test_table(struct strbuf *buf, - struct reftable_ref_record refs[], int n) -{ - uint64_t min = 0xffffffff; - uint64_t max = 0; - int i = 0; - int err; - - struct reftable_write_options opts = { - .block_size = 256, - }; - struct reftable_writer *w = NULL; - for (i = 0; i < n; i++) { - uint64_t ui = refs[i].update_index; - if (ui > max) { - max = ui; - } - if (ui < min) { - min = ui; - } - } - - w = reftable_new_writer(&strbuf_add_void, &noop_flush, buf, &opts); - reftable_writer_set_limits(w, min, max); - - for (i = 0; i < n; i++) { - uint64_t before = refs[i].update_index; - int n = reftable_writer_add_ref(w, &refs[i]); - EXPECT(n == 0); - EXPECT(before == refs[i].update_index); - } - - err = reftable_writer_close(w); - EXPECT_ERR(err); - - reftable_writer_free(w); -} - -static void write_test_log_table(struct strbuf *buf, - struct reftable_log_record logs[], int n, - uint64_t update_index) -{ - int i = 0; - int err; - - struct reftable_write_options opts = { - .block_size = 256, - .exact_log_message = 1, - }; - struct reftable_writer *w = NULL; - w = reftable_new_writer(&strbuf_add_void, &noop_flush, buf, &opts); - reftable_writer_set_limits(w, update_index, update_index); - - for (i = 0; i < n; i++) { - int err = reftable_writer_add_log(w, &logs[i]); - EXPECT_ERR(err); - } - - err = reftable_writer_close(w); - EXPECT_ERR(err); - - reftable_writer_free(w); -} - -static struct reftable_merged_table * -merged_table_from_records(struct reftable_ref_record **refs, - struct reftable_block_source **source, - struct reftable_reader ***readers, int *sizes, - struct strbuf *buf, size_t n) -{ - struct reftable_merged_table *mt = NULL; - struct reftable_table *tabs; - int err; - - REFTABLE_CALLOC_ARRAY(tabs, n); - REFTABLE_CALLOC_ARRAY(*readers, n); - REFTABLE_CALLOC_ARRAY(*source, n); - - for (size_t i = 0; i < n; i++) { - write_test_table(&buf[i], refs[i], sizes[i]); - block_source_from_strbuf(&(*source)[i], &buf[i]); - - err = reftable_new_reader(&(*readers)[i], &(*source)[i], - "name"); - EXPECT_ERR(err); - reftable_table_from_reader(&tabs[i], (*readers)[i]); - } - - err = reftable_new_merged_table(&mt, tabs, n, GIT_SHA1_FORMAT_ID); - EXPECT_ERR(err); - return mt; -} - -static void readers_destroy(struct reftable_reader **readers, size_t n) -{ - int i = 0; - for (; i < n; i++) - reftable_reader_free(readers[i]); - reftable_free(readers); -} - -static void test_merged_between(void) -{ - struct reftable_ref_record r1[] = { { - .refname = "b", - .update_index = 1, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = { 1, 2, 3, 0 }, - } }; - struct reftable_ref_record r2[] = { { - .refname = "a", - .update_index = 2, - .value_type = REFTABLE_REF_DELETION, - } }; - - struct reftable_ref_record *refs[] = { r1, r2 }; - int sizes[] = { 1, 1 }; - struct strbuf bufs[2] = { STRBUF_INIT, STRBUF_INIT }; - struct reftable_block_source *bs = NULL; - struct reftable_reader **readers = NULL; - struct reftable_merged_table *mt = - merged_table_from_records(refs, &bs, &readers, sizes, bufs, 2); - int i; - struct reftable_ref_record ref = { NULL }; - struct reftable_iterator it = { NULL }; - int err = reftable_merged_table_seek_ref(mt, &it, "a"); - EXPECT_ERR(err); - - err = reftable_iterator_next_ref(&it, &ref); - EXPECT_ERR(err); - EXPECT(ref.update_index == 2); - reftable_ref_record_release(&ref); - reftable_iterator_destroy(&it); - readers_destroy(readers, 2); - reftable_merged_table_free(mt); - for (i = 0; i < ARRAY_SIZE(bufs); i++) { - strbuf_release(&bufs[i]); - } - reftable_free(bs); -} - -static void test_merged(void) -{ - struct reftable_ref_record r1[] = { - { - .refname = "a", - .update_index = 1, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = { 1 }, - }, - { - .refname = "b", - .update_index = 1, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = { 1 }, - }, - { - .refname = "c", - .update_index = 1, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = { 1 }, - } - }; - struct reftable_ref_record r2[] = { { - .refname = "a", - .update_index = 2, - .value_type = REFTABLE_REF_DELETION, - } }; - struct reftable_ref_record r3[] = { - { - .refname = "c", - .update_index = 3, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = { 2 }, - }, - { - .refname = "d", - .update_index = 3, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = { 1 }, - }, - }; - - struct reftable_ref_record *want[] = { - &r2[0], - &r1[1], - &r3[0], - &r3[1], - }; - - struct reftable_ref_record *refs[] = { r1, r2, r3 }; - int sizes[3] = { 3, 1, 2 }; - struct strbuf bufs[3] = { STRBUF_INIT, STRBUF_INIT, STRBUF_INIT }; - struct reftable_block_source *bs = NULL; - struct reftable_reader **readers = NULL; - struct reftable_merged_table *mt = - merged_table_from_records(refs, &bs, &readers, sizes, bufs, 3); - - struct reftable_iterator it = { NULL }; - int err = reftable_merged_table_seek_ref(mt, &it, "a"); - struct reftable_ref_record *out = NULL; - size_t len = 0; - size_t cap = 0; - int i = 0; - - EXPECT_ERR(err); - EXPECT(reftable_merged_table_hash_id(mt) == GIT_SHA1_FORMAT_ID); - EXPECT(reftable_merged_table_min_update_index(mt) == 1); - - while (len < 100) { /* cap loops/recursion. */ - struct reftable_ref_record ref = { NULL }; - int err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) - break; - - REFTABLE_ALLOC_GROW(out, len + 1, cap); - out[len++] = ref; - } - reftable_iterator_destroy(&it); - - EXPECT(ARRAY_SIZE(want) == len); - for (i = 0; i < len; i++) { - EXPECT(reftable_ref_record_equal(want[i], &out[i], - GIT_SHA1_RAWSZ)); - } - for (i = 0; i < len; i++) { - reftable_ref_record_release(&out[i]); - } - reftable_free(out); - - for (i = 0; i < 3; i++) { - strbuf_release(&bufs[i]); - } - readers_destroy(readers, 3); - reftable_merged_table_free(mt); - reftable_free(bs); -} - -static struct reftable_merged_table * -merged_table_from_log_records(struct reftable_log_record **logs, - struct reftable_block_source **source, - struct reftable_reader ***readers, int *sizes, - struct strbuf *buf, size_t n) -{ - struct reftable_merged_table *mt = NULL; - struct reftable_table *tabs; - int err; - - REFTABLE_CALLOC_ARRAY(tabs, n); - REFTABLE_CALLOC_ARRAY(*readers, n); - REFTABLE_CALLOC_ARRAY(*source, n); - - for (size_t i = 0; i < n; i++) { - write_test_log_table(&buf[i], logs[i], sizes[i], i + 1); - block_source_from_strbuf(&(*source)[i], &buf[i]); - - err = reftable_new_reader(&(*readers)[i], &(*source)[i], - "name"); - EXPECT_ERR(err); - reftable_table_from_reader(&tabs[i], (*readers)[i]); - } - - err = reftable_new_merged_table(&mt, tabs, n, GIT_SHA1_FORMAT_ID); - EXPECT_ERR(err); - return mt; -} - -static void test_merged_logs(void) -{ - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 }; - uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 }; - uint8_t hash3[GIT_SHA1_RAWSZ] = { 3 }; - struct reftable_log_record r1[] = { - { - .refname = "a", - .update_index = 2, - .value_type = REFTABLE_LOG_UPDATE, - .value.update = { - .old_hash = hash2, - /* deletion */ - .name = "jane doe", - .email = "jane@invalid", - .message = "message2", - } - }, - { - .refname = "a", - .update_index = 1, - .value_type = REFTABLE_LOG_UPDATE, - .value.update = { - .old_hash = hash1, - .new_hash = hash2, - .name = "jane doe", - .email = "jane@invalid", - .message = "message1", - } - }, - }; - struct reftable_log_record r2[] = { - { - .refname = "a", - .update_index = 3, - .value_type = REFTABLE_LOG_UPDATE, - .value.update = { - .new_hash = hash3, - .name = "jane doe", - .email = "jane@invalid", - .message = "message3", - } - }, - }; - struct reftable_log_record r3[] = { - { - .refname = "a", - .update_index = 2, - .value_type = REFTABLE_LOG_DELETION, - }, - }; - struct reftable_log_record *want[] = { - &r2[0], - &r3[0], - &r1[1], - }; - - struct reftable_log_record *logs[] = { r1, r2, r3 }; - int sizes[3] = { 2, 1, 1 }; - struct strbuf bufs[3] = { STRBUF_INIT, STRBUF_INIT, STRBUF_INIT }; - struct reftable_block_source *bs = NULL; - struct reftable_reader **readers = NULL; - struct reftable_merged_table *mt = merged_table_from_log_records( - logs, &bs, &readers, sizes, bufs, 3); - - struct reftable_iterator it = { NULL }; - int err = reftable_merged_table_seek_log(mt, &it, "a"); - struct reftable_log_record *out = NULL; - size_t len = 0; - size_t cap = 0; - int i = 0; - - EXPECT_ERR(err); - EXPECT(reftable_merged_table_hash_id(mt) == GIT_SHA1_FORMAT_ID); - EXPECT(reftable_merged_table_min_update_index(mt) == 1); - - while (len < 100) { /* cap loops/recursion. */ - struct reftable_log_record log = { NULL }; - int err = reftable_iterator_next_log(&it, &log); - if (err > 0) - break; - - REFTABLE_ALLOC_GROW(out, len + 1, cap); - out[len++] = log; - } - reftable_iterator_destroy(&it); - - EXPECT(ARRAY_SIZE(want) == len); - for (i = 0; i < len; i++) { - EXPECT(reftable_log_record_equal(want[i], &out[i], - GIT_SHA1_RAWSZ)); - } - - err = reftable_merged_table_seek_log_at(mt, &it, "a", 2); - EXPECT_ERR(err); - reftable_log_record_release(&out[0]); - err = reftable_iterator_next_log(&it, &out[0]); - EXPECT_ERR(err); - EXPECT(reftable_log_record_equal(&out[0], &r3[0], GIT_SHA1_RAWSZ)); - reftable_iterator_destroy(&it); - - for (i = 0; i < len; i++) { - reftable_log_record_release(&out[i]); - } - reftable_free(out); - - for (i = 0; i < 3; i++) { - strbuf_release(&bufs[i]); - } - readers_destroy(readers, 3); - reftable_merged_table_free(mt); - reftable_free(bs); -} - -static void test_default_write_opts(void) -{ - struct reftable_write_options opts = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - - struct reftable_ref_record rec = { - .refname = "master", - .update_index = 1, - }; - int err; - struct reftable_block_source source = { NULL }; - struct reftable_table *tab = reftable_calloc(1, sizeof(*tab)); - uint32_t hash_id; - struct reftable_reader *rd = NULL; - struct reftable_merged_table *merged = NULL; - - reftable_writer_set_limits(w, 1, 1); - - err = reftable_writer_add_ref(w, &rec); - EXPECT_ERR(err); - - err = reftable_writer_close(w); - EXPECT_ERR(err); - reftable_writer_free(w); - - block_source_from_strbuf(&source, &buf); - - err = reftable_new_reader(&rd, &source, "filename"); - EXPECT_ERR(err); - - hash_id = reftable_reader_hash_id(rd); - EXPECT(hash_id == GIT_SHA1_FORMAT_ID); - - reftable_table_from_reader(&tab[0], rd); - err = reftable_new_merged_table(&merged, tab, 1, GIT_SHA1_FORMAT_ID); - EXPECT_ERR(err); - - reftable_reader_free(rd); - reftable_merged_table_free(merged); - strbuf_release(&buf); -} - -/* XXX test refs_for(oid) */ - -int merged_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_merged_logs); - RUN_TEST(test_merged_between); - RUN_TEST(test_merged); - RUN_TEST(test_default_write_opts); - return 0; -} diff --git a/reftable/pq.c b/reftable/pq.c index 2461daf5ff..2b5b7d1c0e 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -14,56 +14,29 @@ https://developers.google.com/open-source/licenses/bsd int pq_less(struct pq_entry *a, struct pq_entry *b) { - struct strbuf ak = STRBUF_INIT; - struct strbuf bk = STRBUF_INIT; - int cmp = 0; - reftable_record_key(&a->rec, &ak); - reftable_record_key(&b->rec, &bk); - - cmp = strbuf_cmp(&ak, &bk); - - strbuf_release(&ak); - strbuf_release(&bk); - + int cmp = reftable_record_cmp(a->rec, b->rec); if (cmp == 0) return a->index > b->index; - return cmp < 0; } -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) -{ - return pq.heap[0]; -} - -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) -{ - return pq.len == 0; -} - struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) { - int i = 0; + size_t i = 0; struct pq_entry e = pq->heap[0]; pq->heap[0] = pq->heap[pq->len - 1]; pq->len--; - i = 0; while (i < pq->len) { - int min = i; - int j = 2 * i + 1; - int k = 2 * i + 2; - if (j < pq->len && pq_less(&pq->heap[j], &pq->heap[i])) { + size_t min = i; + size_t j = 2 * i + 1; + size_t k = 2 * i + 2; + if (j < pq->len && pq_less(&pq->heap[j], &pq->heap[i])) min = j; - } - if (k < pq->len && pq_less(&pq->heap[k], &pq->heap[min])) { + if (k < pq->len && pq_less(&pq->heap[k], &pq->heap[min])) min = k; - } - - if (min == i) { + if (min == i) break; - } - SWAP(pq->heap[i], pq->heap[min]); i = min; } @@ -73,30 +46,23 @@ struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e) { - int i = 0; + size_t i = 0; REFTABLE_ALLOC_GROW(pq->heap, pq->len + 1, pq->cap); pq->heap[pq->len++] = *e; i = pq->len - 1; while (i > 0) { - int j = (i - 1) / 2; - if (pq_less(&pq->heap[j], &pq->heap[i])) { + size_t j = (i - 1) / 2; + if (pq_less(&pq->heap[j], &pq->heap[i])) break; - } - SWAP(pq->heap[j], pq->heap[i]); - i = j; } } void merged_iter_pqueue_release(struct merged_iter_pqueue *pq) { - int i = 0; - for (i = 0; i < pq->len; i++) { - reftable_record_release(&pq->heap[i].rec); - } FREE_AND_NULL(pq->heap); - pq->len = pq->cap = 0; + memset(pq, 0, sizeof(*pq)); } diff --git a/reftable/pq.h b/reftable/pq.h index e85bac9b52..707bd26767 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -12,8 +12,8 @@ https://developers.google.com/open-source/licenses/bsd #include "record.h" struct pq_entry { - int index; - struct reftable_record rec; + size_t index; + struct reftable_record *rec; }; struct merged_iter_pqueue { @@ -22,12 +22,19 @@ struct merged_iter_pqueue { size_t cap; }; -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq); -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq); -void merged_iter_pqueue_check(struct merged_iter_pqueue pq); struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq); void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e); void merged_iter_pqueue_release(struct merged_iter_pqueue *pq); int pq_less(struct pq_entry *a, struct pq_entry *b); +static inline struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) +{ + return pq.heap[0]; +} + +static inline int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) +{ + return pq.len == 0; +} + #endif diff --git a/reftable/pq_test.c b/reftable/pq_test.c deleted file mode 100644 index c202eff848..0000000000 --- a/reftable/pq_test.c +++ /dev/null @@ -1,79 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "system.h" - -#include "basics.h" -#include "constants.h" -#include "pq.h" -#include "record.h" -#include "reftable-tests.h" -#include "test_framework.h" - -void merged_iter_pqueue_check(struct merged_iter_pqueue pq) -{ - int i; - for (i = 1; i < pq.len; i++) { - int parent = (i - 1) / 2; - - EXPECT(pq_less(&pq.heap[parent], &pq.heap[i])); - } -} - -static void test_pq(void) -{ - char *names[54] = { NULL }; - int N = ARRAY_SIZE(names) - 1; - - struct merged_iter_pqueue pq = { NULL }; - char *last = NULL; - - int i = 0; - for (i = 0; i < N; i++) { - char name[100]; - snprintf(name, sizeof(name), "%02d", i); - names[i] = xstrdup(name); - } - - i = 1; - do { - struct pq_entry e = { .rec = { .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = names[i], - } } }; - merged_iter_pqueue_add(&pq, &e); - merged_iter_pqueue_check(pq); - i = (i * 7) % N; - } while (i != 1); - - while (!merged_iter_pqueue_is_empty(pq)) { - struct pq_entry e = merged_iter_pqueue_remove(&pq); - struct reftable_record *rec = &e.rec; - merged_iter_pqueue_check(pq); - - EXPECT(reftable_record_type(rec) == BLOCK_TYPE_REF); - if (last) { - EXPECT(strcmp(last, rec->u.ref.refname) < 0); - } - /* this is names[i], so don't dealloc. */ - last = rec->u.ref.refname; - rec->u.ref.refname = NULL; - reftable_record_release(rec); - } - for (i = 0; i < N; i++) { - reftable_free(names[i]); - } - - merged_iter_pqueue_release(&pq); -} - -int pq_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_pq); - return 0; -} diff --git a/reftable/reader.c b/reftable/reader.c index 2663b03938..f877099087 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -11,11 +11,9 @@ https://developers.google.com/open-source/licenses/bsd #include "system.h" #include "block.h" #include "constants.h" -#include "generic.h" #include "iter.h" #include "record.h" #include "reftable-error.h" -#include "reftable-generic.h" uint64_t block_source_size(struct reftable_block_source *source) { @@ -164,77 +162,23 @@ done: return err; } -int init_reader(struct reftable_reader *r, struct reftable_block_source *source, - const char *name) -{ - struct reftable_block footer = { NULL }; - struct reftable_block header = { NULL }; - int err = 0; - uint64_t file_size = block_source_size(source); - - /* Need +1 to read type of first block. */ - uint32_t read_size = header_size(2) + 1; /* read v2 because it's larger. */ - memset(r, 0, sizeof(struct reftable_reader)); - - if (read_size > file_size) { - err = REFTABLE_FORMAT_ERROR; - goto done; - } - - err = block_source_read_block(source, &header, 0, read_size); - if (err != read_size) { - err = REFTABLE_IO_ERROR; - goto done; - } - - if (memcmp(header.data, "REFT", 4)) { - err = REFTABLE_FORMAT_ERROR; - goto done; - } - r->version = header.data[4]; - if (r->version != 1 && r->version != 2) { - err = REFTABLE_FORMAT_ERROR; - goto done; - } - - r->size = file_size - footer_size(r->version); - r->source = *source; - r->name = xstrdup(name); - r->hash_id = 0; - - err = block_source_read_block(source, &footer, r->size, - footer_size(r->version)); - if (err != footer_size(r->version)) { - err = REFTABLE_IO_ERROR; - goto done; - } - - err = parse_footer(r, footer.data, header.data); -done: - reftable_block_done(&footer); - reftable_block_done(&header); - return err; -} - struct table_iter { struct reftable_reader *r; uint8_t typ; uint64_t block_off; + struct block_reader br; struct block_iter bi; int is_finished; }; -#define TABLE_ITER_INIT { \ - .bi = BLOCK_ITER_INIT \ -} -static void table_iter_copy_from(struct table_iter *dest, - struct table_iter *src) +static int table_iter_init(struct table_iter *ti, struct reftable_reader *r) { - dest->r = src->r; - dest->typ = src->typ; - dest->block_off = src->block_off; - dest->is_finished = src->is_finished; - block_iter_copy_from(&dest->bi, &src->bi); + struct block_iter bi = BLOCK_ITER_INIT; + memset(ti, 0, sizeof(*ti)); + reftable_reader_incref(r); + ti->r = r; + ti->bi = bi; + return 0; } static int table_iter_next_in_block(struct table_iter *ti, @@ -250,14 +194,8 @@ static int table_iter_next_in_block(struct table_iter *ti, static void table_iter_block_done(struct table_iter *ti) { - if (!ti->bi.br) { - return; - } - reftable_block_done(&ti->bi.br->block); - FREE_AND_NULL(ti->bi.br); - - ti->bi.last_key.len = 0; - ti->bi.next_off = 0; + block_reader_release(&ti->br); + block_iter_reset(&ti->bi); } static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off, @@ -321,32 +259,28 @@ done: return err; } -static int table_iter_next_block(struct table_iter *dest, - struct table_iter *src) +static void table_iter_close(struct table_iter *ti) { - uint64_t next_block_off = src->block_off + src->bi.br->full_block_size; - struct block_reader br = { 0 }; - int err = 0; + table_iter_block_done(ti); + block_iter_close(&ti->bi); + reftable_reader_decref(ti->r); +} - dest->r = src->r; - dest->typ = src->typ; - dest->block_off = next_block_off; +static int table_iter_next_block(struct table_iter *ti) +{ + uint64_t next_block_off = ti->block_off + ti->br.full_block_size; + int err; - err = reader_init_block_reader(src->r, &br, next_block_off, src->typ); - if (err > 0) { - dest->is_finished = 1; - return 1; - } - if (err != 0) + err = reader_init_block_reader(ti->r, &ti->br, next_block_off, ti->typ); + if (err > 0) + ti->is_finished = 1; + if (err) return err; - else { - struct block_reader *brp = - reftable_malloc(sizeof(struct block_reader)); - *brp = br; - dest->is_finished = 0; - block_reader_start(brp, &dest->bi); - } + ti->block_off = next_block_off; + ti->is_finished = 0; + block_iter_seek_start(&ti->bi, &ti->br); + return 0; } @@ -356,79 +290,50 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec) return REFTABLE_API_ERROR; while (1) { - struct table_iter next = TABLE_ITER_INIT; - int err = 0; - if (ti->is_finished) { + int err; + + if (ti->is_finished) return 1; - } + /* + * Check whether the current block still has more records. If + * so, return it. If the iterator returns positive then the + * current block has been exhausted. + */ err = table_iter_next_in_block(ti, rec); - if (err <= 0) { + if (err <= 0) return err; - } - err = table_iter_next_block(&next, ti); - if (err != 0) { + /* + * Otherwise, we need to continue to the next block in the + * table and retry. If there are no more blocks then the + * iterator is drained. + */ + err = table_iter_next_block(ti); + if (err) { ti->is_finished = 1; - } - table_iter_block_done(ti); - if (err != 0) { return err; } - table_iter_copy_from(ti, &next); - block_iter_close(&next.bi); } } -static int table_iter_next_void(void *ti, struct reftable_record *rec) -{ - return table_iter_next(ti, rec); -} - -static void table_iter_close(void *p) +static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ) { - struct table_iter *ti = p; - table_iter_block_done(ti); - block_iter_close(&ti->bi); -} - -static struct reftable_iterator_vtable table_iter_vtable = { - .next = &table_iter_next_void, - .close = &table_iter_close, -}; - -static void iterator_from_table_iter(struct reftable_iterator *it, - struct table_iter *ti) -{ - assert(!it->ops); - it->iter_arg = ti; - it->ops = &table_iter_vtable; -} - -static int reader_table_iter_at(struct reftable_reader *r, - struct table_iter *ti, uint64_t off, - uint8_t typ) -{ - struct block_reader br = { 0 }; - struct block_reader *brp = NULL; + int err; - int err = reader_init_block_reader(r, &br, off, typ); + err = reader_init_block_reader(ti->r, &ti->br, off, typ); if (err != 0) return err; - brp = reftable_malloc(sizeof(struct block_reader)); - *brp = br; - ti->r = r; - ti->typ = block_reader_type(brp); + ti->typ = block_reader_type(&ti->br); ti->block_off = off; - block_reader_start(brp, &ti->bi); + block_iter_seek_start(&ti->bi, &ti->br); return 0; } -static int reader_start(struct reftable_reader *r, struct table_iter *ti, - uint8_t typ, int index) +static int table_iter_seek_start(struct table_iter *ti, uint8_t typ, int index) { - struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); + struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ); uint64_t off = offs->offset; if (index) { off = offs->index_offset; @@ -438,31 +343,60 @@ static int reader_start(struct reftable_reader *r, struct table_iter *ti, typ = BLOCK_TYPE_INDEX; } - return reader_table_iter_at(r, ti, off, typ); + return table_iter_seek_to(ti, off, typ); } -static int reader_seek_linear(struct table_iter *ti, - struct reftable_record *want) +static int table_iter_seek_linear(struct table_iter *ti, + struct reftable_record *want) { struct strbuf want_key = STRBUF_INIT; struct strbuf got_key = STRBUF_INIT; - struct table_iter next = TABLE_ITER_INIT; struct reftable_record rec; - int err = -1; + int err; reftable_record_init(&rec, reftable_record_type(want)); reftable_record_key(want, &want_key); + /* + * First we need to locate the block that must contain our record. To + * do so we scan through blocks linearly until we find the first block + * whose first key is bigger than our wanted key. Once we have found + * that block we know that the key must be contained in the preceding + * block. + * + * This algorithm is somewhat unfortunate because it means that we + * always have to seek one block too far and then back up. But as we + * can only decode the _first_ key of a block but not its _last_ key we + * have no other way to do this. + */ while (1) { - err = table_iter_next_block(&next, ti); + struct table_iter next = *ti; + + /* + * We must be careful to not modify underlying data of `ti` + * because we may find that `next` does not contain our desired + * block, but that `ti` does. In that case, we would discard + * `next` and continue with `ti`. + * + * This also means that we cannot reuse allocated memory for + * `next` here. While it would be great if we could, it should + * in practice not be too bad given that we should only ever + * end up doing linear seeks with at most three blocks. As soon + * as we have more than three blocks we would have an index, so + * we would not do a linear search there anymore. + */ + memset(&next.br.block, 0, sizeof(next.br.block)); + next.br.zstream = NULL; + next.br.uncompressed_data = NULL; + next.br.uncompressed_cap = 0; + + err = table_iter_next_block(&next); if (err < 0) goto done; - - if (err > 0) { + if (err > 0) break; - } - err = block_reader_first_key(next.bi.br, &got_key); + err = block_reader_first_key(&next.br, &got_key); if (err < 0) goto done; @@ -472,25 +406,28 @@ static int reader_seek_linear(struct table_iter *ti, } table_iter_block_done(ti); - table_iter_copy_from(ti, &next); + *ti = next; } - err = block_iter_seek(&ti->bi, &want_key); + /* + * We have located the block that must contain our record, so we seek + * the wanted key inside of it. If the block does not contain our key + * we know that the corresponding record does not exist. + */ + err = block_iter_seek_key(&ti->bi, &ti->br, &want_key); if (err < 0) goto done; err = 0; done: - block_iter_close(&next.bi); reftable_record_release(&rec); strbuf_release(&want_key); strbuf_release(&got_key); return err; } -static int reader_seek_indexed(struct reftable_reader *r, - struct reftable_iterator *it, - struct reftable_record *rec) +static int table_iter_seek_indexed(struct table_iter *ti, + struct reftable_record *rec) { struct reftable_record want_index = { .type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT } @@ -499,14 +436,9 @@ static int reader_seek_indexed(struct reftable_reader *r, .type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT }, }; - struct table_iter index_iter = TABLE_ITER_INIT; - struct table_iter next = TABLE_ITER_INIT; - int err = 0; + int err; reftable_record_key(rec, &want_index.u.idx.last_key); - err = reader_start(r, &index_iter, reftable_record_type(rec), 1); - if (err < 0) - goto done; /* * The index may consist of multiple levels, where each level may have @@ -514,7 +446,7 @@ static int reader_seek_indexed(struct reftable_reader *r, * highest layer that identifies the relevant index block as well as * the record inside that block that corresponds to our wanted key. */ - err = reader_seek_linear(&index_iter, &want_index); + err = table_iter_seek_linear(ti, &want_index); if (err < 0) goto done; @@ -540,146 +472,199 @@ static int reader_seek_indexed(struct reftable_reader *r, * all levels of the index only to find out that the key does * not exist. */ - err = table_iter_next(&index_iter, &index_result); - table_iter_block_done(&index_iter); + err = table_iter_next(ti, &index_result); if (err != 0) goto done; - err = reader_table_iter_at(r, &next, index_result.u.idx.offset, - 0); + err = table_iter_seek_to(ti, index_result.u.idx.offset, 0); if (err != 0) goto done; - err = block_iter_seek(&next.bi, &want_index.u.idx.last_key); + err = block_iter_seek_key(&ti->bi, &ti->br, &want_index.u.idx.last_key); if (err < 0) goto done; - if (next.typ == reftable_record_type(rec)) { + if (ti->typ == reftable_record_type(rec)) { err = 0; break; } - if (next.typ != BLOCK_TYPE_INDEX) { + if (ti->typ != BLOCK_TYPE_INDEX) { err = REFTABLE_FORMAT_ERROR; - break; + goto done; } - - table_iter_copy_from(&index_iter, &next); } - if (err == 0) { - struct table_iter empty = TABLE_ITER_INIT; - struct table_iter *malloced = reftable_calloc(1, sizeof(*malloced)); - *malloced = empty; - table_iter_copy_from(malloced, &next); - iterator_from_table_iter(it, malloced); - } done: - block_iter_close(&next.bi); - table_iter_close(&index_iter); reftable_record_release(&want_index); reftable_record_release(&index_result); return err; } -static int reader_seek_internal(struct reftable_reader *r, - struct reftable_iterator *it, - struct reftable_record *rec) +static int table_iter_seek(struct table_iter *ti, + struct reftable_record *want) { - struct reftable_reader_offsets *offs = - reader_offsets_for(r, reftable_record_type(rec)); - uint64_t idx = offs->index_offset; - struct table_iter ti = TABLE_ITER_INIT; - int err = 0; - if (idx > 0) - return reader_seek_indexed(r, it, rec); + uint8_t typ = reftable_record_type(want); + struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ); + int err; - err = reader_start(r, &ti, reftable_record_type(rec), 0); - if (err < 0) - return err; - err = reader_seek_linear(&ti, rec); + err = table_iter_seek_start(ti, reftable_record_type(want), + !!offs->index_offset); if (err < 0) - return err; - else { - struct table_iter *p = - reftable_malloc(sizeof(struct table_iter)); - *p = ti; - iterator_from_table_iter(it, p); - } + goto out; - return 0; + if (offs->index_offset) + err = table_iter_seek_indexed(ti, want); + else + err = table_iter_seek_linear(ti, want); + if (err) + goto out; + +out: + return err; } -static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it, - struct reftable_record *rec) +static int table_iter_seek_void(void *ti, struct reftable_record *want) { - uint8_t typ = reftable_record_type(rec); + return table_iter_seek(ti, want); +} - struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); - if (!offs->is_present) { - iterator_set_empty(it); - return 0; - } +static int table_iter_next_void(void *ti, struct reftable_record *rec) +{ + return table_iter_next(ti, rec); +} - return reader_seek_internal(r, it, rec); +static void table_iter_close_void(void *ti) +{ + table_iter_close(ti); } -int reftable_reader_seek_ref(struct reftable_reader *r, - struct reftable_iterator *it, const char *name) +static struct reftable_iterator_vtable table_iter_vtable = { + .seek = &table_iter_seek_void, + .next = &table_iter_next_void, + .close = &table_iter_close_void, +}; + +static void iterator_from_table_iter(struct reftable_iterator *it, + struct table_iter *ti) { - struct reftable_record rec = { - .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = (char *)name, - }, - }; - return reader_seek(r, it, &rec); + assert(!it->ops); + it->iter_arg = ti; + it->ops = &table_iter_vtable; } -int reftable_reader_seek_log_at(struct reftable_reader *r, - struct reftable_iterator *it, const char *name, - uint64_t update_index) +void reader_init_iter(struct reftable_reader *r, + struct reftable_iterator *it, + uint8_t typ) { - struct reftable_record rec = { .type = BLOCK_TYPE_LOG, - .u.log = { - .refname = (char *)name, - .update_index = update_index, - } }; - return reader_seek(r, it, &rec); + struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); + + if (offs->is_present) { + struct table_iter *ti; + REFTABLE_ALLOC_ARRAY(ti, 1); + table_iter_init(ti, r); + iterator_from_table_iter(it, ti); + } else { + iterator_set_empty(it); + } } -int reftable_reader_seek_log(struct reftable_reader *r, - struct reftable_iterator *it, const char *name) +void reftable_reader_init_ref_iterator(struct reftable_reader *r, + struct reftable_iterator *it) { - uint64_t max = ~((uint64_t)0); - return reftable_reader_seek_log_at(r, it, name, max); + reader_init_iter(r, it, BLOCK_TYPE_REF); } -void reader_close(struct reftable_reader *r) +void reftable_reader_init_log_iterator(struct reftable_reader *r, + struct reftable_iterator *it) { - block_source_close(&r->source); - FREE_AND_NULL(r->name); + reader_init_iter(r, it, BLOCK_TYPE_LOG); } -int reftable_new_reader(struct reftable_reader **p, - struct reftable_block_source *src, char const *name) +int reftable_reader_new(struct reftable_reader **out, + struct reftable_block_source *source, char const *name) { - struct reftable_reader *rd = reftable_calloc(1, sizeof(*rd)); - int err = init_reader(rd, src, name); - if (err == 0) { - *p = rd; - } else { - block_source_close(src); - reftable_free(rd); + struct reftable_block footer = { 0 }; + struct reftable_block header = { 0 }; + struct reftable_reader *r; + uint64_t file_size = block_source_size(source); + uint32_t read_size; + int err; + + REFTABLE_CALLOC_ARRAY(r, 1); + + /* + * We need one extra byte to read the type of first block. We also + * pretend to always be reading v2 of the format because it is larger. + */ + read_size = header_size(2) + 1; + if (read_size > file_size) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + err = block_source_read_block(source, &header, 0, read_size); + if (err != read_size) { + err = REFTABLE_IO_ERROR; + goto done; + } + + if (memcmp(header.data, "REFT", 4)) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + r->version = header.data[4]; + if (r->version != 1 && r->version != 2) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + r->size = file_size - footer_size(r->version); + r->source = *source; + r->name = xstrdup(name); + r->hash_id = 0; + r->refcount = 1; + + err = block_source_read_block(source, &footer, r->size, + footer_size(r->version)); + if (err != footer_size(r->version)) { + err = REFTABLE_IO_ERROR; + goto done; + } + + err = parse_footer(r, footer.data, header.data); + if (err) + goto done; + + *out = r; + +done: + reftable_block_done(&footer); + reftable_block_done(&header); + if (err) { + reftable_free(r); + block_source_close(source); } return err; } -void reftable_reader_free(struct reftable_reader *r) +void reftable_reader_incref(struct reftable_reader *r) +{ + if (!r->refcount) + BUG("cannot increment ref counter of dead reader"); + r->refcount++; +} + +void reftable_reader_decref(struct reftable_reader *r) { if (!r) return; - reader_close(r); + if (!r->refcount) + BUG("cannot decrement ref counter of dead reader"); + if (--r->refcount) + return; + block_source_close(&r->source); + FREE_AND_NULL(r->name); reftable_free(r); } @@ -703,7 +688,8 @@ static int reftable_reader_refs_for_indexed(struct reftable_reader *r, struct indexed_table_ref_iter *itr = NULL; /* Look through the reverse index. */ - err = reader_seek(r, &oit, &want); + reader_init_iter(r, &oit, BLOCK_TYPE_OBJ); + err = iterator_seek(&oit, &want); if (err != 0) goto done; @@ -738,15 +724,15 @@ static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, struct reftable_iterator *it, uint8_t *oid) { - struct table_iter ti_empty = TABLE_ITER_INIT; - struct table_iter *ti = reftable_calloc(1, sizeof(*ti)); + struct table_iter *ti; struct filtering_ref_iterator *filter = NULL; struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT; int oid_len = hash_size(r->hash_id); int err; - *ti = ti_empty; - err = reader_start(r, ti, BLOCK_TYPE_REF, 0); + REFTABLE_ALLOC_ARRAY(ti, 1); + table_iter_init(ti, r); + err = table_iter_seek_start(ti, BLOCK_TYPE_REF, 0); if (err < 0) { reftable_free(ti); return err; @@ -756,8 +742,6 @@ static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, *filter = empty; strbuf_add(&filter->oid, oid, oid_len); - reftable_table_from_reader(&filter->tab, r); - filter->double_check = 0; iterator_from_table_iter(&filter->it, ti); iterator_from_filtering_ref_iterator(it, filter); @@ -782,61 +766,67 @@ uint64_t reftable_reader_min_update_index(struct reftable_reader *r) return r->min_update_index; } -/* generic table interface. */ - -static int reftable_reader_seek_void(void *tab, struct reftable_iterator *it, - struct reftable_record *rec) +int reftable_reader_print_blocks(const char *tablename) { - return reader_seek(tab, it, rec); -} + struct { + const char *name; + int type; + } sections[] = { + { + .name = "ref", + .type = BLOCK_TYPE_REF, + }, + { + .name = "obj", + .type = BLOCK_TYPE_OBJ, + }, + { + .name = "log", + .type = BLOCK_TYPE_LOG, + }, + }; + struct reftable_block_source src = { 0 }; + struct reftable_reader *r = NULL; + struct table_iter ti = { 0 }; + size_t i; + int err; -static uint32_t reftable_reader_hash_id_void(void *tab) -{ - return reftable_reader_hash_id(tab); -} + err = reftable_block_source_from_file(&src, tablename); + if (err < 0) + goto done; -static uint64_t reftable_reader_min_update_index_void(void *tab) -{ - return reftable_reader_min_update_index(tab); -} + err = reftable_reader_new(&r, &src, tablename); + if (err < 0) + goto done; -static uint64_t reftable_reader_max_update_index_void(void *tab) -{ - return reftable_reader_max_update_index(tab); -} + table_iter_init(&ti, r); -static struct reftable_table_vtable reader_vtable = { - .seek_record = reftable_reader_seek_void, - .hash_id = reftable_reader_hash_id_void, - .min_update_index = reftable_reader_min_update_index_void, - .max_update_index = reftable_reader_max_update_index_void, -}; + printf("header:\n"); + printf(" block_size: %d\n", r->block_size); -void reftable_table_from_reader(struct reftable_table *tab, - struct reftable_reader *reader) -{ - assert(!tab->ops); - tab->ops = &reader_vtable; - tab->table_arg = reader; -} + for (i = 0; i < ARRAY_SIZE(sections); i++) { + err = table_iter_seek_start(&ti, sections[i].type, 0); + if (err < 0) + goto done; + if (err > 0) + continue; + printf("%s:\n", sections[i].name); -int reftable_reader_print_file(const char *tablename) -{ - struct reftable_block_source src = { NULL }; - int err = reftable_block_source_from_file(&src, tablename); - struct reftable_reader *r = NULL; - struct reftable_table tab = { NULL }; - if (err < 0) - goto done; + while (1) { + printf(" - length: %u\n", ti.br.block_len); + printf(" restarts: %u\n", ti.br.restart_count); - err = reftable_new_reader(&r, &src, tablename); - if (err < 0) - goto done; + err = table_iter_next_block(&ti); + if (err < 0) + goto done; + if (err > 0) + break; + } + } - reftable_table_from_reader(&tab, r); - err = reftable_table_print(&tab); done: - reftable_reader_free(r); + reftable_reader_decref(r); + table_iter_close(&ti); return err; } diff --git a/reftable/reader.h b/reftable/reader.h index e869165f23..3710ee09b4 100644 --- a/reftable/reader.h +++ b/reftable/reader.h @@ -50,13 +50,16 @@ struct reftable_reader { struct reftable_reader_offsets ref_offsets; struct reftable_reader_offsets obj_offsets; struct reftable_reader_offsets log_offsets; + + uint64_t refcount; }; -int init_reader(struct reftable_reader *r, struct reftable_block_source *source, - const char *name); -void reader_close(struct reftable_reader *r); const char *reader_name(struct reftable_reader *r); +void reader_init_iter(struct reftable_reader *r, + struct reftable_iterator *it, + uint8_t typ); + /* initialize a block reader to read from `r` */ int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br, uint64_t next_off, uint8_t want_typ); diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c deleted file mode 100644 index 363fe0f998..0000000000 --- a/reftable/readwrite_test.c +++ /dev/null @@ -1,978 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "system.h" - -#include "basics.h" -#include "block.h" -#include "blocksource.h" -#include "reader.h" -#include "record.h" -#include "test_framework.h" -#include "reftable-tests.h" -#include "reftable-writer.h" - -static const int update_index = 5; - -static void test_buffer(void) -{ - struct strbuf buf = STRBUF_INIT; - struct reftable_block_source source = { NULL }; - struct reftable_block out = { NULL }; - int n; - uint8_t in[] = "hello"; - strbuf_add(&buf, in, sizeof(in)); - block_source_from_strbuf(&source, &buf); - EXPECT(block_source_size(&source) == 6); - n = block_source_read_block(&source, &out, 0, sizeof(in)); - EXPECT(n == sizeof(in)); - EXPECT(!memcmp(in, out.data, n)); - reftable_block_done(&out); - - n = block_source_read_block(&source, &out, 1, 2); - EXPECT(n == 2); - EXPECT(!memcmp(out.data, "el", 2)); - - reftable_block_done(&out); - block_source_close(&source); - strbuf_release(&buf); -} - -static void write_table(char ***names, struct strbuf *buf, int N, - int block_size, uint32_t hash_id) -{ - struct reftable_write_options opts = { - .block_size = block_size, - .hash_id = hash_id, - }; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, buf, &opts); - struct reftable_ref_record ref = { NULL }; - int i = 0, n; - struct reftable_log_record log = { NULL }; - const struct reftable_stats *stats = NULL; - - REFTABLE_CALLOC_ARRAY(*names, N + 1); - - reftable_writer_set_limits(w, update_index, update_index); - for (i = 0; i < N; i++) { - char name[100]; - int n; - - snprintf(name, sizeof(name), "refs/heads/branch%02d", i); - - ref.refname = name; - ref.update_index = update_index; - ref.value_type = REFTABLE_REF_VAL1; - set_test_hash(ref.value.val1, i); - (*names)[i] = xstrdup(name); - - n = reftable_writer_add_ref(w, &ref); - EXPECT(n == 0); - } - - for (i = 0; i < N; i++) { - uint8_t hash[GIT_SHA256_RAWSZ] = { 0 }; - char name[100]; - int n; - - set_test_hash(hash, i); - - snprintf(name, sizeof(name), "refs/heads/branch%02d", i); - - log.refname = name; - log.update_index = update_index; - log.value_type = REFTABLE_LOG_UPDATE; - log.value.update.new_hash = hash; - log.value.update.message = "message"; - - n = reftable_writer_add_log(w, &log); - EXPECT(n == 0); - } - - n = reftable_writer_close(w); - EXPECT(n == 0); - - stats = reftable_writer_stats(w); - for (i = 0; i < stats->ref_stats.blocks; i++) { - int off = i * opts.block_size; - if (off == 0) { - off = header_size( - (hash_id == GIT_SHA256_FORMAT_ID) ? 2 : 1); - } - EXPECT(buf->buf[off] == 'r'); - } - - EXPECT(stats->log_stats.blocks > 0); - reftable_writer_free(w); -} - -static void test_log_buffer_size(void) -{ - struct strbuf buf = STRBUF_INIT; - struct reftable_write_options opts = { - .block_size = 4096, - }; - int err; - int i; - struct reftable_log_record - log = { .refname = "refs/heads/master", - .update_index = 0xa, - .value_type = REFTABLE_LOG_UPDATE, - .value = { .update = { - .name = "Han-Wen Nienhuys", - .email = "hanwen@google.com", - .tz_offset = 100, - .time = 0x5e430672, - .message = "commit: 9\n", - } } }; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - - /* This tests buffer extension for log compression. Must use a random - hash, to ensure that the compressed part is larger than the original. - */ - uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; - for (i = 0; i < GIT_SHA1_RAWSZ; i++) { - hash1[i] = (uint8_t)(git_rand() % 256); - hash2[i] = (uint8_t)(git_rand() % 256); - } - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; - reftable_writer_set_limits(w, update_index, update_index); - err = reftable_writer_add_log(w, &log); - EXPECT_ERR(err); - err = reftable_writer_close(w); - EXPECT_ERR(err); - reftable_writer_free(w); - strbuf_release(&buf); -} - -static void test_log_overflow(void) -{ - struct strbuf buf = STRBUF_INIT; - char msg[256] = { 0 }; - struct reftable_write_options opts = { - .block_size = ARRAY_SIZE(msg), - }; - int err; - struct reftable_log_record - log = { .refname = "refs/heads/master", - .update_index = 0xa, - .value_type = REFTABLE_LOG_UPDATE, - .value = { .update = { - .name = "Han-Wen Nienhuys", - .email = "hanwen@google.com", - .tz_offset = 100, - .time = 0x5e430672, - .message = msg, - } } }; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - - uint8_t hash1[GIT_SHA1_RAWSZ] = {1}, hash2[GIT_SHA1_RAWSZ] = { 2 }; - - memset(msg, 'x', sizeof(msg) - 1); - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; - reftable_writer_set_limits(w, update_index, update_index); - err = reftable_writer_add_log(w, &log); - EXPECT(err == REFTABLE_ENTRY_TOO_BIG_ERROR); - reftable_writer_free(w); - strbuf_release(&buf); -} - -static void test_log_write_read(void) -{ - int N = 2; - char **names = reftable_calloc(N + 1, sizeof(*names)); - int err; - struct reftable_write_options opts = { - .block_size = 256, - }; - struct reftable_ref_record ref = { NULL }; - int i = 0; - struct reftable_log_record log = { NULL }; - int n; - struct reftable_iterator it = { NULL }; - struct reftable_reader rd = { NULL }; - struct reftable_block_source source = { NULL }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - const struct reftable_stats *stats = NULL; - reftable_writer_set_limits(w, 0, N); - for (i = 0; i < N; i++) { - char name[256]; - struct reftable_ref_record ref = { NULL }; - snprintf(name, sizeof(name), "b%02d%0*d", i, 130, 7); - names[i] = xstrdup(name); - ref.refname = name; - ref.update_index = i; - - err = reftable_writer_add_ref(w, &ref); - EXPECT_ERR(err); - } - for (i = 0; i < N; i++) { - uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; - struct reftable_log_record log = { NULL }; - set_test_hash(hash1, i); - set_test_hash(hash2, i + 1); - - log.refname = names[i]; - log.update_index = i; - log.value_type = REFTABLE_LOG_UPDATE; - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; - - err = reftable_writer_add_log(w, &log); - EXPECT_ERR(err); - } - - n = reftable_writer_close(w); - EXPECT(n == 0); - - stats = reftable_writer_stats(w); - EXPECT(stats->log_stats.blocks > 0); - reftable_writer_free(w); - w = NULL; - - block_source_from_strbuf(&source, &buf); - - err = init_reader(&rd, &source, "file.log"); - EXPECT_ERR(err); - - err = reftable_reader_seek_ref(&rd, &it, names[N - 1]); - EXPECT_ERR(err); - - err = reftable_iterator_next_ref(&it, &ref); - EXPECT_ERR(err); - - /* end of iteration. */ - err = reftable_iterator_next_ref(&it, &ref); - EXPECT(0 < err); - - reftable_iterator_destroy(&it); - reftable_ref_record_release(&ref); - - err = reftable_reader_seek_log(&rd, &it, ""); - EXPECT_ERR(err); - - i = 0; - while (1) { - int err = reftable_iterator_next_log(&it, &log); - if (err > 0) { - break; - } - - EXPECT_ERR(err); - EXPECT_STREQ(names[i], log.refname); - EXPECT(i == log.update_index); - i++; - reftable_log_record_release(&log); - } - - EXPECT(i == N); - reftable_iterator_destroy(&it); - - /* cleanup. */ - strbuf_release(&buf); - free_names(names); - reader_close(&rd); -} - -static void test_log_zlib_corruption(void) -{ - struct reftable_write_options opts = { - .block_size = 256, - }; - struct reftable_iterator it = { 0 }; - struct reftable_reader rd = { 0 }; - struct reftable_block_source source = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - const struct reftable_stats *stats = NULL; - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 }; - uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 }; - char message[100] = { 0 }; - int err, i, n; - - struct reftable_log_record log = { - .refname = "refname", - .value_type = REFTABLE_LOG_UPDATE, - .value = { - .update = { - .new_hash = hash1, - .old_hash = hash2, - .name = "My Name", - .email = "myname@invalid", - .message = message, - }, - }, - }; - - for (i = 0; i < sizeof(message) - 1; i++) - message[i] = (uint8_t)(git_rand() % 64 + ' '); - - reftable_writer_set_limits(w, 1, 1); - - err = reftable_writer_add_log(w, &log); - EXPECT_ERR(err); - - n = reftable_writer_close(w); - EXPECT(n == 0); - - stats = reftable_writer_stats(w); - EXPECT(stats->log_stats.blocks > 0); - reftable_writer_free(w); - w = NULL; - - /* corrupt the data. */ - buf.buf[50] ^= 0x99; - - block_source_from_strbuf(&source, &buf); - - err = init_reader(&rd, &source, "file.log"); - EXPECT_ERR(err); - - err = reftable_reader_seek_log(&rd, &it, "refname"); - EXPECT(err == REFTABLE_ZLIB_ERROR); - - reftable_iterator_destroy(&it); - - /* cleanup. */ - strbuf_release(&buf); - reader_close(&rd); -} - -static void test_table_read_write_sequential(void) -{ - char **names; - struct strbuf buf = STRBUF_INIT; - int N = 50; - struct reftable_iterator it = { NULL }; - struct reftable_block_source source = { NULL }; - struct reftable_reader rd = { NULL }; - int err = 0; - int j = 0; - - write_table(&names, &buf, N, 256, GIT_SHA1_FORMAT_ID); - - block_source_from_strbuf(&source, &buf); - - err = init_reader(&rd, &source, "file.ref"); - EXPECT_ERR(err); - - err = reftable_reader_seek_ref(&rd, &it, ""); - EXPECT_ERR(err); - - while (1) { - struct reftable_ref_record ref = { NULL }; - int r = reftable_iterator_next_ref(&it, &ref); - EXPECT(r >= 0); - if (r > 0) { - break; - } - EXPECT(0 == strcmp(names[j], ref.refname)); - EXPECT(update_index == ref.update_index); - - j++; - reftable_ref_record_release(&ref); - } - EXPECT(j == N); - reftable_iterator_destroy(&it); - strbuf_release(&buf); - free_names(names); - - reader_close(&rd); -} - -static void test_table_write_small_table(void) -{ - char **names; - struct strbuf buf = STRBUF_INIT; - int N = 1; - write_table(&names, &buf, N, 4096, GIT_SHA1_FORMAT_ID); - EXPECT(buf.len < 200); - strbuf_release(&buf); - free_names(names); -} - -static void test_table_read_api(void) -{ - char **names; - struct strbuf buf = STRBUF_INIT; - int N = 50; - struct reftable_reader rd = { NULL }; - struct reftable_block_source source = { NULL }; - int err; - int i; - struct reftable_log_record log = { NULL }; - struct reftable_iterator it = { NULL }; - - write_table(&names, &buf, N, 256, GIT_SHA1_FORMAT_ID); - - block_source_from_strbuf(&source, &buf); - - err = init_reader(&rd, &source, "file.ref"); - EXPECT_ERR(err); - - err = reftable_reader_seek_ref(&rd, &it, names[0]); - EXPECT_ERR(err); - - err = reftable_iterator_next_log(&it, &log); - EXPECT(err == REFTABLE_API_ERROR); - - strbuf_release(&buf); - for (i = 0; i < N; i++) { - reftable_free(names[i]); - } - reftable_iterator_destroy(&it); - reftable_free(names); - reader_close(&rd); - strbuf_release(&buf); -} - -static void test_table_read_write_seek(int index, int hash_id) -{ - char **names; - struct strbuf buf = STRBUF_INIT; - int N = 50; - struct reftable_reader rd = { NULL }; - struct reftable_block_source source = { NULL }; - int err; - int i = 0; - - struct reftable_iterator it = { NULL }; - struct strbuf pastLast = STRBUF_INIT; - struct reftable_ref_record ref = { NULL }; - - write_table(&names, &buf, N, 256, hash_id); - - block_source_from_strbuf(&source, &buf); - - err = init_reader(&rd, &source, "file.ref"); - EXPECT_ERR(err); - EXPECT(hash_id == reftable_reader_hash_id(&rd)); - - if (!index) { - rd.ref_offsets.index_offset = 0; - } else { - EXPECT(rd.ref_offsets.index_offset > 0); - } - - for (i = 1; i < N; i++) { - int err = reftable_reader_seek_ref(&rd, &it, names[i]); - EXPECT_ERR(err); - err = reftable_iterator_next_ref(&it, &ref); - EXPECT_ERR(err); - EXPECT(0 == strcmp(names[i], ref.refname)); - EXPECT(REFTABLE_REF_VAL1 == ref.value_type); - EXPECT(i == ref.value.val1[0]); - - reftable_ref_record_release(&ref); - reftable_iterator_destroy(&it); - } - - strbuf_addstr(&pastLast, names[N - 1]); - strbuf_addstr(&pastLast, "/"); - - err = reftable_reader_seek_ref(&rd, &it, pastLast.buf); - if (err == 0) { - struct reftable_ref_record ref = { NULL }; - int err = reftable_iterator_next_ref(&it, &ref); - EXPECT(err > 0); - } else { - EXPECT(err > 0); - } - - strbuf_release(&pastLast); - reftable_iterator_destroy(&it); - - strbuf_release(&buf); - for (i = 0; i < N; i++) { - reftable_free(names[i]); - } - reftable_free(names); - reader_close(&rd); -} - -static void test_table_read_write_seek_linear(void) -{ - test_table_read_write_seek(0, GIT_SHA1_FORMAT_ID); -} - -static void test_table_read_write_seek_linear_sha256(void) -{ - test_table_read_write_seek(0, GIT_SHA256_FORMAT_ID); -} - -static void test_table_read_write_seek_index(void) -{ - test_table_read_write_seek(1, GIT_SHA1_FORMAT_ID); -} - -static void test_table_refs_for(int indexed) -{ - int N = 50; - char **want_names = reftable_calloc(N + 1, sizeof(*want_names)); - int want_names_len = 0; - uint8_t want_hash[GIT_SHA1_RAWSZ]; - - struct reftable_write_options opts = { - .block_size = 256, - }; - struct reftable_ref_record ref = { NULL }; - int i = 0; - int n; - int err; - struct reftable_reader rd; - struct reftable_block_source source = { NULL }; - - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - - struct reftable_iterator it = { NULL }; - int j; - - set_test_hash(want_hash, 4); - - for (i = 0; i < N; i++) { - uint8_t hash[GIT_SHA1_RAWSZ]; - char fill[51] = { 0 }; - char name[100]; - struct reftable_ref_record ref = { NULL }; - - memset(hash, i, sizeof(hash)); - memset(fill, 'x', 50); - /* Put the variable part in the start */ - snprintf(name, sizeof(name), "br%02d%s", i, fill); - name[40] = 0; - ref.refname = name; - - ref.value_type = REFTABLE_REF_VAL2; - set_test_hash(ref.value.val2.value, i / 4); - set_test_hash(ref.value.val2.target_value, 3 + i / 4); - - /* 80 bytes / entry, so 3 entries per block. Yields 17 - */ - /* blocks. */ - n = reftable_writer_add_ref(w, &ref); - EXPECT(n == 0); - - if (!memcmp(ref.value.val2.value, want_hash, GIT_SHA1_RAWSZ) || - !memcmp(ref.value.val2.target_value, want_hash, GIT_SHA1_RAWSZ)) { - want_names[want_names_len++] = xstrdup(name); - } - } - - n = reftable_writer_close(w); - EXPECT(n == 0); - - reftable_writer_free(w); - w = NULL; - - block_source_from_strbuf(&source, &buf); - - err = init_reader(&rd, &source, "file.ref"); - EXPECT_ERR(err); - if (!indexed) { - rd.obj_offsets.is_present = 0; - } - - err = reftable_reader_seek_ref(&rd, &it, ""); - EXPECT_ERR(err); - reftable_iterator_destroy(&it); - - err = reftable_reader_refs_for(&rd, &it, want_hash); - EXPECT_ERR(err); - - j = 0; - while (1) { - int err = reftable_iterator_next_ref(&it, &ref); - EXPECT(err >= 0); - if (err > 0) { - break; - } - - EXPECT(j < want_names_len); - EXPECT(0 == strcmp(ref.refname, want_names[j])); - j++; - reftable_ref_record_release(&ref); - } - EXPECT(j == want_names_len); - - strbuf_release(&buf); - free_names(want_names); - reftable_iterator_destroy(&it); - reader_close(&rd); -} - -static void test_table_refs_for_no_index(void) -{ - test_table_refs_for(0); -} - -static void test_table_refs_for_obj_index(void) -{ - test_table_refs_for(1); -} - -static void test_write_empty_table(void) -{ - struct reftable_write_options opts = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - struct reftable_block_source source = { NULL }; - struct reftable_reader *rd = NULL; - struct reftable_ref_record rec = { NULL }; - struct reftable_iterator it = { NULL }; - int err; - - reftable_writer_set_limits(w, 1, 1); - - err = reftable_writer_close(w); - EXPECT(err == REFTABLE_EMPTY_TABLE_ERROR); - reftable_writer_free(w); - - EXPECT(buf.len == header_size(1) + footer_size(1)); - - block_source_from_strbuf(&source, &buf); - - err = reftable_new_reader(&rd, &source, "filename"); - EXPECT_ERR(err); - - err = reftable_reader_seek_ref(rd, &it, ""); - EXPECT_ERR(err); - - err = reftable_iterator_next_ref(&it, &rec); - EXPECT(err > 0); - - reftable_iterator_destroy(&it); - reftable_reader_free(rd); - strbuf_release(&buf); -} - -static void test_write_object_id_min_length(void) -{ - struct reftable_write_options opts = { - .block_size = 75, - }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - struct reftable_ref_record ref = { - .update_index = 1, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = {42}, - }; - int err; - int i; - - reftable_writer_set_limits(w, 1, 1); - - /* Write the same hash in many refs. If there is only 1 hash, the - * disambiguating prefix is length 0 */ - for (i = 0; i < 256; i++) { - char name[256]; - snprintf(name, sizeof(name), "ref%05d", i); - ref.refname = name; - err = reftable_writer_add_ref(w, &ref); - EXPECT_ERR(err); - } - - err = reftable_writer_close(w); - EXPECT_ERR(err); - EXPECT(reftable_writer_stats(w)->object_id_len == 2); - reftable_writer_free(w); - strbuf_release(&buf); -} - -static void test_write_object_id_length(void) -{ - struct reftable_write_options opts = { - .block_size = 75, - }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - struct reftable_ref_record ref = { - .update_index = 1, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = {42}, - }; - int err; - int i; - - reftable_writer_set_limits(w, 1, 1); - - /* Write the same hash in many refs. If there is only 1 hash, the - * disambiguating prefix is length 0 */ - for (i = 0; i < 256; i++) { - char name[256]; - snprintf(name, sizeof(name), "ref%05d", i); - ref.refname = name; - ref.value.val1[15] = i; - err = reftable_writer_add_ref(w, &ref); - EXPECT_ERR(err); - } - - err = reftable_writer_close(w); - EXPECT_ERR(err); - EXPECT(reftable_writer_stats(w)->object_id_len == 16); - reftable_writer_free(w); - strbuf_release(&buf); -} - -static void test_write_empty_key(void) -{ - struct reftable_write_options opts = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - struct reftable_ref_record ref = { - .refname = "", - .update_index = 1, - .value_type = REFTABLE_REF_DELETION, - }; - int err; - - reftable_writer_set_limits(w, 1, 1); - err = reftable_writer_add_ref(w, &ref); - EXPECT(err == REFTABLE_API_ERROR); - - err = reftable_writer_close(w); - EXPECT(err == REFTABLE_EMPTY_TABLE_ERROR); - reftable_writer_free(w); - strbuf_release(&buf); -} - -static void test_write_key_order(void) -{ - struct reftable_write_options opts = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - struct reftable_ref_record refs[2] = { - { - .refname = "b", - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value = { - .symref = "target", - }, - }, { - .refname = "a", - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value = { - .symref = "target", - }, - } - }; - int err; - - reftable_writer_set_limits(w, 1, 1); - err = reftable_writer_add_ref(w, &refs[0]); - EXPECT_ERR(err); - err = reftable_writer_add_ref(w, &refs[1]); - EXPECT(err == REFTABLE_API_ERROR); - reftable_writer_close(w); - reftable_writer_free(w); - strbuf_release(&buf); -} - -static void test_write_multiple_indices(void) -{ - struct reftable_write_options opts = { - .block_size = 100, - }; - struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT; - struct reftable_block_source source = { 0 }; - struct reftable_iterator it = { 0 }; - const struct reftable_stats *stats; - struct reftable_writer *writer; - struct reftable_reader *reader; - int err, i; - - writer = reftable_new_writer(&strbuf_add_void, &noop_flush, &writer_buf, &opts); - reftable_writer_set_limits(writer, 1, 1); - for (i = 0; i < 100; i++) { - struct reftable_ref_record ref = { - .update_index = 1, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = {i}, - }; - - strbuf_reset(&buf); - strbuf_addf(&buf, "refs/heads/%04d", i); - ref.refname = buf.buf, - - err = reftable_writer_add_ref(writer, &ref); - EXPECT_ERR(err); - } - - for (i = 0; i < 100; i++) { - unsigned char hash[GIT_SHA1_RAWSZ] = {i}; - struct reftable_log_record log = { - .update_index = 1, - .value_type = REFTABLE_LOG_UPDATE, - .value.update = { - .old_hash = hash, - .new_hash = hash, - }, - }; - - strbuf_reset(&buf); - strbuf_addf(&buf, "refs/heads/%04d", i); - log.refname = buf.buf, - - err = reftable_writer_add_log(writer, &log); - EXPECT_ERR(err); - } - - reftable_writer_close(writer); - - /* - * The written data should be sufficiently large to result in indices - * for each of the block types. - */ - stats = reftable_writer_stats(writer); - EXPECT(stats->ref_stats.index_offset > 0); - EXPECT(stats->obj_stats.index_offset > 0); - EXPECT(stats->log_stats.index_offset > 0); - - block_source_from_strbuf(&source, &writer_buf); - err = reftable_new_reader(&reader, &source, "filename"); - EXPECT_ERR(err); - - /* - * Seeking the log uses the log index now. In case there is any - * confusion regarding indices we would notice here. - */ - err = reftable_reader_seek_log(reader, &it, ""); - EXPECT_ERR(err); - - reftable_iterator_destroy(&it); - reftable_writer_free(writer); - reftable_reader_free(reader); - strbuf_release(&writer_buf); - strbuf_release(&buf); -} - -static void test_write_multi_level_index(void) -{ - struct reftable_write_options opts = { - .block_size = 100, - }; - struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT; - struct reftable_block_source source = { 0 }; - struct reftable_iterator it = { 0 }; - const struct reftable_stats *stats; - struct reftable_writer *writer; - struct reftable_reader *reader; - int err; - - writer = reftable_new_writer(&strbuf_add_void, &noop_flush, &writer_buf, &opts); - reftable_writer_set_limits(writer, 1, 1); - for (size_t i = 0; i < 200; i++) { - struct reftable_ref_record ref = { - .update_index = 1, - .value_type = REFTABLE_REF_VAL1, - .value.val1 = {i}, - }; - - strbuf_reset(&buf); - strbuf_addf(&buf, "refs/heads/%03" PRIuMAX, (uintmax_t)i); - ref.refname = buf.buf, - - err = reftable_writer_add_ref(writer, &ref); - EXPECT_ERR(err); - } - reftable_writer_close(writer); - - /* - * The written refs should be sufficiently large to result in a - * multi-level index. - */ - stats = reftable_writer_stats(writer); - EXPECT(stats->ref_stats.max_index_level == 2); - - block_source_from_strbuf(&source, &writer_buf); - err = reftable_new_reader(&reader, &source, "filename"); - EXPECT_ERR(err); - - /* - * Seeking the last ref should work as expected. - */ - err = reftable_reader_seek_ref(reader, &it, "refs/heads/199"); - EXPECT_ERR(err); - - reftable_iterator_destroy(&it); - reftable_writer_free(writer); - reftable_reader_free(reader); - strbuf_release(&writer_buf); - strbuf_release(&buf); -} - -static void test_corrupt_table_empty(void) -{ - struct strbuf buf = STRBUF_INIT; - struct reftable_block_source source = { NULL }; - struct reftable_reader rd = { NULL }; - int err; - - block_source_from_strbuf(&source, &buf); - err = init_reader(&rd, &source, "file.log"); - EXPECT(err == REFTABLE_FORMAT_ERROR); -} - -static void test_corrupt_table(void) -{ - uint8_t zeros[1024] = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_block_source source = { NULL }; - struct reftable_reader rd = { NULL }; - int err; - strbuf_add(&buf, zeros, sizeof(zeros)); - - block_source_from_strbuf(&source, &buf); - err = init_reader(&rd, &source, "file.log"); - EXPECT(err == REFTABLE_FORMAT_ERROR); - strbuf_release(&buf); -} - -int readwrite_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_log_zlib_corruption); - RUN_TEST(test_corrupt_table); - RUN_TEST(test_corrupt_table_empty); - RUN_TEST(test_log_write_read); - RUN_TEST(test_write_key_order); - RUN_TEST(test_table_read_write_seek_linear_sha256); - RUN_TEST(test_log_buffer_size); - RUN_TEST(test_table_write_small_table); - RUN_TEST(test_buffer); - RUN_TEST(test_table_read_api); - RUN_TEST(test_table_read_write_sequential); - RUN_TEST(test_table_read_write_seek_linear); - RUN_TEST(test_table_read_write_seek_index); - RUN_TEST(test_table_refs_for_no_index); - RUN_TEST(test_table_refs_for_obj_index); - RUN_TEST(test_write_empty_key); - RUN_TEST(test_write_empty_table); - RUN_TEST(test_log_overflow); - RUN_TEST(test_write_object_id_length); - RUN_TEST(test_write_object_id_min_length); - RUN_TEST(test_write_multiple_indices); - RUN_TEST(test_write_multi_level_index); - return 0; -} diff --git a/reftable/record.c b/reftable/record.c index 26c5e43f9b..6b5a075b92 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -116,7 +116,7 @@ static int decode_string(struct strbuf *dest, struct string_view in) return start_len - in.len; } -static int encode_string(char *str, struct string_view s) +static int encode_string(const char *str, struct string_view s) { struct string_view start = s; int l = strlen(str); @@ -159,34 +159,49 @@ int reftable_encode_key(int *restart, struct string_view dest, return start.len - dest.len; } -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in) +int reftable_decode_keylen(struct string_view in, + uint64_t *prefix_len, + uint64_t *suffix_len, + uint8_t *extra) { - int start_len = in.len; - uint64_t prefix_len = 0; - uint64_t suffix_len = 0; - int n = get_var_int(&prefix_len, &in); + size_t start_len = in.len; + int n; + + n = get_var_int(prefix_len, &in); if (n < 0) return -1; string_view_consume(&in, n); - if (prefix_len > last_key.len) - return -1; - - n = get_var_int(&suffix_len, &in); + n = get_var_int(suffix_len, &in); if (n <= 0) return -1; string_view_consume(&in, n); - *extra = (uint8_t)(suffix_len & 0x7); - suffix_len >>= 3; + *extra = (uint8_t)(*suffix_len & 0x7); + *suffix_len >>= 3; + + return start_len - in.len; +} + +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in) +{ + int start_len = in.len; + uint64_t prefix_len = 0; + uint64_t suffix_len = 0; + int n; + + n = reftable_decode_keylen(in, &prefix_len, &suffix_len, extra); + if (n < 0) + return -1; + string_view_consume(&in, n); - if (in.len < suffix_len) + if (in.len < suffix_len || + prefix_len > last_key->len) return -1; - strbuf_reset(key); - strbuf_add(key, last_key.buf, prefix_len); - strbuf_add(key, in.buf, suffix_len); + strbuf_setlen(last_key, prefix_len); + strbuf_add(last_key, in.buf, suffix_len); string_view_consume(&in, suffix_len); return start_len - in.len; @@ -205,14 +220,26 @@ static void reftable_ref_record_copy_from(void *rec, const void *src_rec, { struct reftable_ref_record *ref = rec; const struct reftable_ref_record *src = src_rec; + char *refname = NULL; + size_t refname_cap = 0; + assert(hash_size > 0); - /* This is simple and correct, but we could probably reuse the hash - * fields. */ + SWAP(refname, ref->refname); + SWAP(refname_cap, ref->refname_cap); reftable_ref_record_release(ref); + SWAP(ref->refname, refname); + SWAP(ref->refname_cap, refname_cap); + if (src->refname) { - ref->refname = xstrdup(src->refname); + size_t refname_len = strlen(src->refname); + + REFTABLE_ALLOC_GROW(ref->refname, refname_len + 1, + ref->refname_cap); + memcpy(ref->refname, src->refname, refname_len); + ref->refname[refname_len] = 0; } + ref->update_index = src->update_index; ref->value_type = src->value_type; switch (src->value_type) { @@ -232,58 +259,6 @@ static void reftable_ref_record_copy_from(void *rec, const void *src_rec, } } -static char hexdigit(int c) -{ - if (c <= 9) - return '0' + c; - return 'a' + (c - 10); -} - -static void hex_format(char *dest, const unsigned char *src, int hash_size) -{ - assert(hash_size > 0); - if (src) { - int i = 0; - for (i = 0; i < hash_size; i++) { - dest[2 * i] = hexdigit(src[i] >> 4); - dest[2 * i + 1] = hexdigit(src[i] & 0xf); - } - dest[2 * hash_size] = 0; - } -} - -static void reftable_ref_record_print_sz(const struct reftable_ref_record *ref, - int hash_size) -{ - char hex[GIT_MAX_HEXSZ + 1] = { 0 }; /* BUG */ - printf("ref{%s(%" PRIu64 ") ", ref->refname, ref->update_index); - switch (ref->value_type) { - case REFTABLE_REF_SYMREF: - printf("=> %s", ref->value.symref); - break; - case REFTABLE_REF_VAL2: - hex_format(hex, ref->value.val2.value, hash_size); - printf("val 2 %s", hex); - hex_format(hex, ref->value.val2.target_value, - hash_size); - printf("(T %s)", hex); - break; - case REFTABLE_REF_VAL1: - hex_format(hex, ref->value.val1, hash_size); - printf("val 1 %s", hex); - break; - case REFTABLE_REF_DELETION: - printf("delete"); - break; - } - printf("}\n"); -} - -void reftable_ref_record_print(const struct reftable_ref_record *ref, - uint32_t hash_id) { - reftable_ref_record_print_sz(ref, hash_size(hash_id)); -} - static void reftable_ref_record_release_void(void *rec) { reftable_ref_record_release(rec); @@ -363,24 +338,33 @@ static int reftable_ref_record_encode(const void *rec, struct string_view s, static int reftable_ref_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch) { struct reftable_ref_record *r = rec; struct string_view start = in; uint64_t update_index = 0; - int n = get_var_int(&update_index, &in); + const char *refname = NULL; + size_t refname_cap = 0; + int n; + + assert(hash_size > 0); + + n = get_var_int(&update_index, &in); if (n < 0) return n; string_view_consume(&in, n); + SWAP(refname, r->refname); + SWAP(refname_cap, r->refname_cap); reftable_ref_record_release(r); + SWAP(r->refname, refname); + SWAP(r->refname_cap, refname_cap); - assert(hash_size > 0); - - r->refname = reftable_realloc(r->refname, key.len + 1); + REFTABLE_ALLOC_GROW(r->refname, key.len + 1, r->refname_cap); memcpy(r->refname, key.buf, key.len); - r->update_index = update_index; r->refname[key.len] = 0; + + r->update_index = update_index; r->value_type = val_type; switch (val_type) { case REFTABLE_REF_VAL1: @@ -405,13 +389,12 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key, break; case REFTABLE_REF_SYMREF: { - struct strbuf dest = STRBUF_INIT; - int n = decode_string(&dest, in); + int n = decode_string(scratch, in); if (n < 0) { return -1; } string_view_consume(&in, n); - r->value.symref = dest.buf; + r->value.symref = strbuf_detach(scratch, NULL); } break; case REFTABLE_REF_DELETION: @@ -430,7 +413,6 @@ static int reftable_ref_record_is_deletion_void(const void *p) (const struct reftable_ref_record *)p); } - static int reftable_ref_record_equal_void(const void *a, const void *b, int hash_size) { @@ -439,10 +421,11 @@ static int reftable_ref_record_equal_void(const void *a, return reftable_ref_record_equal(ra, rb, hash_size); } -static void reftable_ref_record_print_void(const void *rec, - int hash_size) +static int reftable_ref_record_cmp_void(const void *_a, const void *_b) { - reftable_ref_record_print_sz((struct reftable_ref_record *) rec, hash_size); + const struct reftable_ref_record *a = _a; + const struct reftable_ref_record *b = _b; + return strcmp(a->refname, b->refname); } static struct reftable_record_vtable reftable_ref_record_vtable = { @@ -455,7 +438,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = { .release = &reftable_ref_record_release_void, .is_deletion = &reftable_ref_record_is_deletion_void, .equal = &reftable_ref_record_equal_void, - .print = &reftable_ref_record_print_void, + .cmp = &reftable_ref_record_cmp_void, }; static void reftable_obj_record_key(const void *r, struct strbuf *dest) @@ -474,23 +457,8 @@ static void reftable_obj_record_release(void *rec) memset(obj, 0, sizeof(struct reftable_obj_record)); } -static void reftable_obj_record_print(const void *rec, int hash_size) -{ - const struct reftable_obj_record *obj = rec; - char hex[GIT_MAX_HEXSZ + 1] = { 0 }; - struct strbuf offset_str = STRBUF_INIT; - int i; - - for (i = 0; i < obj->offset_len; i++) - strbuf_addf(&offset_str, "%" PRIu64 " ", obj->offsets[i]); - hex_format(hex, obj->hash_prefix, obj->hash_prefix_len); - printf("prefix %s (len %d), offsets [%s]\n", - hex, obj->hash_prefix_len, offset_str.buf); - strbuf_release(&offset_str); -} - static void reftable_obj_record_copy_from(void *rec, const void *src_rec, - int hash_size) + int hash_size UNUSED) { struct reftable_obj_record *obj = rec; const struct reftable_obj_record *src = @@ -517,7 +485,7 @@ static uint8_t reftable_obj_record_val_type(const void *rec) } static int reftable_obj_record_encode(const void *rec, struct string_view s, - int hash_size) + int hash_size UNUSED) { const struct reftable_obj_record *r = rec; struct string_view start = s; @@ -552,7 +520,8 @@ static int reftable_obj_record_encode(const void *rec, struct string_view s, static int reftable_obj_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size UNUSED, + struct strbuf *scratch UNUSED) { struct string_view start = in; struct reftable_obj_record *r = rec; @@ -561,6 +530,8 @@ static int reftable_obj_record_decode(void *rec, struct strbuf key, uint64_t last; int j; + reftable_obj_record_release(r); + REFTABLE_ALLOC_ARRAY(r->hash_prefix, key.len); memcpy(r->hash_prefix, key.buf, key.len); r->hash_prefix_len = key.len; @@ -603,12 +574,13 @@ static int reftable_obj_record_decode(void *rec, struct strbuf key, return start.len - in.len; } -static int not_a_deletion(const void *p) +static int not_a_deletion(const void *p UNUSED) { return 0; } -static int reftable_obj_record_equal_void(const void *a, const void *b, int hash_size) +static int reftable_obj_record_equal_void(const void *a, const void *b, + int hash_size UNUSED) { struct reftable_obj_record *ra = (struct reftable_obj_record *) a; struct reftable_obj_record *rb = (struct reftable_obj_record *) b; @@ -627,6 +599,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash return 1; } +static int reftable_obj_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_obj_record *a = _a; + const struct reftable_obj_record *b = _b; + int cmp; + + cmp = memcmp(a->hash_prefix, b->hash_prefix, + a->hash_prefix_len > b->hash_prefix_len ? + a->hash_prefix_len : b->hash_prefix_len); + if (cmp) + return cmp; + + /* + * When the prefix is the same then the object record that is longer is + * considered to be bigger. + */ + return a->hash_prefix_len - b->hash_prefix_len; +} + static struct reftable_record_vtable reftable_obj_record_vtable = { .key = &reftable_obj_record_key, .type = BLOCK_TYPE_OBJ, @@ -637,41 +628,9 @@ static struct reftable_record_vtable reftable_obj_record_vtable = { .release = &reftable_obj_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_obj_record_equal_void, - .print = &reftable_obj_record_print, + .cmp = &reftable_obj_record_cmp_void, }; -static void reftable_log_record_print_sz(struct reftable_log_record *log, - int hash_size) -{ - char hex[GIT_MAX_HEXSZ + 1] = { 0 }; - - switch (log->value_type) { - case REFTABLE_LOG_DELETION: - printf("log{%s(%" PRIu64 ") delete\n", log->refname, - log->update_index); - break; - case REFTABLE_LOG_UPDATE: - printf("log{%s(%" PRIu64 ") %s <%s> %" PRIu64 " %04d\n", - log->refname, log->update_index, - log->value.update.name ? log->value.update.name : "", - log->value.update.email ? log->value.update.email : "", - log->value.update.time, - log->value.update.tz_offset); - hex_format(hex, log->value.update.old_hash, hash_size); - printf("%s => ", hex); - hex_format(hex, log->value.update.new_hash, hash_size); - printf("%s\n\n%s\n}\n", hex, - log->value.update.message ? log->value.update.message : ""); - break; - } -} - -void reftable_log_record_print(struct reftable_log_record *log, - uint32_t hash_id) -{ - reftable_log_record_print_sz(log, hash_size(hash_id)); -} - static void reftable_log_record_key(const void *r, struct strbuf *dest) { const struct reftable_log_record *rec = @@ -716,16 +675,10 @@ static void reftable_log_record_copy_from(void *rec, const void *src_rec, xstrdup(dst->value.update.message); } - if (dst->value.update.new_hash) { - REFTABLE_ALLOC_ARRAY(dst->value.update.new_hash, hash_size); - memcpy(dst->value.update.new_hash, - src->value.update.new_hash, hash_size); - } - if (dst->value.update.old_hash) { - REFTABLE_ALLOC_ARRAY(dst->value.update.old_hash, hash_size); - memcpy(dst->value.update.old_hash, - src->value.update.old_hash, hash_size); - } + memcpy(dst->value.update.new_hash, + src->value.update.new_hash, hash_size); + memcpy(dst->value.update.old_hash, + src->value.update.old_hash, hash_size); break; } } @@ -743,8 +696,6 @@ void reftable_log_record_release(struct reftable_log_record *r) case REFTABLE_LOG_DELETION: break; case REFTABLE_LOG_UPDATE: - reftable_free(r->value.update.new_hash); - reftable_free(r->value.update.old_hash); reftable_free(r->value.update.name); reftable_free(r->value.update.email); reftable_free(r->value.update.message); @@ -761,33 +712,20 @@ static uint8_t reftable_log_record_val_type(const void *rec) return reftable_log_record_is_deletion(log) ? 0 : 1; } -static uint8_t zero[GIT_SHA256_RAWSZ] = { 0 }; - static int reftable_log_record_encode(const void *rec, struct string_view s, int hash_size) { const struct reftable_log_record *r = rec; struct string_view start = s; int n = 0; - uint8_t *oldh = NULL; - uint8_t *newh = NULL; if (reftable_log_record_is_deletion(r)) return 0; - oldh = r->value.update.old_hash; - newh = r->value.update.new_hash; - if (!oldh) { - oldh = zero; - } - if (!newh) { - newh = zero; - } - if (s.len < 2 * hash_size) return -1; - memcpy(s.buf, oldh, hash_size); - memcpy(s.buf + hash_size, newh, hash_size); + memcpy(s.buf, r->value.update.old_hash, hash_size); + memcpy(s.buf + hash_size, r->value.update.new_hash, hash_size); string_view_consume(&s, 2 * hash_size); n = encode_string(r->value.update.name ? r->value.update.name : "", s); @@ -823,19 +761,18 @@ static int reftable_log_record_encode(const void *rec, struct string_view s, static int reftable_log_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch) { struct string_view start = in; struct reftable_log_record *r = rec; uint64_t max = 0; uint64_t ts = 0; - struct strbuf dest = STRBUF_INIT; int n; if (key.len <= 9 || key.buf[key.len - 9] != 0) return REFTABLE_FORMAT_ERROR; - r->refname = reftable_realloc(r->refname, key.len - 8); + REFTABLE_ALLOC_GROW(r->refname, key.len - 8, r->refname_cap); memcpy(r->refname, key.buf, key.len - 8); ts = get_be64(key.buf + key.len - 8); @@ -844,9 +781,8 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, if (val_type != r->value_type) { switch (r->value_type) { case REFTABLE_LOG_UPDATE: - FREE_AND_NULL(r->value.update.old_hash); - FREE_AND_NULL(r->value.update.new_hash); FREE_AND_NULL(r->value.update.message); + r->value.update.message_cap = 0; FREE_AND_NULL(r->value.update.email); FREE_AND_NULL(r->value.update.name); break; @@ -862,36 +798,43 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, if (in.len < 2 * hash_size) return REFTABLE_FORMAT_ERROR; - r->value.update.old_hash = - reftable_realloc(r->value.update.old_hash, hash_size); - r->value.update.new_hash = - reftable_realloc(r->value.update.new_hash, hash_size); - memcpy(r->value.update.old_hash, in.buf, hash_size); memcpy(r->value.update.new_hash, in.buf + hash_size, hash_size); string_view_consume(&in, 2 * hash_size); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - r->value.update.name = - reftable_realloc(r->value.update.name, dest.len + 1); - memcpy(r->value.update.name, dest.buf, dest.len); - r->value.update.name[dest.len] = 0; + /* + * In almost all cases we can expect the reflog name to not change for + * reflog entries as they are tied to the local identity, not to the + * target commits. As an optimization for this common case we can thus + * skip copying over the name in case it's accurate already. + */ + if (!r->value.update.name || + strcmp(r->value.update.name, scratch->buf)) { + r->value.update.name = + reftable_realloc(r->value.update.name, scratch->len + 1); + memcpy(r->value.update.name, scratch->buf, scratch->len); + r->value.update.name[scratch->len] = 0; + } - strbuf_reset(&dest); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - r->value.update.email = - reftable_realloc(r->value.update.email, dest.len + 1); - memcpy(r->value.update.email, dest.buf, dest.len); - r->value.update.email[dest.len] = 0; + /* Same as above, but for the reflog email. */ + if (!r->value.update.email || + strcmp(r->value.update.email, scratch->buf)) { + r->value.update.email = + reftable_realloc(r->value.update.email, scratch->len + 1); + memcpy(r->value.update.email, scratch->buf, scratch->len); + r->value.update.email[scratch->len] = 0; + } ts = 0; n = get_var_int(&ts, &in); @@ -905,28 +848,25 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, r->value.update.tz_offset = get_be16(in.buf); string_view_consume(&in, 2); - strbuf_reset(&dest); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - r->value.update.message = - reftable_realloc(r->value.update.message, dest.len + 1); - memcpy(r->value.update.message, dest.buf, dest.len); - r->value.update.message[dest.len] = 0; + REFTABLE_ALLOC_GROW(r->value.update.message, scratch->len + 1, + r->value.update.message_cap); + memcpy(r->value.update.message, scratch->buf, scratch->len); + r->value.update.message[scratch->len] = 0; - strbuf_release(&dest); return start.len - in.len; done: - strbuf_release(&dest); return REFTABLE_FORMAT_ERROR; } -static int null_streq(char *a, char *b) +static int null_streq(const char *a, const char *b) { - char *empty = ""; + const char *empty = ""; if (!a) a = empty; @@ -936,17 +876,6 @@ static int null_streq(char *a, char *b) return 0 == strcmp(a, b); } -static int zero_hash_eq(uint8_t *a, uint8_t *b, int sz) -{ - if (!a) - a = zero; - - if (!b) - b = zero; - - return !memcmp(a, b, sz); -} - static int reftable_log_record_equal_void(const void *a, const void *b, int hash_size) { @@ -955,6 +884,22 @@ static int reftable_log_record_equal_void(const void *a, hash_size); } +static int reftable_log_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_log_record *a = _a; + const struct reftable_log_record *b = _b; + int cmp = strcmp(a->refname, b->refname); + if (cmp) + return cmp; + + /* + * Note that the comparison here is reversed. This is because the + * update index is reversed when comparing keys. For reference, see how + * we handle this in reftable_log_record_key()`. + */ + return b->update_index - a->update_index; +} + int reftable_log_record_equal(const struct reftable_log_record *a, const struct reftable_log_record *b, int hash_size) { @@ -974,10 +919,10 @@ int reftable_log_record_equal(const struct reftable_log_record *a, b->value.update.email) && null_streq(a->value.update.message, b->value.update.message) && - zero_hash_eq(a->value.update.old_hash, - b->value.update.old_hash, hash_size) && - zero_hash_eq(a->value.update.new_hash, - b->value.update.new_hash, hash_size); + !memcmp(a->value.update.old_hash, + b->value.update.old_hash, hash_size) && + !memcmp(a->value.update.new_hash, + b->value.update.new_hash, hash_size); } abort(); @@ -989,11 +934,6 @@ static int reftable_log_record_is_deletion_void(const void *p) (const struct reftable_log_record *)p); } -static void reftable_log_record_print_void(const void *rec, int hash_size) -{ - reftable_log_record_print_sz((struct reftable_log_record*)rec, hash_size); -} - static struct reftable_record_vtable reftable_log_record_vtable = { .key = &reftable_log_record_key, .type = BLOCK_TYPE_LOG, @@ -1004,7 +944,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = { .release = &reftable_log_record_release_void, .is_deletion = &reftable_log_record_is_deletion_void, .equal = &reftable_log_record_equal_void, - .print = &reftable_log_record_print_void, + .cmp = &reftable_log_record_cmp_void, }; static void reftable_index_record_key(const void *r, struct strbuf *dest) @@ -1015,7 +955,7 @@ static void reftable_index_record_key(const void *r, struct strbuf *dest) } static void reftable_index_record_copy_from(void *rec, const void *src_rec, - int hash_size) + int hash_size UNUSED) { struct reftable_index_record *dst = rec; const struct reftable_index_record *src = src_rec; @@ -1031,13 +971,13 @@ static void reftable_index_record_release(void *rec) strbuf_release(&idx->last_key); } -static uint8_t reftable_index_record_val_type(const void *rec) +static uint8_t reftable_index_record_val_type(const void *rec UNUSED) { return 0; } static int reftable_index_record_encode(const void *rec, struct string_view out, - int hash_size) + int hash_size UNUSED) { const struct reftable_index_record *r = (const struct reftable_index_record *)rec; @@ -1053,8 +993,10 @@ static int reftable_index_record_encode(const void *rec, struct string_view out, } static int reftable_index_record_decode(void *rec, struct strbuf key, - uint8_t val_type, struct string_view in, - int hash_size) + uint8_t val_type UNUSED, + struct string_view in, + int hash_size UNUSED, + struct strbuf *scratch UNUSED) { struct string_view start = in; struct reftable_index_record *r = rec; @@ -1071,7 +1013,8 @@ static int reftable_index_record_decode(void *rec, struct strbuf key, return start.len - in.len; } -static int reftable_index_record_equal(const void *a, const void *b, int hash_size) +static int reftable_index_record_equal(const void *a, const void *b, + int hash_size UNUSED) { struct reftable_index_record *ia = (struct reftable_index_record *) a; struct reftable_index_record *ib = (struct reftable_index_record *) b; @@ -1079,11 +1022,11 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key); } -static void reftable_index_record_print(const void *rec, int hash_size) +static int reftable_index_record_cmp(const void *_a, const void *_b) { - const struct reftable_index_record *idx = rec; - /* TODO: escape null chars? */ - printf("\"%s\" %" PRIu64 "\n", idx->last_key.buf, idx->offset); + const struct reftable_index_record *a = _a; + const struct reftable_index_record *b = _b; + return strbuf_cmp(&a->last_key, &b->last_key); } static struct reftable_record_vtable reftable_index_record_vtable = { @@ -1096,7 +1039,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = { .release = &reftable_index_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_index_record_equal, - .print = &reftable_index_record_print, + .cmp = &reftable_index_record_cmp, }; void reftable_record_key(struct reftable_record *rec, struct strbuf *dest) @@ -1104,11 +1047,6 @@ void reftable_record_key(struct reftable_record *rec, struct strbuf *dest) reftable_record_vtable(rec)->key(reftable_record_data(rec), dest); } -uint8_t reftable_record_type(struct reftable_record *rec) -{ - return rec->type; -} - int reftable_record_encode(struct reftable_record *rec, struct string_view dest, int hash_size) { @@ -1132,10 +1070,12 @@ uint8_t reftable_record_val_type(struct reftable_record *rec) } int reftable_record_decode(struct reftable_record *rec, struct strbuf key, - uint8_t extra, struct string_view src, int hash_size) + uint8_t extra, struct string_view src, int hash_size, + struct strbuf *scratch) { return reftable_record_vtable(rec)->decode(reftable_record_data(rec), - key, extra, src, hash_size); + key, extra, src, hash_size, + scratch); } void reftable_record_release(struct reftable_record *rec) @@ -1149,6 +1089,14 @@ int reftable_record_is_deletion(struct reftable_record *rec) reftable_record_data(rec)); } +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b) +{ + if (a->type != b->type) + BUG("cannot compare reftable records of different type"); + return reftable_record_vtable(a)->cmp( + reftable_record_data(a), reftable_record_data(b)); +} + int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size) { if (a->type != b->type) @@ -1222,12 +1170,6 @@ int reftable_log_record_is_deletion(const struct reftable_log_record *log) return (log->value_type == REFTABLE_LOG_DELETION); } -void string_view_consume(struct string_view *s, int n) -{ - s->buf += n; - s->len -= n; -} - static void *reftable_record_data(struct reftable_record *rec) { switch (rec->type) { @@ -1276,9 +1218,3 @@ void reftable_record_init(struct reftable_record *rec, uint8_t typ) BUG("unhandled record type"); } } - -void reftable_record_print(struct reftable_record *rec, int hash_size) -{ - printf("'%c': ", rec->type); - reftable_record_vtable(rec)->print(reftable_record_data(rec), hash_size); -} diff --git a/reftable/record.h b/reftable/record.h index e64ed30c80..5003bacdb0 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -25,7 +25,11 @@ struct string_view { }; /* Advance `s.buf` by `n`, and decrease length. */ -void string_view_consume(struct string_view *s, int n); +static inline void string_view_consume(struct string_view *s, int n) +{ + s->buf += n; + s->len -= n; +} /* utilities for de/encoding varints */ @@ -51,7 +55,8 @@ struct reftable_record_vtable { /* decode data from `src` into the record. */ int (*decode)(void *rec, struct strbuf key, uint8_t extra, - struct string_view src, int hash_size); + struct string_view src, int hash_size, + struct strbuf *scratch); /* deallocate and null the record. */ void (*release)(void *rec); @@ -62,6 +67,12 @@ struct reftable_record_vtable { /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */ int (*equal)(const void *a, const void *b, int hash_size); + /* + * Compare keys of two records with each other. The records must have + * the same type. + */ + int (*cmp)(const void *a, const void *b); + /* Print on stdout, for debugging. */ void (*print)(const void *rec, int hash_size); }; @@ -75,9 +86,18 @@ int reftable_encode_key(int *is_restart, struct string_view dest, struct strbuf prev_key, struct strbuf key, uint8_t extra); -/* Decode into `key` and `extra` from `in` */ -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in); +/* Decode a record's key lengths. */ +int reftable_decode_keylen(struct string_view in, + uint64_t *prefix_len, + uint64_t *suffix_len, + uint8_t *extra); + +/* + * Decode into `last_key` and `extra` from `in`. `last_key` is expected to + * contain the decoded key of the preceding record, if any. + */ +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in); /* reftable_index_record are used internally to speed up lookups. */ struct reftable_index_record { @@ -114,10 +134,9 @@ struct reftable_record { void reftable_record_init(struct reftable_record *rec, uint8_t typ); /* see struct record_vtable */ +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); -void reftable_record_print(struct reftable_record *rec, int hash_size); void reftable_record_key(struct reftable_record *rec, struct strbuf *dest); -uint8_t reftable_record_type(struct reftable_record *rec); void reftable_record_copy_from(struct reftable_record *rec, struct reftable_record *src, int hash_size); uint8_t reftable_record_val_type(struct reftable_record *rec); @@ -125,9 +144,14 @@ int reftable_record_encode(struct reftable_record *rec, struct string_view dest, int hash_size); int reftable_record_decode(struct reftable_record *rec, struct strbuf key, uint8_t extra, struct string_view src, - int hash_size); + int hash_size, struct strbuf *scratch); int reftable_record_is_deletion(struct reftable_record *rec); +static inline uint8_t reftable_record_type(struct reftable_record *rec) +{ + return rec->type; +} + /* frees and zeroes out the embedded record */ void reftable_record_release(struct reftable_record *rec); diff --git a/reftable/record_test.c b/reftable/record_test.c deleted file mode 100644 index a86cff5526..0000000000 --- a/reftable/record_test.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - Copyright 2020 Google LLC - - Use of this source code is governed by a BSD-style - license that can be found in the LICENSE file or at - https://developers.google.com/open-source/licenses/bsd -*/ - -#include "record.h" - -#include "system.h" -#include "basics.h" -#include "constants.h" -#include "test_framework.h" -#include "reftable-tests.h" - -static void test_copy(struct reftable_record *rec) -{ - struct reftable_record copy; - uint8_t typ; - - typ = reftable_record_type(rec); - reftable_record_init(©, typ); - reftable_record_copy_from(©, rec, GIT_SHA1_RAWSZ); - /* do it twice to catch memory leaks */ - reftable_record_copy_from(©, rec, GIT_SHA1_RAWSZ); - EXPECT(reftable_record_equal(rec, ©, GIT_SHA1_RAWSZ)); - - puts("testing print coverage:\n"); - reftable_record_print(©, GIT_SHA1_RAWSZ); - - reftable_record_release(©); -} - -static void test_varint_roundtrip(void) -{ - uint64_t inputs[] = { 0, - 1, - 27, - 127, - 128, - 257, - 4096, - ((uint64_t)1 << 63), - ((uint64_t)1 << 63) + ((uint64_t)1 << 63) - 1 }; - int i = 0; - for (i = 0; i < ARRAY_SIZE(inputs); i++) { - uint8_t dest[10]; - - struct string_view out = { - .buf = dest, - .len = sizeof(dest), - }; - uint64_t in = inputs[i]; - int n = put_var_int(&out, in); - uint64_t got = 0; - - EXPECT(n > 0); - out.len = n; - n = get_var_int(&got, &out); - EXPECT(n > 0); - - EXPECT(got == in); - } -} - -static void test_common_prefix(void) -{ - struct { - const char *a, *b; - int want; - } cases[] = { - { "abc", "ab", 2 }, - { "", "abc", 0 }, - { "abc", "abd", 2 }, - { "abc", "pqr", 0 }, - }; - - int i = 0; - for (i = 0; i < ARRAY_SIZE(cases); i++) { - struct strbuf a = STRBUF_INIT; - struct strbuf b = STRBUF_INIT; - strbuf_addstr(&a, cases[i].a); - strbuf_addstr(&b, cases[i].b); - EXPECT(common_prefix_size(&a, &b) == cases[i].want); - - strbuf_release(&a); - strbuf_release(&b); - } -} - -static void set_hash(uint8_t *h, int j) -{ - int i = 0; - for (i = 0; i < hash_size(GIT_SHA1_FORMAT_ID); i++) { - h[i] = (j >> i) & 0xff; - } -} - -static void test_reftable_ref_record_roundtrip(void) -{ - int i = 0; - - for (i = REFTABLE_REF_DELETION; i < REFTABLE_NR_REF_VALUETYPES; i++) { - struct reftable_record in = { - .type = BLOCK_TYPE_REF, - }; - struct reftable_record out = { .type = BLOCK_TYPE_REF }; - struct strbuf key = STRBUF_INIT; - uint8_t buffer[1024] = { 0 }; - struct string_view dest = { - .buf = buffer, - .len = sizeof(buffer), - }; - int n, m; - - in.u.ref.value_type = i; - switch (i) { - case REFTABLE_REF_DELETION: - break; - case REFTABLE_REF_VAL1: - set_hash(in.u.ref.value.val1, 1); - break; - case REFTABLE_REF_VAL2: - set_hash(in.u.ref.value.val2.value, 1); - set_hash(in.u.ref.value.val2.target_value, 2); - break; - case REFTABLE_REF_SYMREF: - in.u.ref.value.symref = xstrdup("target"); - break; - } - in.u.ref.refname = xstrdup("refs/heads/master"); - - test_copy(&in); - - EXPECT(reftable_record_val_type(&in) == i); - - reftable_record_key(&in, &key); - n = reftable_record_encode(&in, dest, GIT_SHA1_RAWSZ); - EXPECT(n > 0); - - /* decode into a non-zero reftable_record to test for leaks. */ - m = reftable_record_decode(&out, key, i, dest, GIT_SHA1_RAWSZ); - EXPECT(n == m); - - EXPECT(reftable_ref_record_equal(&in.u.ref, &out.u.ref, - GIT_SHA1_RAWSZ)); - reftable_record_release(&in); - - strbuf_release(&key); - reftable_record_release(&out); - } -} - -static void test_reftable_log_record_equal(void) -{ - struct reftable_log_record in[2] = { - { - .refname = xstrdup("refs/heads/master"), - .update_index = 42, - }, - { - .refname = xstrdup("refs/heads/master"), - .update_index = 22, - } - }; - - EXPECT(!reftable_log_record_equal(&in[0], &in[1], GIT_SHA1_RAWSZ)); - in[1].update_index = in[0].update_index; - EXPECT(reftable_log_record_equal(&in[0], &in[1], GIT_SHA1_RAWSZ)); - reftable_log_record_release(&in[0]); - reftable_log_record_release(&in[1]); -} - -static void test_reftable_log_record_roundtrip(void) -{ - int i; - - struct reftable_log_record in[] = { - { - .refname = xstrdup("refs/heads/master"), - .update_index = 42, - .value_type = REFTABLE_LOG_UPDATE, - .value = { - .update = { - .old_hash = reftable_malloc(GIT_SHA1_RAWSZ), - .new_hash = reftable_malloc(GIT_SHA1_RAWSZ), - .name = xstrdup("han-wen"), - .email = xstrdup("hanwen@google.com"), - .message = xstrdup("test"), - .time = 1577123507, - .tz_offset = 100, - }, - } - }, - { - .refname = xstrdup("refs/heads/master"), - .update_index = 22, - .value_type = REFTABLE_LOG_DELETION, - }, - { - .refname = xstrdup("branch"), - .update_index = 33, - .value_type = REFTABLE_LOG_UPDATE, - .value = { - .update = { - .old_hash = reftable_malloc(GIT_SHA1_RAWSZ), - .new_hash = reftable_malloc(GIT_SHA1_RAWSZ), - /* rest of fields left empty. */ - }, - }, - } - }; - set_test_hash(in[0].value.update.new_hash, 1); - set_test_hash(in[0].value.update.old_hash, 2); - set_test_hash(in[2].value.update.new_hash, 3); - set_test_hash(in[2].value.update.old_hash, 4); - for (i = 0; i < ARRAY_SIZE(in); i++) { - struct reftable_record rec = { .type = BLOCK_TYPE_LOG }; - struct strbuf key = STRBUF_INIT; - uint8_t buffer[1024] = { 0 }; - struct string_view dest = { - .buf = buffer, - .len = sizeof(buffer), - }; - /* populate out, to check for leaks. */ - struct reftable_record out = { - .type = BLOCK_TYPE_LOG, - .u.log = { - .refname = xstrdup("old name"), - .value_type = REFTABLE_LOG_UPDATE, - .value = { - .update = { - .new_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1), - .old_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1), - .name = xstrdup("old name"), - .email = xstrdup("old@email"), - .message = xstrdup("old message"), - }, - }, - }, - }; - int n, m, valtype; - - rec.u.log = in[i]; - - test_copy(&rec); - - reftable_record_key(&rec, &key); - - n = reftable_record_encode(&rec, dest, GIT_SHA1_RAWSZ); - EXPECT(n >= 0); - valtype = reftable_record_val_type(&rec); - m = reftable_record_decode(&out, key, valtype, dest, - GIT_SHA1_RAWSZ); - EXPECT(n == m); - - EXPECT(reftable_log_record_equal(&in[i], &out.u.log, - GIT_SHA1_RAWSZ)); - reftable_log_record_release(&in[i]); - strbuf_release(&key); - reftable_record_release(&out); - } -} - -static void test_u24_roundtrip(void) -{ - uint32_t in = 0x112233; - uint8_t dest[3]; - uint32_t out; - put_be24(dest, in); - out = get_be24(dest); - EXPECT(in == out); -} - -static void test_key_roundtrip(void) -{ - uint8_t buffer[1024] = { 0 }; - struct string_view dest = { - .buf = buffer, - .len = sizeof(buffer), - }; - struct strbuf last_key = STRBUF_INIT; - struct strbuf key = STRBUF_INIT; - struct strbuf roundtrip = STRBUF_INIT; - int restart; - uint8_t extra; - int n, m; - uint8_t rt_extra; - - strbuf_addstr(&last_key, "refs/heads/master"); - strbuf_addstr(&key, "refs/tags/bla"); - extra = 6; - n = reftable_encode_key(&restart, dest, last_key, key, extra); - EXPECT(!restart); - EXPECT(n > 0); - - m = reftable_decode_key(&roundtrip, &rt_extra, last_key, dest); - EXPECT(n == m); - EXPECT(0 == strbuf_cmp(&key, &roundtrip)); - EXPECT(rt_extra == extra); - - strbuf_release(&last_key); - strbuf_release(&key); - strbuf_release(&roundtrip); -} - -static void test_reftable_obj_record_roundtrip(void) -{ - uint8_t testHash1[GIT_SHA1_RAWSZ] = { 1, 2, 3, 4, 0 }; - uint64_t till9[] = { 1, 2, 3, 4, 500, 600, 700, 800, 9000 }; - struct reftable_obj_record recs[3] = { { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - .offsets = till9, - .offset_len = 3, - }, - { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - .offsets = till9, - .offset_len = 9, - }, - { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - } }; - int i = 0; - for (i = 0; i < ARRAY_SIZE(recs); i++) { - uint8_t buffer[1024] = { 0 }; - struct string_view dest = { - .buf = buffer, - .len = sizeof(buffer), - }; - struct reftable_record in = { - .type = BLOCK_TYPE_OBJ, - .u = { - .obj = recs[i], - }, - }; - struct strbuf key = STRBUF_INIT; - struct reftable_record out = { .type = BLOCK_TYPE_OBJ }; - int n, m; - uint8_t extra; - - test_copy(&in); - reftable_record_key(&in, &key); - n = reftable_record_encode(&in, dest, GIT_SHA1_RAWSZ); - EXPECT(n > 0); - extra = reftable_record_val_type(&in); - m = reftable_record_decode(&out, key, extra, dest, - GIT_SHA1_RAWSZ); - EXPECT(n == m); - - EXPECT(reftable_record_equal(&in, &out, GIT_SHA1_RAWSZ)); - strbuf_release(&key); - reftable_record_release(&out); - } -} - -static void test_reftable_index_record_roundtrip(void) -{ - struct reftable_record in = { - .type = BLOCK_TYPE_INDEX, - .u.idx = { - .offset = 42, - .last_key = STRBUF_INIT, - }, - }; - uint8_t buffer[1024] = { 0 }; - struct string_view dest = { - .buf = buffer, - .len = sizeof(buffer), - }; - struct strbuf key = STRBUF_INIT; - struct reftable_record out = { - .type = BLOCK_TYPE_INDEX, - .u.idx = { .last_key = STRBUF_INIT }, - }; - int n, m; - uint8_t extra; - - strbuf_addstr(&in.u.idx.last_key, "refs/heads/master"); - reftable_record_key(&in, &key); - test_copy(&in); - - EXPECT(0 == strbuf_cmp(&key, &in.u.idx.last_key)); - n = reftable_record_encode(&in, dest, GIT_SHA1_RAWSZ); - EXPECT(n > 0); - - extra = reftable_record_val_type(&in); - m = reftable_record_decode(&out, key, extra, dest, GIT_SHA1_RAWSZ); - EXPECT(m == n); - - EXPECT(reftable_record_equal(&in, &out, GIT_SHA1_RAWSZ)); - - reftable_record_release(&out); - strbuf_release(&key); - strbuf_release(&in.u.idx.last_key); -} - -int record_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_reftable_log_record_equal); - RUN_TEST(test_reftable_log_record_roundtrip); - RUN_TEST(test_reftable_ref_record_roundtrip); - RUN_TEST(test_varint_roundtrip); - RUN_TEST(test_key_roundtrip); - RUN_TEST(test_common_prefix); - RUN_TEST(test_reftable_obj_record_roundtrip); - RUN_TEST(test_reftable_index_record_roundtrip); - RUN_TEST(test_u24_roundtrip); - return 0; -} diff --git a/reftable/refname.c b/reftable/refname.c deleted file mode 100644 index 7570e4acf9..0000000000 --- a/reftable/refname.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - Copyright 2020 Google LLC - - Use of this source code is governed by a BSD-style - license that can be found in the LICENSE file or at - https://developers.google.com/open-source/licenses/bsd -*/ - -#include "system.h" -#include "reftable-error.h" -#include "basics.h" -#include "refname.h" -#include "reftable-iterator.h" - -struct find_arg { - char **names; - const char *want; -}; - -static int find_name(size_t k, void *arg) -{ - struct find_arg *f_arg = arg; - return strcmp(f_arg->names[k], f_arg->want) >= 0; -} - -static int modification_has_ref(struct modification *mod, const char *name) -{ - struct reftable_ref_record ref = { NULL }; - int err = 0; - - if (mod->add_len > 0) { - struct find_arg arg = { - .names = mod->add, - .want = name, - }; - int idx = binsearch(mod->add_len, find_name, &arg); - if (idx < mod->add_len && !strcmp(mod->add[idx], name)) { - return 0; - } - } - - if (mod->del_len > 0) { - struct find_arg arg = { - .names = mod->del, - .want = name, - }; - int idx = binsearch(mod->del_len, find_name, &arg); - if (idx < mod->del_len && !strcmp(mod->del[idx], name)) { - return 1; - } - } - - err = reftable_table_read_ref(&mod->tab, name, &ref); - reftable_ref_record_release(&ref); - return err; -} - -static void modification_release(struct modification *mod) -{ - /* don't delete the strings themselves; they're owned by ref records. - */ - FREE_AND_NULL(mod->add); - FREE_AND_NULL(mod->del); - mod->add_len = 0; - mod->del_len = 0; -} - -static int modification_has_ref_with_prefix(struct modification *mod, - const char *prefix) -{ - struct reftable_iterator it = { NULL }; - struct reftable_ref_record ref = { NULL }; - int err = 0; - - if (mod->add_len > 0) { - struct find_arg arg = { - .names = mod->add, - .want = prefix, - }; - int idx = binsearch(mod->add_len, find_name, &arg); - if (idx < mod->add_len && - !strncmp(prefix, mod->add[idx], strlen(prefix))) - goto done; - } - err = reftable_table_seek_ref(&mod->tab, &it, prefix); - if (err) - goto done; - - while (1) { - err = reftable_iterator_next_ref(&it, &ref); - if (err) - goto done; - - if (mod->del_len > 0) { - struct find_arg arg = { - .names = mod->del, - .want = ref.refname, - }; - int idx = binsearch(mod->del_len, find_name, &arg); - if (idx < mod->del_len && - !strcmp(ref.refname, mod->del[idx])) { - continue; - } - } - - if (strncmp(ref.refname, prefix, strlen(prefix))) { - err = 1; - goto done; - } - err = 0; - goto done; - } - -done: - reftable_ref_record_release(&ref); - reftable_iterator_destroy(&it); - return err; -} - -static int validate_refname(const char *name) -{ - while (1) { - char *next = strchr(name, '/'); - if (!*name) { - return REFTABLE_REFNAME_ERROR; - } - if (!next) { - return 0; - } - if (next - name == 0 || (next - name == 1 && *name == '.') || - (next - name == 2 && name[0] == '.' && name[1] == '.')) - return REFTABLE_REFNAME_ERROR; - name = next + 1; - } - return 0; -} - -int validate_ref_record_addition(struct reftable_table tab, - struct reftable_ref_record *recs, size_t sz) -{ - struct modification mod = { - .tab = tab, - .add = reftable_calloc(sz, sizeof(*mod.add)), - .del = reftable_calloc(sz, sizeof(*mod.del)), - }; - int i = 0; - int err = 0; - for (; i < sz; i++) { - if (reftable_ref_record_is_deletion(&recs[i])) { - mod.del[mod.del_len++] = recs[i].refname; - } else { - mod.add[mod.add_len++] = recs[i].refname; - } - } - - err = modification_validate(&mod); - modification_release(&mod); - return err; -} - -static void strbuf_trim_component(struct strbuf *sl) -{ - while (sl->len > 0) { - int is_slash = (sl->buf[sl->len - 1] == '/'); - strbuf_setlen(sl, sl->len - 1); - if (is_slash) - break; - } -} - -int modification_validate(struct modification *mod) -{ - struct strbuf slashed = STRBUF_INIT; - int err = 0; - int i = 0; - for (; i < mod->add_len; i++) { - err = validate_refname(mod->add[i]); - if (err) - goto done; - strbuf_reset(&slashed); - strbuf_addstr(&slashed, mod->add[i]); - strbuf_addstr(&slashed, "/"); - - err = modification_has_ref_with_prefix(mod, slashed.buf); - if (err == 0) { - err = REFTABLE_NAME_CONFLICT; - goto done; - } - if (err < 0) - goto done; - - strbuf_reset(&slashed); - strbuf_addstr(&slashed, mod->add[i]); - while (slashed.len) { - strbuf_trim_component(&slashed); - err = modification_has_ref(mod, slashed.buf); - if (err == 0) { - err = REFTABLE_NAME_CONFLICT; - goto done; - } - if (err < 0) - goto done; - } - } - err = 0; -done: - strbuf_release(&slashed); - return err; -} diff --git a/reftable/refname.h b/reftable/refname.h deleted file mode 100644 index a24b40fcb4..0000000000 --- a/reftable/refname.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - Copyright 2020 Google LLC - - Use of this source code is governed by a BSD-style - license that can be found in the LICENSE file or at - https://developers.google.com/open-source/licenses/bsd -*/ -#ifndef REFNAME_H -#define REFNAME_H - -#include "reftable-record.h" -#include "reftable-generic.h" - -struct modification { - struct reftable_table tab; - - char **add; - size_t add_len; - - char **del; - size_t del_len; -}; - -int validate_ref_record_addition(struct reftable_table tab, - struct reftable_ref_record *recs, size_t sz); - -int modification_validate(struct modification *mod); - -#endif diff --git a/reftable/refname_test.c b/reftable/refname_test.c deleted file mode 100644 index b9cc62554e..0000000000 --- a/reftable/refname_test.c +++ /dev/null @@ -1,101 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "basics.h" -#include "block.h" -#include "blocksource.h" -#include "reader.h" -#include "record.h" -#include "refname.h" -#include "reftable-error.h" -#include "reftable-writer.h" -#include "system.h" - -#include "test_framework.h" -#include "reftable-tests.h" - -struct testcase { - char *add; - char *del; - int error_code; -}; - -static void test_conflict(void) -{ - struct reftable_write_options opts = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - struct reftable_ref_record rec = { - .refname = "a/b", - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "destination", /* make sure it's not a symref. - */ - .update_index = 1, - }; - int err; - int i; - struct reftable_block_source source = { NULL }; - struct reftable_reader *rd = NULL; - struct reftable_table tab = { NULL }; - struct testcase cases[] = { - { "a/b/c", NULL, REFTABLE_NAME_CONFLICT }, - { "b", NULL, 0 }, - { "a", NULL, REFTABLE_NAME_CONFLICT }, - { "a", "a/b", 0 }, - - { "p/", NULL, REFTABLE_REFNAME_ERROR }, - { "p//q", NULL, REFTABLE_REFNAME_ERROR }, - { "p/./q", NULL, REFTABLE_REFNAME_ERROR }, - { "p/../q", NULL, REFTABLE_REFNAME_ERROR }, - - { "a/b/c", "a/b", 0 }, - { NULL, "a//b", 0 }, - }; - reftable_writer_set_limits(w, 1, 1); - - err = reftable_writer_add_ref(w, &rec); - EXPECT_ERR(err); - - err = reftable_writer_close(w); - EXPECT_ERR(err); - reftable_writer_free(w); - - block_source_from_strbuf(&source, &buf); - err = reftable_new_reader(&rd, &source, "filename"); - EXPECT_ERR(err); - - reftable_table_from_reader(&tab, rd); - - for (i = 0; i < ARRAY_SIZE(cases); i++) { - struct modification mod = { - .tab = tab, - }; - - if (cases[i].add) { - mod.add = &cases[i].add; - mod.add_len = 1; - } - if (cases[i].del) { - mod.del = &cases[i].del; - mod.del_len = 1; - } - - err = modification_validate(&mod); - EXPECT(err == cases[i].error_code); - } - - reftable_reader_free(rd); - strbuf_release(&buf); -} - -int refname_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_conflict); - return 0; -} diff --git a/reftable/reftable-error.h b/reftable/reftable-error.h index 4c457aaaf8..6368cd9ed9 100644 --- a/reftable/reftable-error.h +++ b/reftable/reftable-error.h @@ -25,7 +25,7 @@ enum reftable_error { */ REFTABLE_NOT_EXIST_ERROR = -4, - /* Trying to write out-of-date data. */ + /* Trying to access locked data. */ REFTABLE_LOCK_ERROR = -5, /* Misuse of the API: @@ -48,15 +48,15 @@ enum reftable_error { /* Wrote a table without blocks. */ REFTABLE_EMPTY_TABLE_ERROR = -8, - /* Dir/file conflict. */ - REFTABLE_NAME_CONFLICT = -9, - /* Invalid ref name. */ REFTABLE_REFNAME_ERROR = -10, /* Entry does not fit. This can happen when writing outsize reflog messages. */ REFTABLE_ENTRY_TOO_BIG_ERROR = -11, + + /* Trying to write out-of-date data. */ + REFTABLE_OUTDATED_ERROR = -12, }; /* convert the numeric error code to a string. The string should not be diff --git a/reftable/reftable-generic.h b/reftable/reftable-generic.h deleted file mode 100644 index d239751a77..0000000000 --- a/reftable/reftable-generic.h +++ /dev/null @@ -1,47 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#ifndef REFTABLE_GENERIC_H -#define REFTABLE_GENERIC_H - -#include "reftable-iterator.h" - -struct reftable_table_vtable; - -/* - * Provides a unified API for reading tables, either merged tables, or single - * readers. */ -struct reftable_table { - struct reftable_table_vtable *ops; - void *table_arg; -}; - -int reftable_table_seek_log(struct reftable_table *tab, - struct reftable_iterator *it, const char *name); - -int reftable_table_seek_ref(struct reftable_table *tab, - struct reftable_iterator *it, const char *name); - -/* returns the hash ID from a generic reftable_table */ -uint32_t reftable_table_hash_id(struct reftable_table *tab); - -/* returns the max update_index covered by this table. */ -uint64_t reftable_table_max_update_index(struct reftable_table *tab); - -/* returns the min update_index covered by this table. */ -uint64_t reftable_table_min_update_index(struct reftable_table *tab); - -/* convenience function to read a single ref. Returns < 0 for error, 0 - for success, and 1 if ref not found. */ -int reftable_table_read_ref(struct reftable_table *tab, const char *name, - struct reftable_ref_record *ref); - -/* dump table contents onto stdout for debugging */ -int reftable_table_print(struct reftable_table *tab); - -#endif diff --git a/reftable/reftable-iterator.h b/reftable/reftable-iterator.h index d3eee7af35..e3bf688d53 100644 --- a/reftable/reftable-iterator.h +++ b/reftable/reftable-iterator.h @@ -21,12 +21,33 @@ struct reftable_iterator { void *iter_arg; }; +/* + * Position the iterator at the ref record with given name such that the next + * call to `next_ref()` would yield the record. + */ +int reftable_iterator_seek_ref(struct reftable_iterator *it, + const char *name); + /* reads the next reftable_ref_record. Returns < 0 for error, 0 for OK and > 0: * end of iteration. */ int reftable_iterator_next_ref(struct reftable_iterator *it, struct reftable_ref_record *ref); +/* + * Position the iterator at the log record with given name and update index + * such that the next call to `next_log()` would yield the record. + */ +int reftable_iterator_seek_log_at(struct reftable_iterator *it, + const char *name, uint64_t update_index); + +/* + * Position the iterator at the newest log record with given name such that the + * next call to `next_log()` would yield the record. + */ +int reftable_iterator_seek_log(struct reftable_iterator *it, + const char *name); + /* reads the next reftable_log_record. Returns < 0 for error, 0 for OK and > 0: * end of iteration. */ diff --git a/reftable/reftable-merged.h b/reftable/reftable-merged.h index c91a2d83a2..16d19f8df2 100644 --- a/reftable/reftable-merged.h +++ b/reftable/reftable-merged.h @@ -26,30 +26,23 @@ https://developers.google.com/open-source/licenses/bsd /* A merged table is implements seeking/iterating over a stack of tables. */ struct reftable_merged_table; -/* A generic reftable; see below. */ -struct reftable_table; +struct reftable_reader; -/* reftable_new_merged_table creates a new merged table. It takes ownership of - the stack array. -*/ -int reftable_new_merged_table(struct reftable_merged_table **dest, - struct reftable_table *stack, size_t n, +/* + * reftable_merged_table_new creates a new merged table. The readers must be + * kept alive as long as the merged table is still in use. + */ +int reftable_merged_table_new(struct reftable_merged_table **dest, + struct reftable_reader **readers, size_t n, uint32_t hash_id); -/* returns an iterator positioned just before 'name' */ -int reftable_merged_table_seek_ref(struct reftable_merged_table *mt, - struct reftable_iterator *it, - const char *name); +/* Initialize a merged table iterator for reading refs. */ +void reftable_merged_table_init_ref_iterator(struct reftable_merged_table *mt, + struct reftable_iterator *it); -/* returns an iterator for log entry, at given update_index */ -int reftable_merged_table_seek_log_at(struct reftable_merged_table *mt, - struct reftable_iterator *it, - const char *name, uint64_t update_index); - -/* like reftable_merged_table_seek_log_at but look for the newest entry. */ -int reftable_merged_table_seek_log(struct reftable_merged_table *mt, - struct reftable_iterator *it, - const char *name); +/* Initialize a merged table iterator for reading logs. */ +void reftable_merged_table_init_log_iterator(struct reftable_merged_table *mt, + struct reftable_iterator *it); /* returns the max update_index covered by this merged table. */ uint64_t @@ -65,8 +58,4 @@ void reftable_merged_table_free(struct reftable_merged_table *m); /* return the hash ID of the merged table. */ uint32_t reftable_merged_table_hash_id(struct reftable_merged_table *m); -/* create a generic table from reftable_merged_table */ -void reftable_table_from_merged_table(struct reftable_table *tab, - struct reftable_merged_table *table); - #endif diff --git a/reftable/reftable-reader.h b/reftable/reftable-reader.h index 4a4bc2fdf8..a600452b56 100644 --- a/reftable/reftable-reader.h +++ b/reftable/reftable-reader.h @@ -23,64 +23,39 @@ /* The reader struct is a handle to an open reftable file. */ struct reftable_reader; -/* Generic table. */ -struct reftable_table; - -/* reftable_new_reader opens a reftable for reading. If successful, +/* reftable_reader_new opens a reftable for reading. If successful, * returns 0 code and sets pp. The name is used for creating a * stack. Typically, it is the basename of the file. The block source * `src` is owned by the reader, and is closed on calling * reftable_reader_destroy(). On error, the block source `src` is * closed as well. */ -int reftable_new_reader(struct reftable_reader **pp, +int reftable_reader_new(struct reftable_reader **pp, struct reftable_block_source *src, const char *name); -/* reftable_reader_seek_ref returns an iterator where 'name' would be inserted - in the table. To seek to the start of the table, use name = "". +/* + * Manage the reference count of the reftable reader. A newly initialized + * reader starts with a refcount of 1 and will be deleted once the refcount has + * reached 0. + * + * This is required because readers may have longer lifetimes than the stack + * they belong to. The stack may for example be reloaded while the old tables + * are still being accessed by an iterator. + */ +void reftable_reader_incref(struct reftable_reader *reader); +void reftable_reader_decref(struct reftable_reader *reader); - example: +/* Initialize a reftable iterator for reading refs. */ +void reftable_reader_init_ref_iterator(struct reftable_reader *r, + struct reftable_iterator *it); - struct reftable_reader *r = NULL; - int err = reftable_new_reader(&r, &src, "filename"); - if (err < 0) { ... } - struct reftable_iterator it = {0}; - err = reftable_reader_seek_ref(r, &it, "refs/heads/master"); - if (err < 0) { ... } - struct reftable_ref_record ref = {0}; - while (1) { - err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) { - break; - } - if (err < 0) { - ..error handling.. - } - ..found.. - } - reftable_iterator_destroy(&it); - reftable_ref_record_release(&ref); -*/ -int reftable_reader_seek_ref(struct reftable_reader *r, - struct reftable_iterator *it, const char *name); +/* Initialize a reftable iterator for reading logs. */ +void reftable_reader_init_log_iterator(struct reftable_reader *r, + struct reftable_iterator *it); /* returns the hash ID used in this table. */ uint32_t reftable_reader_hash_id(struct reftable_reader *r); -/* seek to logs for the given name, older than update_index. To seek to the - start of the table, use name = "". -*/ -int reftable_reader_seek_log_at(struct reftable_reader *r, - struct reftable_iterator *it, const char *name, - uint64_t update_index); - -/* seek to newest log entry for given name. */ -int reftable_reader_seek_log(struct reftable_reader *r, - struct reftable_iterator *it, const char *name); - -/* closes and deallocates a reader. */ -void reftable_reader_free(struct reftable_reader *); - /* return an iterator for the refs pointing to `oid`. */ int reftable_reader_refs_for(struct reftable_reader *r, struct reftable_iterator *it, uint8_t *oid); @@ -91,11 +66,7 @@ uint64_t reftable_reader_max_update_index(struct reftable_reader *r); /* return the min_update_index for a table */ uint64_t reftable_reader_min_update_index(struct reftable_reader *r); -/* creates a generic table from a file reader. */ -void reftable_table_from_reader(struct reftable_table *tab, - struct reftable_reader *reader); - -/* print table onto stdout for debugging. */ -int reftable_reader_print_file(const char *tablename); +/* print blocks onto stdout for debugging. */ +int reftable_reader_print_blocks(const char *tablename); #endif diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index bb6e99acd3..2d42463c58 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -9,7 +9,7 @@ https://developers.google.com/open-source/licenses/bsd #ifndef REFTABLE_RECORD_H #define REFTABLE_RECORD_H -#include "hash-ll.h" +#include "hash.h" #include <stdint.h> /* @@ -22,6 +22,7 @@ https://developers.google.com/open-source/licenses/bsd /* reftable_ref_record holds a ref database entry target_value */ struct reftable_ref_record { char *refname; /* Name of the ref, malloced. */ + size_t refname_cap; uint64_t update_index; /* Logical timestamp at which this value is * written */ @@ -59,10 +60,6 @@ const unsigned char *reftable_ref_record_val2(const struct reftable_ref_record * /* returns whether 'ref' represents a deletion */ int reftable_ref_record_is_deletion(const struct reftable_ref_record *ref); -/* prints a reftable_ref_record onto stdout. Useful for debugging. */ -void reftable_ref_record_print(const struct reftable_ref_record *ref, - uint32_t hash_id); - /* frees and nulls all pointer values inside `ref`. */ void reftable_ref_record_release(struct reftable_ref_record *ref); @@ -73,6 +70,7 @@ int reftable_ref_record_equal(const struct reftable_ref_record *a, /* reftable_log_record holds a reflog entry */ struct reftable_log_record { char *refname; + size_t refname_cap; uint64_t update_index; /* logical timestamp of a transactional update. */ @@ -87,13 +85,14 @@ struct reftable_log_record { union { struct { - uint8_t *new_hash; - uint8_t *old_hash; + unsigned char new_hash[GIT_MAX_RAWSZ]; + unsigned char old_hash[GIT_MAX_RAWSZ]; char *name; char *email; uint64_t time; int16_t tz_offset; char *message; + size_t message_cap; } update; } value; }; @@ -108,8 +107,4 @@ void reftable_log_record_release(struct reftable_log_record *log); int reftable_log_record_equal(const struct reftable_log_record *a, const struct reftable_log_record *b, int hash_size); -/* dumps a reftable_log_record on stdout, for debugging/testing. */ -void reftable_log_record_print(struct reftable_log_record *log, - uint32_t hash_id); - #endif diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h index 1b602dda58..f4f8cabc7f 100644 --- a/reftable/reftable-stack.h +++ b/reftable/reftable-stack.h @@ -29,7 +29,7 @@ struct reftable_stack; * stored in 'dir'. Typically, this should be .git/reftables. */ int reftable_new_stack(struct reftable_stack **dest, const char *dir, - struct reftable_write_options config); + const struct reftable_write_options *opts); /* returns the update_index at which a next table should be written. */ uint64_t reftable_stack_next_update_index(struct reftable_stack *st); @@ -66,6 +66,24 @@ int reftable_stack_add(struct reftable_stack *st, void *write_arg), void *write_arg); +struct reftable_iterator; + +/* + * Initialize an iterator for the merged tables contained in the stack that can + * be used to iterate through refs. The iterator is valid until the next reload + * or write. + */ +void reftable_stack_init_ref_iterator(struct reftable_stack *st, + struct reftable_iterator *it); + +/* + * Initialize an iterator for the merged tables contained in the stack that can + * be used to iterate through logs. The iterator is valid until the next reload + * or write. + */ +void reftable_stack_init_log_iterator(struct reftable_stack *st, + struct reftable_iterator *it); + /* returns the merged_table for seeking. This table is valid until the * next write or reload, and should not be closed or deleted. */ @@ -122,7 +140,4 @@ struct reftable_compaction_stats { struct reftable_compaction_stats * reftable_stack_compaction_stats(struct reftable_stack *st); -/* print the entire stack represented by the directory */ -int reftable_stack_print_directory(const char *stackdir, uint32_t hash_id); - #endif diff --git a/reftable/reftable-tests.h b/reftable/reftable-tests.h deleted file mode 100644 index 0019cbcfa4..0000000000 --- a/reftable/reftable-tests.h +++ /dev/null @@ -1,23 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#ifndef REFTABLE_TESTS_H -#define REFTABLE_TESTS_H - -int basics_test_main(int argc, const char **argv); -int block_test_main(int argc, const char **argv); -int merged_test_main(int argc, const char **argv); -int pq_test_main(int argc, const char **argv); -int record_test_main(int argc, const char **argv); -int refname_test_main(int argc, const char **argv); -int readwrite_test_main(int argc, const char **argv); -int stack_test_main(int argc, const char **argv); -int tree_test_main(int argc, const char **argv); -int reftable_dump_main(int argc, char *const *argv); - -#endif diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index 7c7cae5f99..189b1f4144 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -28,7 +28,7 @@ struct reftable_write_options { unsigned skip_index_objects : 1; /* how often to write complete keys in each block. */ - int restart_interval; + uint16_t restart_interval; /* 4-byte identifier ("sha1", "s256") of the hash. * Defaults to SHA1 if unset @@ -38,14 +38,19 @@ struct reftable_write_options { /* Default mode for creating files. If unset, use 0666 (+umask) */ unsigned int default_permissions; - /* boolean: do not check ref names for validity or dir/file conflicts. - */ - unsigned skip_name_check : 1; - /* boolean: copy log messages exactly. If unset, check that the message * is a single line, and add '\n' if missing. */ unsigned exact_log_message : 1; + + /* boolean: Prevent auto-compaction of tables. */ + unsigned disable_auto_compact : 1; + + /* + * Geometric sequence factor used by auto-compaction to decide which + * tables to compact. Defaults to 2 if unset. + */ + uint8_t auto_compaction_factor; }; /* reftable_block_stats holds statistics for a single block type */ @@ -89,7 +94,7 @@ struct reftable_stats { struct reftable_writer * reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t), int (*flush_func)(void *), - void *writer_arg, struct reftable_write_options *opts); + void *writer_arg, const struct reftable_write_options *opts); /* Set the range of update indices for the records we will add. When writing a table into a stack, the min should be at least diff --git a/reftable/stack.c b/reftable/stack.c index b64e55648a..ce0a35216b 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -10,9 +10,9 @@ https://developers.google.com/open-source/licenses/bsd #include "../write-or-die.h" #include "system.h" +#include "constants.h" #include "merged.h" #include "reader.h" -#include "refname.h" #include "reftable-error.h" #include "reftable-record.h" #include "reftable-merged.h" @@ -27,8 +27,6 @@ static int stack_write_compact(struct reftable_stack *st, struct reftable_writer *wr, size_t first, size_t last, struct reftable_log_expiry_config *config); -static int stack_check_addition(struct reftable_stack *st, - const char *new_tab_name); static void reftable_addition_close(struct reftable_addition *add); static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, int reuse_open); @@ -56,15 +54,17 @@ static int reftable_fd_flush(void *arg) } int reftable_new_stack(struct reftable_stack **dest, const char *dir, - struct reftable_write_options config) + const struct reftable_write_options *_opts) { struct reftable_stack *p = reftable_calloc(1, sizeof(*p)); struct strbuf list_file_name = STRBUF_INIT; + struct reftable_write_options opts = {0}; int err = 0; - if (config.hash_id == 0) { - config.hash_id = GIT_SHA1_FORMAT_ID; - } + if (_opts) + opts = *_opts; + if (opts.hash_id == 0) + opts.hash_id = GIT_SHA1_FORMAT_ID; *dest = NULL; @@ -75,7 +75,7 @@ int reftable_new_stack(struct reftable_stack **dest, const char *dir, p->list_file = strbuf_detach(&list_file_name, NULL); p->list_fd = -1; p->reftable_dir = xstrdup(dir); - p->config = config; + p->opts = opts; err = reftable_stack_reload_maybe_reuse(p, 1); if (err < 0) { @@ -132,6 +132,20 @@ int read_lines(const char *filename, char ***namesp) return err; } +void reftable_stack_init_ref_iterator(struct reftable_stack *st, + struct reftable_iterator *it) +{ + merged_table_init_iter(reftable_stack_merged_table(st), + it, BLOCK_TYPE_REF); +} + +void reftable_stack_init_log_iterator(struct reftable_stack *st, + struct reftable_iterator *it) +{ + merged_table_init_iter(reftable_stack_merged_table(st), + it, BLOCK_TYPE_LOG); +} + struct reftable_merged_table * reftable_stack_merged_table(struct reftable_stack *st) { @@ -172,7 +186,7 @@ void reftable_stack_destroy(struct reftable_stack *st) if (names && !has_name(names, name)) { stack_filename(&filename, st, name); } - reftable_reader_free(st->readers[i]); + reftable_reader_decref(st->readers[i]); if (filename.len) { /* On Windows, can only unlink after closing. */ @@ -206,16 +220,17 @@ static struct reftable_reader **stack_copy_readers(struct reftable_stack *st, return cur; } -static int reftable_stack_reload_once(struct reftable_stack *st, char **names, +static int reftable_stack_reload_once(struct reftable_stack *st, + const char **names, int reuse_open) { - size_t cur_len = !st->merged ? 0 : st->merged->stack_len; + size_t cur_len = !st->merged ? 0 : st->merged->readers_len; struct reftable_reader **cur = stack_copy_readers(st, cur_len); + struct reftable_reader **reused = NULL; + size_t reused_len = 0, reused_alloc = 0; size_t names_len = names_length(names); struct reftable_reader **new_readers = reftable_calloc(names_len, sizeof(*new_readers)); - struct reftable_table *new_tables = - reftable_calloc(names_len, sizeof(*new_tables)); size_t new_readers_len = 0; struct reftable_merged_table *new_merged = NULL; struct strbuf table_path = STRBUF_INIT; @@ -224,7 +239,7 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, while (*names) { struct reftable_reader *rd = NULL; - char *name = *names++; + const char *name = *names++; /* this is linear; we assume compaction keeps the number of tables under control so this is not quadratic. */ @@ -232,6 +247,18 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, if (cur[i] && 0 == strcmp(cur[i]->name, name)) { rd = cur[i]; cur[i] = NULL; + + /* + * When reloading the stack fails, we end up + * releasing all new readers. This also + * includes the reused readers, even though + * they are still in used by the old stack. We + * thus need to keep them alive here, which we + * do by bumping their refcount. + */ + REFTABLE_ALLOC_GROW(reused, reused_len + 1, reused_alloc); + reused[reused_len++] = rd; + reftable_reader_incref(rd); break; } } @@ -245,57 +272,62 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, if (err < 0) goto done; - err = reftable_new_reader(&rd, &src, name); + err = reftable_reader_new(&rd, &src, name); if (err < 0) goto done; } new_readers[new_readers_len] = rd; - reftable_table_from_reader(&new_tables[new_readers_len], rd); new_readers_len++; } /* success! */ - err = reftable_new_merged_table(&new_merged, new_tables, - new_readers_len, st->config.hash_id); + err = reftable_merged_table_new(&new_merged, new_readers, + new_readers_len, st->opts.hash_id); if (err < 0) goto done; - new_tables = NULL; - st->readers_len = new_readers_len; - if (st->merged) { - merged_table_release(st->merged); - reftable_merged_table_free(st->merged); - } - if (st->readers) { - reftable_free(st->readers); - } - st->readers = new_readers; - new_readers = NULL; - new_readers_len = 0; - - new_merged->suppress_deletions = 1; - st->merged = new_merged; + /* + * Close the old, non-reused readers and proactively try to unlink + * them. This is done for systems like Windows, where the underlying + * file of such an open reader wouldn't have been possible to be + * unlinked by the compacting process. + */ for (i = 0; i < cur_len; i++) { if (cur[i]) { const char *name = reader_name(cur[i]); stack_filename(&table_path, st, name); - - reader_close(cur[i]); - reftable_reader_free(cur[i]); - - /* On Windows, can only unlink after closing. */ + reftable_reader_decref(cur[i]); unlink(table_path.buf); } } + /* Update the stack to point to the new tables. */ + if (st->merged) + reftable_merged_table_free(st->merged); + new_merged->suppress_deletions = 1; + st->merged = new_merged; + + if (st->readers) + reftable_free(st->readers); + st->readers = new_readers; + st->readers_len = new_readers_len; + new_readers = NULL; + new_readers_len = 0; + + /* + * Decrement the refcount of reused readers again. This only needs to + * happen on the successful case, because on the unsuccessful one we + * decrement their refcount via `new_readers`. + */ + for (i = 0; i < reused_len; i++) + reftable_reader_decref(reused[i]); + done: - for (i = 0; i < new_readers_len; i++) { - reader_close(new_readers[i]); - reftable_reader_free(new_readers[i]); - } + for (i = 0; i < new_readers_len; i++) + reftable_reader_decref(new_readers[i]); reftable_free(new_readers); - reftable_free(new_tables); + reftable_free(reused); reftable_free(cur); strbuf_release(&table_path); return err; @@ -356,7 +388,7 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, goto out; } - err = reftable_stack_reload_once(st, names, reuse_open); + err = reftable_stack_reload_once(st, (const char **) names, reuse_open); if (!err) break; if (err != REFTABLE_NOT_EXIST_ERROR) @@ -370,7 +402,8 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, err = read_lines(st->list_file, &names_after); if (err < 0) goto out; - if (names_equal(names_after, names)) { + if (names_equal((const char **) names_after, + (const char **) names)) { err = REFTABLE_NOT_EXIST_ERROR; goto out; } @@ -505,7 +538,7 @@ static int stack_uptodate(struct reftable_stack *st) } } - if (names[st->merged->stack_len]) { + if (names[st->merged->readers_len]) { err = 1; goto done; } @@ -529,9 +562,9 @@ int reftable_stack_add(struct reftable_stack *st, { int err = stack_try_add(st, write, arg); if (err < 0) { - if (err == REFTABLE_LOCK_ERROR) { + if (err == REFTABLE_OUTDATED_ERROR) { /* Ignore error return, we want to propagate - REFTABLE_LOCK_ERROR. + REFTABLE_OUTDATED_ERROR. */ reftable_stack_reload(st); } @@ -552,7 +585,7 @@ static void format_name(struct strbuf *dest, uint64_t min, uint64_t max) } struct reftable_addition { - struct tempfile *lock_file; + struct lock_file tables_list_lock; struct reftable_stack *stack; char **new_tables; @@ -566,13 +599,13 @@ static int reftable_stack_init_addition(struct reftable_addition *add, struct reftable_stack *st) { struct strbuf lock_file_name = STRBUF_INIT; - int err = 0; - add->stack = st; + int err; - strbuf_addf(&lock_file_name, "%s.lock", st->list_file); + add->stack = st; - add->lock_file = create_tempfile(lock_file_name.buf); - if (!add->lock_file) { + err = hold_lock_file_for_update(&add->tables_list_lock, st->list_file, + LOCK_NO_DEREF); + if (err < 0) { if (errno == EEXIST) { err = REFTABLE_LOCK_ERROR; } else { @@ -580,8 +613,9 @@ static int reftable_stack_init_addition(struct reftable_addition *add, } goto done; } - if (st->config.default_permissions) { - if (chmod(add->lock_file->filename.buf, st->config.default_permissions) < 0) { + if (st->opts.default_permissions) { + if (chmod(get_lock_file_path(&add->tables_list_lock), + st->opts.default_permissions) < 0) { err = REFTABLE_IO_ERROR; goto done; } @@ -590,9 +624,8 @@ static int reftable_stack_init_addition(struct reftable_addition *add, err = stack_uptodate(st); if (err < 0) goto done; - - if (err > 1) { - err = REFTABLE_LOCK_ERROR; + if (err > 0) { + err = REFTABLE_OUTDATED_ERROR; goto done; } @@ -621,7 +654,7 @@ static void reftable_addition_close(struct reftable_addition *add) add->new_tables_len = 0; add->new_tables_cap = 0; - delete_tempfile(&add->lock_file); + rollback_lock_file(&add->tables_list_lock); strbuf_release(&nm); } @@ -637,14 +670,14 @@ void reftable_addition_destroy(struct reftable_addition *add) int reftable_addition_commit(struct reftable_addition *add) { struct strbuf table_list = STRBUF_INIT; - int lock_file_fd = get_tempfile_fd(add->lock_file); + int lock_file_fd = get_lock_file_fd(&add->tables_list_lock); int err = 0; size_t i; if (add->new_tables_len == 0) goto done; - for (i = 0; i < add->stack->merged->stack_len; i++) { + for (i = 0; i < add->stack->merged->readers_len; i++) { strbuf_addstr(&table_list, add->stack->readers[i]->name); strbuf_addstr(&table_list, "\n"); } @@ -660,10 +693,13 @@ int reftable_addition_commit(struct reftable_addition *add) goto done; } - fsync_component_or_die(FSYNC_COMPONENT_REFERENCE, lock_file_fd, - get_tempfile_path(add->lock_file)); + err = fsync_component(FSYNC_COMPONENT_REFERENCE, lock_file_fd); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } - err = rename_tempfile(&add->lock_file, add->stack->list_file); + err = commit_lock_file(&add->tables_list_lock); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; @@ -681,8 +717,19 @@ int reftable_addition_commit(struct reftable_addition *add) if (err) goto done; - if (!add->stack->disable_auto_compact) + if (!add->stack->opts.disable_auto_compact) { + /* + * Auto-compact the stack to keep the number of tables in + * control. It is possible that a concurrent writer is already + * trying to compact parts of the stack, which would lead to a + * `REFTABLE_LOCK_ERROR` because parts of the stack are locked + * already. This is a benign error though, so we ignore it. + */ err = reftable_stack_auto_compact(add->stack); + if (err < 0 && err != REFTABLE_LOCK_ERROR) + goto done; + err = 0; + } done: reftable_addition_close(add); @@ -713,10 +760,6 @@ static int stack_try_add(struct reftable_stack *st, int err = reftable_stack_init_addition(&add, st); if (err < 0) goto done; - if (err > 0) { - err = REFTABLE_LOCK_ERROR; - goto done; - } err = reftable_addition_add(&add, write_table, arg); if (err < 0) @@ -737,8 +780,9 @@ int reftable_addition_add(struct reftable_addition *add, struct strbuf tab_file_name = STRBUF_INIT; struct strbuf next_name = STRBUF_INIT; struct reftable_writer *wr = NULL; + struct tempfile *tab_file = NULL; int err = 0; - int tab_fd = 0; + int tab_fd; strbuf_reset(&next_name); format_name(&next_name, add->next_update_index, add->next_update_index); @@ -746,19 +790,22 @@ int reftable_addition_add(struct reftable_addition *add, stack_filename(&temp_tab_file_name, add->stack, next_name.buf); strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); - tab_fd = mkstemp(temp_tab_file_name.buf); - if (tab_fd < 0) { + tab_file = mks_tempfile(temp_tab_file_name.buf); + if (!tab_file) { err = REFTABLE_IO_ERROR; goto done; } - if (add->stack->config.default_permissions) { - if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) { + if (add->stack->opts.default_permissions) { + if (chmod(get_tempfile_path(tab_file), + add->stack->opts.default_permissions)) { err = REFTABLE_IO_ERROR; goto done; } } + tab_fd = get_tempfile_fd(tab_file); + wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd, - &add->stack->config); + &add->stack->opts); err = write_table(wr, arg); if (err < 0) goto done; @@ -771,17 +818,12 @@ int reftable_addition_add(struct reftable_addition *add, if (err < 0) goto done; - err = close(tab_fd); - tab_fd = 0; + err = close_tempfile_gently(tab_file); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } - err = stack_check_addition(add->stack, temp_tab_file_name.buf); - if (err < 0) - goto done; - if (wr->min_update_index < add->next_update_index) { err = REFTABLE_API_ERROR; goto done; @@ -789,14 +831,13 @@ int reftable_addition_add(struct reftable_addition *add, format_name(&next_name, wr->min_update_index, wr->max_update_index); strbuf_addstr(&next_name, ".ref"); - stack_filename(&tab_file_name, add->stack, next_name.buf); /* On windows, this relies on rand() picking a unique destination name. Maybe we should do retry loop as well? */ - err = rename(temp_tab_file_name.buf, tab_file_name.buf); + err = rename_tempfile(&tab_file, tab_file_name.buf); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; @@ -806,14 +847,7 @@ int reftable_addition_add(struct reftable_addition *add, add->new_tables_cap); add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL); done: - if (tab_fd > 0) { - close(tab_fd); - tab_fd = 0; - } - if (temp_tab_file_name.len > 0) { - unlink(temp_tab_file_name.buf); - } - + delete_tempfile(&tab_file); strbuf_release(&temp_tab_file_name); strbuf_release(&tab_file_name); strbuf_release(&next_name); @@ -823,7 +857,7 @@ done: uint64_t reftable_stack_next_update_index(struct reftable_stack *st) { - int sz = st->merged->stack_len; + int sz = st->merged->readers_len; if (sz > 0) return reftable_reader_max_update_index(st->readers[sz - 1]) + 1; @@ -832,51 +866,56 @@ uint64_t reftable_stack_next_update_index(struct reftable_stack *st) static int stack_compact_locked(struct reftable_stack *st, size_t first, size_t last, - struct strbuf *temp_tab, - struct reftable_log_expiry_config *config) + struct reftable_log_expiry_config *config, + struct tempfile **tab_file_out) { struct strbuf next_name = STRBUF_INIT; - int tab_fd = -1; + struct strbuf tab_file_path = STRBUF_INIT; struct reftable_writer *wr = NULL; - int err = 0; + struct tempfile *tab_file; + int tab_fd, err = 0; format_name(&next_name, reftable_reader_min_update_index(st->readers[first]), reftable_reader_max_update_index(st->readers[last])); + stack_filename(&tab_file_path, st, next_name.buf); + strbuf_addstr(&tab_file_path, ".temp.XXXXXX"); - stack_filename(temp_tab, st, next_name.buf); - strbuf_addstr(temp_tab, ".temp.XXXXXX"); - - tab_fd = mkstemp(temp_tab->buf); - if (st->config.default_permissions && - chmod(temp_tab->buf, st->config.default_permissions) < 0) { + tab_file = mks_tempfile(tab_file_path.buf); + if (!tab_file) { err = REFTABLE_IO_ERROR; goto done; } + tab_fd = get_tempfile_fd(tab_file); - wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd, &st->config); + if (st->opts.default_permissions && + chmod(get_tempfile_path(tab_file), st->opts.default_permissions) < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, + &tab_fd, &st->opts); err = stack_write_compact(st, wr, first, last, config); if (err < 0) goto done; + err = reftable_writer_close(wr); if (err < 0) goto done; - err = close(tab_fd); - tab_fd = 0; + err = close_tempfile_gently(tab_file); + if (err < 0) + goto done; + + *tab_file_out = tab_file; + tab_file = NULL; done: + delete_tempfile(&tab_file); reftable_writer_free(wr); - if (tab_fd > 0) { - close(tab_fd); - tab_fd = 0; - } - if (err != 0 && temp_tab->len > 0) { - unlink(temp_tab->buf); - strbuf_release(temp_tab); - } strbuf_release(&next_name); + strbuf_release(&tab_file_path); return err; } @@ -885,32 +924,26 @@ static int stack_write_compact(struct reftable_stack *st, size_t first, size_t last, struct reftable_log_expiry_config *config) { - size_t subtabs_len = last - first + 1; - struct reftable_table *subtabs = reftable_calloc( - last - first + 1, sizeof(*subtabs)); struct reftable_merged_table *mt = NULL; struct reftable_iterator it = { NULL }; struct reftable_ref_record ref = { NULL }; struct reftable_log_record log = { NULL }; + size_t subtabs_len = last - first + 1; uint64_t entries = 0; int err = 0; - for (size_t i = first, j = 0; i <= last; i++) { - struct reftable_reader *t = st->readers[i]; - reftable_table_from_reader(&subtabs[j++], t); - st->stats.bytes += t->size; - } + for (size_t i = first; i <= last; i++) + st->stats.bytes += st->readers[i]->size; reftable_writer_set_limits(wr, st->readers[first]->min_update_index, st->readers[last]->max_update_index); - err = reftable_new_merged_table(&mt, subtabs, subtabs_len, - st->config.hash_id); - if (err < 0) { - reftable_free(subtabs); + err = reftable_merged_table_new(&mt, st->readers + first, subtabs_len, + st->opts.hash_id); + if (err < 0) goto done; - } - err = reftable_merged_table_seek_ref(mt, &it, ""); + merged_table_init_iter(mt, &it, BLOCK_TYPE_REF); + err = reftable_iterator_seek_ref(&it, ""); if (err < 0) goto done; @@ -934,7 +967,8 @@ static int stack_write_compact(struct reftable_stack *st, } reftable_iterator_destroy(&it); - err = reftable_merged_table_seek_log(mt, &it, ""); + merged_table_init_iter(mt, &it, BLOCK_TYPE_LOG); + err = reftable_iterator_seek_log(&it, ""); if (err < 0) goto done; @@ -968,245 +1002,367 @@ static int stack_write_compact(struct reftable_stack *st, done: reftable_iterator_destroy(&it); - if (mt) { - merged_table_release(mt); + if (mt) reftable_merged_table_free(mt); - } reftable_ref_record_release(&ref); reftable_log_record_release(&log); st->stats.entries_written += entries; return err; } -/* < 0: error. 0 == OK, > 0 attempt failed; could retry. */ +enum stack_compact_range_flags { + /* + * Perform a best-effort compaction. That is, even if we cannot lock + * all tables in the specified range, we will try to compact the + * remaining slice. + */ + STACK_COMPACT_RANGE_BEST_EFFORT = (1 << 0), +}; + +/* + * Compact all tables in the range `[first, last)` into a single new table. + * + * This function returns `0` on success or a code `< 0` on failure. When the + * stack or any of the tables in the specified range are already locked then + * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that + * callers can either ignore, or they may choose to retry compaction after some + * amount of time. + */ static int stack_compact_range(struct reftable_stack *st, size_t first, size_t last, - struct reftable_log_expiry_config *expiry) + struct reftable_log_expiry_config *expiry, + unsigned int flags) { - char **delete_on_success = NULL, **subtable_locks = NULL, **listp = NULL; - struct strbuf temp_tab_file_name = STRBUF_INIT; + struct strbuf tables_list_buf = STRBUF_INIT; struct strbuf new_table_name = STRBUF_INIT; - struct strbuf lock_file_name = STRBUF_INIT; - struct strbuf ref_list_contents = STRBUF_INIT; struct strbuf new_table_path = STRBUF_INIT; - size_t i, j, compact_count; - int err = 0; - int have_lock = 0; - int lock_file_fd = -1; - int is_empty_table = 0; + struct strbuf table_name = STRBUF_INIT; + struct lock_file tables_list_lock = LOCK_INIT; + struct lock_file *table_locks = NULL; + struct tempfile *new_table = NULL; + int is_empty_table = 0, err = 0; + size_t first_to_replace, last_to_replace; + size_t i, nlocks = 0; + char **names = NULL; if (first > last || (!expiry && first == last)) { err = 0; goto done; } - compact_count = last - first + 1; - REFTABLE_CALLOC_ARRAY(delete_on_success, compact_count + 1); - REFTABLE_CALLOC_ARRAY(subtable_locks, compact_count + 1); - st->stats.attempts++; - strbuf_reset(&lock_file_name); - strbuf_addstr(&lock_file_name, st->list_file); - strbuf_addstr(&lock_file_name, ".lock"); - - lock_file_fd = - open(lock_file_name.buf, O_EXCL | O_CREAT | O_WRONLY, 0666); - if (lock_file_fd < 0) { - if (errno == EEXIST) { - err = 1; - } else { + /* + * Hold the lock so that we can read "tables.list" and lock all tables + * which are part of the user-specified range. + */ + err = hold_lock_file_for_update(&tables_list_lock, st->list_file, + LOCK_NO_DEREF); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else err = REFTABLE_IO_ERROR; - } goto done; } - /* Don't want to write to the lock for now. */ - close(lock_file_fd); - lock_file_fd = -1; - have_lock = 1; err = stack_uptodate(st); - if (err != 0) + if (err) goto done; - for (i = first, j = 0; i <= last; i++) { - struct strbuf subtab_file_name = STRBUF_INIT; - struct strbuf subtab_lock = STRBUF_INIT; - int sublock_file_fd = -1; - - stack_filename(&subtab_file_name, st, - reader_name(st->readers[i])); - - strbuf_reset(&subtab_lock); - strbuf_addbuf(&subtab_lock, &subtab_file_name); - strbuf_addstr(&subtab_lock, ".lock"); - - sublock_file_fd = open(subtab_lock.buf, - O_EXCL | O_CREAT | O_WRONLY, 0666); - if (sublock_file_fd >= 0) { - close(sublock_file_fd); - } else if (sublock_file_fd < 0) { - if (errno == EEXIST) { - err = 1; + /* + * Lock all tables in the user-provided range. This is the slice of our + * stack which we'll compact. + * + * Note that we lock tables in reverse order from last to first. The + * intent behind this is to allow a newer process to perform best + * effort compaction of tables that it has added in the case where an + * older process is still busy compacting tables which are preexisting + * from the point of view of the newer process. + */ + REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1); + for (i = last + 1; i > first; i--) { + stack_filename(&table_name, st, reader_name(st->readers[i - 1])); + + err = hold_lock_file_for_update(&table_locks[nlocks], + table_name.buf, LOCK_NO_DEREF); + if (err < 0) { + /* + * When the table is locked already we may do a + * best-effort compaction and compact only the tables + * that we have managed to lock so far. This of course + * requires that we have been able to lock at least two + * tables, otherwise there would be nothing to compact. + * In that case, we return a lock error to our caller. + */ + if (errno == EEXIST && last - (i - 1) >= 2 && + flags & STACK_COMPACT_RANGE_BEST_EFFORT) { + err = 0; + /* + * The subtraction is to offset the index, the + * addition is to only compact up to the table + * of the preceding iteration. They obviously + * cancel each other out, but that may be + * non-obvious when it was omitted. + */ + first = (i - 1) + 1; + break; + } else if (errno == EEXIST) { + err = REFTABLE_LOCK_ERROR; + goto done; } else { err = REFTABLE_IO_ERROR; + goto done; } } - subtable_locks[j] = subtab_lock.buf; - delete_on_success[j] = subtab_file_name.buf; - j++; - - if (err != 0) + /* + * We need to close the lockfiles as we might otherwise easily + * run into file descriptor exhaustion when we compress a lot + * of tables. + */ + err = close_lock_file_gently(&table_locks[nlocks++]); + if (err < 0) { + err = REFTABLE_IO_ERROR; goto done; + } } - err = unlink(lock_file_name.buf); - if (err < 0) + /* + * We have locked all tables in our range and can thus release the + * "tables.list" lock while compacting the locked tables. This allows + * concurrent updates to the stack to proceed. + */ + err = rollback_lock_file(&tables_list_lock); + if (err < 0) { + err = REFTABLE_IO_ERROR; goto done; - have_lock = 0; - - err = stack_compact_locked(st, first, last, &temp_tab_file_name, - expiry); - /* Compaction + tombstones can create an empty table out of non-empty - * tables. */ - is_empty_table = (err == REFTABLE_EMPTY_TABLE_ERROR); - if (is_empty_table) { - err = 0; } - if (err < 0) - goto done; - lock_file_fd = - open(lock_file_name.buf, O_EXCL | O_CREAT | O_WRONLY, 0666); - if (lock_file_fd < 0) { - if (errno == EEXIST) { - err = 1; - } else { + /* + * Compact the now-locked tables into a new table. Note that compacting + * these tables may end up with an empty new table in case tombstones + * end up cancelling out all refs in that range. + */ + err = stack_compact_locked(st, first, last, expiry, &new_table); + if (err < 0) { + if (err != REFTABLE_EMPTY_TABLE_ERROR) + goto done; + is_empty_table = 1; + } + + /* + * Now that we have written the new, compacted table we need to re-lock + * "tables.list". We'll then replace the compacted range of tables with + * the new table. + */ + err = hold_lock_file_for_update(&tables_list_lock, st->list_file, + LOCK_NO_DEREF); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else err = REFTABLE_IO_ERROR; - } goto done; } - have_lock = 1; - if (st->config.default_permissions) { - if (chmod(lock_file_name.buf, st->config.default_permissions) < 0) { + + if (st->opts.default_permissions) { + if (chmod(get_lock_file_path(&tables_list_lock), + st->opts.default_permissions) < 0) { err = REFTABLE_IO_ERROR; goto done; } } - format_name(&new_table_name, st->readers[first]->min_update_index, - st->readers[last]->max_update_index); - strbuf_addstr(&new_table_name, ".ref"); - - stack_filename(&new_table_path, st, new_table_name.buf); + /* + * As we have unlocked the stack while compacting our slice of tables + * it may have happened that a concurrently running process has updated + * the stack while we were compacting. In that case, we need to check + * whether the tables that we have just compacted still exist in the + * stack in the exact same order as we have compacted them. + * + * If they do exist, then it is fine to continue and replace those + * tables with our compacted version. If they don't, then we need to + * abort. + */ + err = stack_uptodate(st); + if (err < 0) + goto done; + if (err > 0) { + ssize_t new_offset = -1; + int fd; - if (!is_empty_table) { - /* retry? */ - err = rename(temp_tab_file_name.buf, new_table_path.buf); - if (err < 0) { + fd = open(st->list_file, O_RDONLY); + if (fd < 0) { err = REFTABLE_IO_ERROR; goto done; } - } - for (i = 0; i < first; i++) { - strbuf_addstr(&ref_list_contents, st->readers[i]->name); - strbuf_addstr(&ref_list_contents, "\n"); + err = fd_read_lines(fd, &names); + close(fd); + if (err < 0) + goto done; + + /* + * Search for the offset of the first table that we have + * compacted in the updated "tables.list" file. + */ + for (size_t i = 0; names[i]; i++) { + if (strcmp(names[i], st->readers[first]->name)) + continue; + + /* + * We have found the first entry. Verify that all the + * subsequent tables we have compacted still exist in + * the modified stack in the exact same order as we + * have compacted them. + */ + for (size_t j = 1; j < last - first + 1; j++) { + const char *old = first + j < st->merged->readers_len ? + st->readers[first + j]->name : NULL; + const char *new = names[i + j]; + + /* + * If some entries are missing or in case the tables + * have changed then we need to bail out. Again, this + * shouldn't ever happen because we have locked the + * tables we are compacting. + */ + if (!old || !new || strcmp(old, new)) { + err = REFTABLE_OUTDATED_ERROR; + goto done; + } + } + + new_offset = i; + break; + } + + /* + * In case we didn't find our compacted tables in the stack we + * need to bail out. In theory, this should have never happened + * because we locked the tables we are compacting. + */ + if (new_offset < 0) { + err = REFTABLE_OUTDATED_ERROR; + goto done; + } + + /* + * We have found the new range that we want to replace, so + * let's update the range of tables that we want to replace. + */ + first_to_replace = new_offset; + last_to_replace = last + (new_offset - first); + } else { + /* + * `fd_read_lines()` uses a `NULL` sentinel to indicate that + * the array is at its end. As we use `free_names()` to free + * the array, we need to include this sentinel value here and + * thus have to allocate `readers_len + 1` many entries. + */ + REFTABLE_CALLOC_ARRAY(names, st->merged->readers_len + 1); + for (size_t i = 0; i < st->merged->readers_len; i++) + names[i] = xstrdup(st->readers[i]->name); + first_to_replace = first; + last_to_replace = last; } + + /* + * If the resulting compacted table is not empty, then we need to move + * it into place now. + */ if (!is_empty_table) { - strbuf_addbuf(&ref_list_contents, &new_table_name); - strbuf_addstr(&ref_list_contents, "\n"); - } - for (i = last + 1; i < st->merged->stack_len; i++) { - strbuf_addstr(&ref_list_contents, st->readers[i]->name); - strbuf_addstr(&ref_list_contents, "\n"); - } + format_name(&new_table_name, st->readers[first]->min_update_index, + st->readers[last]->max_update_index); + strbuf_addstr(&new_table_name, ".ref"); + stack_filename(&new_table_path, st, new_table_name.buf); - err = write_in_full(lock_file_fd, ref_list_contents.buf, ref_list_contents.len); - if (err < 0) { - err = REFTABLE_IO_ERROR; - unlink(new_table_path.buf); - goto done; + err = rename_tempfile(&new_table, new_table_path.buf); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } } - err = fsync_component(FSYNC_COMPONENT_REFERENCE, lock_file_fd); + /* + * Write the new "tables.list" contents with the compacted table we + * have just written. In case the compacted table became empty we + * simply skip writing it. + */ + for (i = 0; i < first_to_replace; i++) + strbuf_addf(&tables_list_buf, "%s\n", names[i]); + if (!is_empty_table) + strbuf_addf(&tables_list_buf, "%s\n", new_table_name.buf); + for (i = last_to_replace + 1; names[i]; i++) + strbuf_addf(&tables_list_buf, "%s\n", names[i]); + + err = write_in_full(get_lock_file_fd(&tables_list_lock), + tables_list_buf.buf, tables_list_buf.len); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); goto done; } - err = close(lock_file_fd); - lock_file_fd = -1; + err = fsync_component(FSYNC_COMPONENT_REFERENCE, get_lock_file_fd(&tables_list_lock)); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); goto done; } - err = rename(lock_file_name.buf, st->list_file); + err = commit_lock_file(&tables_list_lock); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); goto done; } - have_lock = 0; - /* Reload the stack before deleting. On windows, we can only delete the - files after we closed them. - */ + /* + * Reload the stack before deleting the compacted tables. We can only + * delete the files after we closed them on Windows, so this needs to + * happen first. + */ err = reftable_stack_reload_maybe_reuse(st, first < last); + if (err < 0) + goto done; - listp = delete_on_success; - while (*listp) { - if (strcmp(*listp, new_table_path.buf)) { - unlink(*listp); - } - listp++; + /* + * Delete the old tables. They may still be in use by concurrent + * readers, so it is expected that unlinking tables may fail. + */ + for (i = 0; i < nlocks; i++) { + struct lock_file *table_lock = &table_locks[i]; + char *table_path = get_locked_file_path(table_lock); + unlink(table_path); + free(table_path); } done: - free_names(delete_on_success); + rollback_lock_file(&tables_list_lock); + for (i = 0; table_locks && i < nlocks; i++) + rollback_lock_file(&table_locks[i]); + reftable_free(table_locks); - if (subtable_locks) { - listp = subtable_locks; - while (*listp) { - unlink(*listp); - listp++; - } - free_names(subtable_locks); - } - if (lock_file_fd >= 0) { - close(lock_file_fd); - lock_file_fd = -1; - } - if (have_lock) { - unlink(lock_file_name.buf); - } + delete_tempfile(&new_table); strbuf_release(&new_table_name); strbuf_release(&new_table_path); - strbuf_release(&ref_list_contents); - strbuf_release(&temp_tab_file_name); - strbuf_release(&lock_file_name); + strbuf_release(&tables_list_buf); + strbuf_release(&table_name); + free_names(names); + + if (err == REFTABLE_LOCK_ERROR) + st->stats.failures++; + return err; } int reftable_stack_compact_all(struct reftable_stack *st, struct reftable_log_expiry_config *config) { - return stack_compact_range(st, 0, st->merged->stack_len ? - st->merged->stack_len - 1 : 0, config); -} - -static int stack_compact_range_stats(struct reftable_stack *st, - size_t first, size_t last, - struct reftable_log_expiry_config *config) -{ - int err = stack_compact_range(st, first, last, config); - if (err > 0) - st->stats.failures++; - return err; + size_t last = st->merged->readers_len ? st->merged->readers_len - 1 : 0; + return stack_compact_range(st, 0, last, config, 0); } static int segment_size(struct segment *s) @@ -1214,87 +1370,93 @@ static int segment_size(struct segment *s) return s->end - s->start; } -int fastlog2(uint64_t sz) +struct segment suggest_compaction_segment(uint64_t *sizes, size_t n, + uint8_t factor) { - int l = 0; - if (sz == 0) - return 0; - for (; sz; sz /= 2) { - l++; - } - return l - 1; -} - -struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n) -{ - struct segment *segs = reftable_calloc(n, sizeof(*segs)); - struct segment cur = { 0 }; - size_t next = 0, i; - - if (n == 0) { - *seglen = 0; - return segs; - } - for (i = 0; i < n; i++) { - int log = fastlog2(sizes[i]); - if (cur.log != log && cur.bytes > 0) { - struct segment fresh = { - .start = i, - }; - - segs[next++] = cur; - cur = fresh; - } + struct segment seg = { 0 }; + uint64_t bytes; + size_t i; - cur.log = log; - cur.end = i + 1; - cur.bytes += sizes[i]; - } - segs[next++] = cur; - *seglen = next; - return segs; -} + if (!factor) + factor = DEFAULT_GEOMETRIC_FACTOR; -struct segment suggest_compaction_segment(uint64_t *sizes, size_t n) -{ - struct segment min_seg = { - .log = 64, - }; - struct segment *segs; - size_t seglen = 0, i; - - segs = sizes_to_segments(&seglen, sizes, n); - for (i = 0; i < seglen; i++) { - if (segment_size(&segs[i]) == 1) - continue; + /* + * If there are no tables or only a single one then we don't have to + * compact anything. The sequence is geometric by definition already. + */ + if (n <= 1) + return seg; - if (segs[i].log < min_seg.log) - min_seg = segs[i]; + /* + * Find the ending table of the compaction segment needed to restore the + * geometric sequence. Note that the segment end is exclusive. + * + * To do so, we iterate backwards starting from the most recent table + * until a valid segment end is found. If the preceding table is smaller + * than the current table multiplied by the geometric factor (2), the + * compaction segment end has been identified. + * + * Tables after the ending point are not added to the byte count because + * they are already valid members of the geometric sequence. Due to the + * properties of a geometric sequence, it is not possible for the sum of + * these tables to exceed the value of the ending point table. + * + * Example table size sequence requiring no compaction: + * 64, 32, 16, 8, 4, 2, 1 + * + * Example table size sequence where compaction segment end is set to + * the last table. Since the segment end is exclusive, the last table is + * excluded during subsequent compaction and the table with size 3 is + * the final table included: + * 64, 32, 16, 8, 4, 3, 1 + */ + for (i = n - 1; i > 0; i--) { + if (sizes[i - 1] < sizes[i] * factor) { + seg.end = i + 1; + bytes = sizes[i]; + break; + } } - while (min_seg.start > 0) { - size_t prev = min_seg.start - 1; - if (fastlog2(min_seg.bytes) < fastlog2(sizes[prev])) - break; + /* + * Find the starting table of the compaction segment by iterating + * through the remaining tables and keeping track of the accumulated + * size of all tables seen from the segment end table. The previous + * table is compared to the accumulated size because the tables from the + * segment end are merged backwards recursively. + * + * Note that we keep iterating even after we have found the first + * starting point. This is because there may be tables in the stack + * preceding that first starting point which violate the geometric + * sequence. + * + * Example compaction segment start set to table with size 32: + * 128, 32, 16, 8, 4, 3, 1 + */ + for (; i > 0; i--) { + uint64_t curr = bytes; + bytes += sizes[i - 1]; - min_seg.start = prev; - min_seg.bytes += sizes[prev]; + if (sizes[i - 1] < curr * factor) { + seg.start = i - 1; + seg.bytes = bytes; + } } - reftable_free(segs); - return min_seg; + return seg; } static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) { - uint64_t *sizes = - reftable_calloc(st->merged->stack_len, sizeof(*sizes)); - int version = (st->config.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2; + int version = (st->opts.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2; int overhead = header_size(version) - 1; - int i = 0; - for (i = 0; i < st->merged->stack_len; i++) { + uint64_t *sizes; + + REFTABLE_CALLOC_ARRAY(sizes, st->merged->readers_len); + + for (size_t i = 0; i < st->merged->readers_len; i++) sizes[i] = st->readers[i]->size - overhead; - } + return sizes; } @@ -1302,11 +1464,12 @@ int reftable_stack_auto_compact(struct reftable_stack *st) { uint64_t *sizes = stack_table_sizes_for_compaction(st); struct segment seg = - suggest_compaction_segment(sizes, st->merged->stack_len); + suggest_compaction_segment(sizes, st->merged->readers_len, + st->opts.auto_compaction_factor); reftable_free(sizes); if (segment_size(&seg) > 0) - return stack_compact_range_stats(st, seg.start, seg.end - 1, - NULL); + return stack_compact_range(st, seg.start, seg.end - 1, + NULL, STACK_COMPACT_RANGE_BEST_EFFORT); return 0; } @@ -1320,17 +1483,38 @@ reftable_stack_compaction_stats(struct reftable_stack *st) int reftable_stack_read_ref(struct reftable_stack *st, const char *refname, struct reftable_ref_record *ref) { - struct reftable_table tab = { NULL }; - reftable_table_from_merged_table(&tab, reftable_stack_merged_table(st)); - return reftable_table_read_ref(&tab, refname, ref); + struct reftable_iterator it = { 0 }; + int ret; + + reftable_merged_table_init_ref_iterator(st->merged, &it); + ret = reftable_iterator_seek_ref(&it, refname); + if (ret) + goto out; + + ret = reftable_iterator_next_ref(&it, ref); + if (ret) + goto out; + + if (strcmp(ref->refname, refname) || + reftable_ref_record_is_deletion(ref)) { + reftable_ref_record_release(ref); + ret = 1; + goto out; + } + +out: + reftable_iterator_destroy(&it); + return ret; } int reftable_stack_read_log(struct reftable_stack *st, const char *refname, struct reftable_log_record *log) { - struct reftable_iterator it = { NULL }; - struct reftable_merged_table *mt = reftable_stack_merged_table(st); - int err = reftable_merged_table_seek_log(mt, &it, refname); + struct reftable_iterator it = {0}; + int err; + + reftable_stack_init_log_iterator(st, &it); + err = reftable_iterator_seek_log(&it, refname); if (err) goto done; @@ -1352,65 +1536,6 @@ done: return err; } -static int stack_check_addition(struct reftable_stack *st, - const char *new_tab_name) -{ - int err = 0; - struct reftable_block_source src = { NULL }; - struct reftable_reader *rd = NULL; - struct reftable_table tab = { NULL }; - struct reftable_ref_record *refs = NULL; - struct reftable_iterator it = { NULL }; - int cap = 0; - int len = 0; - int i = 0; - - if (st->config.skip_name_check) - return 0; - - err = reftable_block_source_from_file(&src, new_tab_name); - if (err < 0) - goto done; - - err = reftable_new_reader(&rd, &src, new_tab_name); - if (err < 0) - goto done; - - err = reftable_reader_seek_ref(rd, &it, ""); - if (err > 0) { - err = 0; - goto done; - } - if (err < 0) - goto done; - - while (1) { - struct reftable_ref_record ref = { NULL }; - err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) - break; - if (err < 0) - goto done; - - REFTABLE_ALLOC_GROW(refs, len + 1, cap); - refs[len++] = ref; - } - - reftable_table_from_merged_table(&tab, reftable_stack_merged_table(st)); - - err = validate_ref_record_addition(tab, refs, len); - -done: - for (i = 0; i < len; i++) { - reftable_ref_record_release(&refs[i]); - } - - free(refs); - reftable_iterator_destroy(&it); - reftable_reader_free(rd); - return err; -} - static int is_table_name(const char *s) { const char *dot = strrchr(s, '.'); @@ -1431,12 +1556,12 @@ static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max, if (err < 0) goto done; - err = reftable_new_reader(&rd, &src, name); + err = reftable_reader_new(&rd, &src, name); if (err < 0) goto done; update_idx = reftable_reader_max_update_index(rd); - reftable_reader_free(rd); + reftable_reader_decref(rd); if (update_idx <= max) { unlink(table_path.buf); @@ -1493,23 +1618,3 @@ done: reftable_addition_destroy(add); return err; } - -int reftable_stack_print_directory(const char *stackdir, uint32_t hash_id) -{ - struct reftable_stack *stack = NULL; - struct reftable_write_options cfg = { .hash_id = hash_id }; - struct reftable_merged_table *merged = NULL; - struct reftable_table table = { NULL }; - - int err = reftable_new_stack(&stack, stackdir, cfg); - if (err < 0) - goto done; - - merged = reftable_stack_merged_table(stack); - reftable_table_from_merged_table(&table, merged); - err = reftable_table_print(&table); -done: - if (stack) - reftable_stack_destroy(stack); - return err; -} diff --git a/reftable/stack.h b/reftable/stack.h index d919455669..5b45cff4f7 100644 --- a/reftable/stack.h +++ b/reftable/stack.h @@ -19,9 +19,8 @@ struct reftable_stack { int list_fd; char *reftable_dir; - int disable_auto_compact; - struct reftable_write_options config; + struct reftable_write_options opts; struct reftable_reader **readers; size_t readers_len; @@ -33,12 +32,10 @@ int read_lines(const char *filename, char ***lines); struct segment { size_t start, end; - int log; uint64_t bytes; }; -int fastlog2(uint64_t sz); -struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n); -struct segment suggest_compaction_segment(uint64_t *sizes, size_t n); +struct segment suggest_compaction_segment(uint64_t *sizes, size_t n, + uint8_t factor); #endif diff --git a/reftable/stack_test.c b/reftable/stack_test.c deleted file mode 100644 index 509f486623..0000000000 --- a/reftable/stack_test.c +++ /dev/null @@ -1,1101 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "stack.h" - -#include "system.h" - -#include "reftable-reader.h" -#include "merged.h" -#include "basics.h" -#include "record.h" -#include "test_framework.h" -#include "reftable-tests.h" -#include "reader.h" - -#include <sys/types.h> -#include <dirent.h> - -static void clear_dir(const char *dirname) -{ - struct strbuf path = STRBUF_INIT; - strbuf_addstr(&path, dirname); - remove_dir_recursively(&path, 0); - strbuf_release(&path); -} - -static int count_dir_entries(const char *dirname) -{ - DIR *dir = opendir(dirname); - int len = 0; - struct dirent *d; - if (!dir) - return 0; - - while ((d = readdir(dir))) { - if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, ".")) - continue; - len++; - } - closedir(dir); - return len; -} - -/* - * Work linenumber into the tempdir, so we can see which tests forget to - * cleanup. - */ -static char *get_tmp_template(int linenumber) -{ - const char *tmp = getenv("TMPDIR"); - static char template[1024]; - snprintf(template, sizeof(template) - 1, "%s/stack_test-%d.XXXXXX", - tmp ? tmp : "/tmp", linenumber); - return template; -} - -static char *get_tmp_dir(int linenumber) -{ - char *dir = get_tmp_template(linenumber); - EXPECT(mkdtemp(dir)); - return dir; -} - -static void test_read_file(void) -{ - char *fn = get_tmp_template(__LINE__); - int fd = mkstemp(fn); - char out[1024] = "line1\n\nline2\nline3"; - int n, err; - char **names = NULL; - char *want[] = { "line1", "line2", "line3" }; - int i = 0; - - EXPECT(fd > 0); - n = write_in_full(fd, out, strlen(out)); - EXPECT(n == strlen(out)); - err = close(fd); - EXPECT(err >= 0); - - err = read_lines(fn, &names); - EXPECT_ERR(err); - - for (i = 0; names[i]; i++) { - EXPECT(0 == strcmp(want[i], names[i])); - } - free_names(names); - (void) remove(fn); -} - -static void test_parse_names(void) -{ - char buf[] = "line\n"; - char **names = NULL; - parse_names(buf, strlen(buf), &names); - - EXPECT(NULL != names[0]); - EXPECT(0 == strcmp(names[0], "line")); - EXPECT(NULL == names[1]); - free_names(names); -} - -static void test_names_equal(void) -{ - char *a[] = { "a", "b", "c", NULL }; - char *b[] = { "a", "b", "d", NULL }; - char *c[] = { "a", "b", NULL }; - - EXPECT(names_equal(a, a)); - EXPECT(!names_equal(a, b)); - EXPECT(!names_equal(a, c)); -} - -static int write_test_ref(struct reftable_writer *wr, void *arg) -{ - struct reftable_ref_record *ref = arg; - reftable_writer_set_limits(wr, ref->update_index, ref->update_index); - return reftable_writer_add_ref(wr, ref); -} - -struct write_log_arg { - struct reftable_log_record *log; - uint64_t update_index; -}; - -static int write_test_log(struct reftable_writer *wr, void *arg) -{ - struct write_log_arg *wla = arg; - - reftable_writer_set_limits(wr, wla->update_index, wla->update_index); - return reftable_writer_add_log(wr, wla->log); -} - -static void test_reftable_stack_add_one(void) -{ - char *dir = get_tmp_dir(__LINE__); - struct strbuf scratch = STRBUF_INIT; - int mask = umask(002); - struct reftable_write_options cfg = { - .default_permissions = 0660, - }; - struct reftable_stack *st = NULL; - int err; - struct reftable_ref_record ref = { - .refname = "HEAD", - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - struct reftable_ref_record dest = { NULL }; - struct stat stat_result = { 0 }; - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_add(st, &write_test_ref, &ref); - EXPECT_ERR(err); - - err = reftable_stack_read_ref(st, ref.refname, &dest); - EXPECT_ERR(err); - EXPECT(0 == strcmp("master", dest.value.symref)); - EXPECT(st->readers_len > 0); - - printf("testing print functionality:\n"); - err = reftable_stack_print_directory(dir, GIT_SHA1_FORMAT_ID); - EXPECT_ERR(err); - - err = reftable_stack_print_directory(dir, GIT_SHA256_FORMAT_ID); - EXPECT(err == REFTABLE_FORMAT_ERROR); - -#ifndef GIT_WINDOWS_NATIVE - strbuf_addstr(&scratch, dir); - strbuf_addstr(&scratch, "/tables.list"); - err = stat(scratch.buf, &stat_result); - EXPECT(!err); - EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions); - - strbuf_reset(&scratch); - strbuf_addstr(&scratch, dir); - strbuf_addstr(&scratch, "/"); - /* do not try at home; not an external API for reftable. */ - strbuf_addstr(&scratch, st->readers[0]->name); - err = stat(scratch.buf, &stat_result); - EXPECT(!err); - EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions); -#else - (void) stat_result; -#endif - - reftable_ref_record_release(&dest); - reftable_stack_destroy(st); - strbuf_release(&scratch); - clear_dir(dir); - umask(mask); -} - -static void test_reftable_stack_uptodate(void) -{ - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st1 = NULL; - struct reftable_stack *st2 = NULL; - char *dir = get_tmp_dir(__LINE__); - - int err; - struct reftable_ref_record ref1 = { - .refname = "HEAD", - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - struct reftable_ref_record ref2 = { - .refname = "branch2", - .update_index = 2, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - - - /* simulate multi-process access to the same stack - by creating two stacks for the same directory. - */ - err = reftable_new_stack(&st1, dir, cfg); - EXPECT_ERR(err); - - err = reftable_new_stack(&st2, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_add(st1, &write_test_ref, &ref1); - EXPECT_ERR(err); - - err = reftable_stack_add(st2, &write_test_ref, &ref2); - EXPECT(err == REFTABLE_LOCK_ERROR); - - err = reftable_stack_reload(st2); - EXPECT_ERR(err); - - err = reftable_stack_add(st2, &write_test_ref, &ref2); - EXPECT_ERR(err); - reftable_stack_destroy(st1); - reftable_stack_destroy(st2); - clear_dir(dir); -} - -static void test_reftable_stack_transaction_api(void) -{ - char *dir = get_tmp_dir(__LINE__); - - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - int err; - struct reftable_addition *add = NULL; - - struct reftable_ref_record ref = { - .refname = "HEAD", - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - struct reftable_ref_record dest = { NULL }; - - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - reftable_addition_destroy(add); - - err = reftable_stack_new_addition(&add, st); - EXPECT_ERR(err); - - err = reftable_addition_add(add, &write_test_ref, &ref); - EXPECT_ERR(err); - - err = reftable_addition_commit(add); - EXPECT_ERR(err); - - reftable_addition_destroy(add); - - err = reftable_stack_read_ref(st, ref.refname, &dest); - EXPECT_ERR(err); - EXPECT(REFTABLE_REF_SYMREF == dest.value_type); - EXPECT(0 == strcmp("master", dest.value.symref)); - - reftable_ref_record_release(&dest); - reftable_stack_destroy(st); - clear_dir(dir); -} - -static void test_reftable_stack_transaction_api_performs_auto_compaction(void) -{ - char *dir = get_tmp_dir(__LINE__); - struct reftable_write_options cfg = {0}; - struct reftable_addition *add = NULL; - struct reftable_stack *st = NULL; - int i, n = 20, err; - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - for (i = 0; i <= n; i++) { - struct reftable_ref_record ref = { - .update_index = reftable_stack_next_update_index(st), - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - char name[100]; - - snprintf(name, sizeof(name), "branch%04d", i); - ref.refname = name; - - /* - * Disable auto-compaction for all but the last runs. Like this - * we can ensure that we indeed honor this setting and have - * better control over when exactly auto compaction runs. - */ - st->disable_auto_compact = i != n; - - err = reftable_stack_new_addition(&add, st); - EXPECT_ERR(err); - - err = reftable_addition_add(add, &write_test_ref, &ref); - EXPECT_ERR(err); - - err = reftable_addition_commit(add); - EXPECT_ERR(err); - - reftable_addition_destroy(add); - - /* - * The stack length should grow continuously for all runs where - * auto compaction is disabled. When enabled, we should merge - * all tables in the stack. - */ - if (i != n) - EXPECT(st->merged->stack_len == i + 1); - else - EXPECT(st->merged->stack_len == 1); - } - - reftable_stack_destroy(st); - clear_dir(dir); -} - -static void test_reftable_stack_validate_refname(void) -{ - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - int err; - char *dir = get_tmp_dir(__LINE__); - - int i; - struct reftable_ref_record ref = { - .refname = "a/b", - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - char *additions[] = { "a", "a/b/c" }; - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_add(st, &write_test_ref, &ref); - EXPECT_ERR(err); - - for (i = 0; i < ARRAY_SIZE(additions); i++) { - struct reftable_ref_record ref = { - .refname = additions[i], - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - - err = reftable_stack_add(st, &write_test_ref, &ref); - EXPECT(err == REFTABLE_NAME_CONFLICT); - } - - reftable_stack_destroy(st); - clear_dir(dir); -} - -static int write_error(struct reftable_writer *wr, void *arg) -{ - return *((int *)arg); -} - -static void test_reftable_stack_update_index_check(void) -{ - char *dir = get_tmp_dir(__LINE__); - - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - int err; - struct reftable_ref_record ref1 = { - .refname = "name1", - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - struct reftable_ref_record ref2 = { - .refname = "name2", - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_add(st, &write_test_ref, &ref1); - EXPECT_ERR(err); - - err = reftable_stack_add(st, &write_test_ref, &ref2); - EXPECT(err == REFTABLE_API_ERROR); - reftable_stack_destroy(st); - clear_dir(dir); -} - -static void test_reftable_stack_lock_failure(void) -{ - char *dir = get_tmp_dir(__LINE__); - - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - int err, i; - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - for (i = -1; i != REFTABLE_EMPTY_TABLE_ERROR; i--) { - err = reftable_stack_add(st, &write_error, &i); - EXPECT(err == i); - } - - reftable_stack_destroy(st); - clear_dir(dir); -} - -static void test_reftable_stack_add(void) -{ - int i = 0; - int err = 0; - struct reftable_write_options cfg = { - .exact_log_message = 1, - .default_permissions = 0660, - }; - struct reftable_stack *st = NULL; - char *dir = get_tmp_dir(__LINE__); - struct reftable_ref_record refs[2] = { { NULL } }; - struct reftable_log_record logs[2] = { { NULL } }; - struct strbuf path = STRBUF_INIT; - struct stat stat_result; - int N = ARRAY_SIZE(refs); - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - st->disable_auto_compact = 1; - - for (i = 0; i < N; i++) { - char buf[256]; - snprintf(buf, sizeof(buf), "branch%02d", i); - refs[i].refname = xstrdup(buf); - refs[i].update_index = i + 1; - refs[i].value_type = REFTABLE_REF_VAL1; - set_test_hash(refs[i].value.val1, i); - - logs[i].refname = xstrdup(buf); - logs[i].update_index = N + i + 1; - logs[i].value_type = REFTABLE_LOG_UPDATE; - - logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ); - logs[i].value.update.email = xstrdup("identity@invalid"); - set_test_hash(logs[i].value.update.new_hash, i); - } - - for (i = 0; i < N; i++) { - int err = reftable_stack_add(st, &write_test_ref, &refs[i]); - EXPECT_ERR(err); - } - - for (i = 0; i < N; i++) { - struct write_log_arg arg = { - .log = &logs[i], - .update_index = reftable_stack_next_update_index(st), - }; - int err = reftable_stack_add(st, &write_test_log, &arg); - EXPECT_ERR(err); - } - - err = reftable_stack_compact_all(st, NULL); - EXPECT_ERR(err); - - for (i = 0; i < N; i++) { - struct reftable_ref_record dest = { NULL }; - - int err = reftable_stack_read_ref(st, refs[i].refname, &dest); - EXPECT_ERR(err); - EXPECT(reftable_ref_record_equal(&dest, refs + i, - GIT_SHA1_RAWSZ)); - reftable_ref_record_release(&dest); - } - - for (i = 0; i < N; i++) { - struct reftable_log_record dest = { NULL }; - int err = reftable_stack_read_log(st, refs[i].refname, &dest); - EXPECT_ERR(err); - EXPECT(reftable_log_record_equal(&dest, logs + i, - GIT_SHA1_RAWSZ)); - reftable_log_record_release(&dest); - } - -#ifndef GIT_WINDOWS_NATIVE - strbuf_addstr(&path, dir); - strbuf_addstr(&path, "/tables.list"); - err = stat(path.buf, &stat_result); - EXPECT(!err); - EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions); - - strbuf_reset(&path); - strbuf_addstr(&path, dir); - strbuf_addstr(&path, "/"); - /* do not try at home; not an external API for reftable. */ - strbuf_addstr(&path, st->readers[0]->name); - err = stat(path.buf, &stat_result); - EXPECT(!err); - EXPECT((stat_result.st_mode & 0777) == cfg.default_permissions); -#else - (void) stat_result; -#endif - - /* cleanup */ - reftable_stack_destroy(st); - for (i = 0; i < N; i++) { - reftable_ref_record_release(&refs[i]); - reftable_log_record_release(&logs[i]); - } - strbuf_release(&path); - clear_dir(dir); -} - -static void test_reftable_stack_log_normalize(void) -{ - int err = 0; - struct reftable_write_options cfg = { - 0, - }; - struct reftable_stack *st = NULL; - char *dir = get_tmp_dir(__LINE__); - - uint8_t h1[GIT_SHA1_RAWSZ] = { 0x01 }, h2[GIT_SHA1_RAWSZ] = { 0x02 }; - - struct reftable_log_record input = { .refname = "branch", - .update_index = 1, - .value_type = REFTABLE_LOG_UPDATE, - .value = { .update = { - .new_hash = h1, - .old_hash = h2, - } } }; - struct reftable_log_record dest = { - .update_index = 0, - }; - struct write_log_arg arg = { - .log = &input, - .update_index = 1, - }; - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - input.value.update.message = "one\ntwo"; - err = reftable_stack_add(st, &write_test_log, &arg); - EXPECT(err == REFTABLE_API_ERROR); - - input.value.update.message = "one"; - err = reftable_stack_add(st, &write_test_log, &arg); - EXPECT_ERR(err); - - err = reftable_stack_read_log(st, input.refname, &dest); - EXPECT_ERR(err); - EXPECT(0 == strcmp(dest.value.update.message, "one\n")); - - input.value.update.message = "two\n"; - arg.update_index = 2; - err = reftable_stack_add(st, &write_test_log, &arg); - EXPECT_ERR(err); - err = reftable_stack_read_log(st, input.refname, &dest); - EXPECT_ERR(err); - EXPECT(0 == strcmp(dest.value.update.message, "two\n")); - - /* cleanup */ - reftable_stack_destroy(st); - reftable_log_record_release(&dest); - clear_dir(dir); -} - -static void test_reftable_stack_tombstone(void) -{ - int i = 0; - char *dir = get_tmp_dir(__LINE__); - - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - int err; - struct reftable_ref_record refs[2] = { { NULL } }; - struct reftable_log_record logs[2] = { { NULL } }; - int N = ARRAY_SIZE(refs); - struct reftable_ref_record dest = { NULL }; - struct reftable_log_record log_dest = { NULL }; - - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - /* even entries add the refs, odd entries delete them. */ - for (i = 0; i < N; i++) { - const char *buf = "branch"; - refs[i].refname = xstrdup(buf); - refs[i].update_index = i + 1; - if (i % 2 == 0) { - refs[i].value_type = REFTABLE_REF_VAL1; - set_test_hash(refs[i].value.val1, i); - } - - logs[i].refname = xstrdup(buf); - /* update_index is part of the key. */ - logs[i].update_index = 42; - if (i % 2 == 0) { - logs[i].value_type = REFTABLE_LOG_UPDATE; - logs[i].value.update.new_hash = - reftable_malloc(GIT_SHA1_RAWSZ); - set_test_hash(logs[i].value.update.new_hash, i); - logs[i].value.update.email = - xstrdup("identity@invalid"); - } - } - for (i = 0; i < N; i++) { - int err = reftable_stack_add(st, &write_test_ref, &refs[i]); - EXPECT_ERR(err); - } - - for (i = 0; i < N; i++) { - struct write_log_arg arg = { - .log = &logs[i], - .update_index = reftable_stack_next_update_index(st), - }; - int err = reftable_stack_add(st, &write_test_log, &arg); - EXPECT_ERR(err); - } - - err = reftable_stack_read_ref(st, "branch", &dest); - EXPECT(err == 1); - reftable_ref_record_release(&dest); - - err = reftable_stack_read_log(st, "branch", &log_dest); - EXPECT(err == 1); - reftable_log_record_release(&log_dest); - - err = reftable_stack_compact_all(st, NULL); - EXPECT_ERR(err); - - err = reftable_stack_read_ref(st, "branch", &dest); - EXPECT(err == 1); - - err = reftable_stack_read_log(st, "branch", &log_dest); - EXPECT(err == 1); - reftable_ref_record_release(&dest); - reftable_log_record_release(&log_dest); - - /* cleanup */ - reftable_stack_destroy(st); - for (i = 0; i < N; i++) { - reftable_ref_record_release(&refs[i]); - reftable_log_record_release(&logs[i]); - } - clear_dir(dir); -} - -static void test_reftable_stack_hash_id(void) -{ - char *dir = get_tmp_dir(__LINE__); - - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - int err; - - struct reftable_ref_record ref = { - .refname = "master", - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "target", - .update_index = 1, - }; - struct reftable_write_options cfg32 = { .hash_id = GIT_SHA256_FORMAT_ID }; - struct reftable_stack *st32 = NULL; - struct reftable_write_options cfg_default = { 0 }; - struct reftable_stack *st_default = NULL; - struct reftable_ref_record dest = { NULL }; - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_add(st, &write_test_ref, &ref); - EXPECT_ERR(err); - - /* can't read it with the wrong hash ID. */ - err = reftable_new_stack(&st32, dir, cfg32); - EXPECT(err == REFTABLE_FORMAT_ERROR); - - /* check that we can read it back with default config too. */ - err = reftable_new_stack(&st_default, dir, cfg_default); - EXPECT_ERR(err); - - err = reftable_stack_read_ref(st_default, "master", &dest); - EXPECT_ERR(err); - - EXPECT(reftable_ref_record_equal(&ref, &dest, GIT_SHA1_RAWSZ)); - reftable_ref_record_release(&dest); - reftable_stack_destroy(st); - reftable_stack_destroy(st_default); - clear_dir(dir); -} - -static void test_log2(void) -{ - EXPECT(1 == fastlog2(3)); - EXPECT(2 == fastlog2(4)); - EXPECT(2 == fastlog2(5)); -} - -static void test_sizes_to_segments(void) -{ - uint64_t sizes[] = { 2, 3, 4, 5, 7, 9 }; - /* .................0 1 2 3 4 5 */ - - size_t seglen = 0; - struct segment *segs = - sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes)); - EXPECT(segs[2].log == 3); - EXPECT(segs[2].start == 5); - EXPECT(segs[2].end == 6); - - EXPECT(segs[1].log == 2); - EXPECT(segs[1].start == 2); - EXPECT(segs[1].end == 5); - reftable_free(segs); -} - -static void test_sizes_to_segments_empty(void) -{ - size_t seglen = 0; - struct segment *segs = sizes_to_segments(&seglen, NULL, 0); - EXPECT(seglen == 0); - reftable_free(segs); -} - -static void test_sizes_to_segments_all_equal(void) -{ - uint64_t sizes[] = { 5, 5 }; - size_t seglen = 0; - struct segment *segs = - sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes)); - EXPECT(seglen == 1); - EXPECT(segs[0].start == 0); - EXPECT(segs[0].end == 2); - reftable_free(segs); -} - -static void test_suggest_compaction_segment(void) -{ - uint64_t sizes[] = { 128, 64, 17, 16, 9, 9, 9, 16, 16 }; - /* .................0 1 2 3 4 5 6 */ - struct segment min = - suggest_compaction_segment(sizes, ARRAY_SIZE(sizes)); - EXPECT(min.start == 2); - EXPECT(min.end == 7); -} - -static void test_suggest_compaction_segment_nothing(void) -{ - uint64_t sizes[] = { 64, 32, 16, 8, 4, 2 }; - struct segment result = - suggest_compaction_segment(sizes, ARRAY_SIZE(sizes)); - EXPECT(result.start == result.end); -} - -static void test_reflog_expire(void) -{ - char *dir = get_tmp_dir(__LINE__); - - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - struct reftable_log_record logs[20] = { { NULL } }; - int N = ARRAY_SIZE(logs) - 1; - int i = 0; - int err; - struct reftable_log_expiry_config expiry = { - .time = 10, - }; - struct reftable_log_record log = { NULL }; - - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - for (i = 1; i <= N; i++) { - char buf[256]; - snprintf(buf, sizeof(buf), "branch%02d", i); - - logs[i].refname = xstrdup(buf); - logs[i].update_index = i; - logs[i].value_type = REFTABLE_LOG_UPDATE; - logs[i].value.update.time = i; - logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ); - logs[i].value.update.email = xstrdup("identity@invalid"); - set_test_hash(logs[i].value.update.new_hash, i); - } - - for (i = 1; i <= N; i++) { - struct write_log_arg arg = { - .log = &logs[i], - .update_index = reftable_stack_next_update_index(st), - }; - int err = reftable_stack_add(st, &write_test_log, &arg); - EXPECT_ERR(err); - } - - err = reftable_stack_compact_all(st, NULL); - EXPECT_ERR(err); - - err = reftable_stack_compact_all(st, &expiry); - EXPECT_ERR(err); - - err = reftable_stack_read_log(st, logs[9].refname, &log); - EXPECT(err == 1); - - err = reftable_stack_read_log(st, logs[11].refname, &log); - EXPECT_ERR(err); - - expiry.min_update_index = 15; - err = reftable_stack_compact_all(st, &expiry); - EXPECT_ERR(err); - - err = reftable_stack_read_log(st, logs[14].refname, &log); - EXPECT(err == 1); - - err = reftable_stack_read_log(st, logs[16].refname, &log); - EXPECT_ERR(err); - - /* cleanup */ - reftable_stack_destroy(st); - for (i = 0; i <= N; i++) { - reftable_log_record_release(&logs[i]); - } - clear_dir(dir); - reftable_log_record_release(&log); -} - -static int write_nothing(struct reftable_writer *wr, void *arg) -{ - reftable_writer_set_limits(wr, 1, 1); - return 0; -} - -static void test_empty_add(void) -{ - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - int err; - char *dir = get_tmp_dir(__LINE__); - - struct reftable_stack *st2 = NULL; - - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_add(st, &write_nothing, NULL); - EXPECT_ERR(err); - - err = reftable_new_stack(&st2, dir, cfg); - EXPECT_ERR(err); - clear_dir(dir); - reftable_stack_destroy(st); - reftable_stack_destroy(st2); -} - -static void test_reftable_stack_auto_compaction(void) -{ - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - char *dir = get_tmp_dir(__LINE__); - - int err, i; - int N = 100; - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - st->disable_auto_compact = 1; /* call manually below for coverage. */ - for (i = 0; i < N; i++) { - char name[100]; - struct reftable_ref_record ref = { - .refname = name, - .update_index = reftable_stack_next_update_index(st), - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - snprintf(name, sizeof(name), "branch%04d", i); - - err = reftable_stack_add(st, &write_test_ref, &ref); - EXPECT_ERR(err); - - err = reftable_stack_auto_compact(st); - EXPECT_ERR(err); - EXPECT(i < 3 || st->merged->stack_len < 2 * fastlog2(i)); - } - - EXPECT(reftable_stack_compaction_stats(st)->entries_written < - (uint64_t)(N * fastlog2(N))); - - reftable_stack_destroy(st); - clear_dir(dir); -} - -static void test_reftable_stack_add_performs_auto_compaction(void) -{ - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - struct strbuf refname = STRBUF_INIT; - char *dir = get_tmp_dir(__LINE__); - int err, i, n = 20; - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - for (i = 0; i <= n; i++) { - struct reftable_ref_record ref = { - .update_index = reftable_stack_next_update_index(st), - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - - /* - * Disable auto-compaction for all but the last runs. Like this - * we can ensure that we indeed honor this setting and have - * better control over when exactly auto compaction runs. - */ - st->disable_auto_compact = i != n; - - strbuf_reset(&refname); - strbuf_addf(&refname, "branch-%04d", i); - ref.refname = refname.buf; - - err = reftable_stack_add(st, &write_test_ref, &ref); - EXPECT_ERR(err); - - /* - * The stack length should grow continuously for all runs where - * auto compaction is disabled. When enabled, we should merge - * all tables in the stack. - */ - if (i != n) - EXPECT(st->merged->stack_len == i + 1); - else - EXPECT(st->merged->stack_len == 1); - } - - reftable_stack_destroy(st); - strbuf_release(&refname); - clear_dir(dir); -} - -static void test_reftable_stack_compaction_concurrent(void) -{ - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st1 = NULL, *st2 = NULL; - char *dir = get_tmp_dir(__LINE__); - - int err, i; - int N = 3; - - err = reftable_new_stack(&st1, dir, cfg); - EXPECT_ERR(err); - - for (i = 0; i < N; i++) { - char name[100]; - struct reftable_ref_record ref = { - .refname = name, - .update_index = reftable_stack_next_update_index(st1), - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - snprintf(name, sizeof(name), "branch%04d", i); - - err = reftable_stack_add(st1, &write_test_ref, &ref); - EXPECT_ERR(err); - } - - err = reftable_new_stack(&st2, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_compact_all(st1, NULL); - EXPECT_ERR(err); - - reftable_stack_destroy(st1); - reftable_stack_destroy(st2); - - EXPECT(count_dir_entries(dir) == 2); - clear_dir(dir); -} - -static void unclean_stack_close(struct reftable_stack *st) -{ - /* break abstraction boundary to simulate unclean shutdown. */ - int i = 0; - for (; i < st->readers_len; i++) { - reftable_reader_free(st->readers[i]); - } - st->readers_len = 0; - FREE_AND_NULL(st->readers); -} - -static void test_reftable_stack_compaction_concurrent_clean(void) -{ - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st1 = NULL, *st2 = NULL, *st3 = NULL; - char *dir = get_tmp_dir(__LINE__); - - int err, i; - int N = 3; - - err = reftable_new_stack(&st1, dir, cfg); - EXPECT_ERR(err); - - for (i = 0; i < N; i++) { - char name[100]; - struct reftable_ref_record ref = { - .refname = name, - .update_index = reftable_stack_next_update_index(st1), - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - snprintf(name, sizeof(name), "branch%04d", i); - - err = reftable_stack_add(st1, &write_test_ref, &ref); - EXPECT_ERR(err); - } - - err = reftable_new_stack(&st2, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_compact_all(st1, NULL); - EXPECT_ERR(err); - - unclean_stack_close(st1); - unclean_stack_close(st2); - - err = reftable_new_stack(&st3, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_clean(st3); - EXPECT_ERR(err); - EXPECT(count_dir_entries(dir) == 2); - - reftable_stack_destroy(st1); - reftable_stack_destroy(st2); - reftable_stack_destroy(st3); - - clear_dir(dir); -} - -int stack_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_empty_add); - RUN_TEST(test_log2); - RUN_TEST(test_names_equal); - RUN_TEST(test_parse_names); - RUN_TEST(test_read_file); - RUN_TEST(test_reflog_expire); - RUN_TEST(test_reftable_stack_add); - RUN_TEST(test_reftable_stack_add_one); - RUN_TEST(test_reftable_stack_auto_compaction); - RUN_TEST(test_reftable_stack_add_performs_auto_compaction); - RUN_TEST(test_reftable_stack_compaction_concurrent); - RUN_TEST(test_reftable_stack_compaction_concurrent_clean); - RUN_TEST(test_reftable_stack_hash_id); - RUN_TEST(test_reftable_stack_lock_failure); - RUN_TEST(test_reftable_stack_log_normalize); - RUN_TEST(test_reftable_stack_tombstone); - RUN_TEST(test_reftable_stack_transaction_api); - RUN_TEST(test_reftable_stack_transaction_api_performs_auto_compaction); - RUN_TEST(test_reftable_stack_update_index_check); - RUN_TEST(test_reftable_stack_uptodate); - RUN_TEST(test_reftable_stack_validate_refname); - RUN_TEST(test_sizes_to_segments); - RUN_TEST(test_sizes_to_segments_all_equal); - RUN_TEST(test_sizes_to_segments_empty); - RUN_TEST(test_suggest_compaction_segment); - RUN_TEST(test_suggest_compaction_segment_nothing); - return 0; -} diff --git a/reftable/system.h b/reftable/system.h index 6b74a81514..d0cabd5d17 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -12,8 +12,10 @@ https://developers.google.com/open-source/licenses/bsd /* This header glues the reftable library to the rest of Git */ #include "git-compat-util.h" +#include "lockfile.h" #include "strbuf.h" -#include "hash-ll.h" /* hash ID, sizes.*/ +#include "tempfile.h" +#include "hash.h" /* hash ID, sizes.*/ #include "dir.h" /* remove_dir_recursively, for tests.*/ int hash_size(uint32_t id); diff --git a/reftable/test_framework.c b/reftable/test_framework.c deleted file mode 100644 index 4066924eee..0000000000 --- a/reftable/test_framework.c +++ /dev/null @@ -1,27 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "system.h" -#include "test_framework.h" - - -void set_test_hash(uint8_t *p, int i) -{ - memset(p, (uint8_t)i, hash_size(GIT_SHA1_FORMAT_ID)); -} - -ssize_t strbuf_add_void(void *b, const void *data, size_t sz) -{ - strbuf_add(b, data, sz); - return sz; -} - -int noop_flush(void *arg) -{ - return 0; -} diff --git a/reftable/test_framework.h b/reftable/test_framework.h deleted file mode 100644 index 687390f9c2..0000000000 --- a/reftable/test_framework.h +++ /dev/null @@ -1,61 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#ifndef TEST_FRAMEWORK_H -#define TEST_FRAMEWORK_H - -#include "system.h" -#include "reftable-error.h" - -#define EXPECT_ERR(c) \ - do { \ - if (c != 0) { \ - fflush(stderr); \ - fflush(stdout); \ - fprintf(stderr, "%s: %d: error == %d (%s), want 0\n", \ - __FILE__, __LINE__, c, reftable_error_str(c)); \ - abort(); \ - } \ - } while (0) - -#define EXPECT_STREQ(a, b) \ - do { \ - if (strcmp(a, b)) { \ - fflush(stderr); \ - fflush(stdout); \ - fprintf(stderr, "%s:%d: %s (%s) != %s (%s)\n", __FILE__, \ - __LINE__, #a, a, #b, b); \ - abort(); \ - } \ - } while (0) - -#define EXPECT(c) \ - do { \ - if (!(c)) { \ - fflush(stderr); \ - fflush(stdout); \ - fprintf(stderr, "%s: %d: failed assertion %s\n", __FILE__, \ - __LINE__, #c); \ - abort(); \ - } \ - } while (0) - -#define RUN_TEST(f) \ - fprintf(stderr, "running %s\n", #f); \ - fflush(stderr); \ - f(); - -void set_test_hash(uint8_t *p, int i); - -/* Like strbuf_add, but suitable for passing to reftable_new_writer - */ -ssize_t strbuf_add_void(void *b, const void *data, size_t sz); - -int noop_flush(void *); - -#endif diff --git a/reftable/tree.c b/reftable/tree.c index 528f33ae38..5ffb2e0d69 100644 --- a/reftable/tree.c +++ b/reftable/tree.c @@ -39,25 +39,20 @@ struct tree_node *tree_search(void *key, struct tree_node **rootp, void infix_walk(struct tree_node *t, void (*action)(void *arg, void *key), void *arg) { - if (t->left) { + if (t->left) infix_walk(t->left, action, arg); - } action(arg, t->key); - if (t->right) { + if (t->right) infix_walk(t->right, action, arg); - } } void tree_free(struct tree_node *t) { - if (!t) { + if (!t) return; - } - if (t->left) { + if (t->left) tree_free(t->left); - } - if (t->right) { + if (t->right) tree_free(t->right); - } reftable_free(t); } diff --git a/reftable/tree_test.c b/reftable/tree_test.c deleted file mode 100644 index 6961a657ad..0000000000 --- a/reftable/tree_test.c +++ /dev/null @@ -1,60 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "system.h" -#include "tree.h" - -#include "test_framework.h" -#include "reftable-tests.h" - -static int test_compare(const void *a, const void *b) -{ - return (char *)a - (char *)b; -} - -struct curry { - void *last; -}; - -static void check_increasing(void *arg, void *key) -{ - struct curry *c = arg; - if (c->last) { - EXPECT(test_compare(c->last, key) < 0); - } - c->last = key; -} - -static void test_tree(void) -{ - struct tree_node *root = NULL; - - void *values[11] = { NULL }; - struct tree_node *nodes[11] = { NULL }; - int i = 1; - struct curry c = { NULL }; - do { - nodes[i] = tree_search(values + i, &root, &test_compare, 1); - i = (i * 7) % 11; - } while (i != 1); - - for (i = 1; i < ARRAY_SIZE(nodes); i++) { - EXPECT(values + i == nodes[i]->key); - EXPECT(nodes[i] == - tree_search(values + i, &root, &test_compare, 0)); - } - - infix_walk(root, check_increasing, &c); - tree_free(root); -} - -int tree_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_tree); - return 0; -} diff --git a/reftable/writer.c b/reftable/writer.c index 1d9ff0fbfa..9d5e6072bc 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -109,7 +109,7 @@ static void writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ) block_start = header_size(writer_version(w)); } - strbuf_release(&w->last_key); + strbuf_reset(&w->last_key); block_writer_init(&w->block_writer_data, typ, w->block, w->opts.block_size, block_start, hash_size(w->opts.hash_id)); @@ -117,25 +117,26 @@ static void writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ) w->block_writer->restart_interval = w->opts.restart_interval; } -static struct strbuf reftable_empty_strbuf = STRBUF_INIT; - struct reftable_writer * reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t), int (*flush_func)(void *), - void *writer_arg, struct reftable_write_options *opts) + void *writer_arg, const struct reftable_write_options *_opts) { struct reftable_writer *wp = reftable_calloc(1, sizeof(*wp)); + struct reftable_write_options opts = {0}; + + if (_opts) + opts = *_opts; + options_set_defaults(&opts); + if (opts.block_size >= (1 << 24)) + BUG("configured block size exceeds 16MB"); + strbuf_init(&wp->block_writer_data.last_key, 0); - options_set_defaults(opts); - if (opts->block_size >= (1 << 24)) { - /* TODO - error return? */ - abort(); - } - wp->last_key = reftable_empty_strbuf; - REFTABLE_CALLOC_ARRAY(wp->block, opts->block_size); + strbuf_init(&wp->last_key, 0); + REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size); wp->write = writer_func; wp->write_arg = writer_arg; - wp->opts = *opts; + wp->opts = opts; wp->flush = flush_func; writer_reinit_block_writer(wp, BLOCK_TYPE_REF); @@ -149,11 +150,21 @@ void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, w->max_update_index = max; } +static void writer_release(struct reftable_writer *w) +{ + if (w) { + reftable_free(w->block); + w->block = NULL; + block_writer_release(&w->block_writer_data); + w->block_writer = NULL; + writer_clear_index(w); + strbuf_release(&w->last_key); + } +} + void reftable_writer_free(struct reftable_writer *w) { - if (!w) - return; - reftable_free(w->block); + writer_release(w); reftable_free(w); } @@ -209,7 +220,8 @@ static int writer_add_record(struct reftable_writer *w, struct reftable_record *rec) { struct strbuf key = STRBUF_INIT; - int err = -1; + int err; + reftable_record_key(rec, &key); if (strbuf_cmp(&w->last_key, &key) >= 0) { err = REFTABLE_API_ERROR; @@ -218,27 +230,42 @@ static int writer_add_record(struct reftable_writer *w, strbuf_reset(&w->last_key); strbuf_addbuf(&w->last_key, &key); - if (!w->block_writer) { + if (!w->block_writer) writer_reinit_block_writer(w, reftable_record_type(rec)); - } - assert(block_writer_type(w->block_writer) == reftable_record_type(rec)); + if (block_writer_type(w->block_writer) != reftable_record_type(rec)) + BUG("record of type %d added to writer of type %d", + reftable_record_type(rec), block_writer_type(w->block_writer)); - if (block_writer_add(w->block_writer, rec) == 0) { + /* + * Try to add the record to the writer. If this succeeds then we're + * done. Otherwise the block writer may have hit the block size limit + * and needs to be flushed. + */ + if (!block_writer_add(w->block_writer, rec)) { err = 0; goto done; } + /* + * The current block is full, so we need to flush and reinitialize the + * writer to start writing the next block. + */ err = writer_flush_block(w); - if (err < 0) { + if (err < 0) goto done; - } - writer_reinit_block_writer(w, reftable_record_type(rec)); + + /* + * Try to add the record to the writer again. If this still fails then + * the record does not fit into the block size. + * + * TODO: it would be great to have `block_writer_add()` return proper + * error codes so that we don't have to second-guess the failure + * mode here. + */ err = block_writer_add(w->block_writer, rec); - if (err == -1) { - /* we are writing into memory, so an error can only mean it - * doesn't fit. */ + if (err) { err = REFTABLE_ENTRY_TOO_BIG_ERROR; goto done; } @@ -452,7 +479,7 @@ static int writer_finish_section(struct reftable_writer *w) bstats->max_index_level = max_level; /* Reinit lastKey, as the next section can start with any key. */ - w->last_key.len = 0; + strbuf_reset(&w->last_key); return 0; } @@ -517,7 +544,7 @@ static void write_object_record(void *void_arg, void *key) done:; } -static void object_record_free(void *void_arg, void *key) +static void object_record_free(void *void_arg UNUSED, void *key) { struct obj_index_tree_node *entry = key; @@ -627,74 +654,87 @@ int reftable_writer_close(struct reftable_writer *w) } done: - /* free up memory. */ - block_writer_release(&w->block_writer_data); - writer_clear_index(w); - strbuf_release(&w->last_key); + writer_release(w); return err; } static void writer_clear_index(struct reftable_writer *w) { - for (size_t i = 0; i < w->index_len; i++) + for (size_t i = 0; w->index && i < w->index_len; i++) strbuf_release(&w->index[i].last_key); FREE_AND_NULL(w->index); w->index_len = 0; w->index_cap = 0; } -static const int debug = 0; - static int writer_flush_nonempty_block(struct reftable_writer *w) { + struct reftable_index_record index_record = { + .last_key = STRBUF_INIT, + }; uint8_t typ = block_writer_type(w->block_writer); - struct reftable_block_stats *bstats = - writer_reftable_block_stats(w, typ); - uint64_t block_typ_off = (bstats->blocks == 0) ? w->next : 0; - int raw_bytes = block_writer_finish(w->block_writer); - int padding = 0; - int err = 0; - struct reftable_index_record ir = { .last_key = STRBUF_INIT }; + struct reftable_block_stats *bstats; + int raw_bytes, padding = 0, err; + uint64_t block_typ_off; + + /* + * Finish the current block. This will cause the block writer to emit + * restart points and potentially compress records in case we are + * writing a log block. + * + * Note that this is still happening in memory. + */ + raw_bytes = block_writer_finish(w->block_writer); if (raw_bytes < 0) return raw_bytes; - if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG) { + /* + * By default, all records except for log records are padded to the + * block size. + */ + if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG) padding = w->opts.block_size - raw_bytes; - } - if (block_typ_off > 0) { + bstats = writer_reftable_block_stats(w, typ); + block_typ_off = (bstats->blocks == 0) ? w->next : 0; + if (block_typ_off > 0) bstats->offset = block_typ_off; - } - bstats->entries += w->block_writer->entries; bstats->restarts += w->block_writer->restart_len; bstats->blocks++; w->stats.blocks++; - if (debug) { - fprintf(stderr, "block %c off %" PRIu64 " sz %d (%d)\n", typ, - w->next, raw_bytes, - get_be24(w->block + w->block_writer->header_off + 1)); - } - - if (w->next == 0) { + /* + * If this is the first block we're writing to the table then we need + * to also write the reftable header. + */ + if (!w->next) writer_write_header(w, w->block); - } err = padded_write(w, w->block, raw_bytes, padding); if (err < 0) return err; + /* + * Add an index record for every block that we're writing. If we end up + * having more than a threshold of index records we will end up writing + * an index section in `writer_finish_section()`. Each index record + * contains the last record key of the block it is indexing as well as + * the offset of that block. + * + * Note that this also applies when flushing index blocks, in which + * case we will end up with a multi-level index. + */ REFTABLE_ALLOC_GROW(w->index, w->index_len + 1, w->index_cap); - - ir.offset = w->next; - strbuf_reset(&ir.last_key); - strbuf_addbuf(&ir.last_key, &w->block_writer->last_key); - w->index[w->index_len] = ir; - + index_record.offset = w->next; + strbuf_reset(&index_record.last_key); + strbuf_addbuf(&index_record.last_key, &w->block_writer->last_key); + w->index[w->index_len] = index_record; w->index_len++; + w->next += padding + raw_bytes; w->block_writer = NULL; + return 0; } |
