diff options
Diffstat (limited to 'reftable')
32 files changed, 1232 insertions, 1439 deletions
diff --git a/reftable/basics.c b/reftable/basics.c index 0785aff941..fea711db7e 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -27,7 +27,7 @@ void put_be16(uint8_t *out, uint16_t i) out[1] = (uint8_t)(i & 0xff); } -int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) +size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) { size_t lo = 0; size_t hi = sz; @@ -39,8 +39,11 @@ int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) */ while (hi - lo > 1) { size_t mid = lo + (hi - lo) / 2; + int ret = f(mid, args); + if (ret < 0) + return sz; - if (f(mid, args)) + if (ret > 0) hi = mid; else lo = mid; diff --git a/reftable/basics.h b/reftable/basics.h index 91f3533efe..523ecd5307 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -22,13 +22,14 @@ uint32_t get_be24(uint8_t *in); void put_be16(uint8_t *out, uint16_t i); /* - * find smallest index i in [0, sz) at which f(i) is true, assuming - * that f is ascending. Return sz if f(i) is false for all indices. + * find smallest index i in [0, sz) at which `f(i) > 0`, assuming that f is + * ascending. Return sz if `f(i) == 0` for all indices. The search is aborted + * and `sz` is returned in case `f(i) < 0`. * * Contrary to bsearch(3), this returns something useful if the argument is not * found. */ -int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args); +size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args); /* * Frees a NULL terminated array of malloced strings. The array itself is also diff --git a/reftable/basics_test.c b/reftable/basics_test.c index 1fcd229725..997c4d9e01 100644 --- a/reftable/basics_test.c +++ b/reftable/basics_test.c @@ -12,40 +12,47 @@ https://developers.google.com/open-source/licenses/bsd #include "test_framework.h" #include "reftable-tests.h" -struct binsearch_args { - int key; - int *arr; +struct integer_needle_lesseq_args { + int needle; + int *haystack; }; -static int binsearch_func(size_t i, void *void_args) +static int integer_needle_lesseq(size_t i, void *_args) { - struct binsearch_args *args = void_args; - - return args->key < args->arr[i]; + struct integer_needle_lesseq_args *args = _args; + return args->needle <= args->haystack[i]; } static void test_binsearch(void) { - int arr[] = { 2, 4, 6, 8, 10 }; - size_t sz = ARRAY_SIZE(arr); - struct binsearch_args args = { - .arr = arr, + int haystack[] = { 2, 4, 6, 8, 10 }; + struct { + int needle; + size_t expected_idx; + } testcases[] = { + {-9000, 0}, + {-1, 0}, + {0, 0}, + {2, 0}, + {3, 1}, + {4, 1}, + {7, 3}, + {9, 4}, + {10, 4}, + {11, 5}, + {9000, 5}, }; + size_t i = 0; - int i = 0; - for (i = 1; i < 11; i++) { - int res; - args.key = i; - res = binsearch(sz, &binsearch_func, &args); + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + struct integer_needle_lesseq_args args = { + .haystack = haystack, + .needle = testcases[i].needle, + }; + size_t idx; - if (res < sz) { - EXPECT(args.key < arr[res]); - if (res > 0) { - EXPECT(args.key >= arr[res - 1]); - } - } else { - EXPECT(args.key == 10 || args.key == 11); - } + idx = binsearch(ARRAY_SIZE(haystack), &integer_needle_lesseq, &args); + EXPECT(idx == testcases[i].expected_idx); } } diff --git a/reftable/block.c b/reftable/block.c index ce8a7d24f3..5942cb4053 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -76,6 +76,10 @@ void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, bw->entries = 0; bw->restart_len = 0; bw->last_key.len = 0; + if (!bw->zstream) { + REFTABLE_CALLOC_ARRAY(bw->zstream, 1); + deflateInit(bw->zstream, 9); + } } uint8_t block_writer_type(struct block_writer *bw) @@ -139,45 +143,53 @@ int block_writer_finish(struct block_writer *w) w->next += 2; put_be24(w->buf + 1 + w->header_off, w->next); + /* + * Log records are stored zlib-compressed. Note that the compression + * also spans over the restart points we have just written. + */ if (block_writer_type(w) == BLOCK_TYPE_LOG) { int block_header_skip = 4 + w->header_off; - uLongf src_len = w->next - block_header_skip; - uLongf dest_cap = src_len * 1.001 + 12; - uint8_t *compressed; - - REFTABLE_ALLOC_ARRAY(compressed, dest_cap); - - while (1) { - uLongf out_dest_len = dest_cap; - int zresult = compress2(compressed, &out_dest_len, - w->buf + block_header_skip, - src_len, 9); - if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) { - dest_cap *= 2; - compressed = - reftable_realloc(compressed, dest_cap); - if (compressed) - continue; - } - - if (Z_OK != zresult) { - reftable_free(compressed); - return REFTABLE_ZLIB_ERROR; - } - - memcpy(w->buf + block_header_skip, compressed, - out_dest_len); - w->next = out_dest_len + block_header_skip; - reftable_free(compressed); - break; - } + uLongf src_len = w->next - block_header_skip, compressed_len; + int ret; + + ret = deflateReset(w->zstream); + if (ret != Z_OK) + return REFTABLE_ZLIB_ERROR; + + /* + * Precompute the upper bound of how many bytes the compressed + * data may end up with. Combined with `Z_FINISH`, `deflate()` + * is guaranteed to return `Z_STREAM_END`. + */ + compressed_len = deflateBound(w->zstream, src_len); + REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap); + + w->zstream->next_out = w->compressed; + w->zstream->avail_out = compressed_len; + w->zstream->next_in = w->buf + block_header_skip; + w->zstream->avail_in = src_len; + + /* + * We want to perform all decompression in a single step, which + * is why we can pass Z_FINISH here. As we have precomputed the + * deflated buffer's size via `deflateBound()` this function is + * guaranteed to succeed according to the zlib documentation. + */ + ret = deflate(w->zstream, Z_FINISH); + if (ret != Z_STREAM_END) + return REFTABLE_ZLIB_ERROR; + + /* + * Overwrite the uncompressed data we have already written and + * adjust the `next` pointer to point right after the + * compressed data. + */ + memcpy(w->buf + block_header_skip, w->compressed, + w->zstream->total_out); + w->next = w->zstream->total_out + block_header_skip; } - return w->next; -} -uint8_t block_reader_type(struct block_reader *r) -{ - return r->block.data[r->header_off]; + return w->next; } int block_reader_init(struct block_reader *br, struct reftable_block *block, @@ -191,7 +203,8 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, uint16_t restart_count = 0; uint32_t restart_start = 0; uint8_t *restart_bytes = NULL; - uint8_t *uncompressed = NULL; + + reftable_block_done(&br->block); if (!reftable_is_block_type(typ)) { err = REFTABLE_FORMAT_ERROR; @@ -199,37 +212,57 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, } if (typ == BLOCK_TYPE_LOG) { - int block_header_skip = 4 + header_off; - uLongf dst_len = sz - block_header_skip; /* total size of dest - buffer. */ - uLongf src_len = block->len - block_header_skip; + uint32_t block_header_skip = 4 + header_off; + uLong dst_len = sz - block_header_skip; + uLong src_len = block->len - block_header_skip; /* Log blocks specify the *uncompressed* size in their header. */ - REFTABLE_ALLOC_ARRAY(uncompressed, sz); + REFTABLE_ALLOC_GROW(br->uncompressed_data, sz, + br->uncompressed_cap); /* Copy over the block header verbatim. It's not compressed. */ - memcpy(uncompressed, block->data, block_header_skip); + memcpy(br->uncompressed_data, block->data, block_header_skip); - /* Uncompress */ - if (Z_OK != - uncompress2(uncompressed + block_header_skip, &dst_len, - block->data + block_header_skip, &src_len)) { + if (!br->zstream) { + REFTABLE_CALLOC_ARRAY(br->zstream, 1); + err = inflateInit(br->zstream); + } else { + err = inflateReset(br->zstream); + } + if (err != Z_OK) { err = REFTABLE_ZLIB_ERROR; goto done; } - if (dst_len + block_header_skip != sz) { + br->zstream->next_in = block->data + block_header_skip; + br->zstream->avail_in = src_len; + br->zstream->next_out = br->uncompressed_data + block_header_skip; + br->zstream->avail_out = dst_len; + + /* + * We know both input as well as output size, and we know that + * the sizes should never be bigger than `uInt_MAX` because + * blocks can at most be 16MB large. We can thus use `Z_FINISH` + * here to instruct zlib to inflate the data in one go, which + * is more efficient than using `Z_NO_FLUSH`. + */ + err = inflate(br->zstream, Z_FINISH); + if (err != Z_STREAM_END) { + err = REFTABLE_ZLIB_ERROR; + goto done; + } + err = 0; + + if (br->zstream->total_out + block_header_skip != sz) { err = REFTABLE_FORMAT_ERROR; goto done; } /* We're done with the input data. */ reftable_block_done(block); - block->data = uncompressed; - uncompressed = NULL; + block->data = br->uncompressed_data; block->len = sz; - block->source = malloc_block_source(); - full_block_size = src_len + block_header_skip; + full_block_size = src_len + block_header_skip - br->zstream->avail_in; } else if (full_block_size == 0) { full_block_size = sz; } else if (sz < full_block_size && sz < block->len && @@ -257,177 +290,251 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, br->restart_bytes = restart_bytes; done: - reftable_free(uncompressed); return err; } -static uint32_t block_reader_restart_offset(struct block_reader *br, int i) +void block_reader_release(struct block_reader *br) +{ + inflateEnd(br->zstream); + reftable_free(br->zstream); + reftable_free(br->uncompressed_data); + reftable_block_done(&br->block); +} + +uint8_t block_reader_type(const struct block_reader *r) +{ + return r->block.data[r->header_off]; +} + +int block_reader_first_key(const struct block_reader *br, struct strbuf *key) +{ + int off = br->header_off + 4, n; + struct string_view in = { + .buf = br->block.data + off, + .len = br->block_len - off, + }; + uint8_t extra = 0; + + strbuf_reset(key); + + n = reftable_decode_key(key, &extra, in); + if (n < 0) + return n; + if (!key->len) + return REFTABLE_FORMAT_ERROR; + + return 0; +} + +static uint32_t block_reader_restart_offset(const struct block_reader *br, int i) { return get_be24(br->restart_bytes + 3 * i); } -void block_reader_start(struct block_reader *br, struct block_iter *it) +void block_iter_seek_start(struct block_iter *it, const struct block_reader *br) { - it->br = br; + it->block = br->block.data; + it->block_len = br->block_len; + it->hash_size = br->hash_size; strbuf_reset(&it->last_key); it->next_off = br->header_off + 4; } -struct restart_find_args { +struct restart_needle_less_args { int error; - struct strbuf key; - struct block_reader *r; + struct strbuf needle; + const struct block_reader *reader; }; -static int restart_key_less(size_t idx, void *args) +static int restart_needle_less(size_t idx, void *_args) { - struct restart_find_args *a = args; - uint32_t off = block_reader_restart_offset(a->r, idx); + struct restart_needle_less_args *args = _args; + uint32_t off = block_reader_restart_offset(args->reader, idx); struct string_view in = { - .buf = a->r->block.data + off, - .len = a->r->block_len - off, + .buf = args->reader->block.data + off, + .len = args->reader->block_len - off, }; - - /* the restart key is verbatim in the block, so this could avoid the - alloc for decoding the key */ - struct strbuf rkey = STRBUF_INIT; - struct strbuf last_key = STRBUF_INIT; - uint8_t unused_extra; - int n = reftable_decode_key(&rkey, &unused_extra, last_key, in); - int result; - if (n < 0) { - a->error = 1; + uint64_t prefix_len, suffix_len; + uint8_t extra; + int n; + + /* + * Records at restart points are stored without prefix compression, so + * there is no need to fully decode the record key here. This removes + * the need for allocating memory. + */ + n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra); + if (n < 0 || prefix_len) { + args->error = 1; return -1; } - result = strbuf_cmp(&a->key, &rkey); - strbuf_release(&rkey); - return result; -} + string_view_consume(&in, n); + if (suffix_len > in.len) { + args->error = 1; + return -1; + } -void block_iter_copy_from(struct block_iter *dest, struct block_iter *src) -{ - dest->br = src->br; - dest->next_off = src->next_off; - strbuf_reset(&dest->last_key); - strbuf_addbuf(&dest->last_key, &src->last_key); + n = memcmp(args->needle.buf, in.buf, + args->needle.len < suffix_len ? args->needle.len : suffix_len); + if (n) + return n < 0; + return args->needle.len < suffix_len; } int block_iter_next(struct block_iter *it, struct reftable_record *rec) { struct string_view in = { - .buf = it->br->block.data + it->next_off, - .len = it->br->block_len - it->next_off, + .buf = (unsigned char *) it->block + it->next_off, + .len = it->block_len - it->next_off, }; struct string_view start = in; uint8_t extra = 0; int n = 0; - if (it->next_off >= it->br->block_len) + if (it->next_off >= it->block_len) return 1; - n = reftable_decode_key(&it->key, &extra, it->last_key, in); + n = reftable_decode_key(&it->last_key, &extra, in); if (n < 0) return -1; - - if (!it->key.len) + if (!it->last_key.len) return REFTABLE_FORMAT_ERROR; string_view_consume(&in, n); - n = reftable_record_decode(rec, it->key, extra, in, it->br->hash_size); + n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size, + &it->scratch); if (n < 0) return -1; string_view_consume(&in, n); - strbuf_reset(&it->last_key); - strbuf_addbuf(&it->last_key, &it->key); it->next_off += start.len - in.len; return 0; } -int block_reader_first_key(struct block_reader *br, struct strbuf *key) -{ - struct strbuf empty = STRBUF_INIT; - int off = br->header_off + 4; - struct string_view in = { - .buf = br->block.data + off, - .len = br->block_len - off, - }; - - uint8_t extra = 0; - int n = reftable_decode_key(key, &extra, empty, in); - if (n < 0) - return n; - if (!key->len) - return REFTABLE_FORMAT_ERROR; - - return 0; -} - -int block_iter_seek(struct block_iter *it, struct strbuf *want) +void block_iter_reset(struct block_iter *it) { - return block_reader_seek(it->br, it, want); + strbuf_reset(&it->last_key); + it->next_off = 0; + it->block = NULL; + it->block_len = 0; + it->hash_size = 0; } void block_iter_close(struct block_iter *it) { strbuf_release(&it->last_key); - strbuf_release(&it->key); + strbuf_release(&it->scratch); } -int block_reader_seek(struct block_reader *br, struct block_iter *it, - struct strbuf *want) +int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, + struct strbuf *want) { - struct restart_find_args args = { - .key = *want, - .r = br, + struct restart_needle_less_args args = { + .needle = *want, + .reader = br, }; - struct block_iter next = BLOCK_ITER_INIT; struct reftable_record rec; - int err = 0, i; - + int err = 0; + size_t i; + + /* + * Perform a binary search over the block's restart points, which + * avoids doing a linear scan over the whole block. Like this, we + * identify the section of the block that should contain our key. + * + * Note that we explicitly search for the first restart point _greater_ + * than the sought-after record, not _greater or equal_ to it. In case + * the sought-after record is located directly at the restart point we + * would otherwise start doing the linear search at the preceding + * restart point. While that works alright, we would end up scanning + * too many record. + */ + i = binsearch(br->restart_count, &restart_needle_less, &args); if (args.error) { err = REFTABLE_FORMAT_ERROR; goto done; } - i = binsearch(br->restart_count, &restart_key_less, &args); + /* + * Now there are multiple cases: + * + * - `i == 0`: The wanted record is smaller than the record found at + * the first restart point. As the first restart point is the first + * record in the block, our wanted record cannot be located in this + * block at all. We still need to position the iterator so that the + * next call to `block_iter_next()` will yield an end-of-iterator + * signal. + * + * - `i == restart_count`: The wanted record was not found at any of + * the restart points. As there is no restart point at the end of + * the section the record may thus be contained in the last block. + * + * - `i > 0`: The wanted record must be contained in the section + * before the found restart point. We thus do a linear search + * starting from the preceding restart point. + */ if (i > 0) it->next_off = block_reader_restart_offset(br, i - 1); else it->next_off = br->header_off + 4; - it->br = br; + it->block = br->block.data; + it->block_len = br->block_len; + it->hash_size = br->hash_size; reftable_record_init(&rec, block_reader_type(br)); - /* We're looking for the last entry less/equal than the wanted key, so - we have to go one entry too far and then back up. - */ + /* + * We're looking for the last entry less than the wanted key so that + * the next call to `block_reader_next()` would yield the wanted + * record. We thus don't want to position our reader at the sought + * after record, but one before. To do so, we have to go one entry too + * far and then back up. + */ while (1) { - block_iter_copy_from(&next, it); - err = block_iter_next(&next, &rec); + size_t prev_off = it->next_off; + + err = block_iter_next(it, &rec); if (err < 0) goto done; - - reftable_record_key(&rec, &it->key); - if (err > 0 || strbuf_cmp(&it->key, want) >= 0) { + if (err > 0) { + it->next_off = prev_off; err = 0; goto done; } - block_iter_copy_from(it, &next); + /* + * Check whether the current key is greater or equal to the + * sought-after key. In case it is greater we know that the + * record does not exist in the block and can thus abort early. + * In case it is equal to the sought-after key we have found + * the desired record. + * + * Note that we store the next record's key record directly in + * `last_key` without restoring the key of the preceding record + * in case we need to go one record back. This is safe to do as + * `block_iter_next()` would return the ref whose key is equal + * to `last_key` now, and naturally all keys share a prefix + * with themselves. + */ + reftable_record_key(&rec, &it->last_key); + if (strbuf_cmp(&it->last_key, want) >= 0) { + it->next_off = prev_off; + goto done; + } } done: - block_iter_close(&next); reftable_record_release(&rec); - return err; } void block_writer_release(struct block_writer *bw) { + deflateEnd(bw->zstream); + FREE_AND_NULL(bw->zstream); FREE_AND_NULL(bw->restarts); + FREE_AND_NULL(bw->compressed); strbuf_release(&bw->last_key); /* the block is not owned. */ } diff --git a/reftable/block.h b/reftable/block.h index 17481e6331..e91f3d2790 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -18,6 +18,10 @@ https://developers.google.com/open-source/licenses/bsd * allocation overhead. */ struct block_writer { + z_stream *zstream; + unsigned char *compressed; + size_t compressed_cap; + uint8_t *buf; uint32_t block_size; @@ -56,6 +60,8 @@ int block_writer_finish(struct block_writer *w); /* clears out internally allocated block_writer members. */ void block_writer_release(struct block_writer *bw); +struct z_stream; + /* Read a block. */ struct block_reader { /* offset of the block header; nonzero for the first block in a @@ -66,6 +72,11 @@ struct block_reader { struct reftable_block block; int hash_size; + /* Uncompressed data for log entries. */ + z_stream *zstream; + unsigned char *uncompressed_data; + size_t uncompressed_cap; + /* size of the data, excluding restart data. */ uint32_t block_len; uint8_t *restart_bytes; @@ -76,47 +87,49 @@ struct block_reader { uint32_t full_block_size; }; +/* initializes a block reader. */ +int block_reader_init(struct block_reader *br, struct reftable_block *bl, + uint32_t header_off, uint32_t table_block_size, + int hash_size); + +void block_reader_release(struct block_reader *br); + +/* Returns the block type (eg. 'r' for refs) */ +uint8_t block_reader_type(const struct block_reader *r); + +/* Decodes the first key in the block */ +int block_reader_first_key(const struct block_reader *br, struct strbuf *key); + /* Iterate over entries in a block */ struct block_iter { /* offset within the block of the next entry to read. */ uint32_t next_off; - struct block_reader *br; + const unsigned char *block; + size_t block_len; + int hash_size; /* key for last entry we read. */ struct strbuf last_key; - struct strbuf key; + struct strbuf scratch; }; #define BLOCK_ITER_INIT { \ .last_key = STRBUF_INIT, \ - .key = STRBUF_INIT, \ + .scratch = STRBUF_INIT, \ } -/* initializes a block reader. */ -int block_reader_init(struct block_reader *br, struct reftable_block *bl, - uint32_t header_off, uint32_t table_block_size, - int hash_size); - /* Position `it` at start of the block */ -void block_reader_start(struct block_reader *br, struct block_iter *it); +void block_iter_seek_start(struct block_iter *it, const struct block_reader *br); /* Position `it` to the `want` key in the block */ -int block_reader_seek(struct block_reader *br, struct block_iter *it, - struct strbuf *want); - -/* Returns the block type (eg. 'r' for refs) */ -uint8_t block_reader_type(struct block_reader *r); - -/* Decodes the first key in the block */ -int block_reader_first_key(struct block_reader *br, struct strbuf *key); - -void block_iter_copy_from(struct block_iter *dest, struct block_iter *src); +int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, + struct strbuf *want); /* return < 0 for error, 0 for OK, > 0 for EOF. */ int block_iter_next(struct block_iter *it, struct reftable_record *rec); -/* Seek to `want` with in the block pointed to by `it` */ -int block_iter_seek(struct block_iter *it, struct strbuf *want); +/* Reset the block iterator to pristine state without releasing its memory. */ +void block_iter_reset(struct block_iter *it); /* deallocate memory for `it`. The block reader and its block is left intact. */ void block_iter_close(struct block_iter *it); diff --git a/reftable/block_test.c b/reftable/block_test.c index e162c6e33f..26a9cfbc83 100644 --- a/reftable/block_test.c +++ b/reftable/block_test.c @@ -69,7 +69,7 @@ static void test_block_read_write(void) block_reader_init(&br, &block, header_off, block_size, GIT_SHA1_RAWSZ); - block_reader_start(&br, &it); + block_iter_seek_start(&it, &br); while (1) { int r = block_iter_next(&it, &rec); @@ -89,7 +89,7 @@ static void test_block_read_write(void) strbuf_reset(&want); strbuf_addstr(&want, names[i]); - n = block_reader_seek(&br, &it, &want); + n = block_iter_seek_key(&it, &br, &want); EXPECT(n == 0); n = block_iter_next(&it, &rec); @@ -98,7 +98,7 @@ static void test_block_read_write(void) EXPECT_STREQ(names[i], rec.u.ref.refname); want.len--; - n = block_reader_seek(&br, &it, &want); + n = block_iter_seek_key(&it, &br, &want); EXPECT(n == 0); n = block_iter_next(&it, &rec); diff --git a/reftable/error.c b/reftable/error.c index 0d1766735e..a25f28a43e 100644 --- a/reftable/error.c +++ b/reftable/error.c @@ -22,19 +22,19 @@ const char *reftable_error_str(int err) case REFTABLE_NOT_EXIST_ERROR: return "file does not exist"; case REFTABLE_LOCK_ERROR: - return "data is outdated"; + return "data is locked"; case REFTABLE_API_ERROR: return "misuse of the reftable API"; case REFTABLE_ZLIB_ERROR: return "zlib failure"; - case REFTABLE_NAME_CONFLICT: - return "file/directory conflict"; case REFTABLE_EMPTY_TABLE_ERROR: return "wrote empty table"; case REFTABLE_REFNAME_ERROR: return "invalid refname"; case REFTABLE_ENTRY_TOO_BIG_ERROR: return "entry too large"; + case REFTABLE_OUTDATED_ERROR: + return "data concurrently modified"; case -1: return "general error"; default: diff --git a/reftable/iter.c b/reftable/iter.c index 8b5ebf6183..aa9ac199b1 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -16,11 +16,6 @@ https://developers.google.com/open-source/licenses/bsd #include "reader.h" #include "reftable-error.h" -int iterator_is_null(struct reftable_iterator *it) -{ - return !it->ops; -} - static void filtering_ref_iterator_close(void *iter_arg) { struct filtering_ref_iterator *fri = iter_arg; @@ -120,7 +115,7 @@ static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it) /* indexed block does not exist. */ return REFTABLE_FORMAT_ERROR; } - block_reader_start(&it->block_reader, &it->cur); + block_iter_seek_start(&it->cur, &it->block_reader); return 0; } diff --git a/reftable/iter.h b/reftable/iter.h index 47d67d84df..537431baba 100644 --- a/reftable/iter.h +++ b/reftable/iter.h @@ -16,10 +16,6 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-iterator.h" #include "reftable-generic.h" -/* Returns true for a zeroed out iterator, such as the one returned from - * iterator_destroy. */ -int iterator_is_null(struct reftable_iterator *it); - /* iterator that produces only ref records that point to `oid` */ struct filtering_ref_iterator { int double_check; diff --git a/reftable/merged.c b/reftable/merged.c index a0f222e07b..f85a24c678 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -17,23 +17,37 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-error.h" #include "system.h" +struct merged_subiter { + struct reftable_iterator iter; + struct reftable_record rec; +}; + +struct merged_iter { + struct merged_subiter *subiters; + struct merged_iter_pqueue pq; + uint32_t hash_id; + size_t stack_len; + uint8_t typ; + int suppress_deletions; + ssize_t advance_index; +}; + static int merged_iter_init(struct merged_iter *mi) { for (size_t i = 0; i < mi->stack_len; i++) { struct pq_entry e = { .index = i, + .rec = &mi->subiters[i].rec, }; int err; - reftable_record_init(&e.rec, mi->typ); - err = iterator_next(&mi->stack[i], &e.rec); + reftable_record_init(&mi->subiters[i].rec, mi->typ); + err = iterator_next(&mi->subiters[i].iter, + &mi->subiters[i].rec); if (err < 0) return err; - if (err > 0) { - reftable_iterator_destroy(&mi->stack[i]); - reftable_record_release(&e.rec); + if (err > 0) continue; - } merged_iter_pqueue_add(&mi->pq, &e); } @@ -46,56 +60,66 @@ static void merged_iter_close(void *p) struct merged_iter *mi = p; merged_iter_pqueue_release(&mi->pq); - for (size_t i = 0; i < mi->stack_len; i++) - reftable_iterator_destroy(&mi->stack[i]); - reftable_free(mi->stack); - strbuf_release(&mi->key); - strbuf_release(&mi->entry_key); + for (size_t i = 0; i < mi->stack_len; i++) { + reftable_iterator_destroy(&mi->subiters[i].iter); + reftable_record_release(&mi->subiters[i].rec); + } + reftable_free(mi->subiters); } -static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, - size_t idx) +static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) { struct pq_entry e = { .index = idx, + .rec = &mi->subiters[idx].rec, }; int err; - reftable_record_init(&e.rec, mi->typ); - err = iterator_next(&mi->stack[idx], &e.rec); - if (err < 0) + err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec); + if (err) return err; - if (err > 0) { - reftable_iterator_destroy(&mi->stack[idx]); - reftable_record_release(&e.rec); - return 0; - } - merged_iter_pqueue_add(&mi->pq, &e); return 0; } -static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) -{ - if (iterator_is_null(&mi->stack[idx])) - return 0; - return merged_iter_advance_nonnull_subiter(mi, idx); -} - static int merged_iter_next_entry(struct merged_iter *mi, struct reftable_record *rec) { struct pq_entry entry = { 0 }; - int err = 0; + int err = 0, empty; + + empty = merged_iter_pqueue_is_empty(mi->pq); + + if (mi->advance_index >= 0) { + /* + * When there are no pqueue entries then we only have a single + * subiter left. There is no need to use the pqueue in that + * case anymore as we know that the subiter will return entries + * in the correct order already. + * + * While this may sound like a very specific edge case, it may + * happen more frequently than you think. Most repositories + * will end up having a single large base table that contains + * most of the refs. It's thus likely that we exhaust all + * subiters but the one from that base ref. + */ + if (empty) + return iterator_next(&mi->subiters[mi->advance_index].iter, + rec); + + err = merged_iter_advance_subiter(mi, mi->advance_index); + if (err < 0) + return err; + if (!err) + empty = 0; + mi->advance_index = -1; + } - if (merged_iter_pqueue_is_empty(mi->pq)) + if (empty) return 1; entry = merged_iter_pqueue_remove(&mi->pq); - err = merged_iter_advance_subiter(mi, entry.index); - if (err < 0) - return err; /* One can also use reftable as datacenter-local storage, where the ref @@ -105,55 +129,38 @@ static int merged_iter_next_entry(struct merged_iter *mi, such a deployment, the loop below must be changed to collect all entries for the same key, and return new the newest one. */ - reftable_record_key(&entry.rec, &mi->entry_key); while (!merged_iter_pqueue_is_empty(mi->pq)) { struct pq_entry top = merged_iter_pqueue_top(mi->pq); - int cmp = 0; - - reftable_record_key(&top.rec, &mi->key); + int cmp; - cmp = strbuf_cmp(&mi->key, &mi->entry_key); + cmp = reftable_record_cmp(top.rec, entry.rec); if (cmp > 0) break; merged_iter_pqueue_remove(&mi->pq); err = merged_iter_advance_subiter(mi, top.index); if (err < 0) - goto done; - reftable_record_release(&top.rec); + return err; } - reftable_record_release(rec); - *rec = entry.rec; - -done: - if (err) - reftable_record_release(&entry.rec); - return err; + mi->advance_index = entry.index; + SWAP(*rec, *entry.rec); + return 0; } -static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec) +static int merged_iter_next_void(void *p, struct reftable_record *rec) { + struct merged_iter *mi = p; while (1) { int err = merged_iter_next_entry(mi, rec); - if (err == 0 && mi->suppress_deletions && - reftable_record_is_deletion(rec)) { + if (err) + return err; + if (mi->suppress_deletions && reftable_record_is_deletion(rec)) continue; - } - - return err; + return 0; } } -static int merged_iter_next_void(void *p, struct reftable_record *rec) -{ - struct merged_iter *mi = p; - if (merged_iter_pqueue_is_empty(mi->pq)) - return 1; - - return merged_iter_next(mi, rec); -} - static struct reftable_iterator_vtable merged_iter_vtable = { .next = &merged_iter_next_void, .close = &merged_iter_close, @@ -243,16 +250,15 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, .typ = reftable_record_type(rec), .hash_id = mt->hash_id, .suppress_deletions = mt->suppress_deletions, - .key = STRBUF_INIT, - .entry_key = STRBUF_INIT, + .advance_index = -1, }; struct merged_iter *p; int err; - REFTABLE_CALLOC_ARRAY(merged.stack, mt->stack_len); + REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len); for (size_t i = 0; i < mt->stack_len; i++) { err = reftable_table_seek_record(&mt->stack[i], - &merged.stack[merged.stack_len], rec); + &merged.subiters[merged.stack_len].iter, rec); if (err < 0) goto out; if (!err) diff --git a/reftable/merged.h b/reftable/merged.h index d5b39dfe7f..a2571dbc99 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -9,7 +9,7 @@ https://developers.google.com/open-source/licenses/bsd #ifndef MERGED_H #define MERGED_H -#include "pq.h" +#include "system.h" struct reftable_merged_table { struct reftable_table *stack; @@ -24,17 +24,6 @@ struct reftable_merged_table { uint64_t max; }; -struct merged_iter { - struct reftable_iterator *stack; - uint32_t hash_id; - size_t stack_len; - uint8_t typ; - int suppress_deletions; - struct merged_iter_pqueue pq; - struct strbuf key; - struct strbuf entry_key; -}; - void merged_table_release(struct reftable_merged_table *mt); #endif diff --git a/reftable/merged_test.c b/reftable/merged_test.c index d0f77a3b8f..530fc82d1c 100644 --- a/reftable/merged_test.c +++ b/reftable/merged_test.c @@ -289,16 +289,13 @@ merged_table_from_log_records(struct reftable_log_record **logs, static void test_merged_logs(void) { - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 }; - uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 }; - uint8_t hash3[GIT_SHA1_RAWSZ] = { 3 }; struct reftable_log_record r1[] = { { .refname = "a", .update_index = 2, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .old_hash = hash2, + .old_hash = { 2 }, /* deletion */ .name = "jane doe", .email = "jane@invalid", @@ -310,8 +307,8 @@ static void test_merged_logs(void) .update_index = 1, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .old_hash = hash1, - .new_hash = hash2, + .old_hash = { 1 }, + .new_hash = { 2 }, .name = "jane doe", .email = "jane@invalid", .message = "message1", @@ -324,7 +321,7 @@ static void test_merged_logs(void) .update_index = 3, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .new_hash = hash3, + .new_hash = { 3 }, .name = "jane doe", .email = "jane@invalid", .message = "message3", diff --git a/reftable/pq.c b/reftable/pq.c index 2461daf5ff..7fb45d8c60 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -14,33 +14,12 @@ https://developers.google.com/open-source/licenses/bsd int pq_less(struct pq_entry *a, struct pq_entry *b) { - struct strbuf ak = STRBUF_INIT; - struct strbuf bk = STRBUF_INIT; - int cmp = 0; - reftable_record_key(&a->rec, &ak); - reftable_record_key(&b->rec, &bk); - - cmp = strbuf_cmp(&ak, &bk); - - strbuf_release(&ak); - strbuf_release(&bk); - + int cmp = reftable_record_cmp(a->rec, b->rec); if (cmp == 0) return a->index > b->index; - return cmp < 0; } -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) -{ - return pq.heap[0]; -} - -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) -{ - return pq.len == 0; -} - struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) { int i = 0; @@ -93,10 +72,6 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry void merged_iter_pqueue_release(struct merged_iter_pqueue *pq) { - int i = 0; - for (i = 0; i < pq->len; i++) { - reftable_record_release(&pq->heap[i].rec); - } FREE_AND_NULL(pq->heap); - pq->len = pq->cap = 0; + memset(pq, 0, sizeof(*pq)); } diff --git a/reftable/pq.h b/reftable/pq.h index e85bac9b52..f796c23179 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -12,8 +12,8 @@ https://developers.google.com/open-source/licenses/bsd #include "record.h" struct pq_entry { - int index; - struct reftable_record rec; + size_t index; + struct reftable_record *rec; }; struct merged_iter_pqueue { @@ -22,12 +22,20 @@ struct merged_iter_pqueue { size_t cap; }; -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq); -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq); void merged_iter_pqueue_check(struct merged_iter_pqueue pq); struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq); void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e); void merged_iter_pqueue_release(struct merged_iter_pqueue *pq); int pq_less(struct pq_entry *a, struct pq_entry *b); +static inline struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) +{ + return pq.heap[0]; +} + +static inline int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) +{ + return pq.len == 0; +} + #endif diff --git a/reftable/pq_test.c b/reftable/pq_test.c index c202eff848..b7d3c80cc7 100644 --- a/reftable/pq_test.c +++ b/reftable/pq_test.c @@ -27,48 +27,43 @@ void merged_iter_pqueue_check(struct merged_iter_pqueue pq) static void test_pq(void) { - char *names[54] = { NULL }; - int N = ARRAY_SIZE(names) - 1; - struct merged_iter_pqueue pq = { NULL }; + struct reftable_record recs[54]; + int N = ARRAY_SIZE(recs) - 1, i; char *last = NULL; - int i = 0; for (i = 0; i < N; i++) { - char name[100]; - snprintf(name, sizeof(name), "%02d", i); - names[i] = xstrdup(name); + struct strbuf refname = STRBUF_INIT; + strbuf_addf(&refname, "%02d", i); + + reftable_record_init(&recs[i], BLOCK_TYPE_REF); + recs[i].u.ref.refname = strbuf_detach(&refname, NULL); } i = 1; do { - struct pq_entry e = { .rec = { .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = names[i], - } } }; + struct pq_entry e = { + .rec = &recs[i], + }; + merged_iter_pqueue_add(&pq, &e); merged_iter_pqueue_check(pq); + i = (i * 7) % N; } while (i != 1); while (!merged_iter_pqueue_is_empty(pq)) { struct pq_entry e = merged_iter_pqueue_remove(&pq); - struct reftable_record *rec = &e.rec; merged_iter_pqueue_check(pq); - EXPECT(reftable_record_type(rec) == BLOCK_TYPE_REF); - if (last) { - EXPECT(strcmp(last, rec->u.ref.refname) < 0); - } - /* this is names[i], so don't dealloc. */ - last = rec->u.ref.refname; - rec->u.ref.refname = NULL; - reftable_record_release(rec); - } - for (i = 0; i < N; i++) { - reftable_free(names[i]); + EXPECT(reftable_record_type(e.rec) == BLOCK_TYPE_REF); + if (last) + EXPECT(strcmp(last, e.rec->u.ref.refname) < 0); + last = e.rec->u.ref.refname; } + for (i = 0; i < N; i++) + reftable_record_release(&recs[i]); merged_iter_pqueue_release(&pq); } diff --git a/reftable/reader.c b/reftable/reader.c index 2663b03938..481dff10d4 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -220,6 +220,7 @@ struct table_iter { struct reftable_reader *r; uint8_t typ; uint64_t block_off; + struct block_reader br; struct block_iter bi; int is_finished; }; @@ -227,16 +228,6 @@ struct table_iter { .bi = BLOCK_ITER_INIT \ } -static void table_iter_copy_from(struct table_iter *dest, - struct table_iter *src) -{ - dest->r = src->r; - dest->typ = src->typ; - dest->block_off = src->block_off; - dest->is_finished = src->is_finished; - block_iter_copy_from(&dest->bi, &src->bi); -} - static int table_iter_next_in_block(struct table_iter *ti, struct reftable_record *rec) { @@ -250,14 +241,8 @@ static int table_iter_next_in_block(struct table_iter *ti, static void table_iter_block_done(struct table_iter *ti) { - if (!ti->bi.br) { - return; - } - reftable_block_done(&ti->bi.br->block); - FREE_AND_NULL(ti->bi.br); - - ti->bi.last_key.len = 0; - ti->bi.next_off = 0; + block_reader_release(&ti->br); + block_iter_reset(&ti->bi); } static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off, @@ -321,32 +306,27 @@ done: return err; } -static int table_iter_next_block(struct table_iter *dest, - struct table_iter *src) +static void table_iter_close(struct table_iter *ti) { - uint64_t next_block_off = src->block_off + src->bi.br->full_block_size; - struct block_reader br = { 0 }; - int err = 0; + table_iter_block_done(ti); + block_iter_close(&ti->bi); +} - dest->r = src->r; - dest->typ = src->typ; - dest->block_off = next_block_off; +static int table_iter_next_block(struct table_iter *ti) +{ + uint64_t next_block_off = ti->block_off + ti->br.full_block_size; + int err; - err = reader_init_block_reader(src->r, &br, next_block_off, src->typ); - if (err > 0) { - dest->is_finished = 1; - return 1; - } - if (err != 0) + err = reader_init_block_reader(ti->r, &ti->br, next_block_off, ti->typ); + if (err > 0) + ti->is_finished = 1; + if (err) return err; - else { - struct block_reader *brp = - reftable_malloc(sizeof(struct block_reader)); - *brp = br; - dest->is_finished = 0; - block_reader_start(brp, &dest->bi); - } + ti->block_off = next_block_off; + ti->is_finished = 0; + block_iter_seek_start(&ti->bi, &ti->br); + return 0; } @@ -356,27 +336,30 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec) return REFTABLE_API_ERROR; while (1) { - struct table_iter next = TABLE_ITER_INIT; - int err = 0; - if (ti->is_finished) { + int err; + + if (ti->is_finished) return 1; - } + /* + * Check whether the current block still has more records. If + * so, return it. If the iterator returns positive then the + * current block has been exhausted. + */ err = table_iter_next_in_block(ti, rec); - if (err <= 0) { + if (err <= 0) return err; - } - err = table_iter_next_block(&next, ti); - if (err != 0) { + /* + * Otherwise, we need to continue to the next block in the + * table and retry. If there are no more blocks then the + * iterator is drained. + */ + err = table_iter_next_block(ti); + if (err) { ti->is_finished = 1; - } - table_iter_block_done(ti); - if (err != 0) { return err; } - table_iter_copy_from(ti, &next); - block_iter_close(&next.bi); } } @@ -385,16 +368,14 @@ static int table_iter_next_void(void *ti, struct reftable_record *rec) return table_iter_next(ti, rec); } -static void table_iter_close(void *p) +static void table_iter_close_void(void *ti) { - struct table_iter *ti = p; - table_iter_block_done(ti); - block_iter_close(&ti->bi); + table_iter_close(ti); } static struct reftable_iterator_vtable table_iter_vtable = { .next = &table_iter_next_void, - .close = &table_iter_close, + .close = &table_iter_close_void, }; static void iterator_from_table_iter(struct reftable_iterator *it, @@ -409,19 +390,16 @@ static int reader_table_iter_at(struct reftable_reader *r, struct table_iter *ti, uint64_t off, uint8_t typ) { - struct block_reader br = { 0 }; - struct block_reader *brp = NULL; + int err; - int err = reader_init_block_reader(r, &br, off, typ); + err = reader_init_block_reader(r, &ti->br, off, typ); if (err != 0) return err; - brp = reftable_malloc(sizeof(struct block_reader)); - *brp = br; ti->r = r; - ti->typ = block_reader_type(brp); + ti->typ = block_reader_type(&ti->br); ti->block_off = off; - block_reader_start(brp, &ti->bi); + block_iter_seek_start(&ti->bi, &ti->br); return 0; } @@ -446,23 +424,52 @@ static int reader_seek_linear(struct table_iter *ti, { struct strbuf want_key = STRBUF_INIT; struct strbuf got_key = STRBUF_INIT; - struct table_iter next = TABLE_ITER_INIT; struct reftable_record rec; int err = -1; reftable_record_init(&rec, reftable_record_type(want)); reftable_record_key(want, &want_key); + /* + * First we need to locate the block that must contain our record. To + * do so we scan through blocks linearly until we find the first block + * whose first key is bigger than our wanted key. Once we have found + * that block we know that the key must be contained in the preceding + * block. + * + * This algorithm is somewhat unfortunate because it means that we + * always have to seek one block too far and then back up. But as we + * can only decode the _first_ key of a block but not its _last_ key we + * have no other way to do this. + */ while (1) { - err = table_iter_next_block(&next, ti); + struct table_iter next = *ti; + + /* + * We must be careful to not modify underlying data of `ti` + * because we may find that `next` does not contain our desired + * block, but that `ti` does. In that case, we would discard + * `next` and continue with `ti`. + * + * This also means that we cannot reuse allocated memory for + * `next` here. While it would be great if we could, it should + * in practice not be too bad given that we should only ever + * end up doing linear seeks with at most three blocks. As soon + * as we have more than three blocks we would have an index, so + * we would not do a linear search there anymore. + */ + memset(&next.br.block, 0, sizeof(next.br.block)); + next.br.zstream = NULL; + next.br.uncompressed_data = NULL; + next.br.uncompressed_cap = 0; + + err = table_iter_next_block(&next); if (err < 0) goto done; - - if (err > 0) { + if (err > 0) break; - } - err = block_reader_first_key(next.bi.br, &got_key); + err = block_reader_first_key(&next.br, &got_key); if (err < 0) goto done; @@ -472,16 +479,20 @@ static int reader_seek_linear(struct table_iter *ti, } table_iter_block_done(ti); - table_iter_copy_from(ti, &next); + *ti = next; } - err = block_iter_seek(&ti->bi, &want_key); + /* + * We have located the block that must contain our record, so we seek + * the wanted key inside of it. If the block does not contain our key + * we know that the corresponding record does not exist. + */ + err = block_iter_seek_key(&ti->bi, &ti->br, &want_key); if (err < 0) goto done; err = 0; done: - block_iter_close(&next.bi); reftable_record_release(&rec); strbuf_release(&want_key); strbuf_release(&got_key); @@ -500,6 +511,7 @@ static int reader_seek_indexed(struct reftable_reader *r, .u.idx = { .last_key = STRBUF_INIT }, }; struct table_iter index_iter = TABLE_ITER_INIT; + struct table_iter empty = TABLE_ITER_INIT; struct table_iter next = TABLE_ITER_INIT; int err = 0; @@ -541,7 +553,6 @@ static int reader_seek_indexed(struct reftable_reader *r, * not exist. */ err = table_iter_next(&index_iter, &index_result); - table_iter_block_done(&index_iter); if (err != 0) goto done; @@ -550,7 +561,7 @@ static int reader_seek_indexed(struct reftable_reader *r, if (err != 0) goto done; - err = block_iter_seek(&next.bi, &want_index.u.idx.last_key); + err = block_iter_seek_key(&next.bi, &next.br, &want_index.u.idx.last_key); if (err < 0) goto done; @@ -564,18 +575,20 @@ static int reader_seek_indexed(struct reftable_reader *r, break; } - table_iter_copy_from(&index_iter, &next); + table_iter_close(&index_iter); + index_iter = next; + next = empty; } if (err == 0) { - struct table_iter empty = TABLE_ITER_INIT; struct table_iter *malloced = reftable_calloc(1, sizeof(*malloced)); - *malloced = empty; - table_iter_copy_from(malloced, &next); + *malloced = next; + next = empty; iterator_from_table_iter(it, malloced); } + done: - block_iter_close(&next.bi); + table_iter_close(&next); table_iter_close(&index_iter); reftable_record_release(&want_index); reftable_record_release(&index_result); @@ -589,25 +602,28 @@ static int reader_seek_internal(struct reftable_reader *r, struct reftable_reader_offsets *offs = reader_offsets_for(r, reftable_record_type(rec)); uint64_t idx = offs->index_offset; - struct table_iter ti = TABLE_ITER_INIT; - int err = 0; + struct table_iter ti = TABLE_ITER_INIT, *p; + int err; + if (idx > 0) return reader_seek_indexed(r, it, rec); err = reader_start(r, &ti, reftable_record_type(rec), 0); if (err < 0) - return err; + goto out; + err = reader_seek_linear(&ti, rec); if (err < 0) - return err; - else { - struct table_iter *p = - reftable_malloc(sizeof(struct table_iter)); - *p = ti; - iterator_from_table_iter(it, p); - } + goto out; - return 0; + REFTABLE_ALLOC_ARRAY(p, 1); + *p = ti; + iterator_from_table_iter(it, p); + +out: + if (err) + table_iter_close(&ti); + return err; } static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it, diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index 363fe0f998..a6dbd214c5 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -77,18 +77,15 @@ static void write_table(char ***names, struct strbuf *buf, int N, } for (i = 0; i < N; i++) { - uint8_t hash[GIT_SHA256_RAWSZ] = { 0 }; char name[100]; int n; - set_test_hash(hash, i); - snprintf(name, sizeof(name), "refs/heads/branch%02d", i); log.refname = name; log.update_index = update_index; log.value_type = REFTABLE_LOG_UPDATE; - log.value.update.new_hash = hash; + set_test_hash(log.value.update.new_hash, i); log.value.update.message = "message"; n = reftable_writer_add_log(w, &log); @@ -137,13 +134,10 @@ static void test_log_buffer_size(void) /* This tests buffer extension for log compression. Must use a random hash, to ensure that the compressed part is larger than the original. */ - uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; for (i = 0; i < GIT_SHA1_RAWSZ; i++) { - hash1[i] = (uint8_t)(git_rand() % 256); - hash2[i] = (uint8_t)(git_rand() % 256); + log.value.update.old_hash[i] = (uint8_t)(git_rand() % 256); + log.value.update.new_hash[i] = (uint8_t)(git_rand() % 256); } - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; reftable_writer_set_limits(w, update_index, update_index); err = reftable_writer_add_log(w, &log); EXPECT_ERR(err); @@ -161,25 +155,26 @@ static void test_log_overflow(void) .block_size = ARRAY_SIZE(msg), }; int err; - struct reftable_log_record - log = { .refname = "refs/heads/master", - .update_index = 0xa, - .value_type = REFTABLE_LOG_UPDATE, - .value = { .update = { - .name = "Han-Wen Nienhuys", - .email = "hanwen@google.com", - .tz_offset = 100, - .time = 0x5e430672, - .message = msg, - } } }; + struct reftable_log_record log = { + .refname = "refs/heads/master", + .update_index = 0xa, + .value_type = REFTABLE_LOG_UPDATE, + .value = { + .update = { + .old_hash = { 1 }, + .new_hash = { 2 }, + .name = "Han-Wen Nienhuys", + .email = "hanwen@google.com", + .tz_offset = 100, + .time = 0x5e430672, + .message = msg, + }, + }, + }; struct reftable_writer *w = reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - uint8_t hash1[GIT_SHA1_RAWSZ] = {1}, hash2[GIT_SHA1_RAWSZ] = { 2 }; - memset(msg, 'x', sizeof(msg) - 1); - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; reftable_writer_set_limits(w, update_index, update_index); err = reftable_writer_add_log(w, &log); EXPECT(err == REFTABLE_ENTRY_TOO_BIG_ERROR); @@ -219,16 +214,13 @@ static void test_log_write_read(void) EXPECT_ERR(err); } for (i = 0; i < N; i++) { - uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; struct reftable_log_record log = { NULL }; - set_test_hash(hash1, i); - set_test_hash(hash2, i + 1); log.refname = names[i]; log.update_index = i; log.value_type = REFTABLE_LOG_UPDATE; - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; + set_test_hash(log.value.update.old_hash, i); + set_test_hash(log.value.update.new_hash, i + 1); err = reftable_writer_add_log(w, &log); EXPECT_ERR(err); @@ -298,18 +290,15 @@ static void test_log_zlib_corruption(void) struct reftable_writer *w = reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); const struct reftable_stats *stats = NULL; - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 }; - uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 }; char message[100] = { 0 }; int err, i, n; - struct reftable_log_record log = { .refname = "refname", .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .new_hash = hash1, - .old_hash = hash2, + .new_hash = { 1 }, + .old_hash = { 2 }, .name = "My Name", .email = "myname@invalid", .message = message, @@ -821,13 +810,12 @@ static void test_write_multiple_indices(void) } for (i = 0; i < 100; i++) { - unsigned char hash[GIT_SHA1_RAWSZ] = {i}; struct reftable_log_record log = { .update_index = 1, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .old_hash = hash, - .new_hash = hash, + .old_hash = { i }, + .new_hash = { i }, }, }; diff --git a/reftable/record.c b/reftable/record.c index 26c5e43f9b..5506f3e913 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -159,34 +159,49 @@ int reftable_encode_key(int *restart, struct string_view dest, return start.len - dest.len; } -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in) +int reftable_decode_keylen(struct string_view in, + uint64_t *prefix_len, + uint64_t *suffix_len, + uint8_t *extra) { - int start_len = in.len; - uint64_t prefix_len = 0; - uint64_t suffix_len = 0; - int n = get_var_int(&prefix_len, &in); + size_t start_len = in.len; + int n; + + n = get_var_int(prefix_len, &in); if (n < 0) return -1; string_view_consume(&in, n); - if (prefix_len > last_key.len) - return -1; - - n = get_var_int(&suffix_len, &in); + n = get_var_int(suffix_len, &in); if (n <= 0) return -1; string_view_consume(&in, n); - *extra = (uint8_t)(suffix_len & 0x7); - suffix_len >>= 3; + *extra = (uint8_t)(*suffix_len & 0x7); + *suffix_len >>= 3; - if (in.len < suffix_len) + return start_len - in.len; +} + +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in) +{ + int start_len = in.len; + uint64_t prefix_len = 0; + uint64_t suffix_len = 0; + int n; + + n = reftable_decode_keylen(in, &prefix_len, &suffix_len, extra); + if (n < 0) return -1; + string_view_consume(&in, n); - strbuf_reset(key); - strbuf_add(key, last_key.buf, prefix_len); - strbuf_add(key, in.buf, suffix_len); + if (in.len < suffix_len || + prefix_len > last_key->len) + return -1; + + strbuf_setlen(last_key, prefix_len); + strbuf_add(last_key, in.buf, suffix_len); string_view_consume(&in, suffix_len); return start_len - in.len; @@ -205,14 +220,26 @@ static void reftable_ref_record_copy_from(void *rec, const void *src_rec, { struct reftable_ref_record *ref = rec; const struct reftable_ref_record *src = src_rec; + char *refname = NULL; + size_t refname_cap = 0; + assert(hash_size > 0); - /* This is simple and correct, but we could probably reuse the hash - * fields. */ + SWAP(refname, ref->refname); + SWAP(refname_cap, ref->refname_cap); reftable_ref_record_release(ref); + SWAP(ref->refname, refname); + SWAP(ref->refname_cap, refname_cap); + if (src->refname) { - ref->refname = xstrdup(src->refname); + size_t refname_len = strlen(src->refname); + + REFTABLE_ALLOC_GROW(ref->refname, refname_len + 1, + ref->refname_cap); + memcpy(ref->refname, src->refname, refname_len); + ref->refname[refname_len] = 0; } + ref->update_index = src->update_index; ref->value_type = src->value_type; switch (src->value_type) { @@ -363,24 +390,33 @@ static int reftable_ref_record_encode(const void *rec, struct string_view s, static int reftable_ref_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch) { struct reftable_ref_record *r = rec; struct string_view start = in; uint64_t update_index = 0; - int n = get_var_int(&update_index, &in); + const char *refname = NULL; + size_t refname_cap = 0; + int n; + + assert(hash_size > 0); + + n = get_var_int(&update_index, &in); if (n < 0) return n; string_view_consume(&in, n); + SWAP(refname, r->refname); + SWAP(refname_cap, r->refname_cap); reftable_ref_record_release(r); + SWAP(r->refname, refname); + SWAP(r->refname_cap, refname_cap); - assert(hash_size > 0); - - r->refname = reftable_realloc(r->refname, key.len + 1); + REFTABLE_ALLOC_GROW(r->refname, key.len + 1, r->refname_cap); memcpy(r->refname, key.buf, key.len); - r->update_index = update_index; r->refname[key.len] = 0; + + r->update_index = update_index; r->value_type = val_type; switch (val_type) { case REFTABLE_REF_VAL1: @@ -405,13 +441,12 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key, break; case REFTABLE_REF_SYMREF: { - struct strbuf dest = STRBUF_INIT; - int n = decode_string(&dest, in); + int n = decode_string(scratch, in); if (n < 0) { return -1; } string_view_consume(&in, n); - r->value.symref = dest.buf; + r->value.symref = strbuf_detach(scratch, NULL); } break; case REFTABLE_REF_DELETION: @@ -430,7 +465,6 @@ static int reftable_ref_record_is_deletion_void(const void *p) (const struct reftable_ref_record *)p); } - static int reftable_ref_record_equal_void(const void *a, const void *b, int hash_size) { @@ -439,6 +473,13 @@ static int reftable_ref_record_equal_void(const void *a, return reftable_ref_record_equal(ra, rb, hash_size); } +static int reftable_ref_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_ref_record *a = _a; + const struct reftable_ref_record *b = _b; + return strcmp(a->refname, b->refname); +} + static void reftable_ref_record_print_void(const void *rec, int hash_size) { @@ -455,6 +496,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = { .release = &reftable_ref_record_release_void, .is_deletion = &reftable_ref_record_is_deletion_void, .equal = &reftable_ref_record_equal_void, + .cmp = &reftable_ref_record_cmp_void, .print = &reftable_ref_record_print_void, }; @@ -552,7 +594,7 @@ static int reftable_obj_record_encode(const void *rec, struct string_view s, static int reftable_obj_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch UNUSED) { struct string_view start = in; struct reftable_obj_record *r = rec; @@ -561,6 +603,8 @@ static int reftable_obj_record_decode(void *rec, struct strbuf key, uint64_t last; int j; + reftable_obj_record_release(r); + REFTABLE_ALLOC_ARRAY(r->hash_prefix, key.len); memcpy(r->hash_prefix, key.buf, key.len); r->hash_prefix_len = key.len; @@ -627,6 +671,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash return 1; } +static int reftable_obj_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_obj_record *a = _a; + const struct reftable_obj_record *b = _b; + int cmp; + + cmp = memcmp(a->hash_prefix, b->hash_prefix, + a->hash_prefix_len > b->hash_prefix_len ? + a->hash_prefix_len : b->hash_prefix_len); + if (cmp) + return cmp; + + /* + * When the prefix is the same then the object record that is longer is + * considered to be bigger. + */ + return a->hash_prefix_len - b->hash_prefix_len; +} + static struct reftable_record_vtable reftable_obj_record_vtable = { .key = &reftable_obj_record_key, .type = BLOCK_TYPE_OBJ, @@ -637,6 +700,7 @@ static struct reftable_record_vtable reftable_obj_record_vtable = { .release = &reftable_obj_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_obj_record_equal_void, + .cmp = &reftable_obj_record_cmp_void, .print = &reftable_obj_record_print, }; @@ -716,16 +780,10 @@ static void reftable_log_record_copy_from(void *rec, const void *src_rec, xstrdup(dst->value.update.message); } - if (dst->value.update.new_hash) { - REFTABLE_ALLOC_ARRAY(dst->value.update.new_hash, hash_size); - memcpy(dst->value.update.new_hash, - src->value.update.new_hash, hash_size); - } - if (dst->value.update.old_hash) { - REFTABLE_ALLOC_ARRAY(dst->value.update.old_hash, hash_size); - memcpy(dst->value.update.old_hash, - src->value.update.old_hash, hash_size); - } + memcpy(dst->value.update.new_hash, + src->value.update.new_hash, hash_size); + memcpy(dst->value.update.old_hash, + src->value.update.old_hash, hash_size); break; } } @@ -743,8 +801,6 @@ void reftable_log_record_release(struct reftable_log_record *r) case REFTABLE_LOG_DELETION: break; case REFTABLE_LOG_UPDATE: - reftable_free(r->value.update.new_hash); - reftable_free(r->value.update.old_hash); reftable_free(r->value.update.name); reftable_free(r->value.update.email); reftable_free(r->value.update.message); @@ -761,33 +817,20 @@ static uint8_t reftable_log_record_val_type(const void *rec) return reftable_log_record_is_deletion(log) ? 0 : 1; } -static uint8_t zero[GIT_SHA256_RAWSZ] = { 0 }; - static int reftable_log_record_encode(const void *rec, struct string_view s, int hash_size) { const struct reftable_log_record *r = rec; struct string_view start = s; int n = 0; - uint8_t *oldh = NULL; - uint8_t *newh = NULL; if (reftable_log_record_is_deletion(r)) return 0; - oldh = r->value.update.old_hash; - newh = r->value.update.new_hash; - if (!oldh) { - oldh = zero; - } - if (!newh) { - newh = zero; - } - if (s.len < 2 * hash_size) return -1; - memcpy(s.buf, oldh, hash_size); - memcpy(s.buf + hash_size, newh, hash_size); + memcpy(s.buf, r->value.update.old_hash, hash_size); + memcpy(s.buf + hash_size, r->value.update.new_hash, hash_size); string_view_consume(&s, 2 * hash_size); n = encode_string(r->value.update.name ? r->value.update.name : "", s); @@ -823,19 +866,18 @@ static int reftable_log_record_encode(const void *rec, struct string_view s, static int reftable_log_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch) { struct string_view start = in; struct reftable_log_record *r = rec; uint64_t max = 0; uint64_t ts = 0; - struct strbuf dest = STRBUF_INIT; int n; if (key.len <= 9 || key.buf[key.len - 9] != 0) return REFTABLE_FORMAT_ERROR; - r->refname = reftable_realloc(r->refname, key.len - 8); + REFTABLE_ALLOC_GROW(r->refname, key.len - 8, r->refname_cap); memcpy(r->refname, key.buf, key.len - 8); ts = get_be64(key.buf + key.len - 8); @@ -844,9 +886,8 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, if (val_type != r->value_type) { switch (r->value_type) { case REFTABLE_LOG_UPDATE: - FREE_AND_NULL(r->value.update.old_hash); - FREE_AND_NULL(r->value.update.new_hash); FREE_AND_NULL(r->value.update.message); + r->value.update.message_cap = 0; FREE_AND_NULL(r->value.update.email); FREE_AND_NULL(r->value.update.name); break; @@ -862,36 +903,43 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, if (in.len < 2 * hash_size) return REFTABLE_FORMAT_ERROR; - r->value.update.old_hash = - reftable_realloc(r->value.update.old_hash, hash_size); - r->value.update.new_hash = - reftable_realloc(r->value.update.new_hash, hash_size); - memcpy(r->value.update.old_hash, in.buf, hash_size); memcpy(r->value.update.new_hash, in.buf + hash_size, hash_size); string_view_consume(&in, 2 * hash_size); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - r->value.update.name = - reftable_realloc(r->value.update.name, dest.len + 1); - memcpy(r->value.update.name, dest.buf, dest.len); - r->value.update.name[dest.len] = 0; + /* + * In almost all cases we can expect the reflog name to not change for + * reflog entries as they are tied to the local identity, not to the + * target commits. As an optimization for this common case we can thus + * skip copying over the name in case it's accurate already. + */ + if (!r->value.update.name || + strcmp(r->value.update.name, scratch->buf)) { + r->value.update.name = + reftable_realloc(r->value.update.name, scratch->len + 1); + memcpy(r->value.update.name, scratch->buf, scratch->len); + r->value.update.name[scratch->len] = 0; + } - strbuf_reset(&dest); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - r->value.update.email = - reftable_realloc(r->value.update.email, dest.len + 1); - memcpy(r->value.update.email, dest.buf, dest.len); - r->value.update.email[dest.len] = 0; + /* Same as above, but for the reflog email. */ + if (!r->value.update.email || + strcmp(r->value.update.email, scratch->buf)) { + r->value.update.email = + reftable_realloc(r->value.update.email, scratch->len + 1); + memcpy(r->value.update.email, scratch->buf, scratch->len); + r->value.update.email[scratch->len] = 0; + } ts = 0; n = get_var_int(&ts, &in); @@ -905,22 +953,19 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, r->value.update.tz_offset = get_be16(in.buf); string_view_consume(&in, 2); - strbuf_reset(&dest); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - r->value.update.message = - reftable_realloc(r->value.update.message, dest.len + 1); - memcpy(r->value.update.message, dest.buf, dest.len); - r->value.update.message[dest.len] = 0; + REFTABLE_ALLOC_GROW(r->value.update.message, scratch->len + 1, + r->value.update.message_cap); + memcpy(r->value.update.message, scratch->buf, scratch->len); + r->value.update.message[scratch->len] = 0; - strbuf_release(&dest); return start.len - in.len; done: - strbuf_release(&dest); return REFTABLE_FORMAT_ERROR; } @@ -936,17 +981,6 @@ static int null_streq(char *a, char *b) return 0 == strcmp(a, b); } -static int zero_hash_eq(uint8_t *a, uint8_t *b, int sz) -{ - if (!a) - a = zero; - - if (!b) - b = zero; - - return !memcmp(a, b, sz); -} - static int reftable_log_record_equal_void(const void *a, const void *b, int hash_size) { @@ -955,6 +989,22 @@ static int reftable_log_record_equal_void(const void *a, hash_size); } +static int reftable_log_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_log_record *a = _a; + const struct reftable_log_record *b = _b; + int cmp = strcmp(a->refname, b->refname); + if (cmp) + return cmp; + + /* + * Note that the comparison here is reversed. This is because the + * update index is reversed when comparing keys. For reference, see how + * we handle this in reftable_log_record_key()`. + */ + return b->update_index - a->update_index; +} + int reftable_log_record_equal(const struct reftable_log_record *a, const struct reftable_log_record *b, int hash_size) { @@ -974,10 +1024,10 @@ int reftable_log_record_equal(const struct reftable_log_record *a, b->value.update.email) && null_streq(a->value.update.message, b->value.update.message) && - zero_hash_eq(a->value.update.old_hash, - b->value.update.old_hash, hash_size) && - zero_hash_eq(a->value.update.new_hash, - b->value.update.new_hash, hash_size); + !memcmp(a->value.update.old_hash, + b->value.update.old_hash, hash_size) && + !memcmp(a->value.update.new_hash, + b->value.update.new_hash, hash_size); } abort(); @@ -1004,6 +1054,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = { .release = &reftable_log_record_release_void, .is_deletion = &reftable_log_record_is_deletion_void, .equal = &reftable_log_record_equal_void, + .cmp = &reftable_log_record_cmp_void, .print = &reftable_log_record_print_void, }; @@ -1054,7 +1105,7 @@ static int reftable_index_record_encode(const void *rec, struct string_view out, static int reftable_index_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch UNUSED) { struct string_view start = in; struct reftable_index_record *r = rec; @@ -1079,6 +1130,13 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key); } +static int reftable_index_record_cmp(const void *_a, const void *_b) +{ + const struct reftable_index_record *a = _a; + const struct reftable_index_record *b = _b; + return strbuf_cmp(&a->last_key, &b->last_key); +} + static void reftable_index_record_print(const void *rec, int hash_size) { const struct reftable_index_record *idx = rec; @@ -1096,6 +1154,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = { .release = &reftable_index_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_index_record_equal, + .cmp = &reftable_index_record_cmp, .print = &reftable_index_record_print, }; @@ -1104,11 +1163,6 @@ void reftable_record_key(struct reftable_record *rec, struct strbuf *dest) reftable_record_vtable(rec)->key(reftable_record_data(rec), dest); } -uint8_t reftable_record_type(struct reftable_record *rec) -{ - return rec->type; -} - int reftable_record_encode(struct reftable_record *rec, struct string_view dest, int hash_size) { @@ -1132,10 +1186,12 @@ uint8_t reftable_record_val_type(struct reftable_record *rec) } int reftable_record_decode(struct reftable_record *rec, struct strbuf key, - uint8_t extra, struct string_view src, int hash_size) + uint8_t extra, struct string_view src, int hash_size, + struct strbuf *scratch) { return reftable_record_vtable(rec)->decode(reftable_record_data(rec), - key, extra, src, hash_size); + key, extra, src, hash_size, + scratch); } void reftable_record_release(struct reftable_record *rec) @@ -1149,6 +1205,14 @@ int reftable_record_is_deletion(struct reftable_record *rec) reftable_record_data(rec)); } +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b) +{ + if (a->type != b->type) + BUG("cannot compare reftable records of different type"); + return reftable_record_vtable(a)->cmp( + reftable_record_data(a), reftable_record_data(b)); +} + int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size) { if (a->type != b->type) @@ -1222,12 +1286,6 @@ int reftable_log_record_is_deletion(const struct reftable_log_record *log) return (log->value_type == REFTABLE_LOG_DELETION); } -void string_view_consume(struct string_view *s, int n) -{ - s->buf += n; - s->len -= n; -} - static void *reftable_record_data(struct reftable_record *rec) { switch (rec->type) { diff --git a/reftable/record.h b/reftable/record.h index e64ed30c80..d778133e6e 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -25,7 +25,11 @@ struct string_view { }; /* Advance `s.buf` by `n`, and decrease length. */ -void string_view_consume(struct string_view *s, int n); +static inline void string_view_consume(struct string_view *s, int n) +{ + s->buf += n; + s->len -= n; +} /* utilities for de/encoding varints */ @@ -51,7 +55,8 @@ struct reftable_record_vtable { /* decode data from `src` into the record. */ int (*decode)(void *rec, struct strbuf key, uint8_t extra, - struct string_view src, int hash_size); + struct string_view src, int hash_size, + struct strbuf *scratch); /* deallocate and null the record. */ void (*release)(void *rec); @@ -62,6 +67,12 @@ struct reftable_record_vtable { /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */ int (*equal)(const void *a, const void *b, int hash_size); + /* + * Compare keys of two records with each other. The records must have + * the same type. + */ + int (*cmp)(const void *a, const void *b); + /* Print on stdout, for debugging. */ void (*print)(const void *rec, int hash_size); }; @@ -75,9 +86,18 @@ int reftable_encode_key(int *is_restart, struct string_view dest, struct strbuf prev_key, struct strbuf key, uint8_t extra); -/* Decode into `key` and `extra` from `in` */ -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in); +/* Decode a record's key lengths. */ +int reftable_decode_keylen(struct string_view in, + uint64_t *prefix_len, + uint64_t *suffix_len, + uint8_t *extra); + +/* + * Decode into `last_key` and `extra` from `in`. `last_key` is expected to + * contain the decoded key of the preceding record, if any. + */ +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in); /* reftable_index_record are used internally to speed up lookups. */ struct reftable_index_record { @@ -114,10 +134,10 @@ struct reftable_record { void reftable_record_init(struct reftable_record *rec, uint8_t typ); /* see struct record_vtable */ +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); void reftable_record_print(struct reftable_record *rec, int hash_size); void reftable_record_key(struct reftable_record *rec, struct strbuf *dest); -uint8_t reftable_record_type(struct reftable_record *rec); void reftable_record_copy_from(struct reftable_record *rec, struct reftable_record *src, int hash_size); uint8_t reftable_record_val_type(struct reftable_record *rec); @@ -125,9 +145,14 @@ int reftable_record_encode(struct reftable_record *rec, struct string_view dest, int hash_size); int reftable_record_decode(struct reftable_record *rec, struct strbuf key, uint8_t extra, struct string_view src, - int hash_size); + int hash_size, struct strbuf *scratch); int reftable_record_is_deletion(struct reftable_record *rec); +static inline uint8_t reftable_record_type(struct reftable_record *rec) +{ + return rec->type; +} + /* frees and zeroes out the embedded record */ void reftable_record_release(struct reftable_record *rec); diff --git a/reftable/record_test.c b/reftable/record_test.c index a86cff5526..c158ee79ff 100644 --- a/reftable/record_test.c +++ b/reftable/record_test.c @@ -99,6 +99,7 @@ static void set_hash(uint8_t *h, int j) static void test_reftable_ref_record_roundtrip(void) { + struct strbuf scratch = STRBUF_INIT; int i = 0; for (i = REFTABLE_REF_DELETION; i < REFTABLE_NR_REF_VALUETYPES; i++) { @@ -140,7 +141,7 @@ static void test_reftable_ref_record_roundtrip(void) EXPECT(n > 0); /* decode into a non-zero reftable_record to test for leaks. */ - m = reftable_record_decode(&out, key, i, dest, GIT_SHA1_RAWSZ); + m = reftable_record_decode(&out, key, i, dest, GIT_SHA1_RAWSZ, &scratch); EXPECT(n == m); EXPECT(reftable_ref_record_equal(&in.u.ref, &out.u.ref, @@ -150,6 +151,8 @@ static void test_reftable_ref_record_roundtrip(void) strbuf_release(&key); reftable_record_release(&out); } + + strbuf_release(&scratch); } static void test_reftable_log_record_equal(void) @@ -175,7 +178,6 @@ static void test_reftable_log_record_equal(void) static void test_reftable_log_record_roundtrip(void) { int i; - struct reftable_log_record in[] = { { .refname = xstrdup("refs/heads/master"), @@ -183,8 +185,6 @@ static void test_reftable_log_record_roundtrip(void) .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .old_hash = reftable_malloc(GIT_SHA1_RAWSZ), - .new_hash = reftable_malloc(GIT_SHA1_RAWSZ), .name = xstrdup("han-wen"), .email = xstrdup("hanwen@google.com"), .message = xstrdup("test"), @@ -202,15 +202,10 @@ static void test_reftable_log_record_roundtrip(void) .refname = xstrdup("branch"), .update_index = 33, .value_type = REFTABLE_LOG_UPDATE, - .value = { - .update = { - .old_hash = reftable_malloc(GIT_SHA1_RAWSZ), - .new_hash = reftable_malloc(GIT_SHA1_RAWSZ), - /* rest of fields left empty. */ - }, - }, } }; + struct strbuf scratch = STRBUF_INIT; + set_test_hash(in[0].value.update.new_hash, 1); set_test_hash(in[0].value.update.old_hash, 2); set_test_hash(in[2].value.update.new_hash, 3); @@ -231,8 +226,6 @@ static void test_reftable_log_record_roundtrip(void) .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .new_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1), - .old_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1), .name = xstrdup("old name"), .email = xstrdup("old@email"), .message = xstrdup("old message"), @@ -252,7 +245,7 @@ static void test_reftable_log_record_roundtrip(void) EXPECT(n >= 0); valtype = reftable_record_val_type(&rec); m = reftable_record_decode(&out, key, valtype, dest, - GIT_SHA1_RAWSZ); + GIT_SHA1_RAWSZ, &scratch); EXPECT(n == m); EXPECT(reftable_log_record_equal(&in[i], &out.u.log, @@ -261,6 +254,8 @@ static void test_reftable_log_record_roundtrip(void) strbuf_release(&key); reftable_record_release(&out); } + + strbuf_release(&scratch); } static void test_u24_roundtrip(void) @@ -295,7 +290,8 @@ static void test_key_roundtrip(void) EXPECT(!restart); EXPECT(n > 0); - m = reftable_decode_key(&roundtrip, &rt_extra, last_key, dest); + strbuf_addstr(&roundtrip, "refs/heads/master"); + m = reftable_decode_key(&roundtrip, &rt_extra, dest); EXPECT(n == m); EXPECT(0 == strbuf_cmp(&key, &roundtrip)); EXPECT(rt_extra == extra); @@ -309,23 +305,27 @@ static void test_reftable_obj_record_roundtrip(void) { uint8_t testHash1[GIT_SHA1_RAWSZ] = { 1, 2, 3, 4, 0 }; uint64_t till9[] = { 1, 2, 3, 4, 500, 600, 700, 800, 9000 }; - struct reftable_obj_record recs[3] = { { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - .offsets = till9, - .offset_len = 3, - }, - { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - .offsets = till9, - .offset_len = 9, - }, - { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - } }; + struct reftable_obj_record recs[3] = { + { + .hash_prefix = testHash1, + .hash_prefix_len = 5, + .offsets = till9, + .offset_len = 3, + }, + { + .hash_prefix = testHash1, + .hash_prefix_len = 5, + .offsets = till9, + .offset_len = 9, + }, + { + .hash_prefix = testHash1, + .hash_prefix_len = 5, + }, + }; + struct strbuf scratch = STRBUF_INIT; int i = 0; + for (i = 0; i < ARRAY_SIZE(recs); i++) { uint8_t buffer[1024] = { 0 }; struct string_view dest = { @@ -349,13 +349,15 @@ static void test_reftable_obj_record_roundtrip(void) EXPECT(n > 0); extra = reftable_record_val_type(&in); m = reftable_record_decode(&out, key, extra, dest, - GIT_SHA1_RAWSZ); + GIT_SHA1_RAWSZ, &scratch); EXPECT(n == m); EXPECT(reftable_record_equal(&in, &out, GIT_SHA1_RAWSZ)); strbuf_release(&key); reftable_record_release(&out); } + + strbuf_release(&scratch); } static void test_reftable_index_record_roundtrip(void) @@ -372,6 +374,7 @@ static void test_reftable_index_record_roundtrip(void) .buf = buffer, .len = sizeof(buffer), }; + struct strbuf scratch = STRBUF_INIT; struct strbuf key = STRBUF_INIT; struct reftable_record out = { .type = BLOCK_TYPE_INDEX, @@ -389,13 +392,15 @@ static void test_reftable_index_record_roundtrip(void) EXPECT(n > 0); extra = reftable_record_val_type(&in); - m = reftable_record_decode(&out, key, extra, dest, GIT_SHA1_RAWSZ); + m = reftable_record_decode(&out, key, extra, dest, GIT_SHA1_RAWSZ, + &scratch); EXPECT(m == n); EXPECT(reftable_record_equal(&in, &out, GIT_SHA1_RAWSZ)); reftable_record_release(&out); strbuf_release(&key); + strbuf_release(&scratch); strbuf_release(&in.u.idx.last_key); } diff --git a/reftable/refname.c b/reftable/refname.c deleted file mode 100644 index 7570e4acf9..0000000000 --- a/reftable/refname.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - Copyright 2020 Google LLC - - Use of this source code is governed by a BSD-style - license that can be found in the LICENSE file or at - https://developers.google.com/open-source/licenses/bsd -*/ - -#include "system.h" -#include "reftable-error.h" -#include "basics.h" -#include "refname.h" -#include "reftable-iterator.h" - -struct find_arg { - char **names; - const char *want; -}; - -static int find_name(size_t k, void *arg) -{ - struct find_arg *f_arg = arg; - return strcmp(f_arg->names[k], f_arg->want) >= 0; -} - -static int modification_has_ref(struct modification *mod, const char *name) -{ - struct reftable_ref_record ref = { NULL }; - int err = 0; - - if (mod->add_len > 0) { - struct find_arg arg = { - .names = mod->add, - .want = name, - }; - int idx = binsearch(mod->add_len, find_name, &arg); - if (idx < mod->add_len && !strcmp(mod->add[idx], name)) { - return 0; - } - } - - if (mod->del_len > 0) { - struct find_arg arg = { - .names = mod->del, - .want = name, - }; - int idx = binsearch(mod->del_len, find_name, &arg); - if (idx < mod->del_len && !strcmp(mod->del[idx], name)) { - return 1; - } - } - - err = reftable_table_read_ref(&mod->tab, name, &ref); - reftable_ref_record_release(&ref); - return err; -} - -static void modification_release(struct modification *mod) -{ - /* don't delete the strings themselves; they're owned by ref records. - */ - FREE_AND_NULL(mod->add); - FREE_AND_NULL(mod->del); - mod->add_len = 0; - mod->del_len = 0; -} - -static int modification_has_ref_with_prefix(struct modification *mod, - const char *prefix) -{ - struct reftable_iterator it = { NULL }; - struct reftable_ref_record ref = { NULL }; - int err = 0; - - if (mod->add_len > 0) { - struct find_arg arg = { - .names = mod->add, - .want = prefix, - }; - int idx = binsearch(mod->add_len, find_name, &arg); - if (idx < mod->add_len && - !strncmp(prefix, mod->add[idx], strlen(prefix))) - goto done; - } - err = reftable_table_seek_ref(&mod->tab, &it, prefix); - if (err) - goto done; - - while (1) { - err = reftable_iterator_next_ref(&it, &ref); - if (err) - goto done; - - if (mod->del_len > 0) { - struct find_arg arg = { - .names = mod->del, - .want = ref.refname, - }; - int idx = binsearch(mod->del_len, find_name, &arg); - if (idx < mod->del_len && - !strcmp(ref.refname, mod->del[idx])) { - continue; - } - } - - if (strncmp(ref.refname, prefix, strlen(prefix))) { - err = 1; - goto done; - } - err = 0; - goto done; - } - -done: - reftable_ref_record_release(&ref); - reftable_iterator_destroy(&it); - return err; -} - -static int validate_refname(const char *name) -{ - while (1) { - char *next = strchr(name, '/'); - if (!*name) { - return REFTABLE_REFNAME_ERROR; - } - if (!next) { - return 0; - } - if (next - name == 0 || (next - name == 1 && *name == '.') || - (next - name == 2 && name[0] == '.' && name[1] == '.')) - return REFTABLE_REFNAME_ERROR; - name = next + 1; - } - return 0; -} - -int validate_ref_record_addition(struct reftable_table tab, - struct reftable_ref_record *recs, size_t sz) -{ - struct modification mod = { - .tab = tab, - .add = reftable_calloc(sz, sizeof(*mod.add)), - .del = reftable_calloc(sz, sizeof(*mod.del)), - }; - int i = 0; - int err = 0; - for (; i < sz; i++) { - if (reftable_ref_record_is_deletion(&recs[i])) { - mod.del[mod.del_len++] = recs[i].refname; - } else { - mod.add[mod.add_len++] = recs[i].refname; - } - } - - err = modification_validate(&mod); - modification_release(&mod); - return err; -} - -static void strbuf_trim_component(struct strbuf *sl) -{ - while (sl->len > 0) { - int is_slash = (sl->buf[sl->len - 1] == '/'); - strbuf_setlen(sl, sl->len - 1); - if (is_slash) - break; - } -} - -int modification_validate(struct modification *mod) -{ - struct strbuf slashed = STRBUF_INIT; - int err = 0; - int i = 0; - for (; i < mod->add_len; i++) { - err = validate_refname(mod->add[i]); - if (err) - goto done; - strbuf_reset(&slashed); - strbuf_addstr(&slashed, mod->add[i]); - strbuf_addstr(&slashed, "/"); - - err = modification_has_ref_with_prefix(mod, slashed.buf); - if (err == 0) { - err = REFTABLE_NAME_CONFLICT; - goto done; - } - if (err < 0) - goto done; - - strbuf_reset(&slashed); - strbuf_addstr(&slashed, mod->add[i]); - while (slashed.len) { - strbuf_trim_component(&slashed); - err = modification_has_ref(mod, slashed.buf); - if (err == 0) { - err = REFTABLE_NAME_CONFLICT; - goto done; - } - if (err < 0) - goto done; - } - } - err = 0; -done: - strbuf_release(&slashed); - return err; -} diff --git a/reftable/refname.h b/reftable/refname.h deleted file mode 100644 index a24b40fcb4..0000000000 --- a/reftable/refname.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - Copyright 2020 Google LLC - - Use of this source code is governed by a BSD-style - license that can be found in the LICENSE file or at - https://developers.google.com/open-source/licenses/bsd -*/ -#ifndef REFNAME_H -#define REFNAME_H - -#include "reftable-record.h" -#include "reftable-generic.h" - -struct modification { - struct reftable_table tab; - - char **add; - size_t add_len; - - char **del; - size_t del_len; -}; - -int validate_ref_record_addition(struct reftable_table tab, - struct reftable_ref_record *recs, size_t sz); - -int modification_validate(struct modification *mod); - -#endif diff --git a/reftable/refname_test.c b/reftable/refname_test.c deleted file mode 100644 index b9cc62554e..0000000000 --- a/reftable/refname_test.c +++ /dev/null @@ -1,101 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "basics.h" -#include "block.h" -#include "blocksource.h" -#include "reader.h" -#include "record.h" -#include "refname.h" -#include "reftable-error.h" -#include "reftable-writer.h" -#include "system.h" - -#include "test_framework.h" -#include "reftable-tests.h" - -struct testcase { - char *add; - char *del; - int error_code; -}; - -static void test_conflict(void) -{ - struct reftable_write_options opts = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - struct reftable_ref_record rec = { - .refname = "a/b", - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "destination", /* make sure it's not a symref. - */ - .update_index = 1, - }; - int err; - int i; - struct reftable_block_source source = { NULL }; - struct reftable_reader *rd = NULL; - struct reftable_table tab = { NULL }; - struct testcase cases[] = { - { "a/b/c", NULL, REFTABLE_NAME_CONFLICT }, - { "b", NULL, 0 }, - { "a", NULL, REFTABLE_NAME_CONFLICT }, - { "a", "a/b", 0 }, - - { "p/", NULL, REFTABLE_REFNAME_ERROR }, - { "p//q", NULL, REFTABLE_REFNAME_ERROR }, - { "p/./q", NULL, REFTABLE_REFNAME_ERROR }, - { "p/../q", NULL, REFTABLE_REFNAME_ERROR }, - - { "a/b/c", "a/b", 0 }, - { NULL, "a//b", 0 }, - }; - reftable_writer_set_limits(w, 1, 1); - - err = reftable_writer_add_ref(w, &rec); - EXPECT_ERR(err); - - err = reftable_writer_close(w); - EXPECT_ERR(err); - reftable_writer_free(w); - - block_source_from_strbuf(&source, &buf); - err = reftable_new_reader(&rd, &source, "filename"); - EXPECT_ERR(err); - - reftable_table_from_reader(&tab, rd); - - for (i = 0; i < ARRAY_SIZE(cases); i++) { - struct modification mod = { - .tab = tab, - }; - - if (cases[i].add) { - mod.add = &cases[i].add; - mod.add_len = 1; - } - if (cases[i].del) { - mod.del = &cases[i].del; - mod.del_len = 1; - } - - err = modification_validate(&mod); - EXPECT(err == cases[i].error_code); - } - - reftable_reader_free(rd); - strbuf_release(&buf); -} - -int refname_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_conflict); - return 0; -} diff --git a/reftable/reftable-error.h b/reftable/reftable-error.h index 4c457aaaf8..6368cd9ed9 100644 --- a/reftable/reftable-error.h +++ b/reftable/reftable-error.h @@ -25,7 +25,7 @@ enum reftable_error { */ REFTABLE_NOT_EXIST_ERROR = -4, - /* Trying to write out-of-date data. */ + /* Trying to access locked data. */ REFTABLE_LOCK_ERROR = -5, /* Misuse of the API: @@ -48,15 +48,15 @@ enum reftable_error { /* Wrote a table without blocks. */ REFTABLE_EMPTY_TABLE_ERROR = -8, - /* Dir/file conflict. */ - REFTABLE_NAME_CONFLICT = -9, - /* Invalid ref name. */ REFTABLE_REFNAME_ERROR = -10, /* Entry does not fit. This can happen when writing outsize reflog messages. */ REFTABLE_ENTRY_TOO_BIG_ERROR = -11, + + /* Trying to write out-of-date data. */ + REFTABLE_OUTDATED_ERROR = -12, }; /* convert the numeric error code to a string. The string should not be diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index bb6e99acd3..2a2943cd13 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -22,6 +22,7 @@ https://developers.google.com/open-source/licenses/bsd /* reftable_ref_record holds a ref database entry target_value */ struct reftable_ref_record { char *refname; /* Name of the ref, malloced. */ + size_t refname_cap; uint64_t update_index; /* Logical timestamp at which this value is * written */ @@ -73,6 +74,7 @@ int reftable_ref_record_equal(const struct reftable_ref_record *a, /* reftable_log_record holds a reflog entry */ struct reftable_log_record { char *refname; + size_t refname_cap; uint64_t update_index; /* logical timestamp of a transactional update. */ @@ -87,13 +89,14 @@ struct reftable_log_record { union { struct { - uint8_t *new_hash; - uint8_t *old_hash; + unsigned char new_hash[GIT_MAX_RAWSZ]; + unsigned char old_hash[GIT_MAX_RAWSZ]; char *name; char *email; uint64_t time; int16_t tz_offset; char *message; + size_t message_cap; } update; } value; }; diff --git a/reftable/reftable-tests.h b/reftable/reftable-tests.h index 0019cbcfa4..114cc3d053 100644 --- a/reftable/reftable-tests.h +++ b/reftable/reftable-tests.h @@ -14,7 +14,6 @@ int block_test_main(int argc, const char **argv); int merged_test_main(int argc, const char **argv); int pq_test_main(int argc, const char **argv); int record_test_main(int argc, const char **argv); -int refname_test_main(int argc, const char **argv); int readwrite_test_main(int argc, const char **argv); int stack_test_main(int argc, const char **argv); int tree_test_main(int argc, const char **argv); diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index 7c7cae5f99..b601a69a40 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -38,14 +38,13 @@ struct reftable_write_options { /* Default mode for creating files. If unset, use 0666 (+umask) */ unsigned int default_permissions; - /* boolean: do not check ref names for validity or dir/file conflicts. - */ - unsigned skip_name_check : 1; - /* boolean: copy log messages exactly. If unset, check that the message * is a single line, and add '\n' if missing. */ unsigned exact_log_message : 1; + + /* boolean: Prevent auto-compaction of tables. */ + unsigned disable_auto_compact : 1; }; /* reftable_block_stats holds statistics for a single block type */ diff --git a/reftable/stack.c b/reftable/stack.c index b64e55648a..a59ebe038d 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -12,8 +12,8 @@ https://developers.google.com/open-source/licenses/bsd #include "system.h" #include "merged.h" #include "reader.h" -#include "refname.h" #include "reftable-error.h" +#include "reftable-generic.h" #include "reftable-record.h" #include "reftable-merged.h" #include "writer.h" @@ -27,8 +27,6 @@ static int stack_write_compact(struct reftable_stack *st, struct reftable_writer *wr, size_t first, size_t last, struct reftable_log_expiry_config *config); -static int stack_check_addition(struct reftable_stack *st, - const char *new_tab_name); static void reftable_addition_close(struct reftable_addition *add); static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, int reuse_open); @@ -529,9 +527,9 @@ int reftable_stack_add(struct reftable_stack *st, { int err = stack_try_add(st, write, arg); if (err < 0) { - if (err == REFTABLE_LOCK_ERROR) { + if (err == REFTABLE_OUTDATED_ERROR) { /* Ignore error return, we want to propagate - REFTABLE_LOCK_ERROR. + REFTABLE_OUTDATED_ERROR. */ reftable_stack_reload(st); } @@ -590,9 +588,8 @@ static int reftable_stack_init_addition(struct reftable_addition *add, err = stack_uptodate(st); if (err < 0) goto done; - - if (err > 1) { - err = REFTABLE_LOCK_ERROR; + if (err > 0) { + err = REFTABLE_OUTDATED_ERROR; goto done; } @@ -681,8 +678,19 @@ int reftable_addition_commit(struct reftable_addition *add) if (err) goto done; - if (!add->stack->disable_auto_compact) + if (!add->stack->config.disable_auto_compact) { + /* + * Auto-compact the stack to keep the number of tables in + * control. It is possible that a concurrent writer is already + * trying to compact parts of the stack, which would lead to a + * `REFTABLE_LOCK_ERROR` because parts of the stack are locked + * already. This is a benign error though, so we ignore it. + */ err = reftable_stack_auto_compact(add->stack); + if (err < 0 && err != REFTABLE_LOCK_ERROR) + goto done; + err = 0; + } done: reftable_addition_close(add); @@ -713,10 +721,6 @@ static int stack_try_add(struct reftable_stack *st, int err = reftable_stack_init_addition(&add, st); if (err < 0) goto done; - if (err > 0) { - err = REFTABLE_LOCK_ERROR; - goto done; - } err = reftable_addition_add(&add, write_table, arg); if (err < 0) @@ -737,8 +741,9 @@ int reftable_addition_add(struct reftable_addition *add, struct strbuf tab_file_name = STRBUF_INIT; struct strbuf next_name = STRBUF_INIT; struct reftable_writer *wr = NULL; + struct tempfile *tab_file = NULL; int err = 0; - int tab_fd = 0; + int tab_fd; strbuf_reset(&next_name); format_name(&next_name, add->next_update_index, add->next_update_index); @@ -746,17 +751,20 @@ int reftable_addition_add(struct reftable_addition *add, stack_filename(&temp_tab_file_name, add->stack, next_name.buf); strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); - tab_fd = mkstemp(temp_tab_file_name.buf); - if (tab_fd < 0) { + tab_file = mks_tempfile(temp_tab_file_name.buf); + if (!tab_file) { err = REFTABLE_IO_ERROR; goto done; } if (add->stack->config.default_permissions) { - if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) { + if (chmod(get_tempfile_path(tab_file), + add->stack->config.default_permissions)) { err = REFTABLE_IO_ERROR; goto done; } } + tab_fd = get_tempfile_fd(tab_file); + wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd, &add->stack->config); err = write_table(wr, arg); @@ -771,17 +779,12 @@ int reftable_addition_add(struct reftable_addition *add, if (err < 0) goto done; - err = close(tab_fd); - tab_fd = 0; + err = close_tempfile_gently(tab_file); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } - err = stack_check_addition(add->stack, temp_tab_file_name.buf); - if (err < 0) - goto done; - if (wr->min_update_index < add->next_update_index) { err = REFTABLE_API_ERROR; goto done; @@ -789,14 +792,13 @@ int reftable_addition_add(struct reftable_addition *add, format_name(&next_name, wr->min_update_index, wr->max_update_index); strbuf_addstr(&next_name, ".ref"); - stack_filename(&tab_file_name, add->stack, next_name.buf); /* On windows, this relies on rand() picking a unique destination name. Maybe we should do retry loop as well? */ - err = rename(temp_tab_file_name.buf, tab_file_name.buf); + err = rename_tempfile(&tab_file, tab_file_name.buf); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; @@ -806,14 +808,7 @@ int reftable_addition_add(struct reftable_addition *add, add->new_tables_cap); add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL); done: - if (tab_fd > 0) { - close(tab_fd); - tab_fd = 0; - } - if (temp_tab_file_name.len > 0) { - unlink(temp_tab_file_name.buf); - } - + delete_tempfile(&tab_file); strbuf_release(&temp_tab_file_name); strbuf_release(&tab_file_name); strbuf_release(&next_name); @@ -832,51 +827,56 @@ uint64_t reftable_stack_next_update_index(struct reftable_stack *st) static int stack_compact_locked(struct reftable_stack *st, size_t first, size_t last, - struct strbuf *temp_tab, - struct reftable_log_expiry_config *config) + struct reftable_log_expiry_config *config, + struct tempfile **tab_file_out) { struct strbuf next_name = STRBUF_INIT; - int tab_fd = -1; + struct strbuf tab_file_path = STRBUF_INIT; struct reftable_writer *wr = NULL; - int err = 0; + struct tempfile *tab_file; + int tab_fd, err = 0; format_name(&next_name, reftable_reader_min_update_index(st->readers[first]), reftable_reader_max_update_index(st->readers[last])); + stack_filename(&tab_file_path, st, next_name.buf); + strbuf_addstr(&tab_file_path, ".temp.XXXXXX"); - stack_filename(temp_tab, st, next_name.buf); - strbuf_addstr(temp_tab, ".temp.XXXXXX"); + tab_file = mks_tempfile(tab_file_path.buf); + if (!tab_file) { + err = REFTABLE_IO_ERROR; + goto done; + } + tab_fd = get_tempfile_fd(tab_file); - tab_fd = mkstemp(temp_tab->buf); if (st->config.default_permissions && - chmod(temp_tab->buf, st->config.default_permissions) < 0) { + chmod(get_tempfile_path(tab_file), st->config.default_permissions) < 0) { err = REFTABLE_IO_ERROR; goto done; } - wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd, &st->config); - + wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, + &tab_fd, &st->config); err = stack_write_compact(st, wr, first, last, config); if (err < 0) goto done; + err = reftable_writer_close(wr); if (err < 0) goto done; - err = close(tab_fd); - tab_fd = 0; + err = close_tempfile_gently(tab_file); + if (err < 0) + goto done; + + *tab_file_out = tab_file; + tab_file = NULL; done: + delete_tempfile(&tab_file); reftable_writer_free(wr); - if (tab_fd > 0) { - close(tab_fd); - tab_fd = 0; - } - if (err != 0 && temp_tab->len > 0) { - unlink(temp_tab->buf); - strbuf_release(temp_tab); - } strbuf_release(&next_name); + strbuf_release(&tab_file_path); return err; } @@ -978,217 +978,213 @@ done: return err; } -/* < 0: error. 0 == OK, > 0 attempt failed; could retry. */ +/* + * Compact all tables in the range `[first, last)` into a single new table. + * + * This function returns `0` on success or a code `< 0` on failure. When the + * stack or any of the tables in the specified range are already locked then + * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that + * callers can either ignore, or they may choose to retry compaction after some + * amount of time. + */ static int stack_compact_range(struct reftable_stack *st, size_t first, size_t last, struct reftable_log_expiry_config *expiry) { - char **delete_on_success = NULL, **subtable_locks = NULL, **listp = NULL; - struct strbuf temp_tab_file_name = STRBUF_INIT; + struct strbuf tables_list_buf = STRBUF_INIT; struct strbuf new_table_name = STRBUF_INIT; - struct strbuf lock_file_name = STRBUF_INIT; - struct strbuf ref_list_contents = STRBUF_INIT; struct strbuf new_table_path = STRBUF_INIT; - size_t i, j, compact_count; - int err = 0; - int have_lock = 0; - int lock_file_fd = -1; - int is_empty_table = 0; + struct strbuf table_name = STRBUF_INIT; + struct lock_file tables_list_lock = LOCK_INIT; + struct lock_file *table_locks = NULL; + struct tempfile *new_table = NULL; + int is_empty_table = 0, err = 0; + size_t i; if (first > last || (!expiry && first == last)) { err = 0; goto done; } - compact_count = last - first + 1; - REFTABLE_CALLOC_ARRAY(delete_on_success, compact_count + 1); - REFTABLE_CALLOC_ARRAY(subtable_locks, compact_count + 1); - st->stats.attempts++; - strbuf_reset(&lock_file_name); - strbuf_addstr(&lock_file_name, st->list_file); - strbuf_addstr(&lock_file_name, ".lock"); - - lock_file_fd = - open(lock_file_name.buf, O_EXCL | O_CREAT | O_WRONLY, 0666); - if (lock_file_fd < 0) { - if (errno == EEXIST) { - err = 1; - } else { + /* + * Hold the lock so that we can read "tables.list" and lock all tables + * which are part of the user-specified range. + */ + err = hold_lock_file_for_update(&tables_list_lock, st->list_file, + LOCK_NO_DEREF); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else err = REFTABLE_IO_ERROR; - } goto done; } - /* Don't want to write to the lock for now. */ - close(lock_file_fd); - lock_file_fd = -1; - have_lock = 1; err = stack_uptodate(st); - if (err != 0) + if (err) goto done; - for (i = first, j = 0; i <= last; i++) { - struct strbuf subtab_file_name = STRBUF_INIT; - struct strbuf subtab_lock = STRBUF_INIT; - int sublock_file_fd = -1; - - stack_filename(&subtab_file_name, st, - reader_name(st->readers[i])); - - strbuf_reset(&subtab_lock); - strbuf_addbuf(&subtab_lock, &subtab_file_name); - strbuf_addstr(&subtab_lock, ".lock"); - - sublock_file_fd = open(subtab_lock.buf, - O_EXCL | O_CREAT | O_WRONLY, 0666); - if (sublock_file_fd >= 0) { - close(sublock_file_fd); - } else if (sublock_file_fd < 0) { - if (errno == EEXIST) { - err = 1; - } else { + /* + * Lock all tables in the user-provided range. This is the slice of our + * stack which we'll compact. + */ + REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1); + for (i = first; i <= last; i++) { + stack_filename(&table_name, st, reader_name(st->readers[i])); + + err = hold_lock_file_for_update(&table_locks[i - first], + table_name.buf, LOCK_NO_DEREF); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else err = REFTABLE_IO_ERROR; - } + goto done; } - subtable_locks[j] = subtab_lock.buf; - delete_on_success[j] = subtab_file_name.buf; - j++; - - if (err != 0) + /* + * We need to close the lockfiles as we might otherwise easily + * run into file descriptor exhaustion when we compress a lot + * of tables. + */ + err = close_lock_file_gently(&table_locks[i - first]); + if (err < 0) { + err = REFTABLE_IO_ERROR; goto done; + } } - err = unlink(lock_file_name.buf); - if (err < 0) + /* + * We have locked all tables in our range and can thus release the + * "tables.list" lock while compacting the locked tables. This allows + * concurrent updates to the stack to proceed. + */ + err = rollback_lock_file(&tables_list_lock); + if (err < 0) { + err = REFTABLE_IO_ERROR; goto done; - have_lock = 0; - - err = stack_compact_locked(st, first, last, &temp_tab_file_name, - expiry); - /* Compaction + tombstones can create an empty table out of non-empty - * tables. */ - is_empty_table = (err == REFTABLE_EMPTY_TABLE_ERROR); - if (is_empty_table) { - err = 0; } - if (err < 0) - goto done; - lock_file_fd = - open(lock_file_name.buf, O_EXCL | O_CREAT | O_WRONLY, 0666); - if (lock_file_fd < 0) { - if (errno == EEXIST) { - err = 1; - } else { + /* + * Compact the now-locked tables into a new table. Note that compacting + * these tables may end up with an empty new table in case tombstones + * end up cancelling out all refs in that range. + */ + err = stack_compact_locked(st, first, last, expiry, &new_table); + if (err < 0) { + if (err != REFTABLE_EMPTY_TABLE_ERROR) + goto done; + is_empty_table = 1; + } + + /* + * Now that we have written the new, compacted table we need to re-lock + * "tables.list". We'll then replace the compacted range of tables with + * the new table. + */ + err = hold_lock_file_for_update(&tables_list_lock, st->list_file, + LOCK_NO_DEREF); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else err = REFTABLE_IO_ERROR; - } goto done; } - have_lock = 1; + if (st->config.default_permissions) { - if (chmod(lock_file_name.buf, st->config.default_permissions) < 0) { + if (chmod(get_lock_file_path(&tables_list_lock), + st->config.default_permissions) < 0) { err = REFTABLE_IO_ERROR; goto done; } } - format_name(&new_table_name, st->readers[first]->min_update_index, - st->readers[last]->max_update_index); - strbuf_addstr(&new_table_name, ".ref"); - - stack_filename(&new_table_path, st, new_table_name.buf); - + /* + * If the resulting compacted table is not empty, then we need to move + * it into place now. + */ if (!is_empty_table) { - /* retry? */ - err = rename(temp_tab_file_name.buf, new_table_path.buf); + format_name(&new_table_name, st->readers[first]->min_update_index, + st->readers[last]->max_update_index); + strbuf_addstr(&new_table_name, ".ref"); + stack_filename(&new_table_path, st, new_table_name.buf); + + err = rename_tempfile(&new_table, new_table_path.buf); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } } - for (i = 0; i < first; i++) { - strbuf_addstr(&ref_list_contents, st->readers[i]->name); - strbuf_addstr(&ref_list_contents, "\n"); - } - if (!is_empty_table) { - strbuf_addbuf(&ref_list_contents, &new_table_name); - strbuf_addstr(&ref_list_contents, "\n"); - } - for (i = last + 1; i < st->merged->stack_len; i++) { - strbuf_addstr(&ref_list_contents, st->readers[i]->name); - strbuf_addstr(&ref_list_contents, "\n"); - } - - err = write_in_full(lock_file_fd, ref_list_contents.buf, ref_list_contents.len); - if (err < 0) { - err = REFTABLE_IO_ERROR; - unlink(new_table_path.buf); - goto done; - } - - err = fsync_component(FSYNC_COMPONENT_REFERENCE, lock_file_fd); + /* + * Write the new "tables.list" contents with the compacted table we + * have just written. In case the compacted table became empty we + * simply skip writing it. + */ + for (i = 0; i < first; i++) + strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name); + if (!is_empty_table) + strbuf_addf(&tables_list_buf, "%s\n", new_table_name.buf); + for (i = last + 1; i < st->merged->stack_len; i++) + strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name); + + err = write_in_full(get_lock_file_fd(&tables_list_lock), + tables_list_buf.buf, tables_list_buf.len); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); goto done; } - err = close(lock_file_fd); - lock_file_fd = -1; + err = fsync_component(FSYNC_COMPONENT_REFERENCE, get_lock_file_fd(&tables_list_lock)); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); goto done; } - err = rename(lock_file_name.buf, st->list_file); + err = commit_lock_file(&tables_list_lock); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); goto done; } - have_lock = 0; - /* Reload the stack before deleting. On windows, we can only delete the - files after we closed them. - */ + /* + * Reload the stack before deleting the compacted tables. We can only + * delete the files after we closed them on Windows, so this needs to + * happen first. + */ err = reftable_stack_reload_maybe_reuse(st, first < last); + if (err < 0) + goto done; - listp = delete_on_success; - while (*listp) { - if (strcmp(*listp, new_table_path.buf)) { - unlink(*listp); - } - listp++; + /* + * Delete the old tables. They may still be in use by concurrent + * readers, so it is expected that unlinking tables may fail. + */ + for (i = first; i <= last; i++) { + struct lock_file *table_lock = &table_locks[i - first]; + char *table_path = get_locked_file_path(table_lock); + unlink(table_path); + free(table_path); } done: - free_names(delete_on_success); + rollback_lock_file(&tables_list_lock); + for (i = first; table_locks && i <= last; i++) + rollback_lock_file(&table_locks[i - first]); + reftable_free(table_locks); - if (subtable_locks) { - listp = subtable_locks; - while (*listp) { - unlink(*listp); - listp++; - } - free_names(subtable_locks); - } - if (lock_file_fd >= 0) { - close(lock_file_fd); - lock_file_fd = -1; - } - if (have_lock) { - unlink(lock_file_name.buf); - } + delete_tempfile(&new_table); strbuf_release(&new_table_name); strbuf_release(&new_table_path); - strbuf_release(&ref_list_contents); - strbuf_release(&temp_tab_file_name); - strbuf_release(&lock_file_name); + + strbuf_release(&tables_list_buf); + strbuf_release(&table_name); return err; } @@ -1204,7 +1200,7 @@ static int stack_compact_range_stats(struct reftable_stack *st, struct reftable_log_expiry_config *config) { int err = stack_compact_range(st, first, last, config); - if (err > 0) + if (err == REFTABLE_LOCK_ERROR) st->stats.failures++; return err; } @@ -1214,75 +1210,76 @@ static int segment_size(struct segment *s) return s->end - s->start; } -int fastlog2(uint64_t sz) -{ - int l = 0; - if (sz == 0) - return 0; - for (; sz; sz /= 2) { - l++; - } - return l - 1; -} - -struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n) -{ - struct segment *segs = reftable_calloc(n, sizeof(*segs)); - struct segment cur = { 0 }; - size_t next = 0, i; - - if (n == 0) { - *seglen = 0; - return segs; - } - for (i = 0; i < n; i++) { - int log = fastlog2(sizes[i]); - if (cur.log != log && cur.bytes > 0) { - struct segment fresh = { - .start = i, - }; - - segs[next++] = cur; - cur = fresh; - } - - cur.log = log; - cur.end = i + 1; - cur.bytes += sizes[i]; - } - segs[next++] = cur; - *seglen = next; - return segs; -} - struct segment suggest_compaction_segment(uint64_t *sizes, size_t n) { - struct segment min_seg = { - .log = 64, - }; - struct segment *segs; - size_t seglen = 0, i; - - segs = sizes_to_segments(&seglen, sizes, n); - for (i = 0; i < seglen; i++) { - if (segment_size(&segs[i]) == 1) - continue; + struct segment seg = { 0 }; + uint64_t bytes; + size_t i; - if (segs[i].log < min_seg.log) - min_seg = segs[i]; - } + /* + * If there are no tables or only a single one then we don't have to + * compact anything. The sequence is geometric by definition already. + */ + if (n <= 1) + return seg; - while (min_seg.start > 0) { - size_t prev = min_seg.start - 1; - if (fastlog2(min_seg.bytes) < fastlog2(sizes[prev])) + /* + * Find the ending table of the compaction segment needed to restore the + * geometric sequence. Note that the segment end is exclusive. + * + * To do so, we iterate backwards starting from the most recent table + * until a valid segment end is found. If the preceding table is smaller + * than the current table multiplied by the geometric factor (2), the + * compaction segment end has been identified. + * + * Tables after the ending point are not added to the byte count because + * they are already valid members of the geometric sequence. Due to the + * properties of a geometric sequence, it is not possible for the sum of + * these tables to exceed the value of the ending point table. + * + * Example table size sequence requiring no compaction: + * 64, 32, 16, 8, 4, 2, 1 + * + * Example table size sequence where compaction segment end is set to + * the last table. Since the segment end is exclusive, the last table is + * excluded during subsequent compaction and the table with size 3 is + * the final table included: + * 64, 32, 16, 8, 4, 3, 1 + */ + for (i = n - 1; i > 0; i--) { + if (sizes[i - 1] < sizes[i] * 2) { + seg.end = i + 1; + bytes = sizes[i]; break; + } + } - min_seg.start = prev; - min_seg.bytes += sizes[prev]; + /* + * Find the starting table of the compaction segment by iterating + * through the remaining tables and keeping track of the accumulated + * size of all tables seen from the segment end table. The previous + * table is compared to the accumulated size because the tables from the + * segment end are merged backwards recursively. + * + * Note that we keep iterating even after we have found the first + * starting point. This is because there may be tables in the stack + * preceding that first starting point which violate the geometric + * sequence. + * + * Example compaction segment start set to table with size 32: + * 128, 32, 16, 8, 4, 3, 1 + */ + for (; i > 0; i--) { + uint64_t curr = bytes; + bytes += sizes[i - 1]; + + if (sizes[i - 1] < curr * 2) { + seg.start = i - 1; + seg.bytes = bytes; + } } - reftable_free(segs); - return min_seg; + return seg; } static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) @@ -1352,65 +1349,6 @@ done: return err; } -static int stack_check_addition(struct reftable_stack *st, - const char *new_tab_name) -{ - int err = 0; - struct reftable_block_source src = { NULL }; - struct reftable_reader *rd = NULL; - struct reftable_table tab = { NULL }; - struct reftable_ref_record *refs = NULL; - struct reftable_iterator it = { NULL }; - int cap = 0; - int len = 0; - int i = 0; - - if (st->config.skip_name_check) - return 0; - - err = reftable_block_source_from_file(&src, new_tab_name); - if (err < 0) - goto done; - - err = reftable_new_reader(&rd, &src, new_tab_name); - if (err < 0) - goto done; - - err = reftable_reader_seek_ref(rd, &it, ""); - if (err > 0) { - err = 0; - goto done; - } - if (err < 0) - goto done; - - while (1) { - struct reftable_ref_record ref = { NULL }; - err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) - break; - if (err < 0) - goto done; - - REFTABLE_ALLOC_GROW(refs, len + 1, cap); - refs[len++] = ref; - } - - reftable_table_from_merged_table(&tab, reftable_stack_merged_table(st)); - - err = validate_ref_record_addition(tab, refs, len); - -done: - for (i = 0; i < len; i++) { - reftable_ref_record_release(&refs[i]); - } - - free(refs); - reftable_iterator_destroy(&it); - reftable_reader_free(rd); - return err; -} - static int is_table_name(const char *s) { const char *dot = strrchr(s, '.'); diff --git a/reftable/stack.h b/reftable/stack.h index d919455669..d43efa4760 100644 --- a/reftable/stack.h +++ b/reftable/stack.h @@ -19,7 +19,6 @@ struct reftable_stack { int list_fd; char *reftable_dir; - int disable_auto_compact; struct reftable_write_options config; @@ -33,12 +32,9 @@ int read_lines(const char *filename, char ***lines); struct segment { size_t start, end; - int log; uint64_t bytes; }; -int fastlog2(uint64_t sz); -struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n); struct segment suggest_compaction_segment(uint64_t *sizes, size_t n); #endif diff --git a/reftable/stack_test.c b/reftable/stack_test.c index 509f486623..7889f818d1 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -38,7 +38,17 @@ static int count_dir_entries(const char *dirname) return 0; while ((d = readdir(dir))) { - if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, ".")) + /* + * Besides skipping over "." and "..", we also need to + * skip over other files that have a leading ".". This + * is due to behaviour of NFS, which will rename files + * to ".nfs*" to emulate delete-on-last-close. + * + * In any case this should be fine as the reftable + * library will never write files with leading dots + * anyway. + */ + if (starts_with(d->d_name, ".")) continue; len++; } @@ -232,7 +242,7 @@ static void test_reftable_stack_uptodate(void) EXPECT_ERR(err); err = reftable_stack_add(st2, &write_test_ref, &ref2); - EXPECT(err == REFTABLE_LOCK_ERROR); + EXPECT(err == REFTABLE_OUTDATED_ERROR); err = reftable_stack_reload(st2); EXPECT_ERR(err); @@ -315,7 +325,7 @@ static void test_reftable_stack_transaction_api_performs_auto_compaction(void) * we can ensure that we indeed honor this setting and have * better control over when exactly auto compaction runs. */ - st->disable_auto_compact = i != n; + st->config.disable_auto_compact = i != n; err = reftable_stack_new_addition(&add, st); EXPECT_ERR(err); @@ -343,41 +353,46 @@ static void test_reftable_stack_transaction_api_performs_auto_compaction(void) clear_dir(dir); } -static void test_reftable_stack_validate_refname(void) +static void test_reftable_stack_auto_compaction_fails_gracefully(void) { - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - int err; - char *dir = get_tmp_dir(__LINE__); - - int i; struct reftable_ref_record ref = { - .refname = "a/b", + .refname = "refs/heads/master", .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", + .value_type = REFTABLE_REF_VAL1, + .value.val1 = {0x01}, }; - char *additions[] = { "a", "a/b/c" }; + struct reftable_write_options cfg = {0}; + struct reftable_stack *st; + struct strbuf table_path = STRBUF_INIT; + char *dir = get_tmp_dir(__LINE__); + int err; err = reftable_new_stack(&st, dir, cfg); EXPECT_ERR(err); - err = reftable_stack_add(st, &write_test_ref, &ref); + err = reftable_stack_add(st, write_test_ref, &ref); EXPECT_ERR(err); + EXPECT(st->merged->stack_len == 1); + EXPECT(st->stats.attempts == 0); + EXPECT(st->stats.failures == 0); + + /* + * Lock the newly written table such that it cannot be compacted. + * Adding a new table to the stack should not be impacted by this, even + * though auto-compaction will now fail. + */ + strbuf_addf(&table_path, "%s/%s.lock", dir, st->readers[0]->name); + write_file_buf(table_path.buf, "", 0); - for (i = 0; i < ARRAY_SIZE(additions); i++) { - struct reftable_ref_record ref = { - .refname = additions[i], - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - - err = reftable_stack_add(st, &write_test_ref, &ref); - EXPECT(err == REFTABLE_NAME_CONFLICT); - } + ref.update_index = 2; + err = reftable_stack_add(st, write_test_ref, &ref); + EXPECT_ERR(err); + EXPECT(st->merged->stack_len == 2); + EXPECT(st->stats.attempts == 1); + EXPECT(st->stats.failures == 1); reftable_stack_destroy(st); + strbuf_release(&table_path); clear_dir(dir); } @@ -444,6 +459,7 @@ static void test_reftable_stack_add(void) struct reftable_write_options cfg = { .exact_log_message = 1, .default_permissions = 0660, + .disable_auto_compact = 1, }; struct reftable_stack *st = NULL; char *dir = get_tmp_dir(__LINE__); @@ -455,7 +471,6 @@ static void test_reftable_stack_add(void) err = reftable_new_stack(&st, dir, cfg); EXPECT_ERR(err); - st->disable_auto_compact = 1; for (i = 0; i < N; i++) { char buf[256]; @@ -468,8 +483,6 @@ static void test_reftable_stack_add(void) logs[i].refname = xstrdup(buf); logs[i].update_index = N + i + 1; logs[i].value_type = REFTABLE_LOG_UPDATE; - - logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ); logs[i].value.update.email = xstrdup("identity@invalid"); set_test_hash(logs[i].value.update.new_hash, i); } @@ -547,16 +560,17 @@ static void test_reftable_stack_log_normalize(void) }; struct reftable_stack *st = NULL; char *dir = get_tmp_dir(__LINE__); - - uint8_t h1[GIT_SHA1_RAWSZ] = { 0x01 }, h2[GIT_SHA1_RAWSZ] = { 0x02 }; - - struct reftable_log_record input = { .refname = "branch", - .update_index = 1, - .value_type = REFTABLE_LOG_UPDATE, - .value = { .update = { - .new_hash = h1, - .old_hash = h2, - } } }; + struct reftable_log_record input = { + .refname = "branch", + .update_index = 1, + .value_type = REFTABLE_LOG_UPDATE, + .value = { + .update = { + .new_hash = { 1 }, + .old_hash = { 2 }, + }, + }, + }; struct reftable_log_record dest = { .update_index = 0, }; @@ -627,8 +641,6 @@ static void test_reftable_stack_tombstone(void) logs[i].update_index = 42; if (i % 2 == 0) { logs[i].value_type = REFTABLE_LOG_UPDATE; - logs[i].value.update.new_hash = - reftable_malloc(GIT_SHA1_RAWSZ); set_test_hash(logs[i].value.update.new_hash, i); logs[i].value.update.email = xstrdup("identity@invalid"); @@ -720,59 +732,13 @@ static void test_reftable_stack_hash_id(void) clear_dir(dir); } -static void test_log2(void) -{ - EXPECT(1 == fastlog2(3)); - EXPECT(2 == fastlog2(4)); - EXPECT(2 == fastlog2(5)); -} - -static void test_sizes_to_segments(void) -{ - uint64_t sizes[] = { 2, 3, 4, 5, 7, 9 }; - /* .................0 1 2 3 4 5 */ - - size_t seglen = 0; - struct segment *segs = - sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes)); - EXPECT(segs[2].log == 3); - EXPECT(segs[2].start == 5); - EXPECT(segs[2].end == 6); - - EXPECT(segs[1].log == 2); - EXPECT(segs[1].start == 2); - EXPECT(segs[1].end == 5); - reftable_free(segs); -} - -static void test_sizes_to_segments_empty(void) -{ - size_t seglen = 0; - struct segment *segs = sizes_to_segments(&seglen, NULL, 0); - EXPECT(seglen == 0); - reftable_free(segs); -} - -static void test_sizes_to_segments_all_equal(void) -{ - uint64_t sizes[] = { 5, 5 }; - size_t seglen = 0; - struct segment *segs = - sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes)); - EXPECT(seglen == 1); - EXPECT(segs[0].start == 0); - EXPECT(segs[0].end == 2); - reftable_free(segs); -} - static void test_suggest_compaction_segment(void) { - uint64_t sizes[] = { 128, 64, 17, 16, 9, 9, 9, 16, 16 }; - /* .................0 1 2 3 4 5 6 */ + uint64_t sizes[] = { 512, 64, 17, 16, 9, 9, 9, 16, 2, 16 }; struct segment min = suggest_compaction_segment(sizes, ARRAY_SIZE(sizes)); - EXPECT(min.start == 2); - EXPECT(min.end == 7); + EXPECT(min.start == 1); + EXPECT(min.end == 10); } static void test_suggest_compaction_segment_nothing(void) @@ -810,7 +776,6 @@ static void test_reflog_expire(void) logs[i].update_index = i; logs[i].value_type = REFTABLE_LOG_UPDATE; logs[i].value.update.time = i; - logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ); logs[i].value.update.email = xstrdup("identity@invalid"); set_test_hash(logs[i].value.update.new_hash, i); } @@ -884,9 +849,21 @@ static void test_empty_add(void) reftable_stack_destroy(st2); } +static int fastlog2(uint64_t sz) +{ + int l = 0; + if (sz == 0) + return 0; + for (; sz; sz /= 2) + l++; + return l - 1; +} + static void test_reftable_stack_auto_compaction(void) { - struct reftable_write_options cfg = { 0 }; + struct reftable_write_options cfg = { + .disable_auto_compact = 1, + }; struct reftable_stack *st = NULL; char *dir = get_tmp_dir(__LINE__); @@ -896,7 +873,6 @@ static void test_reftable_stack_auto_compaction(void) err = reftable_new_stack(&st, dir, cfg); EXPECT_ERR(err); - st->disable_auto_compact = 1; /* call manually below for coverage. */ for (i = 0; i < N; i++) { char name[100]; struct reftable_ref_record ref = { @@ -945,7 +921,7 @@ static void test_reftable_stack_add_performs_auto_compaction(void) * we can ensure that we indeed honor this setting and have * better control over when exactly auto compaction runs. */ - st->disable_auto_compact = i != n; + st->config.disable_auto_compact = i != n; strbuf_reset(&refname); strbuf_addf(&refname, "branch-%04d", i); @@ -1072,7 +1048,6 @@ static void test_reftable_stack_compaction_concurrent_clean(void) int stack_test_main(int argc, const char *argv[]) { RUN_TEST(test_empty_add); - RUN_TEST(test_log2); RUN_TEST(test_names_equal); RUN_TEST(test_parse_names); RUN_TEST(test_read_file); @@ -1089,12 +1064,9 @@ int stack_test_main(int argc, const char *argv[]) RUN_TEST(test_reftable_stack_tombstone); RUN_TEST(test_reftable_stack_transaction_api); RUN_TEST(test_reftable_stack_transaction_api_performs_auto_compaction); + RUN_TEST(test_reftable_stack_auto_compaction_fails_gracefully); RUN_TEST(test_reftable_stack_update_index_check); RUN_TEST(test_reftable_stack_uptodate); - RUN_TEST(test_reftable_stack_validate_refname); - RUN_TEST(test_sizes_to_segments); - RUN_TEST(test_sizes_to_segments_all_equal); - RUN_TEST(test_sizes_to_segments_empty); RUN_TEST(test_suggest_compaction_segment); RUN_TEST(test_suggest_compaction_segment_nothing); return 0; diff --git a/reftable/system.h b/reftable/system.h index 6b74a81514..5d8b6dede5 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -12,7 +12,9 @@ https://developers.google.com/open-source/licenses/bsd /* This header glues the reftable library to the rest of Git */ #include "git-compat-util.h" +#include "lockfile.h" #include "strbuf.h" +#include "tempfile.h" #include "hash-ll.h" /* hash ID, sizes.*/ #include "dir.h" /* remove_dir_recursively, for tests.*/ diff --git a/reftable/writer.c b/reftable/writer.c index 1d9ff0fbfa..10eccaaa07 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -109,7 +109,7 @@ static void writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ) block_start = header_size(writer_version(w)); } - strbuf_release(&w->last_key); + strbuf_reset(&w->last_key); block_writer_init(&w->block_writer_data, typ, w->block, w->opts.block_size, block_start, hash_size(w->opts.hash_id)); @@ -149,11 +149,21 @@ void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, w->max_update_index = max; } +static void writer_release(struct reftable_writer *w) +{ + if (w) { + reftable_free(w->block); + w->block = NULL; + block_writer_release(&w->block_writer_data); + w->block_writer = NULL; + writer_clear_index(w); + strbuf_release(&w->last_key); + } +} + void reftable_writer_free(struct reftable_writer *w) { - if (!w) - return; - reftable_free(w->block); + writer_release(w); reftable_free(w); } @@ -209,7 +219,8 @@ static int writer_add_record(struct reftable_writer *w, struct reftable_record *rec) { struct strbuf key = STRBUF_INIT; - int err = -1; + int err; + reftable_record_key(rec, &key); if (strbuf_cmp(&w->last_key, &key) >= 0) { err = REFTABLE_API_ERROR; @@ -218,27 +229,42 @@ static int writer_add_record(struct reftable_writer *w, strbuf_reset(&w->last_key); strbuf_addbuf(&w->last_key, &key); - if (!w->block_writer) { + if (!w->block_writer) writer_reinit_block_writer(w, reftable_record_type(rec)); - } - assert(block_writer_type(w->block_writer) == reftable_record_type(rec)); + if (block_writer_type(w->block_writer) != reftable_record_type(rec)) + BUG("record of type %d added to writer of type %d", + reftable_record_type(rec), block_writer_type(w->block_writer)); - if (block_writer_add(w->block_writer, rec) == 0) { + /* + * Try to add the record to the writer. If this succeeds then we're + * done. Otherwise the block writer may have hit the block size limit + * and needs to be flushed. + */ + if (!block_writer_add(w->block_writer, rec)) { err = 0; goto done; } + /* + * The current block is full, so we need to flush and reinitialize the + * writer to start writing the next block. + */ err = writer_flush_block(w); - if (err < 0) { + if (err < 0) goto done; - } - writer_reinit_block_writer(w, reftable_record_type(rec)); + + /* + * Try to add the record to the writer again. If this still fails then + * the record does not fit into the block size. + * + * TODO: it would be great to have `block_writer_add()` return proper + * error codes so that we don't have to second-guess the failure + * mode here. + */ err = block_writer_add(w->block_writer, rec); - if (err == -1) { - /* we are writing into memory, so an error can only mean it - * doesn't fit. */ + if (err) { err = REFTABLE_ENTRY_TOO_BIG_ERROR; goto done; } @@ -452,7 +478,7 @@ static int writer_finish_section(struct reftable_writer *w) bstats->max_index_level = max_level; /* Reinit lastKey, as the next section can start with any key. */ - w->last_key.len = 0; + strbuf_reset(&w->last_key); return 0; } @@ -627,74 +653,87 @@ int reftable_writer_close(struct reftable_writer *w) } done: - /* free up memory. */ - block_writer_release(&w->block_writer_data); - writer_clear_index(w); - strbuf_release(&w->last_key); + writer_release(w); return err; } static void writer_clear_index(struct reftable_writer *w) { - for (size_t i = 0; i < w->index_len; i++) + for (size_t i = 0; w->index && i < w->index_len; i++) strbuf_release(&w->index[i].last_key); FREE_AND_NULL(w->index); w->index_len = 0; w->index_cap = 0; } -static const int debug = 0; - static int writer_flush_nonempty_block(struct reftable_writer *w) { + struct reftable_index_record index_record = { + .last_key = STRBUF_INIT, + }; uint8_t typ = block_writer_type(w->block_writer); - struct reftable_block_stats *bstats = - writer_reftable_block_stats(w, typ); - uint64_t block_typ_off = (bstats->blocks == 0) ? w->next : 0; - int raw_bytes = block_writer_finish(w->block_writer); - int padding = 0; - int err = 0; - struct reftable_index_record ir = { .last_key = STRBUF_INIT }; + struct reftable_block_stats *bstats; + int raw_bytes, padding = 0, err; + uint64_t block_typ_off; + + /* + * Finish the current block. This will cause the block writer to emit + * restart points and potentially compress records in case we are + * writing a log block. + * + * Note that this is still happening in memory. + */ + raw_bytes = block_writer_finish(w->block_writer); if (raw_bytes < 0) return raw_bytes; - if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG) { + /* + * By default, all records except for log records are padded to the + * block size. + */ + if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG) padding = w->opts.block_size - raw_bytes; - } - if (block_typ_off > 0) { + bstats = writer_reftable_block_stats(w, typ); + block_typ_off = (bstats->blocks == 0) ? w->next : 0; + if (block_typ_off > 0) bstats->offset = block_typ_off; - } - bstats->entries += w->block_writer->entries; bstats->restarts += w->block_writer->restart_len; bstats->blocks++; w->stats.blocks++; - if (debug) { - fprintf(stderr, "block %c off %" PRIu64 " sz %d (%d)\n", typ, - w->next, raw_bytes, - get_be24(w->block + w->block_writer->header_off + 1)); - } - - if (w->next == 0) { + /* + * If this is the first block we're writing to the table then we need + * to also write the reftable header. + */ + if (!w->next) writer_write_header(w, w->block); - } err = padded_write(w, w->block, raw_bytes, padding); if (err < 0) return err; + /* + * Add an index record for every block that we're writing. If we end up + * having more than a threshold of index records we will end up writing + * an index section in `writer_finish_section()`. Each index record + * contains the last record key of the block it is indexing as well as + * the offset of that block. + * + * Note that this also applies when flushing index blocks, in which + * case we will end up with a multi-level index. + */ REFTABLE_ALLOC_GROW(w->index, w->index_len + 1, w->index_cap); - - ir.offset = w->next; - strbuf_reset(&ir.last_key); - strbuf_addbuf(&ir.last_key, &w->block_writer->last_key); - w->index[w->index_len] = ir; - + index_record.offset = w->next; + strbuf_reset(&index_record.last_key); + strbuf_addbuf(&index_record.last_key, &w->block_writer->last_key); + w->index[w->index_len] = index_record; w->index_len++; + w->next += padding + raw_bytes; w->block_writer = NULL; + return 0; } |
