aboutsummaryrefslogtreecommitdiffstats
path: root/pack-bitmap.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-06-24 16:39:13 -0700
committerJunio C Hamano <gitster@pobox.com>2024-06-24 16:39:13 -0700
commitffa47b75cf9032c1655985f8ba934d2ba5c60e81 (patch)
treea538fde4afcb5e6202aef144b19054777eacfdc6 /pack-bitmap.c
parentThe fifteenth batch (diff)
parentpack-bitmap.c: ensure pseudo-merge offset reads are bounded (diff)
downloadgit-ffa47b75cf9032c1655985f8ba934d2ba5c60e81.tar.gz
git-ffa47b75cf9032c1655985f8ba934d2ba5c60e81.zip
Merge branch 'tb/pseudo-merge-reachability-bitmap'
The pseudo-merge reachability bitmap to help more efficient storage of the reachability bitmap in a repository with too many refs has been added. * tb/pseudo-merge-reachability-bitmap: (26 commits) pack-bitmap.c: ensure pseudo-merge offset reads are bounded Documentation/technical/bitmap-format.txt: add missing position table t/perf: implement performance tests for pseudo-merge bitmaps pseudo-merge: implement support for finding existing merges ewah: `bitmap_equals_ewah()` pack-bitmap: extra trace2 information pack-bitmap.c: use pseudo-merges during traversal t/test-lib-functions.sh: support `--notick` in `test_commit_bulk()` pack-bitmap: implement test helpers for pseudo-merge ewah: implement `ewah_bitmap_popcount()` pseudo-merge: implement support for reading pseudo-merge commits pack-bitmap.c: read pseudo-merge extension pseudo-merge: scaffolding for reads pack-bitmap: extract `read_bitmap()` function pack-bitmap-write.c: write pseudo-merge table pseudo-merge: implement support for selecting pseudo-merge commits config: introduce `git_config_double()` pack-bitmap: make `bitmap_writer_push_bitmapped_commit()` public pack-bitmap: implement `bitmap_writer_has_bitmapped_object_id()` pack-bitmap-write: support storing pseudo-merge commits ...
Diffstat (limited to 'pack-bitmap.c')
-rw-r--r--pack-bitmap.c364
1 files changed, 353 insertions, 11 deletions
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 1d6b7f2826..afe41f6ba6 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -20,6 +20,7 @@
#include "list-objects-filter-options.h"
#include "midx.h"
#include "config.h"
+#include "pseudo-merge.h"
/*
* An entry on the bitmap index, representing the bitmap for a given
@@ -86,6 +87,9 @@ struct bitmap_index {
*/
unsigned char *table_lookup;
+ /* This contains the pseudo-merge cache within 'map' (if found). */
+ struct pseudo_merge_map pseudo_merges;
+
/*
* Extended index.
*
@@ -110,6 +114,13 @@ struct bitmap_index {
unsigned int version;
};
+static int pseudo_merges_satisfied_nr;
+static int pseudo_merges_cascades_nr;
+static int existing_bitmaps_hits_nr;
+static int existing_bitmaps_misses_nr;
+static int roots_with_bitmaps_nr;
+static int roots_without_bitmaps_nr;
+
static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st)
{
struct ewah_bitmap *parent;
@@ -129,17 +140,13 @@ static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st)
return composed;
}
-/*
- * Read a bitmap from the current read position on the mmaped
- * index, and increase the read position accordingly
- */
-static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
+struct ewah_bitmap *read_bitmap(const unsigned char *map,
+ size_t map_size, size_t *map_pos)
{
struct ewah_bitmap *b = ewah_pool_new();
- ssize_t bitmap_size = ewah_read_mmap(b,
- index->map + index->map_pos,
- index->map_size - index->map_pos);
+ ssize_t bitmap_size = ewah_read_mmap(b, map + *map_pos,
+ map_size - *map_pos);
if (bitmap_size < 0) {
error(_("failed to load bitmap index (corrupted?)"));
@@ -147,10 +154,20 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
return NULL;
}
- index->map_pos += bitmap_size;
+ *map_pos += bitmap_size;
+
return b;
}
+/*
+ * Read a bitmap from the current read position on the mmaped
+ * index, and increase the read position accordingly
+ */
+static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
+{
+ return read_bitmap(index->map, index->map_size, &index->map_pos);
+}
+
static uint32_t bitmap_num_objects(struct bitmap_index *index)
{
if (index->midx)
@@ -199,6 +216,46 @@ static int load_bitmap_header(struct bitmap_index *index)
index->table_lookup = (void *)(index_end - table_size);
index_end -= table_size;
}
+
+ if (flags & BITMAP_OPT_PSEUDO_MERGES) {
+ unsigned char *pseudo_merge_ofs;
+ size_t table_size;
+ uint32_t i;
+
+ if (sizeof(table_size) > index_end - index->map - header_size)
+ return error(_("corrupted bitmap index file (too short to fit pseudo-merge table header)"));
+
+ table_size = get_be64(index_end - 8);
+ if (table_size > index_end - index->map - header_size)
+ return error(_("corrupted bitmap index file (too short to fit pseudo-merge table)"));
+
+ if (git_env_bool("GIT_TEST_USE_PSEUDO_MERGES", 1)) {
+ const unsigned char *ext = (index_end - table_size);
+
+ index->pseudo_merges.map = index->map;
+ index->pseudo_merges.map_size = index->map_size;
+ index->pseudo_merges.commits = ext + get_be64(index_end - 16);
+ index->pseudo_merges.commits_nr = get_be32(index_end - 20);
+ index->pseudo_merges.nr = get_be32(index_end - 24);
+
+ if (st_add(st_mult(index->pseudo_merges.nr,
+ sizeof(uint64_t)),
+ 24) > table_size)
+ return error(_("corrupted bitmap index file, pseudo-merge table too short"));
+
+ CALLOC_ARRAY(index->pseudo_merges.v,
+ index->pseudo_merges.nr);
+
+ pseudo_merge_ofs = index_end - 24 -
+ (index->pseudo_merges.nr * sizeof(uint64_t));
+ for (i = 0; i < index->pseudo_merges.nr; i++) {
+ index->pseudo_merges.v[i].at = get_be64(pseudo_merge_ofs);
+ pseudo_merge_ofs += sizeof(uint64_t);
+ }
+ }
+
+ index_end -= table_size;
+ }
}
index->entry_count = ntohl(header->entry_count);
@@ -960,6 +1017,22 @@ static void show_commit(struct commit *commit UNUSED,
{
}
+static unsigned apply_pseudo_merges_for_commit_1(struct bitmap_index *bitmap_git,
+ struct bitmap *result,
+ struct commit *commit,
+ uint32_t commit_pos)
+{
+ int ret;
+
+ ret = apply_pseudo_merges_for_commit(&bitmap_git->pseudo_merges,
+ result, commit, commit_pos);
+
+ if (ret)
+ pseudo_merges_satisfied_nr += ret;
+
+ return ret;
+}
+
static int add_to_include_set(struct bitmap_index *bitmap_git,
struct include_data *data,
struct commit *commit,
@@ -975,11 +1048,19 @@ static int add_to_include_set(struct bitmap_index *bitmap_git,
partial = bitmap_for_commit(bitmap_git, commit);
if (partial) {
+ existing_bitmaps_hits_nr++;
+
bitmap_or_ewah(data->base, partial);
return 0;
}
+ existing_bitmaps_misses_nr++;
+
bitmap_set(data->base, bitmap_pos);
+ if (apply_pseudo_merges_for_commit_1(bitmap_git, data->base, commit,
+ bitmap_pos))
+ return 0;
+
return 1;
}
@@ -1030,8 +1111,12 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
{
struct ewah_bitmap *or_with = bitmap_for_commit(bitmap_git, commit);
- if (!or_with)
+ if (!or_with) {
+ existing_bitmaps_misses_nr++;
return 0;
+ }
+
+ existing_bitmaps_hits_nr++;
if (!*base)
*base = ewah_to_bitmap(or_with);
@@ -1105,6 +1190,20 @@ static void show_boundary_object(struct object *object UNUSED,
BUG("should not be called");
}
+static unsigned cascade_pseudo_merges_1(struct bitmap_index *bitmap_git,
+ struct bitmap *result,
+ struct bitmap *roots)
+{
+ int ret = cascade_pseudo_merges(&bitmap_git->pseudo_merges,
+ result, roots);
+ if (ret) {
+ pseudo_merges_cascades_nr++;
+ pseudo_merges_satisfied_nr += ret;
+ }
+
+ return ret;
+}
+
static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots)
@@ -1114,6 +1213,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
+ int existing_bitmaps = 0;
cb.bitmap_git = bitmap_git;
cb.base = bitmap_new();
@@ -1121,6 +1221,25 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
revs->ignore_missing_links = 1;
+ if (bitmap_git->pseudo_merges.nr) {
+ struct bitmap *roots_bitmap = bitmap_new();
+ struct object_list *objects = NULL;
+
+ for (objects = roots; objects; objects = objects->next) {
+ struct object *object = objects->item;
+ int pos;
+
+ pos = bitmap_position(bitmap_git, &object->oid);
+ if (pos < 0)
+ continue;
+
+ bitmap_set(roots_bitmap, pos);
+ }
+
+ if (!cascade_pseudo_merges_1(bitmap_git, cb.base, roots_bitmap))
+ bitmap_free(roots_bitmap);
+ }
+
/*
* OR in any existing reachability bitmaps among `roots` into
* `cb.base`.
@@ -1132,8 +1251,10 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
continue;
if (add_commit_to_bitmap(bitmap_git, &cb.base,
- (struct commit *)object))
+ (struct commit *)object)) {
+ existing_bitmaps = 1;
continue;
+ }
any_missing = 1;
}
@@ -1141,6 +1262,9 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
if (!any_missing)
goto cleanup;
+ if (existing_bitmaps)
+ cascade_pseudo_merges_1(bitmap_git, cb.base, NULL);
+
tmp_blobs = revs->blob_objects;
tmp_trees = revs->tree_objects;
tmp_tags = revs->blob_objects;
@@ -1196,6 +1320,44 @@ cleanup:
return cb.base;
}
+struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
+ struct commit *commit)
+{
+ struct commit_list *p;
+ struct bitmap *parents;
+ struct pseudo_merge *match = NULL;
+
+ if (!bitmap_git->pseudo_merges.nr)
+ return NULL;
+
+ parents = bitmap_new();
+
+ for (p = commit->parents; p; p = p->next) {
+ int pos = bitmap_position(bitmap_git, &p->item->object.oid);
+ if (pos < 0 || pos >= bitmap_num_objects(bitmap_git))
+ goto done;
+
+ bitmap_set(parents, pos);
+ }
+
+ match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges,
+ parents);
+
+done:
+ bitmap_free(parents);
+ if (match)
+ return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match);
+
+ return NULL;
+}
+
+static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git)
+{
+ uint32_t i;
+ for (i = 0; i < bitmap_git->pseudo_merges.nr; i++)
+ bitmap_git->pseudo_merges.v[i].satisfied = 0;
+}
+
static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots,
@@ -1203,9 +1365,32 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
{
struct bitmap *base = NULL;
int needs_walk = 0;
+ unsigned existing_bitmaps = 0;
struct object_list *not_mapped = NULL;
+ unsatisfy_all_pseudo_merges(bitmap_git);
+
+ if (bitmap_git->pseudo_merges.nr) {
+ struct bitmap *roots_bitmap = bitmap_new();
+ struct object_list *objects = NULL;
+
+ for (objects = roots; objects; objects = objects->next) {
+ struct object *object = objects->item;
+ int pos;
+
+ pos = bitmap_position(bitmap_git, &object->oid);
+ if (pos < 0)
+ continue;
+
+ bitmap_set(roots_bitmap, pos);
+ }
+
+ base = bitmap_new();
+ if (!cascade_pseudo_merges_1(bitmap_git, base, roots_bitmap))
+ bitmap_free(roots_bitmap);
+ }
+
/*
* Go through all the roots for the walk. The ones that have bitmaps
* on the bitmap index will be `or`ed together to form an initial
@@ -1216,11 +1401,21 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
*/
while (roots) {
struct object *object = roots->item;
+
roots = roots->next;
+ if (base) {
+ int pos = bitmap_position(bitmap_git, &object->oid);
+ if (pos > 0 && bitmap_get(base, pos)) {
+ object->flags |= SEEN;
+ continue;
+ }
+ }
+
if (object->type == OBJ_COMMIT &&
add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) {
object->flags |= SEEN;
+ existing_bitmaps = 1;
continue;
}
@@ -1236,6 +1431,9 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
roots = not_mapped;
+ if (existing_bitmaps)
+ cascade_pseudo_merges_1(bitmap_git, base, NULL);
+
/*
* Let's iterate through all the roots that don't have bitmaps to
* check if we can determine them to be reachable from the existing
@@ -1256,8 +1454,12 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
object->flags &= ~UNINTERESTING;
add_pending_object(revs, object, "");
needs_walk = 1;
+
+ roots_without_bitmaps_nr++;
} else {
object->flags |= SEEN;
+
+ roots_with_bitmaps_nr++;
}
}
@@ -1820,6 +2022,19 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
+ trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ pseudo_merges_satisfied_nr);
+ trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ pseudo_merges_cascades_nr);
+ trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ existing_bitmaps_hits_nr);
+ trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ existing_bitmaps_misses_nr);
+ trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ roots_with_bitmaps_nr);
+ trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ roots_without_bitmaps_nr);
+
return bitmap_git;
cleanup:
@@ -2410,6 +2625,132 @@ cleanup:
return 0;
}
+static void bit_pos_to_object_id(struct bitmap_index *bitmap_git,
+ uint32_t bit_pos,
+ struct object_id *oid)
+{
+ uint32_t index_pos;
+
+ if (bitmap_is_midx(bitmap_git))
+ index_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos);
+ else
+ index_pos = pack_pos_to_index(bitmap_git->pack, bit_pos);
+
+ nth_bitmap_object_oid(bitmap_git, oid, index_pos);
+}
+
+int test_bitmap_pseudo_merges(struct repository *r)
+{
+ struct bitmap_index *bitmap_git;
+ uint32_t i;
+
+ bitmap_git = prepare_bitmap_git(r);
+ if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
+ goto cleanup;
+
+ for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) {
+ struct pseudo_merge *merge;
+ struct ewah_bitmap *commits_bitmap, *merge_bitmap;
+
+ merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
+ &bitmap_git->pseudo_merges.v[i]);
+ commits_bitmap = merge->commits;
+ merge_bitmap = pseudo_merge_bitmap(&bitmap_git->pseudo_merges,
+ merge);
+
+ printf("at=%"PRIuMAX", commits=%"PRIuMAX", objects=%"PRIuMAX"\n",
+ (uintmax_t)merge->at,
+ (uintmax_t)ewah_bitmap_popcount(commits_bitmap),
+ (uintmax_t)ewah_bitmap_popcount(merge_bitmap));
+ }
+
+cleanup:
+ free_bitmap_index(bitmap_git);
+ return 0;
+}
+
+static void dump_ewah_object_ids(struct bitmap_index *bitmap_git,
+ struct ewah_bitmap *bitmap)
+
+{
+ struct ewah_iterator it;
+ eword_t word;
+ uint32_t pos = 0;
+
+ ewah_iterator_init(&it, bitmap);
+
+ while (ewah_iterator_next(&word, &it)) {
+ struct object_id oid;
+ uint32_t offset;
+
+ for (offset = 0; offset < BITS_IN_EWORD; offset++) {
+ if (!(word >> offset))
+ break;
+
+ offset += ewah_bit_ctz64(word >> offset);
+
+ bit_pos_to_object_id(bitmap_git, pos + offset, &oid);
+ printf("%s\n", oid_to_hex(&oid));
+ }
+ pos += BITS_IN_EWORD;
+ }
+}
+
+int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n)
+{
+ struct bitmap_index *bitmap_git;
+ struct pseudo_merge *merge;
+ int ret = 0;
+
+ bitmap_git = prepare_bitmap_git(r);
+ if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
+ goto cleanup;
+
+ if (n >= bitmap_git->pseudo_merges.nr) {
+ ret = error(_("pseudo-merge index out of range "
+ "(%"PRIu32" >= %"PRIuMAX")"),
+ n, (uintmax_t)bitmap_git->pseudo_merges.nr);
+ goto cleanup;
+ }
+
+ merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
+ &bitmap_git->pseudo_merges.v[n]);
+ dump_ewah_object_ids(bitmap_git, merge->commits);
+
+cleanup:
+ free_bitmap_index(bitmap_git);
+ return ret;
+}
+
+int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n)
+{
+ struct bitmap_index *bitmap_git;
+ struct pseudo_merge *merge;
+ int ret = 0;
+
+ bitmap_git = prepare_bitmap_git(r);
+ if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
+ goto cleanup;
+
+ if (n >= bitmap_git->pseudo_merges.nr) {
+ ret = error(_("pseudo-merge index out of range "
+ "(%"PRIu32" >= %"PRIuMAX")"),
+ n, (uintmax_t)bitmap_git->pseudo_merges.nr);
+ goto cleanup;
+ }
+
+ merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
+ &bitmap_git->pseudo_merges.v[n]);
+
+ dump_ewah_object_ids(bitmap_git,
+ pseudo_merge_bitmap(&bitmap_git->pseudo_merges,
+ merge));
+
+cleanup:
+ free_bitmap_index(bitmap_git);
+ return ret;
+}
+
int rebuild_bitmap(const uint32_t *reposition,
struct ewah_bitmap *source,
struct bitmap *dest)
@@ -2516,6 +2857,7 @@ void free_bitmap_index(struct bitmap_index *b)
*/
close_midx_revindex(b->midx);
}
+ free_pseudo_merge_map(&b->pseudo_merges);
free(b);
}