From 809ea28f809e52d3204b597637b2f5e072c140f8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 9 Aug 2021 10:11:59 +0200 Subject: commit-graph: split out function to search commit position The function `find_commit_in_graph()` assumes that the caller has passed an object which was already determined to be a commit given that it will access the commit's graph position, which is stored in a commit slab. In a subsequent patch, we want to search for an object ID though without knowing whether it is a commit or not, which is not currently possible. Split out the logic to search the commit graph for a given object ID to prepare for this change. This commit also renames the function to `find_commit_pos_in_graph()`, which more accurately reflects what this function does. Furthermore, in order to allow for the searched object ID to be const, we need to adjust `bsearch_graph()`'s signature to accept a constant object ID as input, too. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- commit-graph.c | 55 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 25 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 3860a0d847..8c4c7262c8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -723,7 +723,7 @@ void close_commit_graph(struct raw_object_store *o) o->commit_graph = NULL; } -static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos) +static int bsearch_graph(struct commit_graph *g, const struct object_id *oid, uint32_t *pos) { return bsearch_hash(oid->hash, g->chunk_oid_fanout, g->chunk_oid_lookup, g->hash_len, pos); @@ -864,25 +864,30 @@ static int fill_commit_in_graph(struct repository *r, return 1; } -static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos) +static int search_commit_pos_in_graph(const struct object_id *id, struct commit_graph *g, uint32_t *pos) +{ + struct commit_graph *cur_g = g; + uint32_t lex_index; + + while (cur_g && !bsearch_graph(cur_g, id, &lex_index)) + cur_g = cur_g->base_graph; + + if (cur_g) { + *pos = lex_index + cur_g->num_commits_in_base; + return 1; + } + + return 0; +} + +static int find_commit_pos_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos) { uint32_t graph_pos = commit_graph_position(item); if (graph_pos != COMMIT_NOT_FROM_GRAPH) { *pos = graph_pos; return 1; } else { - struct commit_graph *cur_g = g; - uint32_t lex_index; - - while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index)) - cur_g = cur_g->base_graph; - - if (cur_g) { - *pos = lex_index + cur_g->num_commits_in_base; - return 1; - } - - return 0; + return search_commit_pos_in_graph(&item->object.oid, g, pos); } } @@ -895,7 +900,7 @@ static int parse_commit_in_graph_one(struct repository *r, if (item->object.parsed) return 1; - if (find_commit_in_graph(item, g, &pos)) + if (find_commit_pos_in_graph(item, g, &pos)) return fill_commit_in_graph(r, item, g, pos); return 0; @@ -921,7 +926,7 @@ void load_commit_graph_info(struct repository *r, struct commit *item) uint32_t pos; if (!prepare_commit_graph(r)) return; - if (find_commit_in_graph(item, r->objects->commit_graph, &pos)) + if (find_commit_pos_in_graph(item, r->objects->commit_graph, &pos)) fill_commit_graph_info(item, r->objects->commit_graph, pos); } @@ -1091,9 +1096,9 @@ static int write_graph_chunk_data(struct hashfile *f, edge_value += ctx->new_num_commits_in_base; else if (ctx->new_base_graph) { uint32_t pos; - if (find_commit_in_graph(parent->item, - ctx->new_base_graph, - &pos)) + if (find_commit_pos_in_graph(parent->item, + ctx->new_base_graph, + &pos)) edge_value = pos; } @@ -1122,9 +1127,9 @@ static int write_graph_chunk_data(struct hashfile *f, edge_value += ctx->new_num_commits_in_base; else if (ctx->new_base_graph) { uint32_t pos; - if (find_commit_in_graph(parent->item, - ctx->new_base_graph, - &pos)) + if (find_commit_pos_in_graph(parent->item, + ctx->new_base_graph, + &pos)) edge_value = pos; } @@ -1235,9 +1240,9 @@ static int write_graph_chunk_extra_edges(struct hashfile *f, edge_value += ctx->new_num_commits_in_base; else if (ctx->new_base_graph) { uint32_t pos; - if (find_commit_in_graph(parent->item, - ctx->new_base_graph, - &pos)) + if (find_commit_pos_in_graph(parent->item, + ctx->new_base_graph, + &pos)) edge_value = pos; } -- cgit v1.2.3 From f559d6d45e7e58ae1f922213948723de77ea77bd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 9 Aug 2021 10:12:03 +0200 Subject: revision: avoid hitting packfiles when commits are in commit-graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When queueing references in git-rev-list(1), we try to optimize parsing of commits via the commit-graph. To do so, we first look up the object's type, and if it is a commit we call `repo_parse_commit()` instead of `parse_object()`. This is quite inefficient though given that we're always uncompressing the object header in order to determine the type. Instead, we can opportunistically search the commit-graph for the object ID: in case it's found, we know it's a commit and can directly fill in the commit object without having to uncompress the object header. Expose a new function `lookup_commit_in_graph()`, which tries to find a commit in the commit-graph by ID, and convert `get_reference()` to use this function. This provides a big performance win in cases where we load references in a repository with lots of references pointing to commits. The following has been executed in a real-world repository with about 2.2 million refs: Benchmark #1: HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev Time (mean ± σ): 4.458 s ± 0.044 s [User: 4.115 s, System: 0.342 s] Range (min … max): 4.409 s … 4.534 s 10 runs Benchmark #2: HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev Time (mean ± σ): 3.089 s ± 0.015 s [User: 2.768 s, System: 0.321 s] Range (min … max): 3.061 s … 3.105 s 10 runs Summary 'HEAD: rev-list --unsorted-input --objects --quiet --not --all --not $newrev' ran 1.44 ± 0.02 times faster than 'HEAD~: rev-list --unsorted-input --objects --quiet --not --all --not $newrev' Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- commit-graph.c | 24 ++++++++++++++++++++++++ commit-graph.h | 8 ++++++++ revision.c | 18 ++++++++---------- 3 files changed, 40 insertions(+), 10 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 8c4c7262c8..00614acd65 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -891,6 +891,30 @@ static int find_commit_pos_in_graph(struct commit *item, struct commit_graph *g, } } +struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id) +{ + struct commit *commit; + uint32_t pos; + + if (!repo->objects->commit_graph) + return NULL; + if (!search_commit_pos_in_graph(id, repo->objects->commit_graph, &pos)) + return NULL; + if (!repo_has_object_file(repo, id)) + return NULL; + + commit = lookup_commit(repo, id); + if (!commit) + return NULL; + if (commit->object.parsed) + return commit; + + if (!fill_commit_in_graph(repo, commit, repo->objects->commit_graph, pos)) + return NULL; + + return commit; +} + static int parse_commit_in_graph_one(struct repository *r, struct commit_graph *g, struct commit *item) diff --git a/commit-graph.h b/commit-graph.h index 96c24fb577..04a94e1830 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -40,6 +40,14 @@ int open_commit_graph(const char *graph_file, int *fd, struct stat *st); */ int parse_commit_in_graph(struct repository *r, struct commit *item); +/* + * Look up the given commit ID in the commit-graph. This will only return a + * commit if the ID exists both in the graph and in the object database such + * that we don't return commits whose object has been pruned. Otherwise, this + * function returns `NULL`. + */ +struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id); + /* * It is possible that we loaded commit contents from the commit buffer, * but we also want to ensure the commit-graph content is correctly diff --git a/revision.c b/revision.c index 80a59896b9..0dabb5a0bc 100644 --- a/revision.c +++ b/revision.c @@ -360,20 +360,18 @@ static struct object *get_reference(struct rev_info *revs, const char *name, unsigned int flags) { struct object *object; + struct commit *commit; /* - * If the repository has commit graphs, repo_parse_commit() avoids - * reading the object buffer, so use it whenever possible. + * If the repository has commit graphs, we try to opportunistically + * look up the object ID in those graphs. Like this, we can avoid + * parsing commit data from disk. */ - if (oid_object_info(revs->repo, oid, NULL) == OBJ_COMMIT) { - struct commit *c = lookup_commit(revs->repo, oid); - if (!repo_parse_commit(revs->repo, c)) - object = (struct object *) c; - else - object = NULL; - } else { + commit = lookup_commit_in_graph(revs->repo, oid); + if (commit) + object = &commit->object; + else object = parse_object(revs->repo, oid); - } if (!object) { if (revs->ignore_missing) -- cgit v1.2.3