From 4f027355f6b6b5b2ba69e23ff50cb7313d13dd70 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:08 -0600 Subject: builtin/commit-graph.c: support for '--split[=]' With '--split', the commit-graph machinery writes new commits in another incremental commit-graph which is part of the existing chain, and optionally decides to condense the chain into a single commit-graph. This is done to ensure that the asymptotic behavior of looking up a commit in an incremental chain is not dominated by the number of incrementals in that chain. It can be controlled by the '--max-commits' and '--size-multiple' options. In the next two commits, we will introduce additional splitting strategies that can exert additional control over: - when a split commit-graph is and isn't written, and - when the existing commit-graph chain is discarded completely and replaced with another graph To prepare for this, make '--split' take an optional strategy (as in '--split[=]'), and add a new enum to describe which strategy is being used. For now, no strategies are given, and the only enumerated value is 'COMMIT_GRAPH_SPLIT_UNSPECIFIED', indicating the absence of a strategy. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 4a70b33fb5..345fd97c61 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -9,7 +9,9 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] "), + N_("git commit-graph write [--object-dir ] [--append] " + "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " + "[--[no-]progress] "), NULL }; @@ -19,7 +21,9 @@ static const char * const builtin_commit_graph_verify_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] "), + N_("git commit-graph write [--object-dir ] [--append] " + "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " + "[--[no-]progress] "), NULL }; @@ -111,6 +115,18 @@ static int graph_verify(int argc, const char **argv) extern int read_replace_refs; static struct split_commit_graph_opts split_opts; +static int write_option_parse_split(const struct option *opt, const char *arg, + int unset) +{ + opts.split = 1; + if (!arg) + return 0; + + die(_("unrecognized --split argument, %s"), arg); + + return 0; +} + static int graph_write(int argc, const char **argv) { struct string_list *pack_indexes = NULL; @@ -133,8 +149,10 @@ static int graph_write(int argc, const char **argv) OPT_BOOL(0, "append", &opts.append, N_("include all commits already in the commit-graph file")), OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")), - OPT_BOOL(0, "split", &opts.split, - N_("allow writing an incremental commit-graph file")), + OPT_CALLBACK_F(0, "split", &split_opts.flags, NULL, + N_("allow writing an incremental commit-graph file"), + PARSE_OPT_OPTARG | PARSE_OPT_NONEG, + write_option_parse_split), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, -- cgit v1.2.3 From fdbde82fe523374b3c5d1f0f01f3c43dcaca9465 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:12 -0600 Subject: builtin/commit-graph.c: introduce split strategy 'no-merge' In the previous commit, we laid the groundwork for supporting different splitting strategies. In this commit, we introduce the first splitting strategy: 'no-merge'. Passing '--split=no-merge' is useful for callers which wish to write a new incremental commit-graph, but do not want to spend effort condensing the incremental chain [1]. Previously, this was possible by passing '--size-multiple=0', but this no longer the case following 63020f175f (commit-graph: prefer default size_mult when given zero, 2020-01-02). When '--split=no-merge' is given, the commit-graph machinery will never condense an existing chain, and it will always write a new incremental. [1]: This might occur when, for example, a server administrator running some program after each push may want to ensure that each job runs proportional in time to the size of the push, and does not "jump" when the commit-graph machinery decides to trigger a merge. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 5 +++++ builtin/commit-graph.c | 7 ++++++- commit-graph.c | 19 ++++++++++++------- commit-graph.h | 3 ++- t/t5324-split-commit-graph.sh | 11 +++++++++++ 5 files changed, 36 insertions(+), 9 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 10d757c5cc..a4c4a641e5 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -63,6 +63,11 @@ chain of multiple commit-graph files stored in strategy and other splitting options. The new commits not already in the commit-graph are added in a new "tip" file. This file is merged with the existing file if the following merge conditions are met: +* If `--split=no-merge` is specified, a merge is never performed, and +the remaining options are ignored. A bare `--split` defers to the +remaining options. (Note that merging a chain of commit graphs replaces +the existing chain with a length-1 chain where the first and only +incremental holds the entire graph). + * If `--size-multiple=` is not specified, let `X` equal 2. If the new tip file would have `N` commits and the previous tip has `M` commits and diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 345fd97c61..9234b95ecf 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -118,11 +118,16 @@ static struct split_commit_graph_opts split_opts; static int write_option_parse_split(const struct option *opt, const char *arg, int unset) { + enum commit_graph_split_flags *flags = opt->value; + opts.split = 1; if (!arg) return 0; - die(_("unrecognized --split argument, %s"), arg); + if (!strcmp(arg, "no-merge")) + *flags = COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED; + else + die(_("unrecognized --split argument, %s"), arg); return 0; } diff --git a/commit-graph.c b/commit-graph.c index 656dd647d5..71f488839c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1529,6 +1529,7 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) { struct commit_graph *g; uint32_t num_commits; + enum commit_graph_split_flags flags = COMMIT_GRAPH_SPLIT_UNSPECIFIED; uint32_t i; int max_commits = 0; @@ -1539,21 +1540,25 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) if (ctx->split_opts->size_multiple) size_mult = ctx->split_opts->size_multiple; + + flags = ctx->split_opts->flags; } g = ctx->r->objects->commit_graph; num_commits = ctx->commits.nr; ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - while (g && (g->num_commits <= size_mult * num_commits || - (max_commits && num_commits > max_commits))) { - if (g->odb != ctx->odb) - break; + if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED) { + while (g && (g->num_commits <= size_mult * num_commits || + (max_commits && num_commits > max_commits))) { + if (g->odb != ctx->odb) + break; - num_commits += g->num_commits; - g = g->base_graph; + num_commits += g->num_commits; + g = g->base_graph; - ctx->num_commit_graphs_after--; + ctx->num_commit_graphs_after--; + } } ctx->new_base_graph = g; diff --git a/commit-graph.h b/commit-graph.h index e799008ff4..8752afb88d 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -83,7 +83,8 @@ enum commit_graph_write_flags { }; enum commit_graph_split_flags { - COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0 + COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0, + COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1 }; struct split_commit_graph_opts { diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index c24823431f..29de47e834 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -344,4 +344,15 @@ test_expect_success 'split across alternate where alternate is not split' ' test_cmp commit-graph .git/objects/info/commit-graph ' +test_expect_success '--split=no-merge always writes an incremental' ' + test_when_finished rm -rf a b && + rm -rf $graphdir $infodir/commit-graph && + git reset --hard commits/2 && + git rev-list HEAD~1 >a && + git rev-list HEAD >b && + git commit-graph write --split --stdin-commits Date: Mon, 13 Apr 2020 22:04:17 -0600 Subject: builtin/commit-graph.c: introduce split strategy 'replace' When using split commit-graphs, it is sometimes useful to completely replace the commit-graph chain with a new base. For example, consider a scenario in which a repository builds a new commit-graph incremental for each push. Occasionally (say, after some fixed number of pushes), they may wish to rebuild the commit-graph chain with all reachable commits. They can do so with $ git commit-graph write --reachable but this removes the chain entirely and replaces it with a single commit-graph in 'objects/info/commit-graph'. Unfortunately, this means that the next push will have to move this commit-graph into the first layer of a new chain, and then write its new commits on top. Avoid such copying entirely by allowing the caller to specify that they wish to replace the entirety of their commit-graph chain, while also specifying that the new commit-graph should become the basis of a fresh, length-one chain. This addresses the above situation by making it possible for the caller to instead write: $ git commit-graph write --reachable --split=replace which writes a new length-one chain to 'objects/info/commit-graphs', making the commit-graph incremental generated by the subsequent push relatively cheap by avoiding the aforementioned copy. In order to do this, remove an assumption in 'write_commit_graph_file' that chains are always at least two incrementals long. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 7 ++--- builtin/commit-graph.c | 2 ++ commit-graph.c | 53 ++++++++++++++++++++++++++++---------- commit-graph.h | 3 ++- t/t5324-split-commit-graph.sh | 19 ++++++++++++++ 5 files changed, 66 insertions(+), 18 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index a4c4a641e5..46f7f7c573 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -64,9 +64,10 @@ strategy and other splitting options. The new commits not already in the commit-graph are added in a new "tip" file. This file is merged with the existing file if the following merge conditions are met: * If `--split=no-merge` is specified, a merge is never performed, and -the remaining options are ignored. A bare `--split` defers to the -remaining options. (Note that merging a chain of commit graphs replaces -the existing chain with a length-1 chain where the first and only +the remaining options are ignored. `--split=replace` overwrites the +existing chain with a new one. A bare `--split` defers to the remaining +options. (Note that merging a chain of commit graphs replaces the +existing chain with a length-1 chain where the first and only incremental holds the entire graph). + * If `--size-multiple=` is not specified, let `X` equal 2. If the new diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 9234b95ecf..e0dd2876a1 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -126,6 +126,8 @@ static int write_option_parse_split(const struct option *opt, const char *arg, if (!strcmp(arg, "no-merge")) *flags = COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED; + else if (!strcmp(arg, "replace")) + *flags = COMMIT_GRAPH_SPLIT_REPLACE; else die(_("unrecognized --split argument, %s"), arg); diff --git a/commit-graph.c b/commit-graph.c index 71f488839c..36e1d6f9b3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -866,7 +866,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, if (edge_value >= 0) edge_value += ctx->new_num_commits_in_base; - else { + else if (ctx->new_base_graph) { uint32_t pos; if (find_commit_in_graph(parent->item, ctx->new_base_graph, @@ -897,7 +897,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, if (edge_value >= 0) edge_value += ctx->new_num_commits_in_base; - else { + else if (ctx->new_base_graph) { uint32_t pos; if (find_commit_in_graph(parent->item, ctx->new_base_graph, @@ -964,7 +964,7 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, if (edge_value >= 0) edge_value += ctx->new_num_commits_in_base; - else { + else if (ctx->new_base_graph) { uint32_t pos; if (find_commit_in_graph(parent->item, ctx->new_base_graph, @@ -1037,6 +1037,8 @@ static void close_reachable(struct write_commit_graph_context *ctx) { int i; struct commit *commit; + enum commit_graph_split_flags flags = ctx->split_opts ? + ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED; if (ctx->report_progress) ctx->progress = start_delayed_progress( @@ -1066,8 +1068,9 @@ static void close_reachable(struct write_commit_graph_context *ctx) if (!commit) continue; if (ctx->split) { - if (!parse_commit(commit) && - commit->graph_pos == COMMIT_NOT_FROM_GRAPH) + if ((!parse_commit(commit) && + commit->graph_pos == COMMIT_NOT_FROM_GRAPH) || + flags == COMMIT_GRAPH_SPLIT_REPLACE) add_missing_parents(ctx, commit); } else if (!parse_commit_no_graph(commit)) add_missing_parents(ctx, commit); @@ -1287,6 +1290,8 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) static void copy_oids_to_commits(struct write_commit_graph_context *ctx) { uint32_t i; + enum commit_graph_split_flags flags = ctx->split_opts ? + ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED; ctx->num_extra_edges = 0; if (ctx->report_progress) @@ -1303,11 +1308,14 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc); ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); - if (ctx->split && + if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE && ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH) continue; - parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); + if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE) + parse_commit(ctx->commits.list[ctx->commits.nr]); + else + parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents); if (num_parents > 2) @@ -1488,8 +1496,12 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) } if (ctx->base_graph_name) { - const char *dest = ctx->commit_graph_filenames_after[ - ctx->num_commit_graphs_after - 2]; + const char *dest; + int idx = ctx->num_commit_graphs_after - 1; + if (ctx->num_commit_graphs_after > 1) + idx--; + + dest = ctx->commit_graph_filenames_after[idx]; if (strcmp(ctx->base_graph_name, dest)) { result = rename(ctx->base_graph_name, dest); @@ -1546,9 +1558,13 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) g = ctx->r->objects->commit_graph; num_commits = ctx->commits.nr; - ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; + if (flags == COMMIT_GRAPH_SPLIT_REPLACE) + ctx->num_commit_graphs_after = 1; + else + ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED) { + if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED && + flags != COMMIT_GRAPH_SPLIT_REPLACE) { while (g && (g->num_commits <= size_mult * num_commits || (max_commits && num_commits > max_commits))) { if (g->odb != ctx->odb) @@ -1561,7 +1577,11 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) } } - ctx->new_base_graph = g; + if (flags != COMMIT_GRAPH_SPLIT_REPLACE) + ctx->new_base_graph = g; + else if (ctx->num_commit_graphs_after != 1) + BUG("split_graph_merge_strategy: num_commit_graphs_after " + "should be 1 with --split=replace"); if (ctx->num_commit_graphs_after == 2) { char *old_graph_name = get_commit_graph_filename(g->odb); @@ -1772,6 +1792,7 @@ int write_commit_graph(struct object_directory *odb, struct write_commit_graph_context *ctx; uint32_t i, count_distinct = 0; int res = 0; + int replace = 0; if (!commit_graph_compatible(the_repository)) return 0; @@ -1806,6 +1827,9 @@ int write_commit_graph(struct object_directory *odb, g = g->base_graph; } } + + if (ctx->split_opts) + replace = ctx->split_opts->flags & COMMIT_GRAPH_SPLIT_REPLACE; } ctx->approx_nr_objects = approximate_object_count(); @@ -1866,13 +1890,14 @@ int write_commit_graph(struct object_directory *odb, goto cleanup; } - if (!ctx->commits.nr) + if (!ctx->commits.nr && !replace) goto cleanup; if (ctx->split) { split_graph_merge_strategy(ctx); - merge_commit_graphs(ctx); + if (!replace) + merge_commit_graphs(ctx); } else ctx->num_commit_graphs_after = 1; diff --git a/commit-graph.h b/commit-graph.h index 8752afb88d..718433d79b 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -84,7 +84,8 @@ enum commit_graph_write_flags { enum commit_graph_split_flags { COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0, - COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1 + COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1, + COMMIT_GRAPH_SPLIT_REPLACE = 2 }; struct split_commit_graph_opts { diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 29de47e834..87a9148fbe 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -355,4 +355,23 @@ test_expect_success '--split=no-merge always writes an incremental' ' test_line_count = 2 $graphdir/commit-graph-chain ' +test_expect_success '--split=replace replaces the chain' ' + rm -rf $graphdir $infodir/commit-graph && + git reset --hard commits/3 && + git rev-list -1 HEAD~2 >a && + git rev-list -1 HEAD~1 >b && + git rev-list -1 HEAD >c && + git commit-graph write --split=no-merge --stdin-commits graph-files && + test_line_count = 1 graph-files && + verify_chain_files_exist $graphdir && + graph_read_expect 2 +' + test_done -- cgit v1.2.3 From 6830c360777468434184f60023e2562348c9dacc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:25 -0600 Subject: commit-graph.h: replace 'commit_hex' with 'commits' The 'write_commit_graph()' function takes in either a string list of pack indices, or a string list of hexadecimal commit OIDs. These correspond to the '--stdin-packs' and '--stdin-commits' mode(s) from 'git commit-graph write'. Using a string_list of hexadecimal commit IDs is not the most efficient use of memory, since we can instead use the 'struct oidset', which is more well-suited for this case. This has another benefit which will become apparent in the following commit. This is that we are about to disambiguate the kinds of errors we produce with '--stdin-commits' into "non-hex input" and "hex-input, but referring to a non-commit object". By having 'write_commit_graph' take in a 'struct oidset *' of commits, we place the burden on the caller (in this case, the builtin) to handle the first case, and the commit-graph machinery can handle the second case. Suggested-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 19 +++++++++++++--- commit-graph.c | 59 +++++++++++++++++++++++++++---------------------- commit-graph.h | 3 ++- t/t5318-commit-graph.sh | 2 +- 4 files changed, 52 insertions(+), 31 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index e0dd2876a1..075f8f6928 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -137,7 +137,7 @@ static int write_option_parse_split(const struct option *opt, const char *arg, static int graph_write(int argc, const char **argv) { struct string_list *pack_indexes = NULL; - struct string_list *commit_hex = NULL; + struct oidset commits = OIDSET_INIT; struct object_directory *odb = NULL; struct string_list lines; int result = 0; @@ -210,7 +210,20 @@ static int graph_write(int argc, const char **argv) if (opts.stdin_packs) pack_indexes = &lines; if (opts.stdin_commits) { - commit_hex = &lines; + struct string_list_item *item; + oidset_init(&commits, lines.nr); + for_each_string_list_item(item, &lines) { + struct object_id oid; + const char *end; + + if (parse_oid_hex(item->string, &oid, &end)) { + error(_("unexpected non-hex object ID: " + "%s"), item->string); + return 1; + } + + oidset_insert(&commits, &oid); + } flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } @@ -219,7 +232,7 @@ static int graph_write(int argc, const char **argv) if (write_commit_graph(odb, pack_indexes, - commit_hex, + opts.stdin_commits ? &commits : NULL, flags, &split_opts)) result = 1; diff --git a/commit-graph.c b/commit-graph.c index 36e1d6f9b3..bf86e4a92b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1136,13 +1136,13 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } -static int add_ref_to_list(const char *refname, - const struct object_id *oid, - int flags, void *cb_data) +static int add_ref_to_set(const char *refname, + const struct object_id *oid, + int flags, void *cb_data) { - struct string_list *list = (struct string_list *)cb_data; + struct oidset *commits = (struct oidset *)cb_data; - string_list_append(list, oid_to_hex(oid)); + oidset_insert(commits, oid); return 0; } @@ -1150,14 +1150,14 @@ int write_commit_graph_reachable(struct object_directory *odb, enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts) { - struct string_list list = STRING_LIST_INIT_DUP; + struct oidset commits = OIDSET_INIT; int result; - for_each_ref(add_ref_to_list, &list); - result = write_commit_graph(odb, NULL, &list, + for_each_ref(add_ref_to_set, &commits); + result = write_commit_graph(odb, NULL, &commits, flags, split_opts); - string_list_clear(&list, 0); + oidset_clear(&commits); return result; } @@ -1206,39 +1206,46 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, return 0; } -static int fill_oids_from_commit_hex(struct write_commit_graph_context *ctx, - struct string_list *commit_hex) +static int fill_oids_from_commits(struct write_commit_graph_context *ctx, + struct oidset *commits) { - uint32_t i; + uint32_t i = 0; struct strbuf progress_title = STRBUF_INIT; + struct oidset_iter iter; + struct object_id *oid; + + if (!oidset_size(commits)) + return 0; if (ctx->report_progress) { strbuf_addf(&progress_title, Q_("Finding commits for commit graph from %d ref", "Finding commits for commit graph from %d refs", - commit_hex->nr), - commit_hex->nr); + oidset_size(commits)), + oidset_size(commits)); ctx->progress = start_delayed_progress( progress_title.buf, - commit_hex->nr); + oidset_size(commits)); } - for (i = 0; i < commit_hex->nr; i++) { - const char *end; - struct object_id oid; + + oidset_iter_init(commits, &iter); + while ((oid = oidset_iter_next(&iter))) { struct commit *result; - display_progress(ctx->progress, i + 1); - if (!parse_oid_hex(commit_hex->items[i].string, &oid, &end) && - (result = lookup_commit_reference_gently(ctx->r, &oid, 1))) { + display_progress(ctx->progress, ++i); + + result = lookup_commit_reference_gently(ctx->r, oid, 1); + if (result) { ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); ctx->oids.nr++; } else if (ctx->check_oids) { error(_("invalid commit object id: %s"), - commit_hex->items[i].string); + oid_to_hex(oid)); return -1; } } + stop_progress(&ctx->progress); strbuf_release(&progress_title); @@ -1785,7 +1792,7 @@ out: int write_commit_graph(struct object_directory *odb, struct string_list *pack_indexes, - struct string_list *commit_hex, + struct oidset *commits, enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts) { @@ -1861,12 +1868,12 @@ int write_commit_graph(struct object_directory *odb, goto cleanup; } - if (commit_hex) { - if ((res = fill_oids_from_commit_hex(ctx, commit_hex))) + if (commits) { + if ((res = fill_oids_from_commits(ctx, commits))) goto cleanup; } - if (!pack_indexes && !commit_hex) + if (!pack_indexes && !commits) fill_oids_from_all_packs(ctx); close_reachable(ctx); diff --git a/commit-graph.h b/commit-graph.h index 718433d79b..98ef121924 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -6,6 +6,7 @@ #include "string-list.h" #include "cache.h" #include "object-store.h" +#include "oidset.h" #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH" #define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" @@ -106,7 +107,7 @@ int write_commit_graph_reachable(struct object_directory *odb, const struct split_commit_graph_opts *split_opts); int write_commit_graph(struct object_directory *odb, struct string_list *pack_indexes, - struct string_list *commit_hex, + struct oidset *commits, enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 0bf98b56ec..69599cea7f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -43,7 +43,7 @@ test_expect_success 'create commits and repack' ' test_expect_success 'exit with correct error on bad input to --stdin-commits' ' cd "$TRASH_DIRECTORY/full" && echo HEAD | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && - test_i18ngrep "invalid commit object id" stderr && + test_i18ngrep "unexpected non-hex object ID: HEAD" stderr && # valid tree OID, but not a commit OID git rev-parse HEAD^{tree} | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && test_i18ngrep "invalid commit object id" stderr -- cgit v1.2.3 From 7a9ce0269bc0f4ef230f930b3910b70ac3142552 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 14 Apr 2020 22:31:37 -0600 Subject: commit-graph.c: introduce '--[no-]check-oids' When operating on a stream of commit OIDs on stdin, 'git commit-graph write' checks that each OID refers to an object that is indeed a commit. This is convenient to make sure that the given input is well-formed, but can sometimes be undesirable. For example, server operators may wish to feed the refnames that were updated during a push to 'git commit-graph write --input=stdin-commits', and silently discard refs that don't point at commits. This can be done by combing the output of 'git for-each-ref' with '--format %(*objecttype)', but this requires opening up a potentially large number of objects. Instead, it is more convenient to feed the updated refs to the commit-graph machinery, and let it throw out refs that don't point to commits. Introduce '--[no-]check-oids' to make such a behavior possible. With '--check-oids' (the default behavior to retain backwards compatibility), 'git commit-graph write' will barf on a non-commit line in its input. With 'no-check-oids', such lines will be silently ignored, making the above possible by specifying this option. No matter which is supplied, 'git commit-graph write' retains the behavior from the previous commit of rejecting non-OID inputs like "HEAD" and "refs/heads/foo" as before. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 5 +++++ builtin/commit-graph.c | 11 ++++++++--- commit-graph.c | 2 +- t/t5318-commit-graph.sh | 28 ++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 4 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 46f7f7c573..91e8027b86 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -82,6 +82,11 @@ tip with the previous tip. Finally, if `--expire-time=` is not specified, let `datetime` be the current time. After writing the split commit-graph, delete all unused commit-graph whose modified times are older than `datetime`. ++ +The `--[no-]check-oids` option decides whether or not OIDs are required +to be commits. By default, `--check-oids` is implied, generating an +error on non-commit objects. If `--no-check-oids` is given, non-commits +are silently discarded. 'verify':: diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 075f8f6928..2857153008 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -11,7 +11,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] "), + "[--[no-]progress] [--[no-]check-oids] "), NULL }; @@ -23,7 +23,7 @@ static const char * const builtin_commit_graph_verify_usage[] = { static const char * const builtin_commit_graph_write_usage[] = { N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] "), + "[--[no-]progress] [--[no-]check-oids] "), NULL }; @@ -36,6 +36,7 @@ static struct opts_commit_graph { int split; int shallow; int progress; + int check_oids; } opts; static struct object_directory *find_odb(struct repository *r, @@ -160,6 +161,8 @@ static int graph_write(int argc, const char **argv) N_("allow writing an incremental commit-graph file"), PARSE_OPT_OPTARG | PARSE_OPT_NONEG, write_option_parse_split), + OPT_BOOL(0, "check-oids", &opts.check_oids, + N_("require OIDs to be commits")), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, @@ -170,6 +173,7 @@ static int graph_write(int argc, const char **argv) }; opts.progress = isatty(2); + opts.check_oids = 1; split_opts.size_multiple = 2; split_opts.max_commits = 0; split_opts.expire_time = 0; @@ -224,7 +228,8 @@ static int graph_write(int argc, const char **argv) oidset_insert(&commits, &oid); } - flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; + if (opts.check_oids) + flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } UNLEAK(buf); diff --git a/commit-graph.c b/commit-graph.c index bf86e4a92b..af677fc98e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -145,7 +145,7 @@ static int verify_commit_graph_lite(struct commit_graph *g) * * There should only be very basic checks here to ensure that * we don't e.g. segfault in fill_commit_in_graph(), but - * because this is a very hot codepath nothing that e.g. loops + e because this is a very hot codepath nothing that e.g. loops * over g->num_commits, or runs a checksum on the commit-graph * itself. */ diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 69599cea7f..23c7b7e036 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -49,6 +49,34 @@ test_expect_success 'exit with correct error on bad input to --stdin-commits' ' test_i18ngrep "invalid commit object id" stderr ' +graph_expect_commits() { + test-tool read-graph >got + if ! grep "num_commits: $1" got + then + echo "graph_expect_commits: expected $1 commit(s), got:" + cat got + false + fi +} + +test_expect_success 'ignores non-commit OIDs to --input=stdin-commits with --no-check-oids' ' + test_when_finished rm -rf "$objdir/info/commit-graph" && + cd "$TRASH_DIRECTORY/full" && + # write a graph to ensure layers are/are not added appropriately + git rev-parse HEAD~1 >base && + git commit-graph write --stdin-commits bad && + test_expect_code 1 git commit-graph write --stdin-commits err && + test_i18ngrep "unexpected non-hex object ID: HEAD" err && + graph_expect_commits 2 && + # update with valid commit OID, ignore tree OID + git rev-parse HEAD HEAD^{tree} >in && + git commit-graph write --stdin-commits --no-check-oids output git -c core.commitGraph=false $1 >expect -- cgit v1.2.3 From dbd5e0a1861c5bb1446e5518173aa1760c6617b0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 29 Apr 2020 12:44:40 -0700 Subject: Revert "commit-graph.c: introduce '--[no-]check-oids'" This reverts commit 7a9ce0269bc0f4ef230f930b3910b70ac3142552, which has not yet gained consensus. --- Documentation/git-commit-graph.txt | 5 ----- builtin/commit-graph.c | 11 +++-------- commit-graph.c | 2 +- t/t5318-commit-graph.sh | 28 ---------------------------- 4 files changed, 4 insertions(+), 42 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 91e8027b86..46f7f7c573 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -82,11 +82,6 @@ tip with the previous tip. Finally, if `--expire-time=` is not specified, let `datetime` be the current time. After writing the split commit-graph, delete all unused commit-graph whose modified times are older than `datetime`. -+ -The `--[no-]check-oids` option decides whether or not OIDs are required -to be commits. By default, `--check-oids` is implied, generating an -error on non-commit objects. If `--no-check-oids` is given, non-commits -are silently discarded. 'verify':: diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 2857153008..075f8f6928 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -11,7 +11,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] [--[no-]check-oids] "), + "[--[no-]progress] "), NULL }; @@ -23,7 +23,7 @@ static const char * const builtin_commit_graph_verify_usage[] = { static const char * const builtin_commit_graph_write_usage[] = { N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] [--[no-]check-oids] "), + "[--[no-]progress] "), NULL }; @@ -36,7 +36,6 @@ static struct opts_commit_graph { int split; int shallow; int progress; - int check_oids; } opts; static struct object_directory *find_odb(struct repository *r, @@ -161,8 +160,6 @@ static int graph_write(int argc, const char **argv) N_("allow writing an incremental commit-graph file"), PARSE_OPT_OPTARG | PARSE_OPT_NONEG, write_option_parse_split), - OPT_BOOL(0, "check-oids", &opts.check_oids, - N_("require OIDs to be commits")), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, @@ -173,7 +170,6 @@ static int graph_write(int argc, const char **argv) }; opts.progress = isatty(2); - opts.check_oids = 1; split_opts.size_multiple = 2; split_opts.max_commits = 0; split_opts.expire_time = 0; @@ -228,8 +224,7 @@ static int graph_write(int argc, const char **argv) oidset_insert(&commits, &oid); } - if (opts.check_oids) - flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; + flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } UNLEAK(buf); diff --git a/commit-graph.c b/commit-graph.c index af677fc98e..bf86e4a92b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -145,7 +145,7 @@ static int verify_commit_graph_lite(struct commit_graph *g) * * There should only be very basic checks here to ensure that * we don't e.g. segfault in fill_commit_in_graph(), but - e because this is a very hot codepath nothing that e.g. loops + * because this is a very hot codepath nothing that e.g. loops * over g->num_commits, or runs a checksum on the commit-graph * itself. */ diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 23c7b7e036..69599cea7f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -49,34 +49,6 @@ test_expect_success 'exit with correct error on bad input to --stdin-commits' ' test_i18ngrep "invalid commit object id" stderr ' -graph_expect_commits() { - test-tool read-graph >got - if ! grep "num_commits: $1" got - then - echo "graph_expect_commits: expected $1 commit(s), got:" - cat got - false - fi -} - -test_expect_success 'ignores non-commit OIDs to --input=stdin-commits with --no-check-oids' ' - test_when_finished rm -rf "$objdir/info/commit-graph" && - cd "$TRASH_DIRECTORY/full" && - # write a graph to ensure layers are/are not added appropriately - git rev-parse HEAD~1 >base && - git commit-graph write --stdin-commits bad && - test_expect_code 1 git commit-graph write --stdin-commits err && - test_i18ngrep "unexpected non-hex object ID: HEAD" err && - graph_expect_commits 2 && - # update with valid commit OID, ignore tree OID - git rev-parse HEAD HEAD^{tree} >in && - git commit-graph write --stdin-commits --no-check-oids output git -c core.commitGraph=false $1 >expect -- cgit v1.2.3