diff options
| -rw-r--r-- | Documentation/config/feature.adoc | 4 | ||||
| -rw-r--r-- | Documentation/config/pack.adoc | 4 | ||||
| -rw-r--r-- | Documentation/git-pack-objects.adoc | 25 | ||||
| -rw-r--r-- | Documentation/git-repack.adoc | 5 | ||||
| -rw-r--r-- | Documentation/technical/api-path-walk.adoc | 9 | ||||
| -rw-r--r-- | builtin/pack-objects.c | 425 | ||||
| -rw-r--r-- | builtin/repack.c | 7 | ||||
| -rw-r--r-- | pack-objects.h | 12 | ||||
| -rw-r--r-- | path-walk.c | 6 | ||||
| -rw-r--r-- | path-walk.h | 7 | ||||
| -rw-r--r-- | repo-settings.c | 3 | ||||
| -rw-r--r-- | repo-settings.h | 1 | ||||
| -rw-r--r-- | scalar.c | 1 | ||||
| -rw-r--r-- | t/README | 4 | ||||
| -rw-r--r-- | t/helper/test-path-walk.c | 2 | ||||
| -rwxr-xr-x | t/perf/p5313-pack-objects.sh | 37 | ||||
| -rwxr-xr-x | t/t0411-clone-from-partial.sh | 6 | ||||
| -rw-r--r-- | t/t0450/adoc-help-mismatches | 1 | ||||
| -rwxr-xr-x | t/t5300-pack-object.sh | 19 | ||||
| -rwxr-xr-x | t/t5306-pack-nobase.sh | 5 | ||||
| -rwxr-xr-x | t/t5310-pack-bitmaps.sh | 13 | ||||
| -rwxr-xr-x | t/t5316-pack-delta-depth.sh | 9 | ||||
| -rwxr-xr-x | t/t5332-multi-pack-reuse.sh | 7 | ||||
| -rwxr-xr-x | t/t5516-fetch-push.sh | 10 | ||||
| -rwxr-xr-x | t/t5538-push-shallow.sh | 41 | ||||
| -rwxr-xr-x | t/t6601-path-walk.sh | 20 | ||||
| -rwxr-xr-x | t/t7406-submodule-update.sh | 3 |
27 files changed, 620 insertions, 66 deletions
diff --git a/Documentation/config/feature.adoc b/Documentation/config/feature.adoc index f061b64b74..cb49ff2604 100644 --- a/Documentation/config/feature.adoc +++ b/Documentation/config/feature.adoc @@ -20,6 +20,10 @@ walking fewer objects. + * `pack.allowPackReuse=multi` may improve the time it takes to create a pack by reusing objects from multiple packs instead of just one. ++ +* `pack.usePathWalk` may speed up packfile creation and make the packfiles be +significantly smaller in the presence of certain filename collisions with Git's +default name-hash. feature.manyFiles:: Enable config options that optimize for repos with many files in the diff --git a/Documentation/config/pack.adoc b/Documentation/config/pack.adoc index da527377fa..75402d5579 100644 --- a/Documentation/config/pack.adoc +++ b/Documentation/config/pack.adoc @@ -155,6 +155,10 @@ pack.useSparse:: commits contain certain types of direct renames. Default is `true`. +pack.usePathWalk:: + Enable the `--path-walk` option by default for `git pack-objects` + processes. See linkgit:git-pack-objects[1] for full details. + pack.preferBitmapTips:: When selecting which commits will receive bitmaps, prefer a commit at the tip of any reference that is a suffix of any value diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index 7f69ae4855..b1c5aa27da 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -10,13 +10,13 @@ SYNOPSIS -------- [verse] 'git pack-objects' [-q | --progress | --all-progress] [--all-progress-implied] - [--no-reuse-delta] [--delta-base-offset] [--non-empty] - [--local] [--incremental] [--window=<n>] [--depth=<n>] - [--revs [--unpacked | --all]] [--keep-pack=<pack-name>] - [--cruft] [--cruft-expiration=<time>] - [--stdout [--filter=<filter-spec>] | <base-name>] - [--shallow] [--keep-true-parents] [--[no-]sparse] - [--name-hash-version=<n>] < <object-list> + [--no-reuse-delta] [--delta-base-offset] [--non-empty] + [--local] [--incremental] [--window=<n>] [--depth=<n>] + [--revs [--unpacked | --all]] [--keep-pack=<pack-name>] + [--cruft] [--cruft-expiration=<time>] + [--stdout [--filter=<filter-spec>] | <base-name>] + [--shallow] [--keep-true-parents] [--[no-]sparse] + [--name-hash-version=<n>] [--path-walk] < <object-list> DESCRIPTION @@ -375,6 +375,17 @@ many different directories. At the moment, this version is not allowed when writing reachability bitmap files with `--write-bitmap-index` and it will be automatically changed to version `1`. +--path-walk:: + Perform compression by first organizing objects by path, then a + second pass that compresses across paths as normal. This has the + potential to improve delta compression especially in the presence + of filenames that cause collisions in Git's default name-hash + algorithm. ++ +Incompatible with `--delta-islands`, `--shallow`, or `--filter`. The +`--use-bitmap-index` option will be ignored in the presence of +`--path-walk.` + DELTA ISLANDS ------------- diff --git a/Documentation/git-repack.adoc b/Documentation/git-repack.adoc index e1cd75eebe..d12c4985f6 100644 --- a/Documentation/git-repack.adoc +++ b/Documentation/git-repack.adoc @@ -11,7 +11,7 @@ SYNOPSIS [verse] 'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m] [--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>] - [--write-midx] [--name-hash-version=<n>] + [--write-midx] [--name-hash-version=<n>] [--path-walk] DESCRIPTION ----------- @@ -258,6 +258,9 @@ linkgit:git-multi-pack-index[1]). Provide this argument to the underlying `git pack-objects` process. See linkgit:git-pack-objects[1] for full details. +--path-walk:: + Pass the `--path-walk` option to the underlying `git pack-objects` + process. See linkgit:git-pack-objects[1] for full details. CONFIGURATION ------------- diff --git a/Documentation/technical/api-path-walk.adoc b/Documentation/technical/api-path-walk.adoc index 3e089211fb..34c905eb9c 100644 --- a/Documentation/technical/api-path-walk.adoc +++ b/Documentation/technical/api-path-walk.adoc @@ -56,6 +56,14 @@ better off using the revision walk API instead. the revision walk so that the walk emits commits marked with the `UNINTERESTING` flag. +`edge_aggressive`:: + For performance reasons, usually only the boundary commits are + explored to find UNINTERESTING objects. However, in the case of + shallow clones it can be helpful to mark all trees and blobs + reachable from UNINTERESTING tip commits as UNINTERESTING. This + matches the behavior of `--objects-edge-aggressive` in the + revision API. + `pl`:: This pattern list pointer allows focusing the path-walk search to a set of patterns, only emitting paths that match the given @@ -69,4 +77,5 @@ Examples See example usages in: `t/helper/test-path-walk.c`, + `builtin/pack-objects.c`, `builtin/backfill.c` diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8b33edc2ff..67941c8a60 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -41,6 +41,10 @@ #include "promisor-remote.h" #include "pack-mtimes.h" #include "parse-options.h" +#include "blob.h" +#include "tree.h" +#include "path-walk.h" +#include "trace2.h" /* * Objects we are going to pack are collected in the `to_pack` structure. @@ -184,8 +188,14 @@ static inline void oe_set_delta_size(struct packing_data *pack, #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val) static const char *const pack_usage[] = { - N_("git pack-objects --stdout [<options>] [< <ref-list> | < <object-list>]"), - N_("git pack-objects [<options>] <base-name> [< <ref-list> | < <object-list>]"), + N_("git pack-objects [-q | --progress | --all-progress] [--all-progress-implied]\n" + " [--no-reuse-delta] [--delta-base-offset] [--non-empty]\n" + " [--local] [--incremental] [--window=<n>] [--depth=<n>]\n" + " [--revs [--unpacked | --all]] [--keep-pack=<pack-name>]\n" + " [--cruft] [--cruft-expiration=<time>]\n" + " [--stdout [--filter=<filter-spec>] | <base-name>]\n" + " [--shallow] [--keep-true-parents] [--[no-]sparse]\n" + " [--name-hash-version=<n>] [--path-walk] < <object-list>"), NULL }; @@ -200,6 +210,7 @@ static int keep_unreachable, unpack_unreachable, include_tag; static timestamp_t unpack_unreachable_expiration; static int pack_loose_unreachable; static int cruft; +static int shallow = 0; static timestamp_t cruft_expiration; static int local; static int have_non_local_packs; @@ -218,6 +229,7 @@ static int delta_search_threads; static int pack_to_stdout; static int sparse; static int thin; +static int path_walk = -1; static int num_preferred_base; static struct progress *progress_state; @@ -3041,6 +3053,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, struct thread_params { pthread_t thread; struct object_entry **list; + struct packing_region *regions; unsigned list_size; unsigned remaining; int window; @@ -3283,6 +3296,242 @@ static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, cons return 0; } +static int should_attempt_deltas(struct object_entry *entry) +{ + if (DELTA(entry)) + /* This happens if we decided to reuse existing + * delta from a pack. "reuse_delta &&" is implied. + */ + return 0; + + if (!entry->type_valid || + oe_size_less_than(&to_pack, entry, 50)) + return 0; + + if (entry->no_try_delta) + return 0; + + if (!entry->preferred_base) { + if (oe_type(entry) < 0) + die(_("unable to get type of object %s"), + oid_to_hex(&entry->idx.oid)); + } else if (oe_type(entry) < 0) { + /* + * This object is not found, but we + * don't have to include it anyway. + */ + return 0; + } + + return 1; +} + +static void find_deltas_for_region(struct object_entry *list, + struct packing_region *region, + unsigned int *processed) +{ + struct object_entry **delta_list; + unsigned int delta_list_nr = 0; + + ALLOC_ARRAY(delta_list, region->nr); + for (size_t i = 0; i < region->nr; i++) { + struct object_entry *entry = list + region->start + i; + if (should_attempt_deltas(entry)) + delta_list[delta_list_nr++] = entry; + } + + QSORT(delta_list, delta_list_nr, type_size_sort); + find_deltas(delta_list, &delta_list_nr, window, depth, processed); + free(delta_list); +} + +static void find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + size_t start, size_t nr) +{ + unsigned int processed = 0; + size_t progress_nr; + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + + if (progress) + progress_state = start_progress(the_repository, + _("Compressing objects by path"), + progress_nr); + + while (nr--) + find_deltas_for_region(list, + ®ions[start++], + &processed); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + +static void *threaded_find_deltas_by_path(void *arg) +{ + struct thread_params *me = arg; + + progress_lock(); + while (me->remaining) { + while (me->remaining) { + progress_unlock(); + find_deltas_for_region(to_pack.objects, + me->regions, + me->processed); + progress_lock(); + me->remaining--; + me->regions++; + } + + me->working = 0; + pthread_cond_signal(&progress_cond); + progress_unlock(); + + /* + * We must not set ->data_ready before we wait on the + * condition because the main thread may have set it to 1 + * before we get here. In order to be sure that new + * work is available if we see 1 in ->data_ready, it + * was initialized to 0 before this thread was spawned + * and we reset it to 0 right away. + */ + pthread_mutex_lock(&me->mutex); + while (!me->data_ready) + pthread_cond_wait(&me->cond, &me->mutex); + me->data_ready = 0; + pthread_mutex_unlock(&me->mutex); + + progress_lock(); + } + progress_unlock(); + /* leave ->working 1 so that this doesn't get more work assigned */ + return NULL; +} + +static void ll_find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + uint32_t start, uint32_t nr) +{ + struct thread_params *p; + int i, ret, active_threads = 0; + unsigned int processed = 0; + uint32_t progress_nr; + init_threaded_search(); + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + if (delta_search_threads <= 1) { + find_deltas_by_region(list, regions, start, nr); + cleanup_threaded_search(); + return; + } + + if (progress > pack_to_stdout) + fprintf_ln(stderr, + Q_("Path-based delta compression using up to %d thread", + "Path-based delta compression using up to %d threads", + delta_search_threads), + delta_search_threads); + CALLOC_ARRAY(p, delta_search_threads); + + if (progress) + progress_state = start_progress(the_repository, + _("Compressing objects by path"), + progress_nr); + /* Partition the work amongst work threads. */ + for (i = 0; i < delta_search_threads; i++) { + unsigned sub_size = nr / (delta_search_threads - i); + + p[i].window = window; + p[i].depth = depth; + p[i].processed = &processed; + p[i].working = 1; + p[i].data_ready = 0; + + p[i].regions = regions; + p[i].list_size = sub_size; + p[i].remaining = sub_size; + + regions += sub_size; + nr -= sub_size; + } + + /* Start work threads. */ + for (i = 0; i < delta_search_threads; i++) { + if (!p[i].list_size) + continue; + pthread_mutex_init(&p[i].mutex, NULL); + pthread_cond_init(&p[i].cond, NULL); + ret = pthread_create(&p[i].thread, NULL, + threaded_find_deltas_by_path, &p[i]); + if (ret) + die(_("unable to create thread: %s"), strerror(ret)); + active_threads++; + } + + /* + * Now let's wait for work completion. Each time a thread is done + * with its work, we steal half of the remaining work from the + * thread with the largest number of unprocessed objects and give + * it to that newly idle thread. This ensure good load balancing + * until the remaining object list segments are simply too short + * to be worth splitting anymore. + */ + while (active_threads) { + struct thread_params *target = NULL; + struct thread_params *victim = NULL; + unsigned sub_size = 0; + + progress_lock(); + for (;;) { + for (i = 0; !target && i < delta_search_threads; i++) + if (!p[i].working) + target = &p[i]; + if (target) + break; + pthread_cond_wait(&progress_cond, &progress_mutex); + } + + for (i = 0; i < delta_search_threads; i++) + if (p[i].remaining > 2*window && + (!victim || victim->remaining < p[i].remaining)) + victim = &p[i]; + if (victim) { + sub_size = victim->remaining / 2; + target->regions = victim->regions + victim->remaining - sub_size; + victim->list_size -= sub_size; + victim->remaining -= sub_size; + } + target->list_size = sub_size; + target->remaining = sub_size; + target->working = 1; + progress_unlock(); + + pthread_mutex_lock(&target->mutex); + target->data_ready = 1; + pthread_cond_signal(&target->cond); + pthread_mutex_unlock(&target->mutex); + + if (!sub_size) { + pthread_join(target->thread, NULL); + pthread_cond_destroy(&target->cond); + pthread_mutex_destroy(&target->mutex); + active_threads--; + } + } + cleanup_threaded_search(); + free(p); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + static void prepare_pack(int window, int depth) { struct object_entry **delta_list; @@ -3307,39 +3556,21 @@ static void prepare_pack(int window, int depth) if (!to_pack.nr_objects || !window || !depth) return; + if (path_walk) + ll_find_deltas_by_region(to_pack.objects, to_pack.regions, + 0, to_pack.nr_regions); + ALLOC_ARRAY(delta_list, to_pack.nr_objects); nr_deltas = n = 0; for (i = 0; i < to_pack.nr_objects; i++) { struct object_entry *entry = to_pack.objects + i; - if (DELTA(entry)) - /* This happens if we decided to reuse existing - * delta from a pack. "reuse_delta &&" is implied. - */ - continue; - - if (!entry->type_valid || - oe_size_less_than(&to_pack, entry, 50)) - continue; - - if (entry->no_try_delta) + if (!should_attempt_deltas(entry)) continue; - if (!entry->preferred_base) { + if (!entry->preferred_base) nr_deltas++; - if (oe_type(entry) < 0) - die(_("unable to get type of object %s"), - oid_to_hex(&entry->idx.oid)); - } else { - if (oe_type(entry) < 0) { - /* - * This object is not found, but we - * don't have to include it anyway. - */ - continue; - } - } delta_list[n++] = entry; } @@ -4272,6 +4503,93 @@ static void mark_bitmap_preferred_tips(void) } } +static inline int is_oid_uninteresting(struct repository *repo, + struct object_id *oid) +{ + struct object *o = lookup_object(repo, oid); + return !o || (o->flags & UNINTERESTING); +} + +static int add_objects_by_path(const char *path, + struct oid_array *oids, + enum object_type type, + void *data) +{ + size_t oe_start = to_pack.nr_objects; + size_t oe_end; + unsigned int *processed = data; + + /* + * First, add all objects to the packing data, including the ones + * marked UNINTERESTING (translated to 'exclude') as they can be + * used as delta bases. + */ + for (size_t i = 0; i < oids->nr; i++) { + int exclude; + struct object_info oi = OBJECT_INFO_INIT; + struct object_id *oid = &oids->oid[i]; + + /* Skip objects that do not exist locally. */ + if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) && + oid_object_info_extended(the_repository, oid, &oi, + OBJECT_INFO_FOR_PREFETCH) < 0) + continue; + + exclude = is_oid_uninteresting(the_repository, oid); + + if (exclude && !thin) + continue; + + add_object_entry(oid, type, path, exclude); + } + + oe_end = to_pack.nr_objects; + + /* We can skip delta calculations if it is a no-op. */ + if (oe_end == oe_start || !window) + return 0; + + ALLOC_GROW(to_pack.regions, + to_pack.nr_regions + 1, + to_pack.nr_regions_alloc); + + to_pack.regions[to_pack.nr_regions].start = oe_start; + to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start; + to_pack.nr_regions++; + + *processed += oids->nr; + display_progress(progress_state, *processed); + + return 0; +} + +static void get_object_list_path_walk(struct rev_info *revs) +{ + struct path_walk_info info = PATH_WALK_INFO_INIT; + unsigned int processed = 0; + int result; + + info.revs = revs; + info.path_fn = add_objects_by_path; + info.path_fn_data = &processed; + + /* + * Allow the --[no-]sparse option to be interesting here, if only + * for testing purposes. Paths with no interesting objects will not + * contribute to the resulting pack, but only create noisy preferred + * base objects. + */ + info.prune_all_uninteresting = sparse; + info.edge_aggressive = shallow; + + trace2_region_enter("pack-objects", "path-walk", revs->repo); + result = walk_objects_by_path(&info); + trace2_region_leave("pack-objects", "path-walk", revs->repo); + + if (result) + die(_("failed to pack objects via path-walk")); +} + static void get_object_list(struct rev_info *revs, int ac, const char **av) { struct setup_revision_opt s_r_opt = { @@ -4327,15 +4645,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (write_bitmap_index) mark_bitmap_preferred_tips(); - if (prepare_revision_walk(revs)) - die(_("revision walk setup failed")); - mark_edges_uninteresting(revs, show_edge, sparse); - if (!fn_show_object) fn_show_object = show_object; - traverse_commit_list(revs, - show_commit, fn_show_object, - NULL); + + if (path_walk) { + get_object_list_path_walk(revs); + } else { + if (prepare_revision_walk(revs)) + die(_("revision walk setup failed")); + mark_edges_uninteresting(revs, show_edge, sparse); + traverse_commit_list(revs, + show_commit, fn_show_object, + NULL); + } if (unpack_unreachable_expiration) { revs->ignore_missing_links = 1; @@ -4464,7 +4786,6 @@ int cmd_pack_objects(int argc, struct repository *repo UNUSED) { int use_internal_rev_list = 0; - int shallow = 0; int all_progress_implied = 0; struct strvec rp = STRVEC_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; @@ -4545,6 +4866,8 @@ int cmd_pack_objects(int argc, N_("use the sparse reachability algorithm")), OPT_BOOL(0, "thin", &thin, N_("create thin packs")), + OPT_BOOL(0, "path-walk", &path_walk, + N_("use the path-walk API to walk objects when possible")), OPT_BOOL(0, "shallow", &shallow, N_("create packs suitable for shallow fetches")), OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk, @@ -4614,6 +4937,17 @@ int cmd_pack_objects(int argc, if (pack_to_stdout != !base_name || argc) usage_with_options(pack_usage, pack_objects_options); + if (path_walk < 0) { + if (use_bitmap_index > 0 || + !use_internal_rev_list) + path_walk = 0; + else if (the_repository->gitdir && + the_repository->settings.pack_use_path_walk) + path_walk = 1; + else + path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", 0); + } + if (depth < 0) depth = 0; if (depth >= (1 << OE_DEPTH_BITS)) { @@ -4630,7 +4964,28 @@ int cmd_pack_objects(int argc, window = 0; strvec_push(&rp, "pack-objects"); - if (thin) { + + if (path_walk) { + const char *option = NULL; + if (filter_options.choice) + option = "--filter"; + else if (use_delta_islands) + option = "--delta-islands"; + + if (option) { + warning(_("cannot use %s with %s"), + option, "--path-walk"); + path_walk = 0; + } + } + if (path_walk) { + strvec_push(&rp, "--boundary"); + /* + * We must disable the bitmaps because we are removing + * the --objects / --objects-edge[-aggressive] options. + */ + use_bitmap_index = 0; + } else if (thin) { use_internal_rev_list = 1; strvec_push(&rp, shallow ? "--objects-edge-aggressive" diff --git a/builtin/repack.c b/builtin/repack.c index 59214dbdfd..5ddc6e7f95 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -43,7 +43,7 @@ static char *packdir, *packtmp_name, *packtmp; static const char *const git_repack_usage[] = { N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" "[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]\n" - "[--write-midx] [--name-hash-version=<n>]"), + "[--write-midx] [--name-hash-version=<n>] [--path-walk]"), NULL }; @@ -63,6 +63,7 @@ struct pack_objects_args { int quiet; int local; int name_hash_version; + int path_walk; struct list_objects_filter_options filter_options; }; @@ -313,6 +314,8 @@ static void prepare_pack_objects(struct child_process *cmd, strvec_pushf(&cmd->args, "--no-reuse-object"); if (args->name_hash_version) strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version); + if (args->path_walk) + strvec_pushf(&cmd->args, "--path-walk"); if (args->local) strvec_push(&cmd->args, "--local"); if (args->quiet) @@ -1184,6 +1187,8 @@ int cmd_repack(int argc, N_("pass --no-reuse-object to git-pack-objects")), OPT_INTEGER(0, "name-hash-version", &po_args.name_hash_version, N_("specify the name hash version to use for grouping similar objects by path")), + OPT_BOOL(0, "path-walk", &po_args.path_walk, + N_("pass --path-walk to git-pack-objects")), OPT_NEGBIT('n', NULL, &run_update_server_info, N_("do not run git-update-server-info"), 1), OPT__QUIET(&po_args.quiet, N_("be quiet")), diff --git a/pack-objects.h b/pack-objects.h index 475a2d67ce..b1c3e702f4 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -120,11 +120,23 @@ struct object_entry { unsigned ext_base:1; /* delta_idx points outside packlist */ }; +/** + * A packing region is a section of the packing_data.objects array + * as given by a starting index and a number of elements. + */ +struct packing_region { + size_t start; + size_t nr; +}; + struct packing_data { struct repository *repo; struct object_entry *objects; uint32_t nr_objects, nr_alloc; + struct packing_region *regions; + size_t nr_regions, nr_regions_alloc; + int32_t *index; uint32_t index_size; diff --git a/path-walk.c b/path-walk.c index 341bdd2ba4..2d4ddbadd5 100644 --- a/path-walk.c +++ b/path-walk.c @@ -503,7 +503,11 @@ int walk_objects_by_path(struct path_walk_info *info) if (prepare_revision_walk(info->revs)) die(_("failed to setup revision walk")); - /* Walk trees to mark them as UNINTERESTING. */ + /* + * Walk trees to mark them as UNINTERESTING. + * This is particularly important when 'edge_aggressive' is set. + */ + info->revs->edge_hint_aggressive = info->edge_aggressive; edge_repo = info->revs->repo; edge_tree_list = root_tree_list; mark_edges_uninteresting(info->revs, show_edge, diff --git a/path-walk.h b/path-walk.h index 473ee9d361..5ef5a8440e 100644 --- a/path-walk.h +++ b/path-walk.h @@ -51,6 +51,13 @@ struct path_walk_info { int prune_all_uninteresting; /** + * When 'edge_aggressive' is set, then the revision walk will use + * the '--object-edge-aggressive' option to mark even more objects + * as uninteresting. + */ + int edge_aggressive; + + /** * Specify a sparse-checkout definition to match our paths to. Do not * walk outside of this sparse definition. If the patterns are in * cone mode, then the search may prune directories that are outside diff --git a/repo-settings.c b/repo-settings.c index 4129f8fb2b..195c24e9c0 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -54,11 +54,13 @@ void prepare_repo_settings(struct repository *r) r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING; r->settings.pack_use_bitmap_boundary_traversal = 1; r->settings.pack_use_multi_pack_reuse = 1; + r->settings.pack_use_path_walk = 1; } if (manyfiles) { r->settings.index_version = 4; r->settings.index_skip_hash = 1; r->settings.core_untracked_cache = UNTRACKED_CACHE_WRITE; + r->settings.pack_use_path_walk = 1; } /* Commit graph config or default, does not cascade (simple) */ @@ -73,6 +75,7 @@ void prepare_repo_settings(struct repository *r) /* Boolean config or default, does not cascade (simple) */ repo_cfg_bool(r, "pack.usesparse", &r->settings.pack_use_sparse, 1); + repo_cfg_bool(r, "pack.usepathwalk", &r->settings.pack_use_path_walk, 0); repo_cfg_bool(r, "core.multipackindex", &r->settings.core_multi_pack_index, 1); repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0); repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash); diff --git a/repo-settings.h b/repo-settings.h index 2bf24b2597..d477885561 100644 --- a/repo-settings.h +++ b/repo-settings.h @@ -56,6 +56,7 @@ struct repo_settings { enum untracked_cache_setting core_untracked_cache; int pack_use_sparse; + int pack_use_path_walk; enum fetch_negotiation_setting fetch_negotiation_algorithm; int core_multi_pack_index; @@ -170,6 +170,7 @@ static int set_recommended_config(int reconfigure) { "core.autoCRLF", "false" }, { "core.safeCRLF", "false" }, { "fetch.showForcedUpdates", "false" }, + { "pack.usePathWalk", "true" }, { NULL, NULL }, }; int i; @@ -415,6 +415,10 @@ GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects builtin to use the non-sparse object walk. This can still be overridden by the --sparse command-line argument. +GIT_TEST_PACK_PATH_WALK=<boolean> if enabled will default the pack-objects +builtin to use the path-walk API for the object walk. This can still be +overridden by the --no-path-walk command-line argument. + GIT_TEST_PRELOAD_INDEX=<boolean> exercises the preload-index code path by overriding the minimum number of cache entries required per thread. diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index 61e845e5ec..fe63002c2b 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -82,6 +82,8 @@ int cmd__path_walk(int argc, const char **argv) N_("toggle inclusion of tree objects")), OPT_BOOL(0, "prune", &info.prune_all_uninteresting, N_("toggle pruning of uninteresting paths")), + OPT_BOOL(0, "edge-aggressive", &info.edge_aggressive, + N_("toggle aggressive edge walk")), OPT_BOOL(0, "stdin-pl", &stdin_pl, N_("read a pattern list over stdin")), OPT_END(), diff --git a/t/perf/p5313-pack-objects.sh b/t/perf/p5313-pack-objects.sh index 786a2c1c6f..46a6cd32d2 100755 --- a/t/perf/p5313-pack-objects.sh +++ b/t/perf/p5313-pack-objects.sh @@ -22,46 +22,53 @@ test_expect_success 'create rev input' ' EOF ' -for version in 1 2 -do - export version +test_all_with_args () { + parameter=$1 + export parameter - test_perf "thin pack with version $version" ' + test_perf "thin pack with $parameter" ' git pack-objects --thin --stdout --revs --sparse \ - --name-hash-version=$version <in-thin >out + $parameter <in-thin >out ' - test_size "thin pack size with version $version" ' + test_size "thin pack size with $parameter" ' test_file_size out ' - test_perf "big pack with version $version" ' + test_perf "big pack with $parameter" ' git pack-objects --stdout --revs --sparse \ - --name-hash-version=$version <in-big >out + $parameter <in-big >out ' - test_size "big pack size with version $version" ' + test_size "big pack size with $parameter" ' test_file_size out ' - test_perf "shallow fetch pack with version $version" ' + test_perf "shallow fetch pack with $parameter" ' git pack-objects --stdout --revs --sparse --shallow \ - --name-hash-version=$version <in-shallow >out + $parameter <in-shallow >out ' - test_size "shallow pack size with version $version" ' + test_size "shallow pack size with $parameter" ' test_file_size out ' - test_perf "repack with version $version" ' - git repack -adf --name-hash-version=$version + test_perf "repack with $parameter" ' + git repack -adf $parameter ' - test_size "repack size with version $version" ' + test_size "repack size with $parameter" ' gitdir=$(git rev-parse --git-dir) && pack=$(ls $gitdir/objects/pack/pack-*.pack) && test_file_size "$pack" ' +} + +for version in 1 2 +do + test_all_with_args --name-hash-version=$version done +test_all_with_args --path-walk + test_done diff --git a/t/t0411-clone-from-partial.sh b/t/t0411-clone-from-partial.sh index 196fc61784..9e6bca5625 100755 --- a/t/t0411-clone-from-partial.sh +++ b/t/t0411-clone-from-partial.sh @@ -59,6 +59,12 @@ test_expect_success 'pack-objects should fetch from promisor remote and execute test_expect_success 'clone from promisor remote does not lazy-fetch by default' ' rm -f script-executed && + + # The --path-walk feature of "git pack-objects" is not + # compatible with this kind of fetch from an incomplete repo. + GIT_TEST_PACK_PATH_WALK=0 && + export GIT_TEST_PACK_PATH_WALK && + test_must_fail git clone evil no-lazy 2>err && test_grep "lazy fetching disabled" err && test_path_is_missing script-executed diff --git a/t/t0450/adoc-help-mismatches b/t/t0450/adoc-help-mismatches index c4a15fd0cb..06b469bdee 100644 --- a/t/t0450/adoc-help-mismatches +++ b/t/t0450/adoc-help-mismatches @@ -38,7 +38,6 @@ merge-one-file multi-pack-index name-rev notes -pack-objects push range-diff rebase diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index a5932b6a8b..ae72158b94 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -723,4 +723,23 @@ test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompat ! test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err ' +test_expect_success '--path-walk pack everything' ' + git -C server rev-parse HEAD >in && + GIT_PROGRESS_DELAY=0 git -C server pack-objects \ + --stdout --revs --path-walk --progress <in >out.pack 2>err && + grep "Compressing objects by path" err && + git -C server index-pack --stdin <out.pack +' + +test_expect_success '--path-walk thin pack' ' + cat >in <<-EOF && + $(git -C server rev-parse HEAD) + ^$(git -C server rev-parse HEAD~2) + EOF + GIT_PROGRESS_DELAY=0 git -C server pack-objects \ + --thin --stdout --revs --path-walk --progress <in >out.pack 2>err && + grep "Compressing objects by path" err && + git -C server index-pack --fix-thin --stdin <out.pack +' + test_done diff --git a/t/t5306-pack-nobase.sh b/t/t5306-pack-nobase.sh index 805d60ff31..609399d54f 100755 --- a/t/t5306-pack-nobase.sh +++ b/t/t5306-pack-nobase.sh @@ -59,6 +59,11 @@ test_expect_success 'indirectly clone patch_clone' ' git pull ../.git && test $(git rev-parse HEAD) = $B && + # The --path-walk feature of "git pack-objects" is not + # compatible with this kind of fetch from an incomplete repo. + GIT_TEST_PACK_PATH_WALK=0 && + export GIT_TEST_PACK_PATH_WALK && + git pull ../patch_clone/.git && test $(git rev-parse HEAD) = $C ) diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index a62b463eaf..b6926f1027 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -158,8 +158,9 @@ test_bitmap_cases () { ls .git/objects/pack/ | grep bitmap >output && test_line_count = 1 output && # verify equivalent packs are generated with/without using bitmap index - packasha1=$(git pack-objects --no-use-bitmap-index --all packa </dev/null) && - packbsha1=$(git pack-objects --use-bitmap-index --all packb </dev/null) && + # Be careful to not use the path-walk option in either case. + packasha1=$(git pack-objects --no-use-bitmap-index --no-path-walk --all packa </dev/null) && + packbsha1=$(git pack-objects --use-bitmap-index --no-path-walk --all packb </dev/null) && list_packed_objects packa-$packasha1.idx >packa.objects && list_packed_objects packb-$packbsha1.idx >packb.objects && test_cmp packa.objects packb.objects @@ -388,6 +389,14 @@ test_bitmap_cases () { git init --bare client.git && ( cd client.git && + + # This test relies on reusing a delta, but if the + # path-walk machinery is engaged, the base object + # is considered too small to use during the + # dynamic computation, so is not used. + GIT_TEST_PACK_PATH_WALK=0 && + export GIT_TEST_PACK_PATH_WALK && + git config transfer.unpackLimit 1 && git fetch .. delta-reuse-old:delta-reuse-old && git fetch .. delta-reuse-new:delta-reuse-new && diff --git a/t/t5316-pack-delta-depth.sh b/t/t5316-pack-delta-depth.sh index defaa06d65..03dfb7a61e 100755 --- a/t/t5316-pack-delta-depth.sh +++ b/t/t5316-pack-delta-depth.sh @@ -89,15 +89,18 @@ max_chain() { # adjusted (or scrapped if the heuristics have become too unreliable) test_expect_success 'packing produces a long delta' ' # Use --window=0 to make sure we are seeing reused deltas, - # not computing a new long chain. - pack=$(git pack-objects --all --window=0 </dev/null pack) && + # not computing a new long chain. (Also avoid the --path-walk + # option as it may break delta chains.) + pack=$(git pack-objects --all --window=0 --no-path-walk </dev/null pack) && echo 9 >expect && max_chain pack-$pack.pack >actual && test_cmp expect actual ' test_expect_success '--depth limits depth' ' - pack=$(git pack-objects --all --depth=5 </dev/null pack) && + # Avoid --path-walk to avoid breaking delta chains across path + # boundaries. + pack=$(git pack-objects --all --depth=5 --no-path-walk </dev/null pack) && echo 5 >expect && max_chain pack-$pack.pack >actual && test_cmp expect actual diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh index 57cad7708f..395d09444c 100755 --- a/t/t5332-multi-pack-reuse.sh +++ b/t/t5332-multi-pack-reuse.sh @@ -7,6 +7,13 @@ test_description='pack-objects multi-pack reuse' GIT_TEST_MULTI_PACK_INDEX=0 GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0 + +# The --path-walk option does not consider the preferred pack +# at all for reusing deltas, so this variable changes the +# behavior of this test, if enabled. +GIT_TEST_PACK_PATH_WALK=0 +export GIT_TEST_PACK_PATH_WALK + objdir=.git/objects packdir=$objdir/pack diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index dabcc5f811..782f5f93ea 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1909,4 +1909,14 @@ test_expect_success 'push with config push.useBitmaps' ' --thin --delta-base-offset -q --no-use-bitmap-index <false ' +test_expect_success 'push with config pack.usePathWalk=true' ' + mk_test testrepo heads/main && + git checkout main && + test_config pack.usePathWalk true && + GIT_TRACE2_EVENT="$(pwd)/path-walk.txt" \ + git push --quiet testrepo main:test && + + test_region pack-objects path-walk path-walk.txt +' + test_done diff --git a/t/t5538-push-shallow.sh b/t/t5538-push-shallow.sh index e91fcc173e..dc0e972943 100755 --- a/t/t5538-push-shallow.sh +++ b/t/t5538-push-shallow.sh @@ -123,4 +123,45 @@ EOF git cat-file blob $(echo 1|git hash-object --stdin) >/dev/null ) ' + +test_expect_success 'push new commit from shallow clone has correct object count' ' + git init origin && + test_commit -C origin a && + test_commit -C origin b && + + git clone --depth=1 "file://$(pwd)/origin" client && + git -C client checkout -b topic && + git -C client commit --allow-empty -m "empty" && + GIT_PROGRESS_DELAY=0 git -C client push --progress origin topic 2>err && + test_grep "Enumerating objects: 1, done." err +' + +test_expect_success 'push new commit from shallow clone has good deltas' ' + git init base && + test_seq 1 999 >base/a && + test_commit -C base initial && + git -C base add a && + git -C base commit -m "big a" && + + git clone --depth=1 "file://$(pwd)/base" deltas && + git -C deltas checkout -b deltas && + test_seq 1 1000 >deltas/a && + git -C deltas commit -a -m "bigger a" && + GIT_PROGRESS_DELAY=0 git -C deltas push --progress origin deltas 2>err && + + test_grep "Enumerating objects: 5, done" err && + + # If the delta base is found, then this message uses "bytes". + # If the delta base is not found, then this message uses "KiB". + test_grep "Writing objects: .* bytes" err && + + git -C deltas commit --amend -m "changed message" && + GIT_TRACE2_EVENT="$(pwd)/config-push.txt" \ + GIT_PROGRESS_DELAY=0 git -C deltas -c pack.usePathWalk=true \ + push --progress -f origin deltas 2>err && + + test_grep "Enumerating objects: 1, done" err && + test_region pack-objects path-walk config-push.txt +' + test_done diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 8d187f7279..56bd1e3c5b 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -376,6 +376,26 @@ test_expect_success 'topic, not base, boundary with pruning' ' test_cmp_sorted expect out ' +test_expect_success 'topic, not base, --edge-aggressive with pruning' ' + test-tool path-walk --prune --edge-aggressive -- topic --not base >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base^{tree}):UNINTERESTING + 2:tree:right/:$(git rev-parse topic:right) + 2:tree:right/:$(git rev-parse base:right):UNINTERESTING + 3:blob:right/c:$(git rev-parse base:right/c):UNINTERESTING + 3:blob:right/c:$(git rev-parse topic:right/c) + blobs:2 + commits:1 + tags:0 + trees:4 + EOF + + test_cmp_sorted expect out +' + test_expect_success 'trees are reported exactly once' ' test_when_finished "rm -rf unique-trees" && test_create_repo unique-trees && diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index c562bad042..ab76d4b6dc 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -1095,12 +1095,15 @@ test_expect_success 'submodule update --quiet passes quietness to fetch with a s (cd super5 && # This test var can mess with the stderr output checked in this test. GIT_TEST_NAME_HASH_VERSION=1 \ + GIT_TEST_PACK_PATH_WALK=0 \ git submodule update --quiet --init --depth=1 submodule3 >out 2>err && test_must_be_empty out && test_must_be_empty err ) && git clone super4 super6 && (cd super6 && + # This test variable will create a "warning" message to stderr + GIT_TEST_PACK_PATH_WALK=0 \ git submodule update --init --depth=1 submodule3 >out 2>err && test_file_not_empty out && test_file_not_empty err |
