From fa0e936fbb683dbce70b797ae8a5be6b88f48d88 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 30 Jul 2021 11:47:37 +0000 Subject: diffcore-rename: use a mem_pool for exact rename detection's hashmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exact rename detection, via insert_file_table(), uses a hashmap to store files by oid. Use a mem_pool for the hashmap entries so these can all be allocated and deallocated together. For the testcases mentioned in commit 557ac0350d ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 204.2 ms ± 3.0 ms 202.5 ms ± 3.2 ms mega-renames: 1.076 s ± 0.015 s 1.072 s ± 0.012 s just-one-mega: 364.1 ms ± 7.0 ms 357.3 ms ± 3.9 ms Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- diffcore-rename.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'diffcore-rename.c') diff --git a/diffcore-rename.c b/diffcore-rename.c index 4ef0459cfb..73d884099e 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -317,10 +317,11 @@ static int find_identical_files(struct hashmap *srcs, } static void insert_file_table(struct repository *r, + struct mem_pool *pool, struct hashmap *table, int index, struct diff_filespec *filespec) { - struct file_similarity *entry = xmalloc(sizeof(*entry)); + struct file_similarity *entry = mem_pool_alloc(pool, sizeof(*entry)); entry->index = index; entry->filespec = filespec; @@ -336,7 +337,8 @@ static void insert_file_table(struct repository *r, * and then during the second round we try to match * cache-dirty entries as well. */ -static int find_exact_renames(struct diff_options *options) +static int find_exact_renames(struct diff_options *options, + struct mem_pool *pool) { int i, renames = 0; struct hashmap file_table; @@ -346,7 +348,7 @@ static int find_exact_renames(struct diff_options *options) */ hashmap_init(&file_table, NULL, NULL, rename_src_nr); for (i = rename_src_nr-1; i >= 0; i--) - insert_file_table(options->repo, + insert_file_table(options->repo, pool, &file_table, i, rename_src[i].p->one); @@ -354,8 +356,8 @@ static int find_exact_renames(struct diff_options *options) for (i = 0; i < rename_dst_nr; i++) renames += find_identical_files(&file_table, i, options); - /* Free the hash data structure and entries */ - hashmap_clear_and_free(&file_table, struct file_similarity, entry); + /* Free the hash data structure (entries will be freed with the pool) */ + hashmap_clear(&file_table); return renames; } @@ -1341,6 +1343,7 @@ void diffcore_rename_extended(struct diff_options *options, int num_destinations, dst_cnt; int num_sources, want_copies; struct progress *progress = NULL; + struct mem_pool local_pool; struct dir_rename_info info; struct diff_populate_filespec_options dpf_options = { .check_binary = 0, @@ -1409,11 +1412,18 @@ void diffcore_rename_extended(struct diff_options *options, goto cleanup; /* nothing to do */ trace2_region_enter("diff", "exact renames", options->repo); + mem_pool_init(&local_pool, 32*1024); /* * We really want to cull the candidates list early * with cheap tests in order to avoid doing deltas. */ - rename_count = find_exact_renames(options); + rename_count = find_exact_renames(options, &local_pool); + /* + * Discard local_pool immediately instead of at "cleanup:" in order + * to reduce maximum memory usage; inexact rename detection uses up + * a fair amount of memory, and mem_pools can too. + */ + mem_pool_discard(&local_pool, 0); trace2_region_leave("diff", "exact renames", options->repo); /* Did we only want exact renames? */ -- cgit v1.2.3 From a8791ef6492bf702ba70352009cbd28fb581c6e2 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 30 Jul 2021 11:47:41 +0000 Subject: diffcore-rename, merge-ort: add wrapper functions for filepair alloc/dealloc We want to be able to allocate filespecs and filepairs using a mem_pool. However, filespec data will still remain outside the pool (perhaps in the future we could plumb the pool through the various diff APIs to allocate the filespec data too, but for now we are limiting the scope). Add some extra functions to allocate these appropriately based on the non-NULL-ness of opt->priv->pool, as well as some extra functions to handle correctly deallocating the relevant parts of them. A future commit will make use of these new functions. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- diffcore-rename.c | 41 +++++++++++++++++++++++++++++++++++++++++ diffcore.h | 2 ++ merge-ort.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+) (limited to 'diffcore-rename.c') diff --git a/diffcore-rename.c b/diffcore-rename.c index 73d884099e..5bc559f79e 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -1328,6 +1328,47 @@ static void handle_early_known_dir_renames(struct dir_rename_info *info, rename_src_nr = new_num_src; } +static void free_filespec_data(struct diff_filespec *spec) +{ + if (!--spec->count) + diff_free_filespec_data(spec); +} + +MAYBE_UNUSED +static void pool_free_filespec(struct mem_pool *pool, + struct diff_filespec *spec) +{ + if (!pool) { + free_filespec(spec); + return; + } + + /* + * Similar to free_filespec(), but only frees the data. The spec + * itself was allocated in the pool and should not be individually + * freed. + */ + free_filespec_data(spec); +} + +MAYBE_UNUSED +void pool_diff_free_filepair(struct mem_pool *pool, + struct diff_filepair *p) +{ + if (!pool) { + diff_free_filepair(p); + return; + } + + /* + * Similar to diff_free_filepair() but only frees the data from the + * filespecs; not the filespecs or the filepair which were + * allocated from the pool. + */ + free_filespec_data(p->one); + free_filespec_data(p->two); +} + void diffcore_rename_extended(struct diff_options *options, struct strintmap *relevant_sources, struct strintmap *dirs_removed, diff --git a/diffcore.h b/diffcore.h index 533b30e21e..b58ee6b193 100644 --- a/diffcore.h +++ b/diffcore.h @@ -127,6 +127,8 @@ struct diff_filepair { #define DIFF_PAIR_MODE_CHANGED(p) ((p)->one->mode != (p)->two->mode) void diff_free_filepair(struct diff_filepair *); +void pool_diff_free_filepair(struct mem_pool *pool, + struct diff_filepair *p); int diff_unmodified_pair(struct diff_filepair *); diff --git a/merge-ort.c b/merge-ort.c index 99c7569085..e79830f918 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -690,6 +690,48 @@ static void path_msg(struct merge_options *opt, strbuf_addch(sb, '\n'); } +MAYBE_UNUSED +static struct diff_filespec *pool_alloc_filespec(struct mem_pool *pool, + const char *path) +{ + struct diff_filespec *spec; + size_t len; + + if (!pool) + return alloc_filespec(path); + + /* Same code as alloc_filespec, except allocate from pool */ + len = strlen(path); + + spec = mem_pool_calloc(pool, 1, st_add3(sizeof(*spec), len, 1)); + memcpy(spec+1, path, len); + spec->path = (void*)(spec+1); + + spec->count = 1; + spec->is_binary = -1; + return spec; +} + +MAYBE_UNUSED +static struct diff_filepair *pool_diff_queue(struct mem_pool *pool, + struct diff_queue_struct *queue, + struct diff_filespec *one, + struct diff_filespec *two) +{ + struct diff_filepair *dp; + + if (!pool) + return diff_queue(queue, one, two); + + /* Same code as diff_queue, except allocate from pool */ + dp = mem_pool_calloc(pool, 1, sizeof(*dp)); + dp->one = one; + dp->two = two; + if (queue) + diff_q(queue, dp); + return dp; +} + static void *pool_calloc(struct mem_pool *pool, size_t count, size_t size) { if (!pool) -- cgit v1.2.3 From f239fff4c183b231134115bc3b38b4f8e3982d3e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 30 Jul 2021 11:47:42 +0000 Subject: merge-ort: store filepairs and filespecs in our mem_pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the testcases mentioned in commit 557ac0350d ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 198.1 ms ± 2.6 ms 198.5 ms ± 3.4 ms mega-renames: 715.8 ms ± 4.0 ms 679.1 ms ± 5.6 ms just-one-mega: 276.8 ms ± 4.2 ms 271.9 ms ± 2.8 ms Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- diffcore-rename.c | 9 ++++----- diffcore.h | 1 + merge-ort.c | 26 ++++++++++++++------------ 3 files changed, 19 insertions(+), 17 deletions(-) (limited to 'diffcore-rename.c') diff --git a/diffcore-rename.c b/diffcore-rename.c index 5bc559f79e..7e6b3e1b14 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -1334,7 +1334,6 @@ static void free_filespec_data(struct diff_filespec *spec) diff_free_filespec_data(spec); } -MAYBE_UNUSED static void pool_free_filespec(struct mem_pool *pool, struct diff_filespec *spec) { @@ -1351,7 +1350,6 @@ static void pool_free_filespec(struct mem_pool *pool, free_filespec_data(spec); } -MAYBE_UNUSED void pool_diff_free_filepair(struct mem_pool *pool, struct diff_filepair *p) { @@ -1370,6 +1368,7 @@ void pool_diff_free_filepair(struct mem_pool *pool, } void diffcore_rename_extended(struct diff_options *options, + struct mem_pool *pool, struct strintmap *relevant_sources, struct strintmap *dirs_removed, struct strmap *dir_rename_count, @@ -1683,7 +1682,7 @@ void diffcore_rename_extended(struct diff_options *options, pair_to_free = p; if (pair_to_free) - diff_free_filepair(pair_to_free); + pool_diff_free_filepair(pool, pair_to_free); } diff_debug_queue("done copying original", &outq); @@ -1693,7 +1692,7 @@ void diffcore_rename_extended(struct diff_options *options, for (i = 0; i < rename_dst_nr; i++) if (rename_dst[i].filespec_to_free) - free_filespec(rename_dst[i].filespec_to_free); + pool_free_filespec(pool, rename_dst[i].filespec_to_free); cleanup_dir_rename_info(&info, dirs_removed, dir_rename_count != NULL); FREE_AND_NULL(rename_dst); @@ -1710,5 +1709,5 @@ void diffcore_rename_extended(struct diff_options *options, void diffcore_rename(struct diff_options *options) { - diffcore_rename_extended(options, NULL, NULL, NULL, NULL); + diffcore_rename_extended(options, NULL, NULL, NULL, NULL, NULL); } diff --git a/diffcore.h b/diffcore.h index b58ee6b193..badc2261c2 100644 --- a/diffcore.h +++ b/diffcore.h @@ -181,6 +181,7 @@ void partial_clear_dir_rename_count(struct strmap *dir_rename_count); void diffcore_break(struct repository *, int); void diffcore_rename(struct diff_options *); void diffcore_rename_extended(struct diff_options *options, + struct mem_pool *pool, struct strintmap *relevant_sources, struct strintmap *dirs_removed, struct strmap *dir_rename_count, diff --git a/merge-ort.c b/merge-ort.c index e79830f918..f4f0a3d57f 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -690,7 +690,6 @@ static void path_msg(struct merge_options *opt, strbuf_addch(sb, '\n'); } -MAYBE_UNUSED static struct diff_filespec *pool_alloc_filespec(struct mem_pool *pool, const char *path) { @@ -712,7 +711,6 @@ static struct diff_filespec *pool_alloc_filespec(struct mem_pool *pool, return spec; } -MAYBE_UNUSED static struct diff_filepair *pool_diff_queue(struct mem_pool *pool, struct diff_queue_struct *queue, struct diff_filespec *one, @@ -930,6 +928,7 @@ static void add_pair(struct merge_options *opt, unsigned dir_rename_mask) { struct diff_filespec *one, *two; + struct mem_pool *pool = opt->priv->pool; struct rename_info *renames = &opt->priv->renames; int names_idx = is_add ? side : 0; @@ -980,11 +979,11 @@ static void add_pair(struct merge_options *opt, return; } - one = alloc_filespec(pathname); - two = alloc_filespec(pathname); + one = pool_alloc_filespec(pool, pathname); + two = pool_alloc_filespec(pool, pathname); fill_filespec(is_add ? two : one, &names[names_idx].oid, 1, names[names_idx].mode); - diff_queue(&renames->pairs[side], one, two); + pool_diff_queue(pool, &renames->pairs[side], one, two); } static void collect_rename_info(struct merge_options *opt, @@ -2893,6 +2892,7 @@ static void use_cached_pairs(struct merge_options *opt, { struct hashmap_iter iter; struct strmap_entry *entry; + struct mem_pool *pool = opt->priv->pool; /* * Add to side_pairs all entries from renames->cached_pairs[side_index]. @@ -2906,9 +2906,9 @@ static void use_cached_pairs(struct merge_options *opt, new_name = old_name; /* We don't care about oid/mode, only filenames and status */ - one = alloc_filespec(old_name); - two = alloc_filespec(new_name); - diff_queue(pairs, one, two); + one = pool_alloc_filespec(pool, old_name); + two = pool_alloc_filespec(pool, new_name); + pool_diff_queue(pool, pairs, one, two); pairs->queue[pairs->nr-1]->status = entry->value ? 'R' : 'D'; } } @@ -3016,6 +3016,7 @@ static int detect_regular_renames(struct merge_options *opt, diff_queued_diff = renames->pairs[side_index]; trace2_region_enter("diff", "diffcore_rename", opt->repo); diffcore_rename_extended(&diff_opts, + opt->priv->pool, &renames->relevant_sources[side_index], &renames->dirs_removed[side_index], &renames->dir_rename_count[side_index], @@ -3066,7 +3067,7 @@ static int collect_renames(struct merge_options *opt, if (p->status != 'A' && p->status != 'R') { possibly_cache_new_pair(renames, p, side_index, NULL); - diff_free_filepair(p); + pool_diff_free_filepair(opt->priv->pool, p); continue; } @@ -3079,7 +3080,7 @@ static int collect_renames(struct merge_options *opt, possibly_cache_new_pair(renames, p, side_index, new_path); if (p->status != 'R' && !new_path) { - diff_free_filepair(p); + pool_diff_free_filepair(opt->priv->pool, p); continue; } @@ -3197,7 +3198,7 @@ cleanup: side_pairs = &renames->pairs[s]; for (i = 0; i < side_pairs->nr; ++i) { struct diff_filepair *p = side_pairs->queue[i]; - diff_free_filepair(p); + pool_diff_free_filepair(opt->priv->pool, p); } } @@ -3210,7 +3211,8 @@ simple_cleanup: if (combined.nr) { int i; for (i = 0; i < combined.nr; i++) - diff_free_filepair(combined.queue[i]); + pool_diff_free_filepair(opt->priv->pool, + combined.queue[i]); free(combined.queue); } -- cgit v1.2.3