aboutsummaryrefslogtreecommitdiffstats
path: root/ref-filter.c
diff options
context:
space:
mode:
authorKarthik Nayak <karthik.188@gmail.com>2025-07-15 13:28:30 +0200
committerJunio C Hamano <gitster@pobox.com>2025-07-15 11:54:20 -0700
commitdabecb9db2b25a32d72c90832f338849b665fdf9 (patch)
tree365fed331a67e6105cfd1f77776593773c0ce388 /ref-filter.c
parentref-filter: remove unnecessary else clause (diff)
downloadgit-dabecb9db2b25a32d72c90832f338849b665fdf9.tar.gz
git-dabecb9db2b25a32d72c90832f338849b665fdf9.zip
for-each-ref: introduce a '--start-after' option
The `git-for-each-ref(1)` command is used to iterate over references present in a repository. In large repositories with millions of references, it would be optimal to paginate this output such that we can start iteration from a given reference. This would avoid having to iterate over all references from the beginning each time when paginating through results. The previous commit added 'seek' functionality to the reference backends. Utilize this and expose a '--start-after' option in 'git-for-each-ref(1)'. When used, the reference iteration seeks to the lexicographically next reference and iterates from there onward. This enables efficient pagination workflows, where the calling script can remember the last provided reference and use that as the starting point for the next set of references: git for-each-ref --count=100 git for-each-ref --count=100 --start-after=refs/heads/branch-100 git for-each-ref --count=100 --start-after=refs/heads/branch-200 Since the reference iterators only allow seeking to a specified marker via the `ref_iterator_seek()`, we introduce a helper function `start_ref_iterator_after()`, which seeks to next reference by simply adding (char) 1 to the marker. We must note that pagination always continues from the provided marker, as such any concurrent reference updates lexicographically behind the marker will not be output. Document the same. Signed-off-by: Karthik Nayak <karthik.188@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'ref-filter.c')
-rw-r--r--ref-filter.c78
1 files changed, 60 insertions, 18 deletions
diff --git a/ref-filter.c b/ref-filter.c
index da663c7ac8..c8a6b7f1af 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2684,6 +2684,41 @@ static int filter_exclude_match(struct ref_filter *filter, const char *refname)
}
/*
+ * We need to seek to the reference right after a given marker but excluding any
+ * matching references. So we seek to the lexicographically next reference.
+ */
+static int start_ref_iterator_after(struct ref_iterator *iter, const char *marker)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret;
+
+ strbuf_addstr(&sb, marker);
+ strbuf_addch(&sb, 1);
+
+ ret = ref_iterator_seek(iter, sb.buf, 0);
+
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int for_each_fullref_with_seek(struct ref_filter *filter, each_ref_fn cb,
+ void *cb_data, unsigned int flags)
+{
+ struct ref_iterator *iter;
+ int ret = 0;
+
+ iter = refs_ref_iterator_begin(get_main_ref_store(the_repository), "",
+ NULL, 0, flags);
+ if (filter->start_after)
+ ret = start_ref_iterator_after(iter, filter->start_after);
+
+ if (ret)
+ return ret;
+
+ return do_for_each_ref_iterator(iter, cb, cb_data);
+}
+
+/*
* This is the same as for_each_fullref_in(), but it tries to iterate
* only over the patterns we'll care about. Note that it _doesn't_ do a full
* pattern match, so the callback still has to match each ref individually.
@@ -2694,8 +2729,8 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
{
if (filter->kind & FILTER_REFS_ROOT_REFS) {
/* In this case, we want to print all refs including root refs. */
- return refs_for_each_include_root_refs(get_main_ref_store(the_repository),
- cb, cb_data);
+ return for_each_fullref_with_seek(filter, cb, cb_data,
+ DO_FOR_EACH_INCLUDE_ROOT_REFS);
}
if (!filter->match_as_path) {
@@ -2704,8 +2739,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
* prefixes like "refs/heads/" etc. are stripped off,
* so we have to look at everything:
*/
- return refs_for_each_fullref_in(get_main_ref_store(the_repository),
- "", NULL, cb, cb_data);
+ return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}
if (filter->ignore_case) {
@@ -2714,14 +2748,12 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
* so just return everything and let the caller
* sort it out.
*/
- return refs_for_each_fullref_in(get_main_ref_store(the_repository),
- "", NULL, cb, cb_data);
+ return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}
if (!filter->name_patterns[0]) {
/* no patterns; we have to look at everything */
- return refs_for_each_fullref_in(get_main_ref_store(the_repository),
- "", filter->exclude.v, cb, cb_data);
+ return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}
return refs_for_each_fullref_in_prefixes(get_main_ref_store(the_repository),
@@ -3189,6 +3221,7 @@ void filter_is_base(struct repository *r,
static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref_fn fn, void *cb_data)
{
+ const char *prefix = NULL;
int ret = 0;
filter->kind = type & FILTER_REFS_KIND_MASK;
@@ -3207,19 +3240,28 @@ static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref
* of filter_ref_kind().
*/
if (filter->kind == FILTER_REFS_BRANCHES)
- ret = refs_for_each_fullref_in(get_main_ref_store(the_repository),
- "refs/heads/", NULL,
- fn, cb_data);
+ prefix = "refs/heads/";
else if (filter->kind == FILTER_REFS_REMOTES)
- ret = refs_for_each_fullref_in(get_main_ref_store(the_repository),
- "refs/remotes/", NULL,
- fn, cb_data);
+ prefix = "refs/remotes/";
else if (filter->kind == FILTER_REFS_TAGS)
- ret = refs_for_each_fullref_in(get_main_ref_store(the_repository),
- "refs/tags/", NULL, fn,
- cb_data);
- else if (filter->kind & FILTER_REFS_REGULAR)
+ prefix = "refs/tags/";
+
+ if (prefix) {
+ struct ref_iterator *iter;
+
+ iter = refs_ref_iterator_begin(get_main_ref_store(the_repository),
+ "", NULL, 0, 0);
+
+ if (filter->start_after)
+ ret = start_ref_iterator_after(iter, filter->start_after);
+ else if (prefix)
+ ret = ref_iterator_seek(iter, prefix, 1);
+
+ if (!ret)
+ ret = do_for_each_ref_iterator(iter, fn, cb_data);
+ } else if (filter->kind & FILTER_REFS_REGULAR) {
ret = for_each_fullref_in_pattern(filter, fn, cb_data);
+ }
/*
* When printing all ref types, HEAD is already included,