From da01032a32c3b14c8007e0262a5cd303864c9b04 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Thu, 11 Jun 2026 10:15:34 +0200 Subject: [PATCH] commit-reach: remove get_reachable_subset() get_reachable_subset() and tips_reachable_from_bases() both answer the same reachability question but use different traversal strategies: priority queue vs depth-first search. Consolidate them into tips_reachable_from_bases() with a mode parameter to select between DFS and PQ traversal, preserving the preferred strategy for each caller. This works cleanly because prio_queue already supports LIFO mode (when compare is NULL), so a single prio_queue acts as either a stack or a heap depending on the mode. The unified traversal pushes all unseen parents at once rather than peeking and pushing one parent at a time. This eliminates merge commit revisits entirely: a 2-parent merge now requires 1 visit instead of 3. For DFS (LIFO) mode, the first parent is pushed last so it ends up on top of the stack, preserving first-parent traversal order. Parsing is deferred to pop time for DFS since parent objects carry valid flags without a full repo_parse_commit() call. PQ mode parses before push so the heap can order by generation number. Add exhaustive reachability tests that use every commit in the grid as a tip, protecting against subtle traversal bugs such as wrong parent ordering or premature pruning. The existing tests are also extended to exercise both DFS and PQ modes. The flag in remote.c changes from 1 (bit 0) to TMP_MARK (bit 4) because tips_reachable_from_bases() uses SEEN (bit 0) internally. TMP_MARK is already used for deduplication earlier in the same function and is cleared before the reachability check. Signed-off-by: Kristofer Karlsson --- commit-reach.c | 131 +++++++++++------------------------------- commit-reach.h | 19 ++---- ref-filter.c | 2 +- remote.c | 20 +++---- t/helper/test-reach.c | 44 +++++++------- t/t6600-test-reach.sh | 65 ++++++++++++++++++--- 6 files changed, 129 insertions(+), 152 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index 5df471a313cf6b..1cad7b211e11b0 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -1013,79 +1013,6 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, return result; } -struct commit_list *get_reachable_subset(struct commit **from, size_t nr_from, - struct commit **to, size_t nr_to, - unsigned int reachable_flag) -{ - struct commit **item; - struct commit *current; - struct commit_list *found_commits = NULL; - struct commit **to_last = to + nr_to; - struct commit **from_last = from + nr_from; - timestamp_t min_generation = GENERATION_NUMBER_INFINITY; - int num_to_find = 0; - - struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; - - for (item = to; item < to_last; item++) { - timestamp_t generation; - struct commit *c = *item; - - repo_parse_commit(the_repository, c); - generation = commit_graph_generation(c); - if (generation < min_generation) - min_generation = generation; - - if (!(c->object.flags & PARENT1)) { - c->object.flags |= PARENT1; - num_to_find++; - } - } - - for (item = from; item < from_last; item++) { - struct commit *c = *item; - if (!(c->object.flags & PARENT2)) { - c->object.flags |= PARENT2; - repo_parse_commit(the_repository, c); - - prio_queue_put(&queue, *item); - } - } - - while (num_to_find && (current = prio_queue_get(&queue)) != NULL) { - struct commit_list *parents; - - if (current->object.flags & PARENT1) { - current->object.flags &= ~PARENT1; - current->object.flags |= reachable_flag; - commit_list_insert(current, &found_commits); - num_to_find--; - } - - for (parents = current->parents; parents; parents = parents->next) { - struct commit *p = parents->item; - - repo_parse_commit(the_repository, p); - - if (commit_graph_generation(p) < min_generation) - continue; - - if (p->object.flags & PARENT2) - continue; - - p->object.flags |= PARENT2; - prio_queue_put(&queue, p); - } - } - - clear_prio_queue(&queue); - - clear_commit_marks_many(nr_to, to, PARENT1); - clear_commit_marks_many(nr_from, from, PARENT2); - - return found_commits; -} - define_commit_slab(bit_arrays, struct bitmap *); static struct bit_arrays bit_arrays; @@ -1212,22 +1139,26 @@ static int compare_commit_and_index_by_generation(const void *va, const void *vb void tips_reachable_from_bases(struct repository *r, struct commit_list *bases, struct commit **tips, size_t tips_nr, - int mark) + int mark, enum tips_reachable_mode mode) { struct commit_and_index *commits; + struct commit_list *p; + struct commit *c; size_t min_generation_index = 0; timestamp_t min_generation; - struct commit_list *stack = NULL; + struct prio_queue queue = { NULL }; if (!bases || !tips || !tips_nr) return; /* - * Do a depth-first search starting at 'bases' to search for the - * tips. Stop at the lowest (un-found) generation number. When - * finding the lowest commit, increase the minimum generation - * number to the next lowest (un-found) generation number. + * Search starting at 'bases' looking for the tips. Stop at the + * lowest un-found generation number, raising the floor as tips + * are found. Use DFS by default; with TIPS_REACHABLE_PQ, + * use a priority queue ordered by generation then commit date. */ + if (mode == TIPS_REACHABLE_PQ) + queue.compare = compare_commits_by_gen_then_commit_date; CALLOC_ARRAY(commits, tips_nr); @@ -1245,14 +1176,19 @@ void tips_reachable_from_bases(struct repository *r, while (bases) { repo_parse_commit(r, bases->item); - commit_list_insert(bases->item, &stack); + bases->item->object.flags |= SEEN; + prio_queue_put(&queue, bases->item); bases = bases->next; } - while (stack) { - int explored_all_parents = 1; - struct commit_list *p; - struct commit *c = stack->item; + while ((c = prio_queue_get(&queue))) { + struct commit *first_parent = NULL; + + repo_parse_commit(r, c); + + /* Skip if below the current generation floor. */ + if (commit_graph_generation(c) < min_generation) + continue; /* Does it match any of our tips? */ { @@ -1276,25 +1212,26 @@ void tips_reachable_from_bases(struct repository *r, } for (p = c->parents; p; p = p->next) { - repo_parse_commit(r, p->item); - /* Have we already explored this parent? */ if (p->item->object.flags & SEEN) continue; - /* Is it below the current minimum generation? */ - if (commit_graph_generation(p->item) < min_generation) - continue; - /* Ok, we will explore from here on. */ p->item->object.flags |= SEEN; - explored_all_parents = 0; - commit_list_insert(p->item, &stack); - break; + /* Parse before pushing in PQ mode for ordering. */ + if (mode == TIPS_REACHABLE_PQ) + repo_parse_commit(r, p->item); + if (!first_parent) + first_parent = p->item; + else + prio_queue_put(&queue, p->item); } - - if (explored_all_parents) - pop_commit(&stack); + /* + * Add the first parent last so that it is on top of + * the LIFO queue, maintaining first-parent DFS order. + */ + if (first_parent) + prio_queue_put(&queue, first_parent); } done: @@ -1302,7 +1239,7 @@ void tips_reachable_from_bases(struct repository *r, commits[i].commit->object.flags &= ~RESULT; free(commits); repo_clear_commit_marks(r, SEEN); - commit_list_free(stack); + clear_prio_queue(&queue); } /* diff --git a/commit-reach.h b/commit-reach.h index 3f3a563d8a5dd1..71e60d727a4919 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -96,19 +96,6 @@ int can_all_from_reach_with_flag(struct object_array *from, int can_all_from_reach(struct commit_list *from, struct commit_list *to, int commit_date_cutoff); - -/* - * Return a list of commits containing the commits in the 'to' array - * that are reachable from at least one commit in the 'from' array. - * Also add the given 'flag' to each of the commits in the returned list. - * - * This method uses the PARENT1 and PARENT2 flags during its operation, - * so be sure these flags are not set before calling the method. - */ -struct commit_list *get_reachable_subset(struct commit **from, size_t nr_from, - struct commit **to, size_t nr_to, - unsigned int reachable_flag); - struct ahead_behind_count { /** * As input, the *_index members indicate which positions in @@ -144,10 +131,14 @@ void ahead_behind(struct repository *r, * For all tip commits, add 'mark' to their flags if and only if they * are reachable from one of the commits in 'bases'. */ +enum tips_reachable_mode { + TIPS_REACHABLE_DFS, + TIPS_REACHABLE_PQ, +}; void tips_reachable_from_bases(struct repository *r, struct commit_list *bases, struct commit **tips, size_t tips_nr, - int mark); + int mark, enum tips_reachable_mode mode); /* * Given a 'tip' commit and a list potential 'bases', return the index 'i' that diff --git a/ref-filter.c b/ref-filter.c index 1da4c0e60df3fa..9c8896d347b42c 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -3157,7 +3157,7 @@ static void reach_filter(struct ref_array *array, tips_reachable_from_bases(the_repository, *check_reachable, to_clear, array->nr, - UNINTERESTING); + UNINTERESTING, TIPS_REACHABLE_DFS); old_nr = array->nr; array->nr = 0; diff --git a/remote.c b/remote.c index 00723b385e1d52..0324c257438c55 100644 --- a/remote.c +++ b/remote.c @@ -1459,9 +1459,8 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds * sent to the other side. */ if (sent_tips.nr) { - const int reachable_flag = 1; - struct commit_list *found_commits; struct commit_stack src_commits = COMMIT_STACK_INIT; + struct commit_list *bases = NULL; for_each_string_list_item(item, &src_tag) { struct ref *ref = item->util; @@ -1479,11 +1478,13 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds commit_stack_push(&src_commits, commit); } - found_commits = get_reachable_subset(sent_tips.items, - sent_tips.nr, - src_commits.items, - src_commits.nr, - reachable_flag); + for (size_t i = 0; i < sent_tips.nr; i++) + commit_list_insert(sent_tips.items[i], &bases); + tips_reachable_from_bases(the_repository, + bases, src_commits.items, + src_commits.nr, TMP_MARK, + TIPS_REACHABLE_PQ); + commit_list_free(bases); for_each_string_list_item(item, &src_tag) { struct ref *dst_ref; @@ -1503,7 +1504,7 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds * Is this tag, which they do not have, reachable from * any of the commits we are sending? */ - if (!(commit->object.flags & reachable_flag)) + if (!(commit->object.flags & TMP_MARK)) continue; /* Add it in */ @@ -1513,9 +1514,8 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds } clear_commit_marks_many(src_commits.nr, src_commits.items, - reachable_flag); + TMP_MARK); commit_stack_clear(&src_commits); - commit_list_free(found_commits); } string_list_clear(&src_tag, 0); diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 5d86a96c17e4e5..66ee35e70d8683 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -7,6 +7,7 @@ #include "hex.h" #include "object-name.h" #include "ref-filter.h" +#include "revision.h" #include "setup.h" #include "string-list.h" #include "tag.h" @@ -149,30 +150,31 @@ int cmd__reach(int ac, const char **av) printf("%s(_,A,X,_):%d\n", av[1], commit_contains(&filter, A, X, &cache)); clear_contains_cache(&cache); - } else if (!strcmp(av[1], "get_reachable_subset")) { - const int reachable_flag = 1; - int count = 0; - struct commit_list *current; - struct commit_list *list = get_reachable_subset(X_stack.items, X_stack.nr, - Y_stack.items, Y_stack.nr, - reachable_flag); - printf("get_reachable_subset(X,Y)\n"); - for (current = list; current; current = current->next) { - if (!(list->item->object.flags & reachable_flag)) - die(_("commit %s is not marked reachable"), - oid_to_hex(&list->item->object.oid)); - count++; - } + } else if (!strcmp(av[1], "tips_reachable_from_bases") || + !strcmp(av[1], "tips_reachable_from_bases_pq")) { + enum tips_reachable_mode mode = + !strcmp(av[1], "tips_reachable_from_bases_pq") + ? TIPS_REACHABLE_PQ : TIPS_REACHABLE_DFS; + struct commit_list *bases = NULL; + struct commit_list *result = NULL; + + for (size_t i = 0; i < X_stack.nr; i++) + commit_list_insert(X_stack.items[i], &bases); + tips_reachable_from_bases(the_repository, + bases, Y_stack.items, + Y_stack.nr, TMP_MARK, + mode); + commit_list_free(bases); + + printf("tips_reachable_from_bases(X,Y)\n"); for (size_t i = 0; i < Y_stack.nr; i++) { - if (Y_stack.items[i]->object.flags & reachable_flag) - count--; + if (Y_stack.items[i]->object.flags & TMP_MARK) + commit_list_insert(Y_stack.items[i], &result); } + print_sorted_commit_ids(result); - if (count < 0) - die(_("too many commits marked reachable")); - - print_sorted_commit_ids(list); - commit_list_free(list); + clear_commit_marks_many(Y_stack.nr, Y_stack.items, TMP_MARK); + commit_list_free(result); } object_array_clear(&X_obj); diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index b5b314e57068f9..b736d893d5ab16 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -391,7 +391,7 @@ test_expect_success 'rev-list: symmetric difference topo-order' ' run_all_modes git rev-list --topo-order commit-3-8...commit-6-6 ' -test_expect_success 'get_reachable_subset:all' ' +test_expect_success 'tips_reachable_from_bases:all' ' cat >input <<-\EOF && X:commit-9-1 X:commit-8-3 @@ -403,15 +403,16 @@ test_expect_success 'get_reachable_subset:all' ' Y:commit-5-6 EOF ( - echo "get_reachable_subset(X,Y)" && + echo "tips_reachable_from_bases(X,Y)" && git rev-parse commit-3-3 \ commit-1-7 \ commit-5-6 | sort ) >expect && - test_all_modes get_reachable_subset + test_all_modes tips_reachable_from_bases && + test_all_modes tips_reachable_from_bases_pq ' -test_expect_success 'get_reachable_subset:some' ' +test_expect_success 'tips_reachable_from_bases:some' ' cat >input <<-\EOF && X:commit-9-1 X:commit-8-3 @@ -422,14 +423,15 @@ test_expect_success 'get_reachable_subset:some' ' Y:commit-5-6 EOF ( - echo "get_reachable_subset(X,Y)" && + echo "tips_reachable_from_bases(X,Y)" && git rev-parse commit-3-3 \ commit-1-7 | sort ) >expect && - test_all_modes get_reachable_subset + test_all_modes tips_reachable_from_bases && + test_all_modes tips_reachable_from_bases_pq ' -test_expect_success 'get_reachable_subset:none' ' +test_expect_success 'tips_reachable_from_bases:none' ' cat >input <<-\EOF && X:commit-9-1 X:commit-8-3 @@ -439,8 +441,9 @@ test_expect_success 'get_reachable_subset:none' ' Y:commit-7-6 Y:commit-2-8 EOF - echo "get_reachable_subset(X,Y)" >expect && - test_all_modes get_reachable_subset + echo "tips_reachable_from_bases(X,Y)" >expect && + test_all_modes tips_reachable_from_bases && + test_all_modes tips_reachable_from_bases_pq ' test_expect_success 'for-each-ref ahead-behind:linear' ' @@ -657,6 +660,50 @@ test_expect_success 'for-each-ref merged:duplicate at min generation' ' --format="%(refname)" --stdin ' +test_expect_success 'for-each-ref merged:all reachable commits' ' + for x in $(test_seq 1 10) + do + for y in $(test_seq 1 10) + do + echo "refs/heads/commit-$x-$y" || return 1 + done + done >input && + for x in $(test_seq 1 5) + do + for y in $(test_seq 1 5) + do + echo "refs/heads/commit-$x-$y" || return 1 + done + done | sort >expect && + run_all_modes git for-each-ref --merged=commit-5-5 \ + --format="%(refname)" --stdin +' + +test_expect_success 'for-each-ref merged:all reachable, multibase' ' + for x in $(test_seq 1 10) + do + for y in $(test_seq 1 10) + do + echo "refs/heads/commit-$x-$y" || return 1 + done + done >input && + for x in $(test_seq 1 10) + do + for y in $(test_seq 1 10) + do + if { test $x -le 3 && test $y -le 7; } || + { test $x -le 7 && test $y -le 3; } + then + echo "refs/heads/commit-$x-$y" || return 1 + fi + done + done | sort >expect && + run_all_modes git for-each-ref \ + --merged=commit-3-7 \ + --merged=commit-7-3 \ + --format="%(refname)" --stdin +' + # For get_branch_base_for_tip, we only care about # first-parent history. Here is the test graph with # second parents removed: