Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 198 additions & 0 deletions commit-reach.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,46 @@ static int queue_has_nonstale(struct prio_queue *queue)
return 0;
}

/*
* Two conditions must both hold for it to be safe to stop early:
*
* 1. No merge-base candidate is currently waiting in the queue to be
* recorded (pending_merges_count == 0). A candidate is a commit
* that has both PARENT1 and PARENT2 set, is not STALE, and has not
* yet had RESULT set on it.
*
* 2. At least one side has zero exclusive (non-STALE) commits left in
* the queue. An "exclusive" commit carries exactly one of PARENT1
* or PARENT2 (and not STALE). New merge-base candidates are born
* only when an exclusive commit on one side propagates its flag
* onto a commit that already carries the other side's flag; if one
* side has run out of exclusive territory, no further such transition
* can occur. Propagation through STALE commits cannot create new
* candidates, because STALE commits already carry both PARENT1 and
* PARENT2.
*
* The callers maintain p1_exclusive_count, p2_exclusive_count, and
* pending_merges_count incrementally so this check is O(1). Each
* enqueue of a commit in a counted state contributes +1, balanced by
* either the flag transition that takes the commit out of that state or
* by its eventual dequeue, so the counts stay >= 0. The
* `(p->object.flags & flags) == flags` check on parent visits prevents
* enqueueing a second copy of a commit in the same flag state, which
* keeps the accounting straight even through diamond merges. Transitions
* out of (PARENT1|PARENT2) -- when a descendant merge-base propagates
* STALE up to a still-pending candidate -- are also counted, so that
* the candidate is not double-counted when it later dequeues with STALE
* set (and thus fails the dequeue-time decrement check).
*/
static int no_more_merge_bases_possible(int p1_exclusive_count,
int p2_exclusive_count,
int pending_merges_count)
{
if (pending_merges_count)
return 0;
return !p1_exclusive_count || !p2_exclusive_count;
}

/* all input commits in one and twos[] must have been parsed! */
static int paint_down_to_common(struct repository *r,
struct commit *one, int n,
Expand All @@ -59,24 +99,75 @@ static int paint_down_to_common(struct repository *r,
{
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
int i;
int found_result = 0;
int use_early_termination;
int p1_has_infinity = 0, p2_has_infinity = 0;
int p1_exclusive_count = 0, p2_exclusive_count = 0;
int pending_merges_count = 0;
timestamp_t last_gen = GENERATION_NUMBER_INFINITY;
struct commit_list **tail = result;

if (!min_generation && !corrected_commit_dates_enabled(r))
queue.compare = compare_commits_by_commit_date;

/*
* The early termination optimization in no_more_merge_bases_possible()
* relies on the priority queue processing commits in an order where
* parents are never dequeued before their children. This is guaranteed
* by corrected commit dates (which ensure child_gen > parent_gen) for
* commits covered by the commit graph. Commits NOT in the graph get
* GENERATION_NUMBER_INFINITY and sort among themselves by commit date,
* which may violate monotonicity if timestamps are out of order.
*
* We handle this by tracking whether both sides have ever touched any
* INFINITY-generation commit; if so, we disable early termination
* because the dequeue order among INFINITY commits is not trustworthy
* and counter updates from later cross-side transitions can desync from
* the queue state. When only one side has INFINITY commits, the other
* side's territory is entirely in graph space (commit-graph writes are
* ancestry-closed) and no monotonicity violation can occur between them,
* so the optimization remains safe.
*
* Detection happens at push time -- both at the initial pushes of `one`
* and `twos[]` below, and when a parent is (re-)enqueued during the
* walk. The flag is sticky.
*/
use_early_termination = corrected_commit_dates_enabled(r);

one->object.flags |= PARENT1;
if (!n) {
commit_list_append(one, result);
return 0;
}
prio_queue_put(&queue, one);
if (use_early_termination) {
p1_exclusive_count++;
if (commit_graph_generation(one) == GENERATION_NUMBER_INFINITY)
p1_has_infinity = 1;
}

for (i = 0; i < n; i++) {
/*
* Skip duplicates within twos[]. A second push of an already-queued
* commit would inflate p2_exclusive_count, leaving a phantom that
* could delay the early-termination check. (Callers separately
* ensure twos[i] != one, so this is the only duplicate case we must
* handle here.)
*/
if (twos[i]->object.flags & PARENT2)
continue;
twos[i]->object.flags |= PARENT2;
prio_queue_put(&queue, twos[i]);
if (use_early_termination) {
p2_exclusive_count++;
if (commit_graph_generation(twos[i]) == GENERATION_NUMBER_INFINITY)
p2_has_infinity = 1;
}
}

if (p1_has_infinity && p2_has_infinity)
use_early_termination = 0;

while (queue_has_nonstale(&queue)) {
struct commit *commit = prio_queue_get(&queue);
struct commit_list *parents;
Expand All @@ -93,14 +184,39 @@ static int paint_down_to_common(struct repository *r,
break;

flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);

/*
* Decrement queue counters for the dequeued commit. Only maintained
* while early termination is active, since otherwise the increments
* in the parent-visit block below are also skipped. The counts must
* stay >= 0 in graph-space: each enqueue of a commit in a counted
* state is balanced by exactly one dequeue (or by a flag transition
* that moves it out of that state while still queued).
*/
if (use_early_termination) {
if (flags == PARENT1) {
if (p1_exclusive_count-- <= 0)
BUG("p1_exclusive_count went negative");
} else if (flags == PARENT2) {
if (p2_exclusive_count-- <= 0)
BUG("p2_exclusive_count went negative");
} else if (flags == (PARENT1 | PARENT2) &&
!(commit->object.flags & RESULT)) {
if (pending_merges_count-- <= 0)
BUG("pending_merges_count went negative");
}
}

if (flags == (PARENT1 | PARENT2)) {
if (!(commit->object.flags & RESULT)) {
commit->object.flags |= RESULT;
tail = commit_list_append(commit, tail);
}
/* Mark parents of a found merge stale */
flags |= STALE;
found_result = 1;
}

parents = commit->parents;
while (parents) {
struct commit *p = parents->item;
Expand All @@ -123,9 +239,82 @@ static int paint_down_to_common(struct repository *r,
return error(_("could not parse commit %s"),
oid_to_hex(&p->object.oid));
}
/*
* Adjust counts for the flag transition on this parent. With
* corrected commit dates, parents are never dequeued before
* their children, so the `old_flags` we read here reflects
* the current state of every queue entry of p.
*
* The `(p->object.flags & flags) == flags` check above ensures
* we never enqueue a second copy of p with the same flag state,
* so the count of queued entries in each exclusive (non-STALE)
* state is exact: +1 on the transition that brings p into that
* state, -1 on the transition that takes it out, and -1 on
* dequeue if it is still in that state. In particular, the
* counts cannot drift negative.
*/
if (use_early_termination) {
unsigned old_flags = p->object.flags &
(PARENT1 | PARENT2 | STALE);
unsigned new_flags = (p->object.flags | flags) &
(PARENT1 | PARENT2 | STALE);

/*
* Detect INFINITY exposure on this push. If both sides have
* touched any INFINITY commit, monotonicity among those
* commits is no longer guaranteed and counters can desync;
* disable the optimization before mutating them. In normal
* usage (ancestry-closed commit graphs) this is reached only
* when at least one side's tip is itself INFINITY, which is
* already detected at the initial pushes above; the check
* here is a safety net for unusual graph layouts.
*/
if (commit_graph_generation(p) == GENERATION_NUMBER_INFINITY) {
if (new_flags & PARENT1)
p1_has_infinity = 1;
if (new_flags & PARENT2)
p2_has_infinity = 1;
if (p1_has_infinity && p2_has_infinity)
use_early_termination = 0;
}

if (use_early_termination && old_flags != new_flags) {
if (old_flags == PARENT1) {
if (p1_exclusive_count-- <= 0)
BUG("p1_exclusive_count went negative");
} else if (old_flags == PARENT2) {
if (p2_exclusive_count-- <= 0)
BUG("p2_exclusive_count went negative");
} else if (old_flags == (PARENT1 | PARENT2) &&
!(p->object.flags & RESULT)) {
/*
* p was a pending merge-base candidate; STALE is being
* propagated to it from a descendant that has already
* been recorded. When p later dequeues with STALE
* set, the dequeue path will not match its
* (PARENT1|PARENT2)&&!RESULT check, so we must
* decrement here to keep pending_merges_count exact.
*/
if (pending_merges_count-- <= 0)
BUG("pending_merges_count went negative");
}
if (new_flags == PARENT1)
p1_exclusive_count++;
else if (new_flags == PARENT2)
p2_exclusive_count++;
else if (new_flags == (PARENT1 | PARENT2))
pending_merges_count++;
}
}
p->object.flags |= flags;
prio_queue_put(&queue, p);
}

if (found_result && use_early_termination &&
no_more_merge_bases_possible(p1_exclusive_count,
p2_exclusive_count,
pending_merges_count))
break;
}

clear_prio_queue(&queue);
Expand Down Expand Up @@ -544,6 +733,15 @@ int repo_in_merge_bases_many(struct repository *r, struct commit *commit,
if (repo_parse_commit(r, reference[i]))
return ignore_missing_commits ? 0 : -1;

/*
* Short-circuit when commit is itself one of the references: a commit
* is trivially reachable from itself. This also avoids tripping the
* counter invariants in paint_down_to_common(), which assumes the
* initial pushes of `one` and `twos[]` paint disjoint commits.
*/
if (commit == reference[i])
return 1;

generation = commit_graph_generation(reference[i]);
if (generation > max_generation)
max_generation = generation;
Expand Down
119 changes: 119 additions & 0 deletions t/perf/p6012-merge-base-deep-side-branch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/bin/sh

test_description='Performance of merge-base with a deep side branch

Synthetic repository that triggers slow merge-base when a merge commit
introduces a side branch rooted far back in history. The pathological
case forces paint_down_to_common() to chase STALE flags through many
intermediate commits after the merge-base is already found.

Topology (N = $NUM_COMMITS commits on main before the merge, F is
the BRANCH_POINT-th of them, S = $SIDE_COMMITS commits on the side):

# F -- s1 -- ... -- sS (side, dates set old)
# / \
# 1 -- 2 -- ... -- N-1 -- N -- M (main)
# |
# P (pr branch)

merge-base(M, P) = N. Without early termination, the algorithm
walks from N back through roughly N - BRANCH_POINT commits to F
to exhaust STALE processing.
'

. ./perf-lib.sh

NUM_COMMITS=500000
BRANCH_POINT=10000
SIDE_COMMITS=5

test_expect_success 'setup synthetic repo' '
git init --bare repo.git &&
awk -v N=$NUM_COMMITS -v BP=$BRANCH_POINT -v S=$SIDE_COMMITS '\''
BEGIN {
# Shared blob
print "blob"
print "mark :1"
print "data 8"
print "content"
print ""

# Main branch commits: mark :2 is the 1st main
# commit, mark :(N+1) is the Nth.
for (i = 2; i <= N + 1; i++) {
print "commit refs/heads/main"
print "mark :" i
print "committer C <c@c> " (1000000 + i) " +0000"
print "data 2"
print "x"
if (i > 2)
print "from :" (i - 1)
print "M 100644 :1 file"
print ""
}

# Side branch forks from the BP-th main commit
# (mark :(BP+1)), with old dates.
side_start = BP + 1
side_mark_base = N + 2
for (j = 0; j < S; j++) {
mark = side_mark_base + j
if (j == 0)
from_mark = side_start
else
from_mark = mark - 1
print "commit refs/heads/side"
print "mark :" mark
print "committer C <c@c> " (500000 + j) " +0000"
print "data 2"
print "x"
print "from :" from_mark
print ""
}

# Merge side into main
main_tip = N + 1
side_tip = side_mark_base + S - 1
merge_mark = side_tip + 1
print "commit refs/heads/main"
print "mark :" merge_mark
print "committer C <c@c> " (1000000 + N + 2) " +0000"
print "data 2"
print "x"
print "from :" main_tip
print "merge :" side_tip
print ""

# PR branch forked from main tip (just before merge)
pr_mark = merge_mark + 1
print "commit refs/heads/pr"
print "mark :" pr_mark
print "committer C <c@c> " (1000000 + N + 3) " +0000"
print "data 2"
print "x"
print "from :" main_tip
print ""
}
'\'' </dev/null |
git -C repo.git fast-import --quiet &&

git -C repo.git commit-graph write --reachable &&

# Compute expected merge-base: just below the merge on main.
git -C repo.git rev-parse main~1 >expect
'

test_expect_success 'merge-base result is correct' '
git -C repo.git merge-base --all main pr >actual &&
test_cmp expect actual
'

test_perf 'merge-base: commit introducing old side branch' '
git -C repo.git merge-base --all main pr
'

test_perf 'merge-base: no side branch (baseline)' '
git -C repo.git merge-base --all main~1 pr
'

test_done
Loading