gitgitgadget · newren · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/cache-tree.c b/cache-tree.c
@@ -161,6 +161,54 @@ void cache_tree_invalidate_path(struct index_state *istate, const char *path)
 		istate->cache_changed |= CACHE_TREE_CHANGED;
 }
 
+/*
+ * Check whether this_ce and the next entry in the index form a D/F
+ * conflict ("path" vs "path/file").  Returns the conflicting "path/..."
+ * name when one is found, or NULL otherwise.
+ *
+ * The cache is sorted, so "path/file" sorts after "path" and the
+ * conflict is usually visible as adjacent entries.  But other entries
+ * can sort between them -- e.g. "path-internal" sits between "path"
+ * and "path/file" because '-' (0x2D) precedes '/' (0x2F) -- so when
+ * the immediately following entry shares our prefix but starts with a
+ * character that sorts before '/', binary search for "path/" instead.
+ */
+static const char *find_df_conflict(struct index_state *istate,
+				    const struct cache_entry *this_ce,
+				    const struct cache_entry *next_ce)
+{
+	const char *this_name = this_ce->name;
+	const char *next_name = next_ce->name;
+	int this_len = ce_namelen(this_ce);
+	const struct cache_entry *other;
+	struct strbuf probe = STRBUF_INIT;
+	int pos;
+
+	if (this_len >= ce_namelen(next_ce) ||
+	    next_name[this_len] > '/' ||
+	    strncmp(this_name, next_name, this_len))
+		return NULL;
+
+	if (next_name[this_len] == '/')
+		return next_name;
+
+	strbuf_add(&probe, this_name, this_len);
+	strbuf_addch(&probe, '/');
+	pos = index_name_pos_sparse(istate, probe.buf, probe.len);
+	strbuf_release(&probe);
+
+	if (pos < 0)
+		pos = -pos - 1;
+	if (pos >= (int)istate->cache_nr)
+		return NULL;
+	other = istate->cache[pos];
+	if (ce_namelen(other) > this_len &&
+	    other->name[this_len] == '/' &&
+	    !strncmp(this_name, other->name, this_len))
+		return other->name;
+	return NULL;
+}
+
 static int verify_cache(struct index_state *istate, int flags)
 {
 	unsigned i, funny;
@@ -190,24 +238,18 @@ static int verify_cache(struct index_state *istate, int flags)
 	 */
 	funny = 0;
 	for (i = 0; i + 1 < istate->cache_nr; i++) {
-		/* path/file always comes after path because of the way
-		 * the cache is sorted.  Also path can appear only once,
-		 * which means conflicting one would immediately follow.
-		 */
 		const struct cache_entry *this_ce = istate->cache[i];
 		const struct cache_entry *next_ce = istate->cache[i + 1];
-		const char *this_name = this_ce->name;
-		const char *next_name = next_ce->name;
-		int this_len = ce_namelen(this_ce);
-		if (this_len < ce_namelen(next_ce) &&
-		    next_name[this_len] == '/' &&
-		    strncmp(this_name, next_name, this_len) == 0) {
+		const char *conflict_name;
+
+		conflict_name = find_df_conflict(istate, this_ce, next_ce);
+		if (conflict_name) {
 			if (10 < ++funny) {
 				fprintf(stderr, "...\n");
 				break;
 			}
 			fprintf(stderr, "You have both %s and %s\n",
-				this_name, next_name);
+				this_ce->name, conflict_name);
 		}
 	}
 	if (funny)

diff --git a/merge-ort.c b/merge-ort.c
@@ -728,6 +728,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 		strintmap_clear_func(&renames->deferred[i].possible_trivial_merges);
 		strset_clear_func(&renames->deferred[i].target_dirs);
 		renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
+		free(renames->pairs[i].queue);
+		diff_queue_init(&renames->pairs[i]);
 	}
 	renames->cached_pairs_valid_side = 0;
 	renames->dir_rename_mask = 0;
@@ -1008,32 +1010,34 @@ static int traverse_trees_wrapper(struct index_state *istate,
 	info->traverse_path = renames->callback_data_traverse_path;
 	info->fn = old_fn;
 	for (i = old_offset; i < renames->callback_data_nr; ++i) {
-		info->fn(n,
-			 renames->callback_data[i].mask,
-			 renames->callback_data[i].dirmask,
-			 renames->callback_data[i].names,
-			 info);
+		ret = info->fn(n,
+			       renames->callback_data[i].mask,
+			       renames->callback_data[i].dirmask,
+			       renames->callback_data[i].names,
+			       info);
+		if (ret < 0)
+			break;
 	}
 
 	renames->callback_data_nr = old_offset;
 	free(renames->callback_data_traverse_path);
 	renames->callback_data_traverse_path = old_callback_data_traverse_path;
 	info->traverse_path = NULL;
-	return 0;
+	return ret < 0 ? ret : 0;
 }
 
-static void setup_path_info(struct merge_options *opt,
-			    struct string_list_item *result,
-			    const char *current_dir_name,
-			    int current_dir_name_len,
-			    char *fullpath, /* we'll take over ownership */
-			    struct name_entry *names,
-			    struct name_entry *merged_version,
-			    unsigned is_null,     /* boolean */
-			    unsigned df_conflict, /* boolean */
-			    unsigned filemask,
-			    unsigned dirmask,
-			    int resolved          /* boolean */)
+static int setup_path_info(struct merge_options *opt,
+			   struct string_list_item *result,
+			   const char *current_dir_name,
+			   int current_dir_name_len,
+			   char *fullpath, /* we'll take over ownership */
+			   struct name_entry *names,
+			   struct name_entry *merged_version,
+			   unsigned is_null,     /* boolean */
+			   unsigned df_conflict, /* boolean */
+			   unsigned filemask,
+			   unsigned dirmask,
+			   int resolved          /* boolean */)
 {
 	/* result->util is void*, so mi is a convenience typed variable */
 	struct merged_info *mi;
@@ -1077,9 +1081,11 @@ static void setup_path_info(struct merge_options *opt,
 			 */
 			mi->is_null = 1;
 	}
-	strmap_put(&opt->priv->paths, fullpath, mi);
+	if (strmap_put(&opt->priv->paths, fullpath, mi))
+		return error(_("tree has duplicate entries for '%s'"), fullpath);
 	result->string = fullpath;
 	result->util = mi;
+	return 0;
 }
 
 static void add_pair(struct merge_options *opt,
@@ -1346,9 +1352,10 @@ static int collect_merge_info_callback(int n,
 	 */
 	if (side1_matches_mbase && side2_matches_mbase) {
 		/* mbase, side1, & side2 all match; use mbase as resolution */
-		setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-				names, names+0, mbase_null, 0 /* df_conflict */,
-				filemask, dirmask, 1 /* resolved */);
+		if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+				    names, names+0, mbase_null, 0 /* df_conflict */,
+				    filemask, dirmask, 1 /* resolved */))
+			return -1; /* Quit traversing */
 		return mask;
 	}
 
@@ -1360,9 +1367,10 @@ static int collect_merge_info_callback(int n,
 	 */
 	if (sides_match && filemask == 0x07) {
 		/* use side1 (== side2) version as resolution */
-		setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-				names, names+1, side1_null, 0,
-				filemask, dirmask, 1);
+		if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+				    names, names+1, side1_null, 0,
+				    filemask, dirmask, 1))
+			return -1; /* Quit traversing */
 		return mask;
 	}
 
@@ -1374,18 +1382,20 @@ static int collect_merge_info_callback(int n,
 	 */
 	if (side1_matches_mbase && filemask == 0x07) {
 		/* use side2 version as resolution */
-		setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-				names, names+2, side2_null, 0,
-				filemask, dirmask, 1);
+		if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+				    names, names+2, side2_null, 0,
+				    filemask, dirmask, 1))
+			return -1; /* Quit traversing */
 		return mask;
 	}
 
 	/* Similar to above but swapping sides 1 and 2 */
 	if (side2_matches_mbase && filemask == 0x07) {
 		/* use side1 version as resolution */
-		setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-				names, names+1, side1_null, 0,
-				filemask, dirmask, 1);
+		if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+				    names, names+1, side1_null, 0,
+				    filemask, dirmask, 1))
+			return -1; /* Quit traversing */
 		return mask;
 	}
 
@@ -1409,8 +1419,9 @@ static int collect_merge_info_callback(int n,
 	 * unconflict some more cases, but that comes later so all we can
 	 * do now is record the different non-null file hashes.)
 	 */
-	setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-			names, NULL, 0, df_conflict, filemask, dirmask, 0);
+	if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+			    names, NULL, 0, df_conflict, filemask, dirmask, 0))
+		return -1; /* Quit traversing */
 
 	ci = pi.util;
 	VERIFY_CI(ci);
@@ -1738,7 +1749,6 @@ static int collect_merge_info(struct merge_options *opt,
 	setup_traverse_info(&info, opt->priv->toplevel_dir);
 	info.fn = collect_merge_info_callback;
 	info.data = opt;
-	info.show_all_errors = 1;
 
 	if (repo_parse_tree(opt->repo, merge_base) < 0 ||
 	    repo_parse_tree(opt->repo, side1) < 0 ||

diff --git a/t/meson.build b/t/meson.build
@@ -124,6 +124,7 @@ integration_tests = [
   't0090-cache-tree.sh',
   't0091-bugreport.sh',
   't0092-diagnose.sh',
+  't0093-verify-cache-df-gap.sh',
   't0095-bloom.sh',
   't0100-previous.sh',
   't0101-at-syntax.sh',

diff --git a/t/t0093-direct-index-write.pl b/t/t0093-direct-index-write.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+#
+# Build a v2 index file from entries listed on stdin.
+# Each line: "octalmode hex-oid name"
+# Output: binary index written to stdout.
+#
+# This bypasses all D/F safety checks in add_index_entry(), simulating
+# what happens when code uses ADD_CACHE_JUST_APPEND to bulk-load entries.
+use strict;
+use warnings;
+use Digest::SHA qw(sha1 sha256);
+
+my $hash_algo = $ENV{'GIT_DEFAULT_HASH'} || 'sha1';
+my $hash_func = $hash_algo eq 'sha256' ? \&sha256 : \&sha1;
+
+my @entries;
+while (my $line = <STDIN>) {
+	chomp $line;
+	my ($mode, $oid_hex, $name) = split(/ /, $line, 3);
+	push @entries, [$mode, $oid_hex, $name];
+}
+
+my $body = "DIRC" . pack("NN", 2, scalar @entries);
+
+for my $ent (@entries) {
+	my ($mode, $oid_hex, $name) = @{$ent};
+	# 10 x 32-bit stat fields (zeroed), with mode in position 7
+	my $stat = pack("N10", 0, 0, 0, 0, 0, 0, oct($mode), 0, 0, 0);
+	my $oid = pack("H*", $oid_hex);
+	my $flags = pack("n", length($name) & 0xFFF);
+	my $entry = $stat . $oid . $flags . $name . "\0";
+	# Pad to 8-byte boundary
+	while (length($entry) % 8) { $entry .= "\0"; }
+	$body .= $entry;
+}
+
+binmode STDOUT;
+print $body . $hash_func->($body);
diff --git a/t/t0093-verify-cache-df-gap.sh b/t/t0093-verify-cache-df-gap.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+test_description='verify_cache() must catch non-adjacent D/F conflicts
+
+Ensure that verify_cache() can complain about bad entries like:
+
+  docs               <-- submodule
+  docs-internal/...  <-- sorts here because "-" < "/"
+  docs/...           <-- D/F conflict with "docs" above, not adjacent
+
+In order to test verify_cache, we directly construct a corrupt index
+(bypassing the D/F safety checks in add_index_entry) and verify that
+write-tree rejects it.
+'
+
+. ./test-lib.sh
+
+if ! test_have_prereq PERL
+then
+	skip_all='skipping verify_cache D/F tests; Perl not available'
+	test_done
+fi
+
+# Build a v2 index from entries on stdin, bypassing D/F checks.
+# Each line: "octalmode hex-oid name" (entries must be pre-sorted).
+build_corrupt_index () {
+	perl "$TEST_DIRECTORY/t0093-direct-index-write.pl" >"$1"
+}
+
+test_expect_success 'setup objects' '
+	test_commit base &&
+	BLOB=$(git rev-parse HEAD:base.t) &&
+	SUB_COMMIT=$(git rev-parse HEAD)
+'
+
+test_expect_success 'adjacent D/F conflict is caught by verify_cache' '
+	cat >index-entries <<-EOF &&
+	0160000 $SUB_COMMIT docs
+	0100644 $BLOB docs/requirements.txt
+	EOF
+	build_corrupt_index .git/index <index-entries &&
+
+	test_must_fail git write-tree 2>err &&
+	test_grep "You have both docs and docs/requirements.txt" err
+'
+
+test_expect_success 'non-adjacent D/F conflict is caught by verify_cache' '
+	cat >index-entries <<-EOF &&
+	0160000 $SUB_COMMIT docs
+	0100644 $BLOB docs-internal/README.md
+	0100644 $BLOB docs/requirements.txt
+	EOF
+	build_corrupt_index .git/index <index-entries &&
+
+	test_must_fail git write-tree 2>err &&
+	test_grep "You have both docs and docs/requirements.txt" err
+'
+
+test_done