From c3cf8e5907adb55380801007ff14f0e3b7cf7152 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Fri, 21 Nov 2025 12:13:45 +0100 Subject: [PATCH 01/16] fetch: extract out reference committing logic The `do_fetch()` function contains the core of the `git-fetch(1)` logic. Part of this is to fetch and store references. This is done by 1. Creating a reference transaction (non-atomic mode uses batched updates). 2. Adding individual reference updates to the transaction. 3. Committing the transaction. 4. When using batched updates, handling the rejected updates. The following commit, will fix a bug wherein fetching tags with conflicts was causing other reference updates to fail. Fixing this requires utilizing this logic in different regions of the function. In preparation of the follow up commit, extract the committing and rejection handling logic into a separate function called `commit_ref_transaction()`. Helped-by: Patrick Steinhardt Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/fetch.c | 59 +++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index c7ff3480fb1827..f90179040ba34c 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1686,6 +1686,36 @@ static void ref_transaction_rejection_handler(const char *refname, *data->retcode = 1; } +/* + * Commit the reference transaction. If it isn't an atomic transaction, handle + * rejected updates as part of using batched updates. + */ +static int commit_ref_transaction(struct ref_transaction **transaction, + bool is_atomic, const char *remote_name, + struct strbuf *err) +{ + int retcode = ref_transaction_commit(*transaction, err); + if (retcode) + goto out; + + if (!is_atomic) { + struct ref_rejection_data data = { + .conflict_msg_shown = 0, + .remote_name = remote_name, + .retcode = &retcode, + }; + + ref_transaction_for_each_rejected_update(*transaction, + ref_transaction_rejection_handler, + &data); + } + +out: + ref_transaction_free(*transaction); + *transaction = NULL; + return retcode; +} + static int do_fetch(struct transport *transport, struct refspec *rs, const struct fetch_config *config) @@ -1858,33 +1888,10 @@ static int do_fetch(struct transport *transport, if (retcode) goto cleanup; - retcode = ref_transaction_commit(transaction, &err); - if (retcode) { - /* - * Explicitly handle transaction cleanup to avoid - * aborting an already closed transaction. - */ - ref_transaction_free(transaction); - transaction = NULL; + retcode = commit_ref_transaction(&transaction, atomic_fetch, + transport->remote->name, &err); + if (retcode) goto cleanup; - } - - if (!atomic_fetch) { - struct ref_rejection_data data = { - .retcode = &retcode, - .conflict_msg_shown = 0, - .remote_name = transport->remote->name, - }; - - ref_transaction_for_each_rejected_update(transaction, - ref_transaction_rejection_handler, - &data); - if (retcode) { - ref_transaction_free(transaction); - transaction = NULL; - goto cleanup; - } - } commit_fetch_head(&fetch_head); From dd8e8c786efdfb3ba588d807bfb0dc0d5196c343 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 15 Nov 2025 23:02:57 -0800 Subject: [PATCH 02/16] submodule add: sanity check existing .gitmodules "git submodule add" tries to find if a submodule with the same name already exists at a different path, by looking up an entry in the .gitmodules file. If the entry in the file is incomplete, e.g., when the submodule..something variable is defined but there is no definition of submodule..path variable, it accesses the missing .path member of the submodule structure and triggers a segfault. A brief audit was done to make sure that the code does not assume members other than those that are absolutely certain to exist: a submodule obtained by submodule_from_name() should have .name member, while a submodule obtained by submodule_from_path() should also have .path as well as .name member, and we cannot assume anything else. Luckily, the module_add() codepath was the only problematic one. It is fairly recent code that comes from 1fa06ced (submodule: prevent overwriting .gitmodules on path reuse, 2025-07-24). A helper used by update_submodule() seems to assume that its call to submodule_from_path() always yields a submodule object without a failure, which seems to rely on the caller making sure it is the case. Leave an assert() with a NEEDSWORK comment there for future developers to make sure the assumption actually holds. Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 12 ++++++++++-- t/t7400-submodule-basic.sh | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 07a1935cbe1a69..1a1043cdab73af 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1913,6 +1913,13 @@ static int determine_submodule_update_strategy(struct repository *r, const char *val; int ret; + /* + * NEEDSWORK: audit and ensure that update_submodule() has right + * to assume that submodule_from_path() above will always succeed. + */ + if (!sub) + BUG("update_submodule assumes a submodule exists at path (%s)", + path); key = xstrfmt("submodule.%s.update", sub->name); if (update) { @@ -3537,14 +3544,15 @@ static int module_add(int argc, const char **argv, const char *prefix, } } - if(!add_data.sm_name) + if (!add_data.sm_name) add_data.sm_name = add_data.sm_path; existing = submodule_from_name(the_repository, null_oid(the_hash_algo), add_data.sm_name); - if (existing && strcmp(existing->path, add_data.sm_path)) { + if (existing && existing->path && + strcmp(existing->path, add_data.sm_path)) { if (!force) { die(_("submodule name '%s' already used for path '%s'"), add_data.sm_name, existing->path); diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index fd3e7e355e4ffc..9ade97e432162a 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -48,6 +48,25 @@ test_expect_success 'submodule deinit works on empty repository' ' git submodule deinit --all ' +test_expect_success 'submodule add with incomplete .gitmodules' ' + test_when_finished "rm -f expect actual" && + test_when_finished "git config remove-section submodule.one" && + test_when_finished "git rm -f one .gitmodules" && + git init one && + git -C one commit --allow-empty -m one-initial && + git config -f .gitmodules submodule.one.ignore all && + + git submodule add ./one && + + for var in ignore path url + do + git config -f .gitmodules --get "submodule.one.$var" || + return 1 + done >actual && + test_write_lines all one ./one >expect && + test_cmp expect actual +' + test_expect_success 'setup - initial commit' ' >t && git add t && From dc8a00fafef0608c27cdf47cd8a8de0d31dc2197 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 7 Dec 2025 10:03:56 +0900 Subject: [PATCH 03/16] completion: clarify support for short options and arguments The list of supported completions in the header of the file was mostly written a long time ago when Shawn added the initial version of this script in 2006. The list explicitly states that we complete "common --long-options", which implies that we do not complete not-so-common ones and single letter options (this text dates back to May 2007). Update the description to explicitly state that single-letter options are not completed. Also, document that arguments to options are completed, even for single-letter options (e.g., "git -c " offers configuration variables). The reason why we do not complete single-letter options is because it does not seem to help all that much to learn that the command takes -c, -d, -e options when "git foo -" offers these three, unlike long options that is easier to guess what they are about. Because this rationale is primarily for our developers, let's leave it out of the completion script itself, whose messages are entirely for end-users. Our developers can run "git blame" to find this commit as needed. Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 73abea31b428f3..538dff1ee5cebe 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -13,7 +13,8 @@ # *) git email aliases for git-send-email # *) tree paths within 'ref:path/to/file' expressions # *) file paths within current working directory and index -# *) common --long-options +# *) common --long-options but not single-letter options +# *) arguments to long and single-letter options # # To use these routines: # From 8ff2eef8ada18c2d7ef61b1e8e13d53937524908 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Fri, 21 Nov 2025 12:13:46 +0100 Subject: [PATCH 04/16] fetch: fix non-conflicting tags not being committed The commit 0e358de64a (fetch: use batched reference updates, 2025-05-19) updated the 'git-fetch(1)' command to use batched updates. This batches updates to gain performance improvements. When fetching references, each update is added to the transaction. Finally, when committing, individual updates are allowed to fail with reason, while the transaction itself succeeds. One scenario which was missed here, was fetching tags. When fetching conflicting tags, the `fetch_and_consume_refs()` function returns '1', which skipped committing the transaction and directly jumped to the cleanup section. This mean that no updates were applied. This also extends to backfilling tags which is done when fetching specific refspecs which contains tags in their history. Fix this by committing the transaction when we have an error code and not using an atomic transaction. This ensures other references are applied even when some updates fail. The cleanup section is reached with `retcode` set in several scenarios: - `truncate_fetch_head()`, `open_fetch_head()` and `prune_refs()` set `retcode` before the transaction is created, so no commit is attempted. - `fetch_and_consume_refs()` and `backfill_tags()` are the primary cases this fix targets, both setting a positive `retcode` to trigger the committing of the transaction. This simplifies error handling and ensures future modifications to `do_fetch()` don't need special handling for batched updates. Add tests to check for this regression. While here, add a missing cleanup from previous test. Reported-by: David Bohman Helped-by: Patrick Steinhardt Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/fetch.c | 8 +++++++ t/t5510-fetch.sh | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/builtin/fetch.c b/builtin/fetch.c index f90179040ba34c..b19fa8e966df05 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1957,6 +1957,14 @@ static int do_fetch(struct transport *transport, } cleanup: + /* + * When using batched updates, we want to commit the non-rejected + * updates and also handle the rejections. + */ + if (retcode && !atomic_fetch && transaction) + commit_ref_transaction(&transaction, false, + transport->remote->name, &err); + if (retcode) { if (err.len) { error("%s", err.buf); diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index b7059cccaacce0..f500cb83cad997 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1552,6 +1552,7 @@ test_expect_success CASE_INSENSITIVE_FS,REFFILES 'D/F conflict on case insensiti ' test_expect_success REFFILES 'D/F conflict on case sensitive filesystem with lock' ' + test_when_finished rm -rf base repo && ( git init --ref-format=reftable base && cd base && @@ -1577,6 +1578,67 @@ test_expect_success REFFILES 'D/F conflict on case sensitive filesystem with loc ) ' +test_expect_success 'fetch --tags fetches existing tags' ' + test_when_finished rm -rf base repo && + + git init base && + git -C base commit --allow-empty -m "empty-commit" && + + git clone --bare base repo && + + git -C base tag tag-1 && + git -C repo for-each-ref >out && + test_grep ! "tag-1" out && + git -C repo fetch --tags && + git -C repo for-each-ref >out && + test_grep "tag-1" out +' + +test_expect_success 'fetch --tags fetches non-conflicting tags' ' + test_when_finished rm -rf base repo && + + git init base && + git -C base commit --allow-empty -m "empty-commit" && + git -C base tag tag-1 && + + git clone --bare base repo && + + git -C base tag tag-2 && + git -C repo for-each-ref >out && + test_grep ! "tag-2" out && + + git -C base commit --allow-empty -m "second empty-commit" && + git -C base tag -f tag-1 && + + test_must_fail git -C repo fetch --tags 2>out && + test_grep "tag-1 (would clobber existing tag)" out && + git -C repo for-each-ref >out && + test_grep "tag-2" out +' + +test_expect_success "backfill tags when providing a refspec" ' + test_when_finished rm -rf source target && + + git init source && + git -C source commit --allow-empty --message common && + git clone file://"$(pwd)"/source target && + ( + cd source && + test_commit history && + test_commit fetch-me + ) && + + # The "history" tag is backfilled even though we requested + # to only fetch HEAD + git -C target fetch origin HEAD:branch && + git -C target tag -l >actual && + cat >expect <<-\EOF && + fetch-me + history + EOF + test_cmp expect actual +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From b7b17ec8a6b1cb176206ad69c194b84eb3490b99 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Fri, 21 Nov 2025 12:13:47 +0100 Subject: [PATCH 05/16] fetch: fix failed batched updates skipping operations Fix a regression introduced with batched updates in 0e358de64a (fetch: use batched reference updates, 2025-05-19) when fetching references. In the `do_fetch()` function, we jump to cleanup if committing the transaction fails, regardless of whether using batched or atomic updates. This skips three subsequent operations: - Update 'FETCH_HEAD' as part of `commit_fetch_head()`. - Add upstream tracking information via `set_upstream()`. - Setting remote 'HEAD' values when `do_set_head` is true. For atomic updates, this is expected behavior. For batched updates, we want to continue with these operations even if some refs fail to update. Skipping `commit_fetch_head()` isn't actually a regression because 'FETCH_HEAD' is already updated via `append_fetch_head()` when not using '--atomic'. However, we add a test to validate this behavior. Skipping the other two operations (upstream tracking and remote HEAD) is a regression. Fix this by only jumping to cleanup when using '--atomic', allowing batched updates to continue with post-fetch operations. Add tests to prevent future regressions. Helped-by: Junio C Hamano Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- builtin/fetch.c | 6 +++- t/t5510-fetch.sh | 88 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index b19fa8e966df05..74bf67349d30a0 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1890,7 +1890,11 @@ static int do_fetch(struct transport *transport, retcode = commit_ref_transaction(&transaction, atomic_fetch, transport->remote->name, &err); - if (retcode) + /* + * With '--atomic', bail out if the transaction fails. Without '--atomic', + * continue to fetch head and perform other post-fetch operations. + */ + if (retcode && atomic_fetch) goto cleanup; commit_fetch_head(&fetch_head); diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index f500cb83cad997..ce1c23684ece38 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1639,6 +1639,94 @@ test_expect_success "backfill tags when providing a refspec" ' test_cmp expect actual ' +test_expect_success REFFILES "FETCH_HEAD is updated even if ref updates fail" ' + test_when_finished rm -rf base repo && + + git init base && + ( + cd base && + test_commit "updated" && + + git update-ref refs/heads/foo @ && + git update-ref refs/heads/branch @ + ) && + + git init --bare repo && + ( + cd repo && + rm -f FETCH_HEAD && + git remote add origin ../base && + >refs/heads/foo.lock && + test_must_fail git fetch -f origin "refs/heads/*:refs/heads/*" 2>err && + test_grep "error: fetching ref refs/heads/foo failed: reference already exists" err && + test_grep "branch ${SQ}branch${SQ} of ../base" FETCH_HEAD && + test_grep "branch ${SQ}foo${SQ} of ../base" FETCH_HEAD + ) +' + +test_expect_success "upstream tracking info is added with --set-upstream" ' + test_when_finished rm -rf base repo && + + git init --initial-branch=main base && + test_commit -C base "updated" && + + git init --bare --initial-branch=main repo && + ( + cd repo && + git remote add origin ../base && + git fetch origin --set-upstream main && + git config get branch.main.remote >actual && + echo "origin" >expect && + test_cmp expect actual + ) +' + +test_expect_success REFFILES "upstream tracking info is added even with conflicts" ' + test_when_finished rm -rf base repo && + + git init --initial-branch=main base && + test_commit -C base "updated" && + + git init --bare --initial-branch=main repo && + ( + cd repo && + git remote add origin ../base && + test_must_fail git config get branch.main.remote && + + mkdir -p refs/remotes/origin && + >refs/remotes/origin/main.lock && + test_must_fail git fetch origin --set-upstream main && + git config get branch.main.remote >actual && + echo "origin" >expect && + test_cmp expect actual + ) +' + +test_expect_success REFFILES "HEAD is updated even with conflicts" ' + test_when_finished rm -rf base repo && + + git init base && + ( + cd base && + test_commit "updated" && + + git update-ref refs/heads/foo @ && + git update-ref refs/heads/branch @ + ) && + + git init --bare repo && + ( + cd repo && + git remote add origin ../base && + + test_path_is_missing refs/remotes/origin/HEAD && + mkdir -p refs/remotes/origin && + >refs/remotes/origin/branch.lock && + test_must_fail git fetch origin && + test -f refs/remotes/origin/HEAD + ) +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From a67b902c94a2f33275a3947a8bcdab03f64ae75e Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 10 Dec 2025 14:13:01 +0100 Subject: [PATCH 06/16] git-compat-util: introduce MEMZERO_ARRAY() macro Introduce a new macro MEMZERO_ARRAY() that zeroes the memory allocated by ALLOC_ARRAY() and friends. And add coccinelle rule to enforce the use of this macro. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- builtin/last-modified.c | 2 +- compat/simple-ipc/ipc-win32.c | 2 +- contrib/coccinelle/array.cocci | 20 ++++++++++++++++++++ diff-delta.c | 2 +- ewah/bitmap.c | 7 +++---- git-compat-util.h | 1 + hashmap.c | 2 +- pack-revindex.c | 2 +- 8 files changed, 29 insertions(+), 9 deletions(-) diff --git a/builtin/last-modified.c b/builtin/last-modified.c index cc5fd2e7950be7..ac5387e861fd88 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -327,7 +327,7 @@ static void process_parent(struct last_modified *lm, if (!(parent->object.flags & PARENT1)) active_paths_free(lm, parent); - memset(lm->scratch->words, 0x0, lm->scratch->word_alloc * sizeof(eword_t)); + MEMZERO_ARRAY(lm->scratch->words, lm->scratch->word_alloc); diff_queue_clear(&diff_queued_diff); } diff --git a/compat/simple-ipc/ipc-win32.c b/compat/simple-ipc/ipc-win32.c index a8fc812adfcbd3..4a3e7df9c739e1 100644 --- a/compat/simple-ipc/ipc-win32.c +++ b/compat/simple-ipc/ipc-win32.c @@ -686,7 +686,7 @@ static LPSECURITY_ATTRIBUTES get_sa(struct my_sa_data *d) goto fail; } - memset(ea, 0, NR_EA * sizeof(EXPLICIT_ACCESS)); + MEMZERO_ARRAY(ea, NR_EA); ea[0].grfAccessPermissions = GENERIC_READ | GENERIC_WRITE; ea[0].grfAccessMode = SET_ACCESS; diff --git a/contrib/coccinelle/array.cocci b/contrib/coccinelle/array.cocci index 27a3b479c94e5c..d306f6a21efc9e 100644 --- a/contrib/coccinelle/array.cocci +++ b/contrib/coccinelle/array.cocci @@ -101,3 +101,23 @@ expression dst, src, n; -ALLOC_ARRAY(dst, n); -COPY_ARRAY(dst, src, n); +DUP_ARRAY(dst, src, n); + +@@ +type T; +T *ptr; +expression n; +@@ +- memset(ptr, \( 0x0 \| 0 \), n * \( sizeof(T) +- \| sizeof(*ptr) +- \) ) ++ MEMZERO_ARRAY(ptr, n) + +@@ +type T; +T[] ptr; +expression n; +@@ +- memset(ptr, \( 0x0 \| 0 \), n * \( sizeof(T) +- \| sizeof(*ptr) +- \) ) ++ MEMZERO_ARRAY(ptr, n) diff --git a/diff-delta.c b/diff-delta.c index 71d37368d68a18..43c339f01061ca 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -171,7 +171,7 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) mem = hash + hsize; entry = mem; - memset(hash, 0, hsize * sizeof(*hash)); + MEMZERO_ARRAY(hash, hsize); /* allocate an array to count hash entries */ hash_count = calloc(hsize, sizeof(*hash_count)); diff --git a/ewah/bitmap.c b/ewah/bitmap.c index 55928dada86a37..bf878bf8768ea0 100644 --- a/ewah/bitmap.c +++ b/ewah/bitmap.c @@ -46,8 +46,7 @@ static void bitmap_grow(struct bitmap *self, size_t word_alloc) { size_t old_size = self->word_alloc; ALLOC_GROW(self->words, word_alloc, self->word_alloc); - memset(self->words + old_size, 0x0, - (self->word_alloc - old_size) * sizeof(eword_t)); + MEMZERO_ARRAY(self->words + old_size, (self->word_alloc - old_size)); } void bitmap_set(struct bitmap *self, size_t pos) @@ -192,8 +191,8 @@ void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other) if (self->word_alloc < other_final) { self->word_alloc = other_final; REALLOC_ARRAY(self->words, self->word_alloc); - memset(self->words + original_size, 0x0, - (self->word_alloc - original_size) * sizeof(eword_t)); + MEMZERO_ARRAY(self->words + original_size, + (self->word_alloc - original_size)); } ewah_iterator_init(&it, other); diff --git a/git-compat-util.h b/git-compat-util.h index 398e0fac4fab60..2b8192fd2e22fb 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -726,6 +726,7 @@ static inline uint64_t u64_add(uint64_t a, uint64_t b) #define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc))) #define CALLOC_ARRAY(x, alloc) (x) = xcalloc((alloc), sizeof(*(x))) #define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), st_mult(sizeof(*(x)), (alloc))) +#define MEMZERO_ARRAY(x, alloc) memset((x), 0x0, st_mult(sizeof(*(x)), (alloc))) #define COPY_ARRAY(dst, src, n) copy_array((dst), (src), (n), sizeof(*(dst)) + \ BARF_UNLESS_COPYABLE((dst), (src))) diff --git a/hashmap.c b/hashmap.c index a711377853f185..3b5d6f14bc93fb 100644 --- a/hashmap.c +++ b/hashmap.c @@ -194,7 +194,7 @@ void hashmap_partial_clear_(struct hashmap *map, ssize_t entry_offset) return; if (entry_offset >= 0) /* called by hashmap_clear_entries */ free_individual_entries(map, entry_offset); - memset(map->table, 0, map->tablesize * sizeof(struct hashmap_entry *)); + MEMZERO_ARRAY(map->table, map->tablesize); map->shrink_at = 0; map->private_size = 0; } diff --git a/pack-revindex.c b/pack-revindex.c index d0791cc4938fa2..8598b941c8c419 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -75,7 +75,7 @@ static void sort_revindex(struct revindex_entry *entries, unsigned n, off_t max) for (bits = 0; max >> bits; bits += DIGIT_SIZE) { unsigned i; - memset(pos, 0, BUCKETS * sizeof(*pos)); + MEMZERO_ARRAY(pos, BUCKETS); /* * We want pos[i] to store the index of the last element that From 467860bc0b0447093ae97bcecf1655131732338f Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 10 Dec 2025 14:13:02 +0100 Subject: [PATCH 07/16] contrib/coccinelle: pass include paths to spatch(1) In the previous commit a new coccinelle rule is added. But neiter `make coccicheck` nor `meson compile coccicheck` did detect a case in builtin/last-modified.c. This case involves the field `scratch` in `struct last_modified`. This field is of type `struct bitmap` and that struct has a member `eword_t *words`. Both are defined in `ewah/ewok.h`. Now, while builtin/last-modified.c does include that header (with the subdir in the #include directive), it seems coccinelle does not process it. So it's unaware of the type of `words` in the bitmap, and it doesn't recognize the rule from previous commit that uses: type T; T *ptr; Fix coccicheck by passing all possible include paths inside the Git project so spatch(1) can find the headers and can determine the types. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- Makefile | 2 +- contrib/coccinelle/meson.build | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7e0f77e2988e3b..7ca21188138ace 100644 --- a/Makefile +++ b/Makefile @@ -981,7 +981,7 @@ SANITIZE_LEAK = SANITIZE_ADDRESS = # For the 'coccicheck' target -SPATCH_INCLUDE_FLAGS = --all-includes +SPATCH_INCLUDE_FLAGS = --all-includes $(addprefix -I ,compat ewah refs sha256 trace2 win32 xdiff) SPATCH_FLAGS = SPATCH_TEST_FLAGS = diff --git a/contrib/coccinelle/meson.build b/contrib/coccinelle/meson.build index dc3f73c2e7b117..ae7f5b54602fd5 100644 --- a/contrib/coccinelle/meson.build +++ b/contrib/coccinelle/meson.build @@ -50,6 +50,11 @@ foreach header : headers_to_check coccinelle_headers += meson.project_source_root() / header endforeach +coccinelle_includes = [] +foreach path : ['compat', 'ewah', 'refs', 'sha256', 'trace2', 'win32', 'xdiff'] + coccinelle_includes += ['-I', meson.project_source_root() / path] +endforeach + patches = [ ] foreach source : coccinelle_sources patches += custom_target( @@ -58,6 +63,7 @@ foreach source : coccinelle_sources '--all-includes', '--sp-file', concatenated_rules, '--patch', meson.project_source_root(), + coccinelle_includes, '@INPUT@', ], input: meson.project_source_root() / source, From 48695fcde51e10d6d6e72653fb94b5fd339cd6e6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Dec 2025 15:15:24 +0000 Subject: [PATCH 08/16] scalar: annotate config file with "set by scalar" A repo may have config options set by 'scalar clone' or 'scalar register' and then updated by 'scalar reconfigure'. It can be helpful to point out which of those options were set by the latest scalar recommendations. Add "# set by scalar" to the end of each config option to assist users in identifying why these config options were set in their repo. Use a new helper method to simplify the two callsites. Co-authored-by: Patrick Steinhardt Signed-off-by: Patrick Steinhardt Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- scalar.c | 24 +++++++++++++++++------- t/t9210-scalar.sh | 3 +++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/scalar.c b/scalar.c index f7543116272b77..1c7bd1a8f8b67b 100644 --- a/scalar.c +++ b/scalar.c @@ -19,6 +19,7 @@ #include "help.h" #include "setup.h" #include "trace2.h" +#include "path.h" static void setup_enlistment_directory(int argc, const char **argv, const char * const *usagestr, @@ -95,7 +96,17 @@ struct scalar_config { int overwrite_on_reconfigure; }; -static int set_scalar_config(const struct scalar_config *config, int reconfigure) +static int set_scalar_config(const char *key, const char *value) +{ + char *file = repo_git_path(the_repository, "config"); + int res = repo_config_set_multivar_in_file_gently(the_repository, file, + key, value, NULL, + " # set by scalar", 0); + free(file); + return res; +} + +static int set_config_if_missing(const struct scalar_config *config, int reconfigure) { char *value = NULL; int res; @@ -103,7 +114,7 @@ static int set_scalar_config(const struct scalar_config *config, int reconfigure if ((reconfigure && config->overwrite_on_reconfigure) || repo_config_get_string(the_repository, config->key, &value)) { trace2_data_string("scalar", the_repository, config->key, "created"); - res = repo_config_set_gently(the_repository, config->key, config->value); + res = set_scalar_config(config->key, config->value); } else { trace2_data_string("scalar", the_repository, config->key, "exists"); res = 0; @@ -178,14 +189,14 @@ static int set_recommended_config(int reconfigure) char *value; for (i = 0; config[i].key; i++) { - if (set_scalar_config(config + i, reconfigure)) + if (set_config_if_missing(config + i, reconfigure)) return error(_("could not configure %s=%s"), config[i].key, config[i].value); } if (have_fsmonitor_support()) { struct scalar_config fsmonitor = { "core.fsmonitor", "true" }; - if (set_scalar_config(&fsmonitor, reconfigure)) + if (set_config_if_missing(&fsmonitor, reconfigure)) return error(_("could not configure %s=%s"), fsmonitor.key, fsmonitor.value); } @@ -197,9 +208,8 @@ static int set_recommended_config(int reconfigure) if (repo_config_get_string(the_repository, "log.excludeDecoration", &value)) { trace2_data_string("scalar", the_repository, "log.excludeDecoration", "created"); - if (repo_config_set_multivar_gently(the_repository, "log.excludeDecoration", - "refs/prefetch/*", - CONFIG_REGEX_NONE, 0)) + if (set_scalar_config("log.excludeDecoration", + "refs/prefetch/*")) return error(_("could not configure " "log.excludeDecoration")); } else { diff --git a/t/t9210-scalar.sh b/t/t9210-scalar.sh index bd6f0c40d229b6..43c210a23d4bef 100755 --- a/t/t9210-scalar.sh +++ b/t/t9210-scalar.sh @@ -210,6 +210,9 @@ test_expect_success 'scalar reconfigure' ' GIT_TRACE2_EVENT="$(pwd)/reconfigure" scalar reconfigure -a && test_path_is_file one/src/cron.txt && test true = "$(git -C one/src config core.preloadIndex)" && + test_grep "preloadIndex = true # set by scalar" one/src/.git/config && + test_grep "excludeDecoration = refs/prefetch/\* # set by scalar" one/src/.git/config && + test_subcommand git maintenance start Date: Fri, 12 Dec 2025 15:15:25 +0000 Subject: [PATCH 09/16] scalar: use index.skipHash=true for performance The index.skipHash config option has been set to 'false' by Scalar since 4933152cbb (scalar: enable path-walk during push via config, 2025-05-16) but that commit message is trying to communicate the exact opposite: that the 'true' value is what we want instead. This means that we've been disabling this performance benefit for Scalar repos unintentionally. Fix this issue before we add justification for the config options set in this list. Oddly, enabling index.skipHash causes a test issue during 'test_commit' in one of the Scalar tests when GIT_TEST_SPLIT_INDEX is enabled (as caught by the linux-test-vars build). I'm fixing the test by disabling the environment variable, but the issue should be resolved in a series focused on the split index. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- scalar.c | 2 +- t/t9210-scalar.sh | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/scalar.c b/scalar.c index 1c7bd1a8f8b67b..55b8542770b780 100644 --- a/scalar.c +++ b/scalar.c @@ -160,7 +160,7 @@ static int set_recommended_config(int reconfigure) { "credential.validate", "false", 1 }, /* GCM4W-only */ { "gc.auto", "0", 1 }, { "gui.GCWarning", "false", 1 }, - { "index.skipHash", "false", 1 }, + { "index.skipHash", "true", 1 }, { "index.threads", "true", 1 }, { "index.version", "4", 1 }, { "merge.stat", "false", 1 }, diff --git a/t/t9210-scalar.sh b/t/t9210-scalar.sh index 43c210a23d4bef..923c243c133387 100755 --- a/t/t9210-scalar.sh +++ b/t/t9210-scalar.sh @@ -246,6 +246,10 @@ test_expect_success 'scalar reconfigure --all with includeIf.onbranch' ' ' test_expect_success 'scalar reconfigure --all with detached HEADs' ' + # This test demonstrates an issue with index.skipHash=true and + # this test variable for the split index. Disable the test variable. + sane_unset GIT_TEST_SPLIT_INDEX && + repos="two three four" && for num in $repos do From be667e40cbe2975aaf44748f5ee237e0d79359af Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Dec 2025 15:15:26 +0000 Subject: [PATCH 10/16] scalar: remove stale config values These config values were added in the original Scalar contribution, d0feac4e8c (scalar: 'register' sets recommended config and starts maintenance, 2021-12-03), but were never fully checked for validity in the upstream Git project. At the time, Scalar was only intended for the contrib/ directory so did not have as rigorous of an investigation. Each config option has its own justification for removal: * core.preloadIndex: This value is true by default, now. Removing this causes some changes required to the tests that checked this config value. Use gui.gcwarning=false instead. * core.fscache: This config does not exist in the core Git project, but is instead a config option for a Git for Windows feature. * core.multiPackIndex: This config value is now enabled by default, so does not need to be called out specifically. It was originally included to make sure the background maintenance that created multi-pack-indexes would result in the expected performance improvements. * credential.validate: This option is not something specific to Git but instead an older version of Git Credential Manager for Windows. That software was replaced several years ago by the cross-platform Git Credential Manger so this option is no longer needed to help users who were on that older software. * pack.useSparse=true: This value is now Git's default as of de3a864114 (config: set pack.useSparse=true by default, 2020-03-20) so we don't need it set by Scalar. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- scalar.c | 5 ----- t/t9210-scalar.sh | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/scalar.c b/scalar.c index 55b8542770b780..aeebea41fa8fc2 100644 --- a/scalar.c +++ b/scalar.c @@ -135,9 +135,6 @@ static int set_recommended_config(int reconfigure) struct scalar_config config[] = { /* Required */ { "am.keepCR", "true", 1 }, - { "core.FSCache", "true", 1 }, - { "core.multiPackIndex", "true", 1 }, - { "core.preloadIndex", "true", 1 }, #ifndef WIN32 { "core.untrackedCache", "true", 1 }, #else @@ -157,7 +154,6 @@ static int set_recommended_config(int reconfigure) #endif { "core.logAllRefUpdates", "true", 1 }, { "credential.https://dev.azure.com.useHttpPath", "true", 1 }, - { "credential.validate", "false", 1 }, /* GCM4W-only */ { "gc.auto", "0", 1 }, { "gui.GCWarning", "false", 1 }, { "index.skipHash", "true", 1 }, @@ -166,7 +162,6 @@ static int set_recommended_config(int reconfigure) { "merge.stat", "false", 1 }, { "merge.renames", "true", 1 }, { "pack.useBitmaps", "false", 1 }, - { "pack.useSparse", "true", 1 }, { "receive.autoGC", "false", 1 }, { "feature.manyFiles", "false", 1 }, { "feature.experimental", "false", 1 }, diff --git a/t/t9210-scalar.sh b/t/t9210-scalar.sh index 923c243c133387..009437a5f3168f 100755 --- a/t/t9210-scalar.sh +++ b/t/t9210-scalar.sh @@ -202,15 +202,15 @@ test_expect_success 'scalar clone --no-... opts' ' test_expect_success 'scalar reconfigure' ' git init one/src && scalar register one && - git -C one/src config core.preloadIndex false && + git -C one/src config unset gui.gcwarning && scalar reconfigure one && - test true = "$(git -C one/src config core.preloadIndex)" && - git -C one/src config core.preloadIndex false && + test false = "$(git -C one/src config gui.gcwarning)" && + git -C one/src config unset gui.gcwarning && rm one/src/cron.txt && GIT_TRACE2_EVENT="$(pwd)/reconfigure" scalar reconfigure -a && test_path_is_file one/src/cron.txt && - test true = "$(git -C one/src config core.preloadIndex)" && - test_grep "preloadIndex = true # set by scalar" one/src/.git/config && + test false = "$(git -C one/src config gui.gcwarning)" && + test_grep "GCWarning = false # set by scalar" one/src/.git/config && test_grep "excludeDecoration = refs/prefetch/\* # set by scalar" one/src/.git/config && test_subcommand git maintenance start ` errors out when dir is missing' ' From e1588c270d584fff0ddf1da684515cd218a0718b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Dec 2025 15:15:27 +0000 Subject: [PATCH 11/16] scalar: alphabetize and simplify config The config values set by Scalar went through an audit in the previous changes, so now reorganize the settings and simplify their purpose. First, alphabetize the config options, except put the platform-specific options at the end. This groups two Windows-specific settings and only one non-Windows setting. Also, this removes the 'overwrite_on_reconfigure' setting for many of these options. That setting made nearly all of these options "required" for scalar enlistments, restricting use for users. Instead, now nearly all options have removed this setting. However, there is one setting that still has this, which is index.skipHash, which was previously being set to _false_ when we actually prefer the value of true. Keep the overwrite here to help Scalar users upgrade to the new version. We may remove that overwrite in the future once we belive that most of the users who have the false value have upgraded to a version that overwrites that to 'true'. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- scalar.c | 60 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/scalar.c b/scalar.c index aeebea41fa8fc2..3b25fd3f353049 100644 --- a/scalar.c +++ b/scalar.c @@ -133,10 +133,33 @@ static int have_fsmonitor_support(void) static int set_recommended_config(int reconfigure) { struct scalar_config config[] = { - /* Required */ - { "am.keepCR", "true", 1 }, + { "am.keepCR", "true" }, + { "commitGraph.changedPaths", "true" }, + { "commitGraph.generationVersion", "1" }, + { "core.autoCRLF", "false" }, + { "core.logAllRefUpdates", "true" }, + { "core.safeCRLF", "false" }, + { "credential.https://dev.azure.com.useHttpPath", "true" }, + { "feature.experimental", "false" }, + { "feature.manyFiles", "false" }, + { "fetch.showForcedUpdates", "false" }, + { "fetch.unpackLimit", "1" }, + { "fetch.writeCommitGraph", "false" }, + { "gc.auto", "0" }, + { "gui.GCWarning", "false" }, + { "index.skipHash", "true", 1 /* Fix previous setting. */ }, + { "index.threads", "true"}, + { "index.version", "4" }, + { "merge.renames", "true" }, + { "merge.stat", "false" }, + { "pack.useBitmaps", "false" }, + { "pack.usePathWalk", "true" }, + { "receive.autoGC", "false" }, + { "status.aheadBehind", "false" }, + + /* platform-specific */ #ifndef WIN32 - { "core.untrackedCache", "true", 1 }, + { "core.untrackedCache", "true" }, #else /* * Unfortunately, Scalar's Functional Tests demonstrated @@ -150,34 +173,11 @@ static int set_recommended_config(int reconfigure) * Therefore, with a sad heart, we disable this very useful * feature on Windows. */ - { "core.untrackedCache", "false", 1 }, -#endif - { "core.logAllRefUpdates", "true", 1 }, - { "credential.https://dev.azure.com.useHttpPath", "true", 1 }, - { "gc.auto", "0", 1 }, - { "gui.GCWarning", "false", 1 }, - { "index.skipHash", "true", 1 }, - { "index.threads", "true", 1 }, - { "index.version", "4", 1 }, - { "merge.stat", "false", 1 }, - { "merge.renames", "true", 1 }, - { "pack.useBitmaps", "false", 1 }, - { "receive.autoGC", "false", 1 }, - { "feature.manyFiles", "false", 1 }, - { "feature.experimental", "false", 1 }, - { "fetch.unpackLimit", "1", 1 }, - { "fetch.writeCommitGraph", "false", 1 }, -#ifdef WIN32 - { "http.sslBackend", "schannel", 1 }, + { "core.untrackedCache", "false" }, + + /* Other Windows-specific required settings: */ + { "http.sslBackend", "schannel" }, #endif - /* Optional */ - { "status.aheadBehind", "false" }, - { "commitGraph.changedPaths", "true" }, - { "commitGraph.generationVersion", "1" }, - { "core.autoCRLF", "false" }, - { "core.safeCRLF", "false" }, - { "fetch.showForcedUpdates", "false" }, - { "pack.usePathWalk", "true" }, { NULL, NULL }, }; int i; From d2e4099968ca1cd6b31b0516cdbafa0520674a8e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 13 Dec 2025 10:46:27 +0900 Subject: [PATCH 12/16] coccicheck: emit the contents of cocci patch Telling the user "you got some error messages" without showing what the errors are is almost useless in CI environment, as the errors cannot be examined without downloading build artifacts. Arrange it to spew out the output when it fails. Signed-off-by: Junio C Hamano --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7ca21188138ace..0117d0008c71b4 100644 --- a/Makefile +++ b/Makefile @@ -3521,7 +3521,7 @@ else COCCICHECK_PATCH_MUST_BE_EMPTY_FILES = $(COCCICHECK_PATCHES_INTREE) endif coccicheck: $(COCCICHECK_PATCH_MUST_BE_EMPTY_FILES) - ! grep -q ^ $(COCCICHECK_PATCH_MUST_BE_EMPTY_FILES) /dev/null + ! grep ^ $(COCCICHECK_PATCH_MUST_BE_EMPTY_FILES) /dev/null # See contrib/coccinelle/README coccicheck-pending: coccicheck-test From 8ea9492cf3505c379d1c573b02db90e6b480cc75 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 13 Dec 2025 10:46:28 +0900 Subject: [PATCH 13/16] cocci: use MEMZERO_ARRAY() a bit more Existing code in files that have been fairly stable trigger the "make coccicheck" suggestions due to the new check. Rewrite them to use MEMZERO_ARRAY() Signed-off-by: Junio C Hamano --- diffcore-delta.c | 4 ++-- linear-assignment.c | 4 ++-- shallow.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/diffcore-delta.c b/diffcore-delta.c index ba6cbee76ba018..2de9e9ccff321a 100644 --- a/diffcore-delta.c +++ b/diffcore-delta.c @@ -56,7 +56,7 @@ static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig) st_mult(sizeof(struct spanhash), sz))); new_spanhash->alloc_log2 = orig->alloc_log2 + 1; new_spanhash->free = INITIAL_FREE(new_spanhash->alloc_log2); - memset(new_spanhash->data, 0, sizeof(struct spanhash) * sz); + MEMZERO_ARRAY(new_spanhash->data, sz); for (i = 0; i < osz; i++) { struct spanhash *o = &(orig->data[i]); int bucket; @@ -135,7 +135,7 @@ static struct spanhash_top *hash_chars(struct repository *r, st_mult(sizeof(struct spanhash), (size_t)1 << i))); hash->alloc_log2 = i; hash->free = INITIAL_FREE(i); - memset(hash->data, 0, sizeof(struct spanhash) * ((size_t)1 << i)); + MEMZERO_ARRAY(hash->data, ((size_t)1 << i)); n = 0; accum1 = accum2 = 0; diff --git a/linear-assignment.c b/linear-assignment.c index 5416cbcf409d26..97b4f750586a78 100644 --- a/linear-assignment.c +++ b/linear-assignment.c @@ -20,8 +20,8 @@ void compute_assignment(int column_count, int row_count, int *cost, int i, j, phase; if (column_count < 2) { - memset(column2row, 0, sizeof(int) * column_count); - memset(row2column, 0, sizeof(int) * row_count); + MEMZERO_ARRAY(column2row, column_count); + MEMZERO_ARRAY(row2column, row_count); return; } diff --git a/shallow.c b/shallow.c index d9cd4e219cb07d..c20471cd7e450b 100644 --- a/shallow.c +++ b/shallow.c @@ -713,7 +713,7 @@ void assign_shallow_commits_to_refs(struct shallow_info *info, if (used) { int bitmap_size = DIV_ROUND_UP(pi.nr_bits, 32) * sizeof(uint32_t); - memset(used, 0, sizeof(*used) * info->shallow->nr); + MEMZERO_ARRAY(used, info->shallow->nr); for (i = 0; i < nr_shallow; i++) { const struct commit *c = lookup_commit(the_repository, &oid[shallow[i]]); @@ -782,7 +782,7 @@ static void post_assign_shallow(struct shallow_info *info, trace_printf_key(&trace_shallow, "shallow: post_assign_shallow\n"); if (ref_status) - memset(ref_status, 0, sizeof(*ref_status) * info->ref->nr); + MEMZERO_ARRAY(ref_status, info->ref->nr); /* Remove unreachable shallow commits from "theirs" */ for (i = dst = 0; i < info->nr_theirs; i++) { From 4ce170c522cd91e73e7d500667a4718af125bcf3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 12 Dec 2025 15:15:28 +0000 Subject: [PATCH 14/16] scalar: document config settings Add user-facing documentation that justifies the values being set by 'scalar clone', 'scalar register', and 'scalar reconfigure'. Helped-by: Junio C Hamano Helped-by: Patrick Steinhardt Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/scalar.adoc | 164 ++++++++++++++++++++++++++++++++++++++ scalar.c | 4 + 2 files changed, 168 insertions(+) diff --git a/Documentation/scalar.adoc b/Documentation/scalar.adoc index f81b2832f8dfeb..5252fb134a47ab 100644 --- a/Documentation/scalar.adoc +++ b/Documentation/scalar.adoc @@ -197,6 +197,170 @@ delete :: This subcommand lets you delete an existing Scalar enlistment from your local file system, unregistering the repository. +RECOMMENDED CONFIG VALUES +------------------------- + +As part of both `scalar clone` and `scalar register`, certain Git config +values are set to optimize for large repositories or cross-platform support. +These options are updated in new Git versions according to the best known +advice for large repositories, and users can get the latest recommendations +by running `scalar reconfigure [--all]`. + +This section lists justifications for the config values that are set in the +latest version. + +am.keepCR=true:: + This setting is important for cross-platform development across Windows + and non-Windows platforms and keeping carriage return (`\r`) characters + in certain workflows. + +commitGraph.changedPaths=true:: + This setting helps the background maintenance steps that compute the + serialized commit-graph to also store changed-path Bloom filters. This + accelerates file history commands and allows users to automatically + benefit without running a foreground command. + +commitGraph.generationVersion=1:: + While the preferred version is 2 for performance reasons, existing users + that had version 1 by default will need special care in upgrading to + version 2. This is likely to change in the future as the upgrade story + solidifies. + +core.autoCRLF=false:: + This removes the transformation of worktree files to add CRLF line + endings when only LF line endings exist. This is removed for performance + reasons. Repositories that use tools that care about CRLF line endings + should commit the necessary files with those line endings instead. + +core.logAllRefUpdates=true:: + This enables the reflog on all branches. While this is a performance + cost for large repositories, it is frequently an important data source + for users to get out of bad situations or to seek support from experts. + +core.safeCRLF=false:: + Similar to `core.autoCRLF=false`, this disables checks around whether + the CRLF conversion is reversible. This is a performance improvement, + but can be dangerous if `core.autoCRLF` is reenabled by the user. + +credential.https://dev.azure.com.useHttpPath=true:: + This setting enables the `credential.useHttpPath` feature only for web + URLs for Azure DevOps. This is important for users interacting with that + service using multiple organizations and thus multiple credential + tokens. + +feature.experimental=false:: + This disables the "experimental" optimizations grouped under this + feature config. The expectation is that all valuable optimizations are + also set explicitly by Scalar config, and any differences are + intentional. Notable differences include several bitmap-related config + options which are disabled for client-focused Scalar repos. + +feature.manyFiles=false:: + This disables the "many files" optimizations grouped under this feature + config. The expectation is that all valuable optimizations are also set + explicitly by Scalar config, and any differences are intentional. + +fetch.showForcedUpdates=false:: + This disables the check at the end of `git fetch` that notifies the user + if the ref update was a forced update (one where the previous position + is not reachable from the latest position). This check can be very + expensive in large repositories, so is disabled and replaced with an + advice message. Set `advice.fetchShowForcedUpdates=false` to disable + this advice message. + +fetch.unpackLimit=1:: + This setting prevents Git from unpacking packfiles into loose objects + as they are downloaded from the server. The default limit of 100 was + intended as a way to prevent performance issues from too many packfiles, + but Scalar uses background maintenance to group packfiles and cover them + with a multi-pack-index, removing this issue. + +fetch.writeCommitGraph=false:: + This config setting was created to help users automatically update their + commit-graph files as they perform fetches. However, this takes time + from foreground fetches and pulls and Scalar uses background maintenance + for this function instead. + +gc.auto=0:: + This disables automatic garbage collection, since Scalar uses background + maintenance to keep the repository data in good shape. + +gui.GCWarning=false:: + Since Scalar disables garbage collection by setting `gc.auto=0`, the + `git-gui` tool may start to warn about this setting. Disable this + warning as Scalar's background maintenance configuration makes the + warning irrelevant. + +index.skipHash=true:: + Disable computing the hash of the index contents as it is being written. + This assists with performance, especially for large index files. + +index.threads=true:: + This tells Git to automatically detect how many threads it should use + when reading the index due to the default value of `core.preloadIndex`, + which enables parallel index reads. This explicit setting also enables + `index.recordOffsetTable=true` to speed up parallel index reads. + +index.version=4:: + This index version adds compression to the path names, reducing the size + of the index in a significant way for large repos. This is an important + performance boost. + +log.excludeDecoration=refs/prefetch/*:: + Since Scalar enables background maintenance with the `incremental` + strategy, this setting avoids polluting `git log` output with refs + stored by the background prefetch operations. + +merge.renames=true:: + When computing merges in large repos, it is particularly important to + detect renames to maximize the potential for a result that will validate + correctly. Users performing merges locally are more likely to be doing + so because a server-side merge (via pull request or similar) resulted in + conflicts. While this is the default setting, it is set specifically to + override a potential change to `diff.renames` which a user may set for + performance reasons. + +merge.stat=false:: + This disables a diff output after computing a merge. This improves + performance of `git merge` for large repos while reducing noisy output. + +pack.useBitmaps=false:: + This disables the use of `.bitmap` files attached to packfiles. Bitmap + files are optimized for server-side use, not client-side use. Scalar + disables this to avoid some performance issues that can occur if a user + accidentally creates `.bitmap` files. + +pack.usePathWalk=true:: + This enables the `--path-walk` option to `git pack-objects` by default. + This can accelerate the computation and compression of packfiles created + by `git push` and other repack operations. + +receive.autoGC=false:: + Similar to `gc.auto`, this setting is disabled in preference of + background maintenance. + +status.aheadBehind=false:: + This disables the ahead/behind calculation that would normally happen + during a `git status` command. This information is frequently ignored by + users but can be expensive to calculate in large repos that receive + thousands of commits per day. The calculation is replaced with an advice + message that can be disabled by disabling the `advice.statusAheadBehind` + config. + +The following settings are different based on which platform is in use: + +core.untrackedCache=(true|false):: + The untracked cache feature is important for performance benefits on + large repositories, but has demonstrated some bugs on Windows + filesystems. Thus, this is set for other platforms but disabled on + Windows. + +http.sslBackend=schannel:: + On Windows, the `openssl` backend has some issues with certain types of + remote providers and certificate types. Override the default setting to + avoid these common problems. + + SEE ALSO -------- linkgit:git-clone[1], linkgit:git-maintenance[1]. diff --git a/scalar.c b/scalar.c index 3b25fd3f353049..21ab1dba8979aa 100644 --- a/scalar.c +++ b/scalar.c @@ -132,6 +132,10 @@ static int have_fsmonitor_support(void) static int set_recommended_config(int reconfigure) { + /* + * Be sure to update Documentation/scalar.adoc if you add, update, + * or remove any of these recommended settings. + */ struct scalar_config config[] = { { "am.keepCR", "true" }, { "commitGraph.changedPaths", "true" }, From f293bdcc29f91e3e56c478473a85a8e13e6fd87c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 14 Dec 2025 16:57:06 +0100 Subject: [PATCH 15/16] diff-files: fix copy detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copy detection cannot work when comparing the index to the working tree because Git ignores files that it is not explicitly told to track. It should work in the other direction, though, i.e. for a reverse diff of the deletion of a copy from the index. d1f2d7e8ca (Make run_diff_index() use unpack_trees(), not read_tree(), 2008-01-19) broke it with a seemingly stray change to run_diff_files(). We didn't notice because there's no test for that. But even if we had one, it might have gone unnoticed because the breakage only happens with index preloading, which requires at least 1000 entries (more than most test repos have) and is racy because it runs in parallel with the actual command. Fix copy detection by queuing up-to-date and skip-worktree entries using diff_same(). While at it, use diff_same() also for queuing unchanged files not flagged as up-to-date, i.e. clean submodules and entries where preloading was not done at all or not quickly enough. It uses less memory than diff_change() and doesn't unnecessarily set the diff flag has_changes. Add two tests to cover running both without and with preloading. The first one passes reliably with the original code. The second one enables preloading and thus is racy. It has a good chance to pass even without the fix, but fails within seconds when running the test script with --stress. With the fix it runs fine for several minutes, until my patience runs out. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- diff-lib.c | 12 +++++++++--- t/t4007-rename-3.sh | 23 ++++++++++++++++++++++- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/diff-lib.c b/diff-lib.c index 8e624f38c6d6f3..5307390ff3db7b 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -226,8 +226,12 @@ void run_diff_files(struct rev_info *revs, unsigned int option) continue; } - if (ce_uptodate(ce) || ce_skip_worktree(ce)) + if (ce_uptodate(ce) || ce_skip_worktree(ce)) { + if (revs->diffopt.flags.find_copies_harder) + diff_same(&revs->diffopt, ce->ce_mode, + &ce->oid, ce->name); continue; + } /* * When CE_VALID is set (via "update-index --assume-unchanged" @@ -272,8 +276,10 @@ void run_diff_files(struct rev_info *revs, unsigned int option) if (!changed && !dirty_submodule) { ce_mark_uptodate(ce); mark_fsmonitor_valid(istate, ce); - if (!revs->diffopt.flags.find_copies_harder) - continue; + if (revs->diffopt.flags.find_copies_harder) + diff_same(&revs->diffopt, newmode, + &ce->oid, ce->name); + continue; } oldmode = ce->ce_mode; old_oid = &ce->oid; diff --git a/t/t4007-rename-3.sh b/t/t4007-rename-3.sh index e8faf0dd2ef1c5..34f7d276d116e0 100755 --- a/t/t4007-rename-3.sh +++ b/t/t4007-rename-3.sh @@ -57,7 +57,28 @@ test_expect_success 'copy, limited to a subtree' ' ' test_expect_success 'tweak work tree' ' - rm -f path0/COPYING && + rm -f path0/COPYING +' + +cat >expected <current && + compare_diff_raw current expected +' + +test_expect_success 'copy detection, files to preloaded index' ' + GIT_TEST_PRELOAD_INDEX=1 \ + git diff-files -C --find-copies-harder -R >current && + compare_diff_raw current expected +' + +test_expect_success 'tweak index' ' git update-index --remove path0/COPYING ' # In the tree, there is only path0/COPYING. In the cache, path0 does From 66ce5f8e8872f0183bb137911c52b07f1f242d13 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 23 Dec 2025 10:37:41 +0900 Subject: [PATCH 16/16] The 12th batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.53.0.adoc | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Documentation/RelNotes/2.53.0.adoc b/Documentation/RelNotes/2.53.0.adoc index e94a79516c2138..88d24f6d4dde4c 100644 --- a/Documentation/RelNotes/2.53.0.adoc +++ b/Documentation/RelNotes/2.53.0.adoc @@ -67,6 +67,17 @@ Performance, Internal Implementation, Development Support etc. and Stop using the insecure "mktemp()" function. (merge 10bba537c4 rs/ban-mktemp later to maint). + * In-code comment update to clarify that single-letter options are + outside of the scope of command line completion script. + (merge dc8a00fafe jc/completion-no-single-letter-options later to maint). + + * MEMZERO_ARRAY() helper is introduced to avoid clearing only the + first N bytes of an N-element array whose elements are larger than + a byte. + + * "git diff-files -R --find-copies-harder" has been taught to use + the potential copy sources from the index correctly. + Fixes since v2.52 ----------------- @@ -177,6 +188,17 @@ Fixes since v2.52 * Emulation code clean-up. (merge 42aa7603aa gf/win32-pthread-cond-init later to maint). + * "git submodule add" to add a submodule under segfaulted, + when a submodule..something is already in .gitmodules file + without defining where its submodule..path is, which has been + corrected. + (merge dd8e8c786e jc/submodule-add later to maint). + + * "git fetch" that involves fetching tags, when a tag being fetched + needs to overwrite existing one, failed to fetch other tags, which + has been corrected. + (merge b7b17ec8a6 kn/fix-fetch-backfill-tag-with-batched-ref-updates later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 46207a54cc qj/doc-http-bad-want-response later to maint). (merge df90eccd93 kh/doc-commit-extra-references later to maint). @@ -191,3 +213,4 @@ Fixes since v2.52 (merge d4b732899e jc/macports-darwinports later to maint). (merge bab391761d kj/pull-options-decl-cleanup later to maint). (merge 007b8994d4 rs/t4014-git-version-string-fix later to maint). + (merge 4ce170c522 ds/doc-scalar-config later to maint).