-
Notifications
You must be signed in to change notification settings - Fork 188
Support hashing objects larger than 4GB on Windows #2138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
84e1cd0
809d83e
253d6f8
ba629a3
f48d570
8a6beeb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -561,9 +561,9 @@ int odb_source_loose_read_object_info(struct odb_source *source, | |
| } | ||
|
|
||
| static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Patrick Steinhardt wrote on the Git mailing list (how to reply to this email): On Thu, Jun 04, 2026 at 05:15:08PM +0000, Philip Oakley via GitGitGadget wrote:
> From: Philip Oakley <philipoakley@iee.email>
>
> Continue walking the code path for the >4GB `hash-object --literally`
> test. The `hash_object_file_literally()` function internally uses both
> `hash_object_file()` and `write_object_file_prepare()`. Both function
> signatures use `unsigned long` rather than `size_t` for the mem buffer
> sizes. Use `size_t` instead, for LLP64 compatibility.
>
> While at it, convert those function's object's header buffer length to
> `size_t` for consistency. The value is already upcast to `uintmax_t` for
> print format compatibility.
One thing I was wondering is whether we should rather migrate to a size
that is consistent across different platforms. We could e.g. `typedef
uint64_t objsize_t` and then use that going forward.
I guess the question though is whether that'd buy us anything. In other
words, are there any platforms that we care about where `size_t` is only
32 bit wide? And would such platforms even be able to handle such large
objects?
Patrick |
||
| const void *buf, unsigned long len, | ||
| const void *buf, size_t len, | ||
| struct object_id *oid, | ||
| char *hdr, int *hdrlen) | ||
| char *hdr, size_t *hdrlen) | ||
| { | ||
| algo->init_fn(c); | ||
| git_hash_update(c, hdr, *hdrlen); | ||
|
|
@@ -572,16 +572,16 @@ static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_c | |
| } | ||
|
|
||
| static void write_object_file_prepare(const struct git_hash_algo *algo, | ||
| const void *buf, unsigned long len, | ||
| const void *buf, size_t len, | ||
| enum object_type type, struct object_id *oid, | ||
| char *hdr, int *hdrlen) | ||
| char *hdr, size_t *hdrlen) | ||
| { | ||
| struct git_hash_ctx c; | ||
|
|
||
| /* Generate the header */ | ||
| *hdrlen = format_object_header(hdr, *hdrlen, type, len); | ||
|
|
||
| /* Sha1.. */ | ||
| /* Hash (function pointers) computation */ | ||
| hash_object_body(algo, &c, buf, len, oid, hdr, hdrlen); | ||
| } | ||
|
|
||
|
|
@@ -717,11 +717,11 @@ int finalize_object_file_flags(struct repository *repo, | |
| } | ||
|
|
||
| void hash_object_file(const struct git_hash_algo *algo, const void *buf, | ||
| unsigned long len, enum object_type type, | ||
| size_t len, enum object_type type, | ||
| struct object_id *oid) | ||
| { | ||
| char hdr[MAX_HEADER_LEN]; | ||
| int hdrlen = sizeof(hdr); | ||
| size_t hdrlen = sizeof(hdr); | ||
|
|
||
| write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); | ||
| } | ||
|
|
@@ -1177,7 +1177,7 @@ int odb_source_loose_write_stream(struct odb_source *source, | |
| } | ||
|
|
||
| int odb_source_loose_write_object(struct odb_source *source, | ||
| const void *buf, unsigned long len, | ||
| const void *buf, size_t len, | ||
| enum object_type type, struct object_id *oid, | ||
| struct object_id *compat_oid_in, | ||
| enum odb_write_object_flags flags) | ||
|
|
@@ -1186,7 +1186,7 @@ int odb_source_loose_write_object(struct odb_source *source, | |
| const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; | ||
| struct object_id compat_oid; | ||
| char hdr[MAX_HEADER_LEN]; | ||
| int hdrlen = sizeof(hdr); | ||
| size_t hdrlen = sizeof(hdr); | ||
|
|
||
| /* Generate compat_oid */ | ||
| if (compat) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,6 +49,9 @@ test_expect_success 'setup' ' | |
|
|
||
| example sha1:ddd3f836d3e3fbb7ae289aa9ae83536f76956399 | ||
| example sha256:b44fe1fe65589848253737db859bd490453510719d7424daab03daf0767b85ae | ||
|
|
||
| large5GB sha1:0be2be10a4c8764f32c4bf372a98edc731a4b204 | ||
| large5GB sha256:dc18ca621300c8d3cfa505a275641ebab00de189859e022a975056882d313e64 | ||
| EOF | ||
| ' | ||
|
|
||
|
|
@@ -258,4 +261,40 @@ test_expect_success '--stdin outside of repository (uses default hash)' ' | |
| test_cmp expect actual | ||
| ' | ||
|
|
||
| test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ | ||
| 'files over 4GB hash literally' ' | ||
| test-tool genzeros $((5*1024*1024*1024)) >big && | ||
| test_oid large5GB >expect && | ||
| git hash-object --stdin --literally <big >actual && | ||
| test_cmp expect actual | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Patrick Steinhardt wrote on the Git mailing list (how to reply to this email): On Thu, Jun 04, 2026 at 05:15:10PM +0000, Philip Oakley via GitGitGadget wrote:
> diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
> index 10382a815e..59efee3aff 100755
> --- a/t/t1007-hash-object.sh
> +++ b/t/t1007-hash-object.sh
> @@ -269,4 +269,12 @@ test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
> test_cmp expect actual
> '
>
> +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
> + 'files over 4GB hash correctly via --stdin' '
> + { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } &&
> + test_oid large5GB >expect &&
> + git hash-object --stdin <big >actual &&
> + test_cmp expect actual
> +'
Same comment here: can we drop the `!LONG_IS_64BIT` prereq?
Patrick |
||
| ' | ||
|
|
||
| test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ | ||
| 'files over 4GB hash correctly via --stdin' ' | ||
| { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } && | ||
| test_oid large5GB >expect && | ||
| git hash-object --stdin <big >actual && | ||
| test_cmp expect actual | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Patrick Steinhardt wrote on the Git mailing list (how to reply to this email): On Thu, Jun 04, 2026 at 05:15:11PM +0000, Philip Oakley via GitGitGadget wrote:
> diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
> index 59efee3aff..f2722380ee 100755
> --- a/t/t1007-hash-object.sh
> +++ b/t/t1007-hash-object.sh
> @@ -277,4 +277,12 @@ test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
> test_cmp expect actual
> '
>
> +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
> + 'files over 4GB hash correctly' '
> + { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } &&
> + test_oid large5GB >expect &&
> + git hash-object -- big >actual &&
> + test_cmp expect actual
> +'
Same comment here.
Nit: I feel like we could've easily introduced all of these tests in the
first commit.
Patrick |
||
| ' | ||
|
|
||
| test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ | ||
| 'files over 4GB hash correctly' ' | ||
| { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } && | ||
| test_oid large5GB >expect && | ||
| git hash-object -- big >actual && | ||
| test_cmp expect actual | ||
| ' | ||
|
|
||
| # This clean filter does nothing, other than excercising the interface. | ||
| # We ensure that cleaning doesn't mangle large files on 64-bit Windows. | ||
| test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ | ||
| 'hash filtered files over 4GB correctly' ' | ||
| { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } && | ||
| test_oid large5GB >expect && | ||
| test_config filter.null-filter.clean "cat" && | ||
| echo "big filter=null-filter" >.gitattributes && | ||
| git hash-object -- big >actual && | ||
| test_cmp expect actual | ||
| ' | ||
|
|
||
| test_done | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Patrick Steinhardt wrote on the Git mailing list (how to reply to this email):