aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2025-01-31 10:05:46 -0800
committerJunio C Hamano <gitster@pobox.com>2025-01-31 10:05:46 -0800
commit0cbcba54550b1ea108e977c24b835e07b96b1c0e (patch)
tree6cb525c7fdb38ee631d741c2f32bcaf2fffa5f07
parentThe fifth batch (diff)
parenthash.h: drop unsafe_ function variants (diff)
downloadgit-0cbcba54550b1ea108e977c24b835e07b96b1c0e.tar.gz
git-0cbcba54550b1ea108e977c24b835e07b96b1c0e.zip
Merge branch 'tb/unsafe-hash-cleanup' into ps/hash-cleanup
* tb/unsafe-hash-cleanup: hash.h: drop unsafe_ function variants csum-file: introduce hashfile_checkpoint_init() t/helper/test-hash.c: use unsafe_hash_algo() csum-file.c: use unsafe_hash_algo() hash.h: introduce `unsafe_hash_algo()` csum-file.c: extract algop from hashfile_checksum_valid() csum-file: store the hash algorithm as a struct field t/helper/test-tool: implement sha1-unsafe helper
-rw-r--r--builtin/fast-import.c2
-rw-r--r--bulk-checkin.c9
-rw-r--r--csum-file.c40
-rw-r--r--csum-file.h2
-rw-r--r--hash.h28
-rw-r--r--object-file.c41
-rw-r--r--t/helper/test-hash.c4
-rw-r--r--t/helper/test-sha1.c7
-rwxr-xr-xt/helper/test-sha1.sh38
-rw-r--r--t/helper/test-sha256.c2
-rw-r--r--t/helper/test-tool.c1
-rw-r--r--t/helper/test-tool.h3
12 files changed, 107 insertions, 70 deletions
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 2da46fecdc..ccdada1810 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1106,7 +1106,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
|| (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size)
cycle_packfile();
- the_hash_algo->unsafe_init_fn(&checkpoint.ctx);
+ hashfile_checkpoint_init(pack_file, &checkpoint);
hashfile_checkpoint(pack_file, &checkpoint);
offset = checkpoint.offset;
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 5044cb7fa0..f889fce734 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -261,7 +261,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
git_hash_ctx ctx;
unsigned char obuf[16384];
unsigned header_len;
- struct hashfile_checkpoint checkpoint = {0};
+ struct hashfile_checkpoint checkpoint;
struct pack_idx_entry *idx = NULL;
seekback = lseek(fd, 0, SEEK_CUR);
@@ -272,12 +272,15 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
OBJ_BLOB, size);
the_hash_algo->init_fn(&ctx);
the_hash_algo->update_fn(&ctx, obuf, header_len);
- the_hash_algo->unsafe_init_fn(&checkpoint.ctx);
/* Note: idx is non-NULL when we are writing */
- if ((flags & HASH_WRITE_OBJECT) != 0)
+ if ((flags & HASH_WRITE_OBJECT) != 0) {
CALLOC_ARRAY(idx, 1);
+ prepare_to_stream(state, flags);
+ hashfile_checkpoint_init(state->f, &checkpoint);
+ }
+
already_hashed_to = 0;
while (1) {
diff --git a/csum-file.c b/csum-file.c
index 5716016e12..232121f415 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -50,7 +50,7 @@ void hashflush(struct hashfile *f)
if (offset) {
if (!f->skip_hash)
- the_hash_algo->unsafe_update_fn(&f->ctx, f->buffer, offset);
+ f->algop->update_fn(&f->ctx, f->buffer, offset);
flush(f, f->buffer, offset);
f->offset = 0;
}
@@ -71,14 +71,14 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result,
hashflush(f);
if (f->skip_hash)
- hashclr(f->buffer, the_repository->hash_algo);
+ hashclr(f->buffer, f->algop);
else
- the_hash_algo->unsafe_final_fn(f->buffer, &f->ctx);
+ f->algop->final_fn(f->buffer, &f->ctx);
if (result)
- hashcpy(result, f->buffer, the_repository->hash_algo);
+ hashcpy(result, f->buffer, f->algop);
if (flags & CSUM_HASH_IN_STREAM)
- flush(f, f->buffer, the_hash_algo->rawsz);
+ flush(f, f->buffer, f->algop->rawsz);
if (flags & CSUM_FSYNC)
fsync_component_or_die(component, f->fd, f->name);
if (flags & CSUM_CLOSE) {
@@ -128,7 +128,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
* f->offset is necessarily zero.
*/
if (!f->skip_hash)
- the_hash_algo->unsafe_update_fn(&f->ctx, buf, nr);
+ f->algop->update_fn(&f->ctx, buf, nr);
flush(f, buf, nr);
} else {
/*
@@ -174,7 +174,9 @@ static struct hashfile *hashfd_internal(int fd, const char *name,
f->name = name;
f->do_crc = 0;
f->skip_hash = 0;
- the_hash_algo->unsafe_init_fn(&f->ctx);
+
+ f->algop = unsafe_hash_algo(the_hash_algo);
+ f->algop->init_fn(&f->ctx);
f->buffer_len = buffer_len;
f->buffer = xmalloc(buffer_len);
@@ -204,11 +206,18 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp
return hashfd_internal(fd, name, tp, 8 * 1024);
}
+void hashfile_checkpoint_init(struct hashfile *f,
+ struct hashfile_checkpoint *checkpoint)
+{
+ memset(checkpoint, 0, sizeof(*checkpoint));
+ f->algop->init_fn(&checkpoint->ctx);
+}
+
void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
{
hashflush(f);
checkpoint->offset = f->total;
- the_hash_algo->unsafe_clone_fn(&checkpoint->ctx, &f->ctx);
+ f->algop->clone_fn(&checkpoint->ctx, &f->ctx);
}
int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
@@ -219,7 +228,7 @@ int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint
lseek(f->fd, offset, SEEK_SET) != offset)
return -1;
f->total = offset;
- the_hash_algo->unsafe_clone_fn(&f->ctx, &checkpoint->ctx);
+ f->algop->clone_fn(&f->ctx, &checkpoint->ctx);
f->offset = 0; /* hashflush() was called in checkpoint */
return 0;
}
@@ -240,14 +249,15 @@ int hashfile_checksum_valid(const unsigned char *data, size_t total_len)
{
unsigned char got[GIT_MAX_RAWSZ];
git_hash_ctx ctx;
- size_t data_len = total_len - the_hash_algo->rawsz;
+ const struct git_hash_algo *algop = unsafe_hash_algo(the_hash_algo);
+ size_t data_len = total_len - algop->rawsz;
- if (total_len < the_hash_algo->rawsz)
+ if (total_len < algop->rawsz)
return 0; /* say "too short"? */
- the_hash_algo->unsafe_init_fn(&ctx);
- the_hash_algo->unsafe_update_fn(&ctx, data, data_len);
- the_hash_algo->unsafe_final_fn(got, &ctx);
+ algop->init_fn(&ctx);
+ algop->update_fn(&ctx, data, data_len);
+ algop->final_fn(got, &ctx);
- return hasheq(got, data + data_len, the_repository->hash_algo);
+ return hasheq(got, data + data_len, algop);
}
diff --git a/csum-file.h b/csum-file.h
index 7c73da0a40..b7475f16c2 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -20,6 +20,7 @@ struct hashfile {
size_t buffer_len;
unsigned char *buffer;
unsigned char *check_buffer;
+ const struct git_hash_algo *algop;
/**
* If non-zero, skip_hash indicates that we should
@@ -35,6 +36,7 @@ struct hashfile_checkpoint {
git_hash_ctx ctx;
};
+void hashfile_checkpoint_init(struct hashfile *, struct hashfile_checkpoint *);
void hashfile_checkpoint(struct hashfile *, struct hashfile_checkpoint *);
int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *);
diff --git a/hash.h b/hash.h
index 756166ce5e..ad2c919991 100644
--- a/hash.h
+++ b/hash.h
@@ -282,21 +282,6 @@ struct git_hash_algo {
/* The hash finalization function for object IDs. */
git_hash_final_oid_fn final_oid_fn;
- /* The non-cryptographic hash initialization function. */
- git_hash_init_fn unsafe_init_fn;
-
- /* The non-cryptographic hash context cloning function. */
- git_hash_clone_fn unsafe_clone_fn;
-
- /* The non-cryptographic hash update function. */
- git_hash_update_fn unsafe_update_fn;
-
- /* The non-cryptographic hash finalization function. */
- git_hash_final_fn unsafe_final_fn;
-
- /* The non-cryptographic hash finalization function. */
- git_hash_final_oid_fn unsafe_final_oid_fn;
-
/* The OID of the empty tree. */
const struct object_id *empty_tree;
@@ -305,6 +290,9 @@ struct git_hash_algo {
/* The all-zeros OID. */
const struct object_id *null_oid;
+
+ /* The unsafe variant of this hash function, if one exists. */
+ const struct git_hash_algo *unsafe;
};
extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS];
@@ -320,9 +308,17 @@ int hash_algo_by_length(int len);
/* Identical, except for a pointer to struct git_hash_algo. */
static inline int hash_algo_by_ptr(const struct git_hash_algo *p)
{
- return p - hash_algos;
+ size_t i;
+ for (i = 0; i < GIT_HASH_NALGOS; i++) {
+ const struct git_hash_algo *algop = &hash_algos[i];
+ if (p == algop)
+ return i;
+ }
+ return GIT_HASH_UNKNOWN;
}
+const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop);
+
const struct object_id *null_oid(void);
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop)
diff --git a/object-file.c b/object-file.c
index aeca61b8ae..6ce1caacae 100644
--- a/object-file.c
+++ b/object-file.c
@@ -202,6 +202,22 @@ static void git_hash_unknown_final_oid(struct object_id *oid UNUSED,
BUG("trying to finalize unknown hash");
}
+static const struct git_hash_algo sha1_unsafe_algo = {
+ .name = "sha1",
+ .format_id = GIT_SHA1_FORMAT_ID,
+ .rawsz = GIT_SHA1_RAWSZ,
+ .hexsz = GIT_SHA1_HEXSZ,
+ .blksz = GIT_SHA1_BLKSZ,
+ .init_fn = git_hash_sha1_init_unsafe,
+ .clone_fn = git_hash_sha1_clone_unsafe,
+ .update_fn = git_hash_sha1_update_unsafe,
+ .final_fn = git_hash_sha1_final_unsafe,
+ .final_oid_fn = git_hash_sha1_final_oid_unsafe,
+ .empty_tree = &empty_tree_oid,
+ .empty_blob = &empty_blob_oid,
+ .null_oid = &null_oid_sha1,
+};
+
const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
{
.name = NULL,
@@ -214,11 +230,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
.update_fn = git_hash_unknown_update,
.final_fn = git_hash_unknown_final,
.final_oid_fn = git_hash_unknown_final_oid,
- .unsafe_init_fn = git_hash_unknown_init,
- .unsafe_clone_fn = git_hash_unknown_clone,
- .unsafe_update_fn = git_hash_unknown_update,
- .unsafe_final_fn = git_hash_unknown_final,
- .unsafe_final_oid_fn = git_hash_unknown_final_oid,
.empty_tree = NULL,
.empty_blob = NULL,
.null_oid = NULL,
@@ -234,11 +245,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
.update_fn = git_hash_sha1_update,
.final_fn = git_hash_sha1_final,
.final_oid_fn = git_hash_sha1_final_oid,
- .unsafe_init_fn = git_hash_sha1_init_unsafe,
- .unsafe_clone_fn = git_hash_sha1_clone_unsafe,
- .unsafe_update_fn = git_hash_sha1_update_unsafe,
- .unsafe_final_fn = git_hash_sha1_final_unsafe,
- .unsafe_final_oid_fn = git_hash_sha1_final_oid_unsafe,
+ .unsafe = &sha1_unsafe_algo,
.empty_tree = &empty_tree_oid,
.empty_blob = &empty_blob_oid,
.null_oid = &null_oid_sha1,
@@ -254,11 +261,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
.update_fn = git_hash_sha256_update,
.final_fn = git_hash_sha256_final,
.final_oid_fn = git_hash_sha256_final_oid,
- .unsafe_init_fn = git_hash_sha256_init,
- .unsafe_clone_fn = git_hash_sha256_clone,
- .unsafe_update_fn = git_hash_sha256_update,
- .unsafe_final_fn = git_hash_sha256_final,
- .unsafe_final_oid_fn = git_hash_sha256_final_oid,
.empty_tree = &empty_tree_oid_sha256,
.empty_blob = &empty_blob_oid_sha256,
.null_oid = &null_oid_sha256,
@@ -305,6 +307,15 @@ int hash_algo_by_length(int len)
return GIT_HASH_UNKNOWN;
}
+const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop)
+{
+ /* If we have a faster "unsafe" implementation, use that. */
+ if (algop->unsafe)
+ return algop->unsafe;
+ /* Otherwise use the default one. */
+ return algop;
+}
+
/*
* This is meant to hold a *small* number of objects that you would
* want repo_read_object_file() to be able to return, but yet you do not want
diff --git a/t/helper/test-hash.c b/t/helper/test-hash.c
index 45d829c908..aa82638c62 100644
--- a/t/helper/test-hash.c
+++ b/t/helper/test-hash.c
@@ -1,7 +1,7 @@
#include "test-tool.h"
#include "hex.h"
-int cmd_hash_impl(int ac, const char **av, int algo)
+int cmd_hash_impl(int ac, const char **av, int algo, int unsafe)
{
git_hash_ctx ctx;
unsigned char hash[GIT_MAX_HEXSZ];
@@ -9,6 +9,8 @@ int cmd_hash_impl(int ac, const char **av, int algo)
int binary = 0;
char *buffer;
const struct git_hash_algo *algop = &hash_algos[algo];
+ if (unsafe)
+ algop = unsafe_hash_algo(algop);
if (ac == 2) {
if (!strcmp(av[1], "-b"))
diff --git a/t/helper/test-sha1.c b/t/helper/test-sha1.c
index e60d000c03..349540c4df 100644
--- a/t/helper/test-sha1.c
+++ b/t/helper/test-sha1.c
@@ -3,7 +3,7 @@
int cmd__sha1(int ac, const char **av)
{
- return cmd_hash_impl(ac, av, GIT_HASH_SHA1);
+ return cmd_hash_impl(ac, av, GIT_HASH_SHA1, 0);
}
int cmd__sha1_is_sha1dc(int argc UNUSED, const char **argv UNUSED)
@@ -13,3 +13,8 @@ int cmd__sha1_is_sha1dc(int argc UNUSED, const char **argv UNUSED)
#endif
return 1;
}
+
+int cmd__sha1_unsafe(int ac, const char **av)
+{
+ return cmd_hash_impl(ac, av, GIT_HASH_SHA1, 1);
+}
diff --git a/t/helper/test-sha1.sh b/t/helper/test-sha1.sh
index 84594885c7..bf387d3db1 100755
--- a/t/helper/test-sha1.sh
+++ b/t/helper/test-sha1.sh
@@ -3,25 +3,31 @@
dd if=/dev/zero bs=1048576 count=100 2>/dev/null |
/usr/bin/time t/helper/test-tool sha1 >/dev/null
+dd if=/dev/zero bs=1048576 count=100 2>/dev/null |
+/usr/bin/time t/helper/test-tool sha1-unsafe >/dev/null
+
while read expect cnt pfx
do
case "$expect" in '#'*) continue ;; esac
- actual=$(
- {
- test -z "$pfx" || echo "$pfx"
- dd if=/dev/zero bs=1048576 count=$cnt 2>/dev/null |
- perl -pe 'y/\000/g/'
- } | ./t/helper/test-tool sha1 $cnt
- )
- if test "$expect" = "$actual"
- then
- echo "OK: $expect $cnt $pfx"
- else
- echo >&2 "OOPS: $cnt"
- echo >&2 "expect: $expect"
- echo >&2 "actual: $actual"
- exit 1
- fi
+ for sha1 in sha1 sha1-unsafe
+ do
+ actual=$(
+ {
+ test -z "$pfx" || echo "$pfx"
+ dd if=/dev/zero bs=1048576 count=$cnt 2>/dev/null |
+ perl -pe 'y/\000/g/'
+ } | ./t/helper/test-tool $sha1 $cnt
+ )
+ if test "$expect" = "$actual"
+ then
+ echo "OK ($sha1): $expect $cnt $pfx"
+ else
+ echo >&2 "OOPS ($sha1): $cnt"
+ echo >&2 "expect ($sha1): $expect"
+ echo >&2 "actual ($sha1): $actual"
+ exit 1
+ fi
+ done
done <<EOF
da39a3ee5e6b4b0d3255bfef95601890afd80709 0
3f786850e387550fdab836ed7e6dc881de23001b 0 a
diff --git a/t/helper/test-sha256.c b/t/helper/test-sha256.c
index 2fb20438f3..7fd0aa1fcd 100644
--- a/t/helper/test-sha256.c
+++ b/t/helper/test-sha256.c
@@ -3,5 +3,5 @@
int cmd__sha256(int ac, const char **av)
{
- return cmd_hash_impl(ac, av, GIT_HASH_SHA256);
+ return cmd_hash_impl(ac, av, GIT_HASH_SHA256, 0);
}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 69a9ffa1eb..e8bc94862e 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -71,6 +71,7 @@ static struct test_cmd cmds[] = {
{ "serve-v2", cmd__serve_v2 },
{ "sha1", cmd__sha1 },
{ "sha1-is-sha1dc", cmd__sha1_is_sha1dc },
+ { "sha1-unsafe", cmd__sha1_unsafe },
{ "sha256", cmd__sha256 },
{ "sigchain", cmd__sigchain },
{ "simple-ipc", cmd__simple_ipc },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 9cfc5da6e5..fb4c609254 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -64,6 +64,7 @@ int cmd__scrap_cache_tree(int argc, const char **argv);
int cmd__serve_v2(int argc, const char **argv);
int cmd__sha1(int argc, const char **argv);
int cmd__sha1_is_sha1dc(int argc, const char **argv);
+int cmd__sha1_unsafe(int argc, const char **argv);
int cmd__sha256(int argc, const char **argv);
int cmd__sigchain(int argc, const char **argv);
int cmd__simple_ipc(int argc, const char **argv);
@@ -82,6 +83,6 @@ int cmd__windows_named_pipe(int argc, const char **argv);
#endif
int cmd__write_cache(int argc, const char **argv);
-int cmd_hash_impl(int ac, const char **av, int algo);
+int cmd_hash_impl(int ac, const char **av, int algo, int unsafe);
#endif