diff options
Diffstat (limited to 'builtin/unpack-objects.c')
| -rw-r--r-- | builtin/unpack-objects.c | 155 |
1 files changed, 120 insertions, 35 deletions
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 4a9466295b..f1c85a00ae 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -1,15 +1,17 @@ #include "builtin.h" -#include "cache.h" +#include "bulk-checkin.h" #include "config.h" -#include "object-store.h" +#include "environment.h" +#include "gettext.h" +#include "git-zlib.h" +#include "hex.h" +#include "object-store-ll.h" #include "object.h" #include "delta.h" #include "pack.h" #include "blob.h" -#include "commit.h" -#include "tag.h" -#include "tree.h" -#include "tree-walk.h" +#include "replace-object.h" +#include "strbuf.h" #include "progress.h" #include "decorate.h" #include "fsck.h" @@ -96,15 +98,27 @@ static void use(int bytes) display_throughput(progress, consumed_bytes); } +/* + * Decompress zstream from the standard input into a newly + * allocated buffer of specified size and return the buffer. + * The caller is responsible to free the returned buffer. + * + * But for dry_run mode, "get_data()" is only used to check the + * integrity of data, and the returned buffer is not used at all. + * Therefore, in dry_run mode, "get_data()" will release the small + * allocated buffer which is reused to hold temporary zstream output + * and return NULL instead of returning garbage data. + */ static void *get_data(unsigned long size) { git_zstream stream; - void *buf = xmallocz(size); + unsigned long bufsize = dry_run && size > 8192 ? 8192 : size; + void *buf = xmallocz(bufsize); memset(&stream, 0, sizeof(stream)); stream.next_out = buf; - stream.avail_out = size; + stream.avail_out = bufsize; stream.next_in = fill(1); stream.avail_in = len; git_inflate_init(&stream); @@ -124,8 +138,17 @@ static void *get_data(unsigned long size) } stream.next_in = fill(1); stream.avail_in = len; + if (dry_run) { + /* reuse the buffer in dry_run mode */ + stream.next_out = buf; + stream.avail_out = bufsize > size - stream.total_out ? + size - stream.total_out : + bufsize; + } } git_inflate_end(&stream); + if (dry_run) + FREE_AND_NULL(buf); return buf; } @@ -177,7 +200,7 @@ static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf) struct object_id oid; if (write_object_file(obj_buf->buffer, obj_buf->size, - type_name(obj->type), &oid) < 0) + obj->type, &oid) < 0) die("failed to write object %s", oid_to_hex(&obj->oid)); obj->flags |= FLAG_WRITTEN; } @@ -188,7 +211,8 @@ static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf) * Verify its reachability and validity recursively and write it out. */ static int check_object(struct object *obj, enum object_type type, - void *data, struct fsck_options *options) + void *data UNUSED, + struct fsck_options *options UNUSED) { struct obj_buffer *obj_buf; @@ -243,7 +267,7 @@ static void write_object(unsigned nr, enum object_type type, void *buf, unsigned long size) { if (!strict) { - if (write_object_file(buf, size, type_name(type), + if (write_object_file(buf, size, type, &obj_list[nr].oid) < 0) die("failed to write object"); added_object(nr, type, buf, size); @@ -251,7 +275,7 @@ static void write_object(unsigned nr, enum object_type type, obj_list[nr].obj = NULL; } else if (type == OBJ_BLOB) { struct blob *blob; - if (write_object_file(buf, size, type_name(type), + if (write_object_file(buf, size, type, &obj_list[nr].oid) < 0) die("failed to write object"); added_object(nr, type, buf, size); @@ -266,7 +290,7 @@ static void write_object(unsigned nr, enum object_type type, } else { struct object *obj; int eaten; - hash_object_file(the_hash_algo, buf, size, type_name(type), + hash_object_file(the_hash_algo, buf, size, type, &obj_list[nr].oid); added_object(nr, type, buf, size); obj = parse_object_buffer(the_repository, &obj_list[nr].oid, @@ -325,10 +349,70 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size, { void *buf = get_data(size); - if (!dry_run && buf) + if (buf) write_object(nr, type, buf, size); - else - free(buf); +} + +struct input_zstream_data { + git_zstream *zstream; + unsigned char buf[8192]; + int status; +}; + +static const void *feed_input_zstream(struct input_stream *in_stream, + unsigned long *readlen) +{ + struct input_zstream_data *data = in_stream->data; + git_zstream *zstream = data->zstream; + void *in = fill(1); + + if (in_stream->is_finished) { + *readlen = 0; + return NULL; + } + + zstream->next_out = data->buf; + zstream->avail_out = sizeof(data->buf); + zstream->next_in = in; + zstream->avail_in = len; + + data->status = git_inflate(zstream, 0); + + in_stream->is_finished = data->status != Z_OK; + use(len - zstream->avail_in); + *readlen = sizeof(data->buf) - zstream->avail_out; + + return data->buf; +} + +static void stream_blob(unsigned long size, unsigned nr) +{ + git_zstream zstream = { 0 }; + struct input_zstream_data data = { 0 }; + struct input_stream in_stream = { + .read = feed_input_zstream, + .data = &data, + }; + struct obj_info *info = &obj_list[nr]; + + data.zstream = &zstream; + git_inflate_init(&zstream); + + if (stream_loose_object(&in_stream, size, &info->oid)) + die(_("failed to write object in stream")); + + if (data.status != Z_STREAM_END) + die(_("inflate returned (%d)"), data.status); + git_inflate_end(&zstream); + + if (strict) { + struct blob *blob = lookup_blob(the_repository, &info->oid); + + if (!blob) + die(_("invalid blob object from stream")); + blob->object.flags |= FLAG_WRITTEN; + } + info->obj = NULL; } static int resolve_against_held(unsigned nr, const struct object_id *base, @@ -358,11 +442,9 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, oidread(&base_oid, fill(the_hash_algo->rawsz)); use(the_hash_algo->rawsz); delta_data = get_data(delta_size); - if (dry_run || !delta_data) { - free(delta_data); + if (!delta_data) return; - } - if (has_object_file(&base_oid)) + if (repo_has_object_file(the_repository, &base_oid)) ; /* Ok we have this one */ else if (resolve_against_held(nr, &base_oid, delta_data, delta_size)) @@ -397,10 +479,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, die("offset value out of bound for delta base object"); delta_data = get_data(delta_size); - if (dry_run || !delta_data) { - free(delta_data); + if (!delta_data) return; - } lo = 0; hi = nr; while (lo < hi) { @@ -430,7 +510,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, if (resolve_against_held(nr, &base_oid, delta_data, delta_size)) return; - base = read_object_file(&base_oid, &type, &base_size); + base = repo_read_object_file(the_repository, &base_oid, &type, + &base_size); if (!base) { error("failed to read delta-pack base object %s", oid_to_hex(&base_oid)); @@ -467,9 +548,14 @@ static void unpack_one(unsigned nr) } switch (type) { + case OBJ_BLOB: + if (!dry_run && size > big_file_threshold) { + stream_blob(size, nr); + return; + } + /* fallthrough */ case OBJ_COMMIT: case OBJ_TREE: - case OBJ_BLOB: case OBJ_TAG: unpack_non_delta_entry(type, size, nr); return; @@ -503,22 +589,25 @@ static void unpack_all(void) if (!quiet) progress = start_progress(_("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); + begin_odb_transaction(); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } + end_odb_transaction(); stop_progress(&progress); if (delta_list) die("unresolved deltas left after unpacking"); } -int cmd_unpack_objects(int argc, const char **argv, const char *prefix) +int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED) { int i; struct object_id oid; + git_hash_ctx tmp_ctx; - read_replace_refs = 0; + disable_replace_refs(); git_config(git_default_config, NULL); @@ -577,7 +666,9 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) the_hash_algo->init_fn(&ctx); unpack_all(); the_hash_algo->update_fn(&ctx, buffer, offset); - the_hash_algo->final_oid_fn(&oid, &ctx); + the_hash_algo->init_fn(&tmp_ctx); + the_hash_algo->clone_fn(&tmp_ctx, &ctx); + the_hash_algo->final_oid_fn(&oid, &tmp_ctx); if (strict) { write_rest(); if (fsck_finish(&fsck_options)) @@ -588,13 +679,7 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) use(the_hash_algo->rawsz); /* Write the last part of the buffer to stdout */ - while (len) { - int ret = xwrite(1, buffer + offset, len); - if (ret <= 0) - break; - len -= ret; - offset += ret; - } + write_in_full(1, buffer + offset, len); /* All done */ return has_errors; |
