diff options
Diffstat (limited to 'builtin/unpack-objects.c')
| -rw-r--r-- | builtin/unpack-objects.c | 145 |
1 files changed, 118 insertions, 27 deletions
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 716408e3a9..1908dcfcff 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -1,12 +1,15 @@ #include "builtin.h" #include "cache.h" +#include "bulk-checkin.h" #include "config.h" +#include "hex.h" #include "object-store.h" #include "object.h" #include "delta.h" #include "pack.h" #include "blob.h" #include "commit.h" +#include "replace-object.h" #include "tag.h" #include "tree.h" #include "tree-walk.h" @@ -24,6 +27,7 @@ static off_t consumed_bytes; static off_t max_input_size; static git_hash_ctx ctx; static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT; +static struct progress *progress; /* * When running under --strict mode, objects whose reachability are @@ -45,7 +49,7 @@ static struct obj_buffer *lookup_object_buffer(struct object *base) static void add_object_buffer(struct object *object, char *buffer, unsigned long size) { struct obj_buffer *obj; - obj = xcalloc(1, sizeof(struct obj_buffer)); + CALLOC_ARRAY(obj, 1); obj->buffer = buffer; obj->size = size; if (add_decoration(&obj_decorate, object, obj)) @@ -92,17 +96,30 @@ static void use(int bytes) consumed_bytes += bytes; if (max_input_size && consumed_bytes > max_input_size) die(_("pack exceeds maximum allowed size")); + display_throughput(progress, consumed_bytes); } +/* + * Decompress zstream from the standard input into a newly + * allocated buffer of specified size and return the buffer. + * The caller is responsible to free the returned buffer. + * + * But for dry_run mode, "get_data()" is only used to check the + * integrity of data, and the returned buffer is not used at all. + * Therefore, in dry_run mode, "get_data()" will release the small + * allocated buffer which is reused to hold temporary zstream output + * and return NULL instead of returning garbage data. + */ static void *get_data(unsigned long size) { git_zstream stream; - void *buf = xmallocz(size); + unsigned long bufsize = dry_run && size > 8192 ? 8192 : size; + void *buf = xmallocz(bufsize); memset(&stream, 0, sizeof(stream)); stream.next_out = buf; - stream.avail_out = size; + stream.avail_out = bufsize; stream.next_in = fill(1); stream.avail_in = len; git_inflate_init(&stream); @@ -122,8 +139,17 @@ static void *get_data(unsigned long size) } stream.next_in = fill(1); stream.avail_in = len; + if (dry_run) { + /* reuse the buffer in dry_run mode */ + stream.next_out = buf; + stream.avail_out = bufsize > size - stream.total_out ? + size - stream.total_out : + bufsize; + } } git_inflate_end(&stream); + if (dry_run) + FREE_AND_NULL(buf); return buf; } @@ -175,7 +201,7 @@ static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf) struct object_id oid; if (write_object_file(obj_buf->buffer, obj_buf->size, - type_name(obj->type), &oid) < 0) + obj->type, &oid) < 0) die("failed to write object %s", oid_to_hex(&obj->oid)); obj->flags |= FLAG_WRITTEN; } @@ -185,7 +211,8 @@ static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf) * that have reachability requirements and calls this function. * Verify its reachability and validity recursively and write it out. */ -static int check_object(struct object *obj, int type, void *data, struct fsck_options *options) +static int check_object(struct object *obj, enum object_type type, + void *data, struct fsck_options *options) { struct obj_buffer *obj_buf; @@ -240,7 +267,7 @@ static void write_object(unsigned nr, enum object_type type, void *buf, unsigned long size) { if (!strict) { - if (write_object_file(buf, size, type_name(type), + if (write_object_file(buf, size, type, &obj_list[nr].oid) < 0) die("failed to write object"); added_object(nr, type, buf, size); @@ -248,7 +275,7 @@ static void write_object(unsigned nr, enum object_type type, obj_list[nr].obj = NULL; } else if (type == OBJ_BLOB) { struct blob *blob; - if (write_object_file(buf, size, type_name(type), + if (write_object_file(buf, size, type, &obj_list[nr].oid) < 0) die("failed to write object"); added_object(nr, type, buf, size); @@ -263,7 +290,8 @@ static void write_object(unsigned nr, enum object_type type, } else { struct object *obj; int eaten; - hash_object_file(buf, size, type_name(type), &obj_list[nr].oid); + hash_object_file(the_hash_algo, buf, size, type, + &obj_list[nr].oid); added_object(nr, type, buf, size); obj = parse_object_buffer(the_repository, &obj_list[nr].oid, type, size, buf, @@ -303,7 +331,7 @@ static void added_object(unsigned nr, enum object_type type, struct delta_info *info; while ((info = *p) != NULL) { - if (!oidcmp(&info->base_oid, &obj_list[nr].oid) || + if (oideq(&info->base_oid, &obj_list[nr].oid) || info->base_offset == obj_list[nr].offset) { *p = info->next; p = &delta_list; @@ -321,10 +349,70 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size, { void *buf = get_data(size); - if (!dry_run && buf) + if (buf) write_object(nr, type, buf, size); - else - free(buf); +} + +struct input_zstream_data { + git_zstream *zstream; + unsigned char buf[8192]; + int status; +}; + +static const void *feed_input_zstream(struct input_stream *in_stream, + unsigned long *readlen) +{ + struct input_zstream_data *data = in_stream->data; + git_zstream *zstream = data->zstream; + void *in = fill(1); + + if (in_stream->is_finished) { + *readlen = 0; + return NULL; + } + + zstream->next_out = data->buf; + zstream->avail_out = sizeof(data->buf); + zstream->next_in = in; + zstream->avail_in = len; + + data->status = git_inflate(zstream, 0); + + in_stream->is_finished = data->status != Z_OK; + use(len - zstream->avail_in); + *readlen = sizeof(data->buf) - zstream->avail_out; + + return data->buf; +} + +static void stream_blob(unsigned long size, unsigned nr) +{ + git_zstream zstream = { 0 }; + struct input_zstream_data data = { 0 }; + struct input_stream in_stream = { + .read = feed_input_zstream, + .data = &data, + }; + struct obj_info *info = &obj_list[nr]; + + data.zstream = &zstream; + git_inflate_init(&zstream); + + if (stream_loose_object(&in_stream, size, &info->oid)) + die(_("failed to write object in stream")); + + if (data.status != Z_STREAM_END) + die(_("inflate returned (%d)"), data.status); + git_inflate_end(&zstream); + + if (strict) { + struct blob *blob = lookup_blob(the_repository, &info->oid); + + if (!blob) + die(_("invalid blob object from stream")); + blob->object.flags |= FLAG_WRITTEN; + } + info->obj = NULL; } static int resolve_against_held(unsigned nr, const struct object_id *base, @@ -332,7 +420,7 @@ static int resolve_against_held(unsigned nr, const struct object_id *base, { struct object *obj; struct obj_buffer *obj_buffer; - obj = lookup_object(the_repository, base->hash); + obj = lookup_object(the_repository, base); if (!obj) return 0; obj_buffer = lookup_object_buffer(obj); @@ -351,13 +439,11 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, struct object_id base_oid; if (type == OBJ_REF_DELTA) { - hashcpy(base_oid.hash, fill(the_hash_algo->rawsz)); + oidread(&base_oid, fill(the_hash_algo->rawsz)); use(the_hash_algo->rawsz); delta_data = get_data(delta_size); - if (dry_run || !delta_data) { - free(delta_data); + if (!delta_data) return; - } if (has_object_file(&base_oid)) ; /* Ok we have this one */ else if (resolve_against_held(nr, &base_oid, @@ -393,10 +479,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, die("offset value out of bound for delta base object"); delta_data = get_data(delta_size); - if (dry_run || !delta_data) { - free(delta_data); + if (!delta_data) return; - } lo = 0; hi = nr; while (lo < hi) { @@ -417,7 +501,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, * has not been resolved yet. */ oidclr(&obj_list[nr].oid); - add_delta_to_list(nr, &null_oid, base_offset, delta_data, delta_size); + add_delta_to_list(nr, null_oid(), base_offset, + delta_data, delta_size); return; } } @@ -462,9 +547,14 @@ static void unpack_one(unsigned nr) } switch (type) { + case OBJ_BLOB: + if (!dry_run && size > big_file_threshold) { + stream_blob(size, nr); + return; + } + /* fallthrough */ case OBJ_COMMIT: case OBJ_TREE: - case OBJ_BLOB: case OBJ_TAG: unpack_non_delta_entry(type, size, nr); return; @@ -484,7 +574,6 @@ static void unpack_one(unsigned nr) static void unpack_all(void) { int i; - struct progress *progress = NULL; struct pack_header *hdr = fill(sizeof(struct pack_header)); nr_objects = ntohl(hdr->hdr_entries); @@ -498,11 +587,13 @@ static void unpack_all(void) if (!quiet) progress = start_progress(_("Unpacking objects"), nr_objects); - obj_list = xcalloc(nr_objects, sizeof(*obj_list)); + CALLOC_ARRAY(obj_list, nr_objects); + begin_odb_transaction(); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } + end_odb_transaction(); stop_progress(&progress); if (delta_list) @@ -514,7 +605,7 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) int i; struct object_id oid; - check_replace_refs = 0; + read_replace_refs = 0; git_config(git_default_config, NULL); @@ -573,13 +664,13 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) the_hash_algo->init_fn(&ctx); unpack_all(); the_hash_algo->update_fn(&ctx, buffer, offset); - the_hash_algo->final_fn(oid.hash, &ctx); + the_hash_algo->final_oid_fn(&oid, &ctx); if (strict) { write_rest(); if (fsck_finish(&fsck_options)) die(_("fsck error in pack objects")); } - if (hashcmp(fill(the_hash_algo->rawsz), oid.hash)) + if (!hasheq(fill(the_hash_algo->rawsz), oid.hash)) die("final sha1 did not match"); use(the_hash_algo->rawsz); |
