diff options
Diffstat (limited to 'builtin/index-pack.c')
| -rw-r--r-- | builtin/index-pack.c | 811 |
1 files changed, 460 insertions, 351 deletions
diff --git a/builtin/index-pack.c b/builtin/index-pack.c index de311febe3..b451755f40 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1,6 +1,8 @@ #include "builtin.h" +#include "alloc.h" #include "config.h" #include "delta.h" +#include "hex.h" #include "pack.h" #include "csum-file.h" #include "blob.h" @@ -14,9 +16,11 @@ #include "thread-utils.h" #include "packfile.h" #include "object-store.h" +#include "replace-object.h" +#include "promisor-remote.h" static const char index_pack_usage[] = -"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])"; +"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])"; struct object_entry { struct pack_idx_entry idx; @@ -32,21 +36,61 @@ struct object_stat { }; struct base_data { + /* Initialized by make_base(). */ struct base_data *base; - struct base_data *child; struct object_entry *obj; - void *data; - unsigned long size; int ref_first, ref_last; int ofs_first, ofs_last; + /* + * Threads should increment retain_data if they are about to call + * patch_delta() using this struct's data as a base, and decrement this + * when they are done. While retain_data is nonzero, this struct's data + * will not be freed even if the delta base cache limit is exceeded. + */ + int retain_data; + /* + * The number of direct children that have not been fully processed + * (entered work_head, entered done_head, left done_head). When this + * number reaches zero, this struct base_data can be freed. + */ + int children_remaining; + + /* Not initialized by make_base(). */ + struct list_head list; + void *data; + unsigned long size; }; +/* + * Stack of struct base_data that have unprocessed children. + * threaded_second_pass() uses this as a source of work (the other being the + * objects array). + * + * Guarded by work_mutex. + */ +static LIST_HEAD(work_head); + +/* + * Stack of struct base_data that have children, all of whom have been + * processed or are being processed, and at least one child is being processed. + * These struct base_data must be kept around until the last child is + * processed. + * + * Guarded by work_mutex. + */ +static LIST_HEAD(done_head); + +/* + * All threads share one delta base cache. + * + * base_cache_used is guarded by work_mutex, and base_cache_limit is read-only + * in a thread. + */ +static size_t base_cache_used; +static size_t base_cache_limit; + struct thread_local { -#ifndef NO_PTHREADS pthread_t thread; -#endif - struct base_data *base_cache; - size_t base_cache_used; int pack_fd; }; @@ -79,8 +123,9 @@ static int nr_threads; static int from_stdin; static int strict; static int do_fsck_object; -static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT; +static struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES; static int verbose; +static const char *progress_title; static int show_resolving_progress; static int show_stat; static int check_self_contained_and_connected; @@ -98,8 +143,6 @@ static uint32_t input_crc32; static int input_fd, output_fd; static const char *curr_pack; -#ifndef NO_PTHREADS - static struct thread_local *thread_data; static int nr_dispatched; static int threads_active; @@ -120,10 +163,6 @@ static pthread_mutex_t deepest_delta_mutex; #define deepest_delta_lock() lock_mutex(&deepest_delta_mutex) #define deepest_delta_unlock() unlock_mutex(&deepest_delta_mutex) -static pthread_mutex_t type_cas_mutex; -#define type_cas_lock() lock_mutex(&type_cas_mutex) -#define type_cas_unlock() unlock_mutex(&type_cas_mutex) - static pthread_key_t key; static inline void lock_mutex(pthread_mutex_t *mutex) @@ -147,15 +186,12 @@ static void init_thread(void) init_recursive_mutex(&read_mutex); pthread_mutex_init(&counter_mutex, NULL); pthread_mutex_init(&work_mutex, NULL); - pthread_mutex_init(&type_cas_mutex, NULL); if (show_stat) pthread_mutex_init(&deepest_delta_mutex, NULL); pthread_key_create(&key, NULL); - thread_data = xcalloc(nr_threads, sizeof(*thread_data)); + CALLOC_ARRAY(thread_data, nr_threads); for (i = 0; i < nr_threads; i++) { - thread_data[i].pack_fd = open(curr_pack, O_RDONLY); - if (thread_data[i].pack_fd == -1) - die_errno(_("unable to open %s"), curr_pack); + thread_data[i].pack_fd = xopen(curr_pack, O_RDONLY); } threads_active = 1; @@ -170,7 +206,6 @@ static void cleanup_thread(void) pthread_mutex_destroy(&read_mutex); pthread_mutex_destroy(&counter_mutex); pthread_mutex_destroy(&work_mutex); - pthread_mutex_destroy(&type_cas_mutex); if (show_stat) pthread_mutex_destroy(&deepest_delta_mutex); for (i = 0; i < nr_threads; i++) @@ -179,27 +214,8 @@ static void cleanup_thread(void) free(thread_data); } -#else - -#define read_lock() -#define read_unlock() - -#define counter_lock() -#define counter_unlock() - -#define work_lock() -#define work_unlock() - -#define deepest_delta_lock() -#define deepest_delta_unlock() - -#define type_cas_lock() -#define type_cas_unlock() - -#endif - - -static int mark_link(struct object *obj, int type, void *data, struct fsck_options *options) +static int mark_link(struct object *obj, enum object_type type, + void *data, struct fsck_options *options) { if (!obj) return -1; @@ -243,8 +259,16 @@ static unsigned check_objects(void) unsigned i, max, foreign_nr = 0; max = get_max_object_index(); - for (i = 0; i < max; i++) + + if (verbose) + progress = start_delayed_progress(_("Checking objects"), max); + + for (i = 0; i < max; i++) { foreign_nr += check_object(get_indexed_object(i)); + display_progress(progress, i + 1); + } + + stop_progress(&progress); return foreign_nr; } @@ -302,8 +326,12 @@ static void use(int bytes) if (signed_add_overflows(consumed_bytes, bytes)) die(_("pack too large for current definition of off_t")); consumed_bytes += bytes; - if (max_input_size && consumed_bytes > max_input_size) - die(_("pack exceeds maximum allowed size")); + if (max_input_size && consumed_bytes > max_input_size) { + struct strbuf size_limit = STRBUF_INIT; + strbuf_humanise_bytes(&size_limit, max_input_size); + die(_("pack exceeds maximum allowed size (%s)"), + size_limit.buf); + } } static const char *open_pack_file(const char *pack_name) @@ -316,15 +344,11 @@ static const char *open_pack_file(const char *pack_name) "pack/tmp_pack_XXXXXX"); pack_name = strbuf_detach(&tmp_file, NULL); } else { - output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600); - if (output_fd < 0) - die_errno(_("unable to create '%s'"), pack_name); + output_fd = xopen(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600); } nothread_data.pack_fd = output_fd; } else { - input_fd = open(pack_name, O_RDONLY); - if (input_fd < 0) - die_errno(_("cannot open packfile '%s'"), pack_name); + input_fd = xopen(pack_name, O_RDONLY); output_fd = -1; nothread_data.pack_fd = input_fd; } @@ -347,9 +371,7 @@ static void parse_pack_header(void) use(sizeof(struct pack_header)); } -static NORETURN void bad_object(off_t offset, const char *format, - ...) __attribute__((format (printf, 2, 3))); - +__attribute__((format (printf, 2, 3))) static NORETURN void bad_object(off_t offset, const char *format, ...) { va_list params; @@ -364,73 +386,57 @@ static NORETURN void bad_object(off_t offset, const char *format, ...) static inline struct thread_local *get_thread_data(void) { -#ifndef NO_PTHREADS - if (threads_active) - return pthread_getspecific(key); - assert(!threads_active && - "This should only be reached when all threads are gone"); -#endif + if (HAVE_THREADS) { + if (threads_active) + return pthread_getspecific(key); + assert(!threads_active && + "This should only be reached when all threads are gone"); + } return ¬hread_data; } -#ifndef NO_PTHREADS static void set_thread_data(struct thread_local *data) { if (threads_active) pthread_setspecific(key, data); } -#endif - -static struct base_data *alloc_base_data(void) -{ - struct base_data *base = xcalloc(1, sizeof(struct base_data)); - base->ref_last = -1; - base->ofs_last = -1; - return base; -} static void free_base_data(struct base_data *c) { if (c->data) { FREE_AND_NULL(c->data); - get_thread_data()->base_cache_used -= c->size; + base_cache_used -= c->size; } } static void prune_base_data(struct base_data *retain) { - struct base_data *b; - struct thread_local *data = get_thread_data(); - for (b = data->base_cache; - data->base_cache_used > delta_base_cache_limit && b; - b = b->child) { - if (b->data && b != retain) - free_base_data(b); - } -} + struct list_head *pos; -static void link_base_data(struct base_data *base, struct base_data *c) -{ - if (base) - base->child = c; - else - get_thread_data()->base_cache = c; + if (base_cache_used <= base_cache_limit) + return; - c->base = base; - c->child = NULL; - if (c->data) - get_thread_data()->base_cache_used += c->size; - prune_base_data(c); -} + list_for_each_prev(pos, &done_head) { + struct base_data *b = list_entry(pos, struct base_data, list); + if (b->retain_data || b == retain) + continue; + if (b->data) { + free_base_data(b); + if (base_cache_used <= base_cache_limit) + return; + } + } -static void unlink_base_data(struct base_data *c) -{ - struct base_data *base = c->base; - if (base) - base->child = NULL; - else - get_thread_data()->base_cache = NULL; - free_base_data(c); + list_for_each_prev(pos, &work_head) { + struct base_data *b = list_entry(pos, struct base_data, list); + if (b->retain_data || b == retain) + continue; + if (b->data) { + free_base_data(b); + if (base_cache_used <= base_cache_limit) + return; + } + } } static int is_delta_type(enum object_type type) @@ -450,7 +456,7 @@ static void *unpack_entry_data(off_t offset, unsigned long size, int hdrlen; if (!is_delta_type(type)) { - hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", type_name(type), size) + 1; + hdrlen = format_object_header(hdr, sizeof(hdr), type, size); the_hash_algo->init_fn(&c); the_hash_algo->update_fn(&c, hdr, hdrlen); } else @@ -482,7 +488,7 @@ static void *unpack_entry_data(off_t offset, unsigned long size, bad_object(offset, _("inflate returned %d"), status); git_inflate_end(&stream); if (oid) - the_hash_algo->final_fn(oid->hash, &c); + the_hash_algo->final_oid_fn(oid, &c); return buf == fixed_buf ? NULL : buf; } @@ -517,7 +523,7 @@ static void *unpack_raw_entry(struct object_entry *obj, switch (obj->type) { case OBJ_REF_DELTA: - hashcpy(ref_oid->hash, fill(the_hash_algo->rawsz)); + oidread(ref_oid, fill(the_hash_algo->rawsz)); use(the_hash_algo->rawsz); break; case OBJ_OFS_DELTA: @@ -579,7 +585,7 @@ static void *unpack_data(struct object_entry *obj, if (!n) die(Q_("premature end of pack file, %"PRIuMAX" byte missing", "premature end of pack file, %"PRIuMAX" bytes missing", - (unsigned int)len), + len), (uintmax_t)len); from += n; len -= n; @@ -630,7 +636,7 @@ static int compare_ofs_delta_bases(off_t offset1, off_t offset2, 0; } -static int find_ofs_delta(const off_t offset, enum object_type type) +static int find_ofs_delta(const off_t offset) { int first = 0, last = nr_ofs_deltas; @@ -640,7 +646,8 @@ static int find_ofs_delta(const off_t offset, enum object_type type) int cmp; cmp = compare_ofs_delta_bases(offset, delta->offset, - type, objects[delta->obj_no].type); + OBJ_OFS_DELTA, + objects[delta->obj_no].type); if (!cmp) return next; if (cmp < 0) { @@ -653,10 +660,9 @@ static int find_ofs_delta(const off_t offset, enum object_type type) } static void find_ofs_delta_children(off_t offset, - int *first_index, int *last_index, - enum object_type type) + int *first_index, int *last_index) { - int first = find_ofs_delta(offset, type); + int first = find_ofs_delta(offset); int last = first; int end = nr_ofs_deltas - 1; @@ -684,7 +690,7 @@ static int compare_ref_delta_bases(const struct object_id *oid1, return oidcmp(oid1, oid2); } -static int find_ref_delta(const struct object_id *oid, enum object_type type) +static int find_ref_delta(const struct object_id *oid) { int first = 0, last = nr_ref_deltas; @@ -694,7 +700,8 @@ static int find_ref_delta(const struct object_id *oid, enum object_type type) int cmp; cmp = compare_ref_delta_bases(oid, &delta->oid, - type, objects[delta->obj_no].type); + OBJ_REF_DELTA, + objects[delta->obj_no].type); if (!cmp) return next; if (cmp < 0) { @@ -707,10 +714,9 @@ static int find_ref_delta(const struct object_id *oid, enum object_type type) } static void find_ref_delta_children(const struct object_id *oid, - int *first_index, int *last_index, - enum object_type type) + int *first_index, int *last_index) { - int first = find_ref_delta(oid, type); + int first = find_ref_delta(oid); int last = first; int end = nr_ref_deltas - 1; @@ -719,9 +725,9 @@ static void find_ref_delta_children(const struct object_id *oid, *last_index = -1; return; } - while (first > 0 && !oidcmp(&ref_deltas[first - 1].oid, oid)) + while (first > 0 && oideq(&ref_deltas[first - 1].oid, oid)) --first; - while (last < end && !oidcmp(&ref_deltas[last + 1].oid, oid)) + while (last < end && oideq(&ref_deltas[last + 1].oid, oid)) ++last; *first_index = first; *last_index = last; @@ -773,7 +779,8 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = open_istream(&entry->idx.oid, &type, &size, NULL); + data.st = open_istream(the_repository, &entry->idx.oid, &type, &size, + NULL); if (!data.st) return -1; if (size != entry->size || type != entry->type) @@ -797,7 +804,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, if (startup_info->have_repository) { read_lock(); collision_test_needed = - has_sha1_file_with_flags(oid->hash, OBJECT_INFO_QUICK); + has_object_file_with_flags(oid, OBJECT_INFO_QUICK); read_unlock(); } @@ -881,26 +888,15 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, } /* - * This function is part of find_unresolved_deltas(). There are two - * walkers going in the opposite ways. - * - * The first one in find_unresolved_deltas() traverses down from - * parent node to children, deflating nodes along the way. However, - * memory for deflated nodes is limited by delta_base_cache_limit, so - * at some point parent node's deflated content may be freed. + * Ensure that this node has been reconstructed and return its contents. * - * The second walker is this function, which goes from current node up - * to top parent if necessary to deflate the node. In normal - * situation, its parent node would be already deflated, so it just - * needs to apply delta. - * - * In the worst case scenario, parent node is no longer deflated because - * we're running out of delta_base_cache_limit; we need to re-deflate - * parents, possibly up to the top base. - * - * All deflated objects here are subject to be freed if we exceed - * delta_base_cache_limit, just like in find_unresolved_deltas(), we - * just need to make sure the last node is not freed. + * In the typical and best case, this node would already be reconstructed + * (through the invocation to resolve_delta() in threaded_second_pass()) and it + * would not be pruned. However, if pruning of this node was necessary due to + * reaching delta_base_cache_limit, this function will find the closest + * ancestor with reconstructed data that has not been pruned (or if there is + * none, the ultimate base object), and reconstruct each node in the delta + * chain in order to generate the reconstructed data for this node. */ static void *get_base_data(struct base_data *c) { @@ -917,7 +913,7 @@ static void *get_base_data(struct base_data *c) if (!delta_nr) { c->data = get_data_from_pack(obj); c->size = obj->size; - get_thread_data()->base_cache_used += c->size; + base_cache_used += c->size; prune_base_data(c); } for (; delta_nr > 0; delta_nr--) { @@ -933,7 +929,7 @@ static void *get_base_data(struct base_data *c) free(raw); if (!c->data) bad_object(obj->idx.offset, _("failed to apply delta")); - get_thread_data()->base_cache_used += c->size; + base_cache_used += c->size; prune_base_data(c); } free(delta); @@ -941,10 +937,27 @@ static void *get_base_data(struct base_data *c) return c->data; } -static void resolve_delta(struct object_entry *delta_obj, - struct base_data *base, struct base_data *result) +static struct base_data *make_base(struct object_entry *obj, + struct base_data *parent) { - void *base_data, *delta_data; + struct base_data *base = xcalloc(1, sizeof(struct base_data)); + base->base = parent; + base->obj = obj; + find_ref_delta_children(&obj->idx.oid, + &base->ref_first, &base->ref_last); + find_ofs_delta_children(obj->idx.offset, + &base->ofs_first, &base->ofs_last); + base->children_remaining = base->ref_last - base->ref_first + + base->ofs_last - base->ofs_first + 2; + return base; +} + +static struct base_data *resolve_delta(struct object_entry *delta_obj, + struct base_data *base) +{ + void *delta_data, *result_data; + struct base_data *result; + unsigned long result_size; if (show_stat) { int i = delta_obj - objects; @@ -957,113 +970,26 @@ static void resolve_delta(struct object_entry *delta_obj, obj_stat[i].base_object_no = j; } delta_data = get_data_from_pack(delta_obj); - base_data = get_base_data(base); - result->obj = delta_obj; - result->data = patch_delta(base_data, base->size, - delta_data, delta_obj->size, &result->size); + assert(base->data); + result_data = patch_delta(base->data, base->size, + delta_data, delta_obj->size, &result_size); free(delta_data); - if (!result->data) + if (!result_data) bad_object(delta_obj->idx.offset, _("failed to apply delta")); - hash_object_file(result->data, result->size, - type_name(delta_obj->real_type), &delta_obj->idx.oid); - sha1_object(result->data, NULL, result->size, delta_obj->real_type, + hash_object_file(the_hash_algo, result_data, result_size, + delta_obj->real_type, &delta_obj->idx.oid); + sha1_object(result_data, NULL, result_size, delta_obj->real_type, &delta_obj->idx.oid); + + result = make_base(delta_obj, base); + result->data = result_data; + result->size = result_size; + counter_lock(); nr_resolved_deltas++; counter_unlock(); -} - -/* - * Standard boolean compare-and-swap: atomically check whether "*type" is - * "want"; if so, swap in "set" and return true. Otherwise, leave it untouched - * and return false. - */ -static int compare_and_swap_type(signed char *type, - enum object_type want, - enum object_type set) -{ - enum object_type old; - - type_cas_lock(); - old = *type; - if (old == want) - *type = set; - type_cas_unlock(); - - return old == want; -} - -static struct base_data *find_unresolved_deltas_1(struct base_data *base, - struct base_data *prev_base) -{ - if (base->ref_last == -1 && base->ofs_last == -1) { - find_ref_delta_children(&base->obj->idx.oid, - &base->ref_first, &base->ref_last, - OBJ_REF_DELTA); - - find_ofs_delta_children(base->obj->idx.offset, - &base->ofs_first, &base->ofs_last, - OBJ_OFS_DELTA); - - if (base->ref_last == -1 && base->ofs_last == -1) { - free(base->data); - return NULL; - } - - link_base_data(prev_base, base); - } - if (base->ref_first <= base->ref_last) { - struct object_entry *child = objects + ref_deltas[base->ref_first].obj_no; - struct base_data *result = alloc_base_data(); - - if (!compare_and_swap_type(&child->real_type, OBJ_REF_DELTA, - base->obj->real_type)) - BUG("child->real_type != OBJ_REF_DELTA"); - - resolve_delta(child, base, result); - if (base->ref_first == base->ref_last && base->ofs_last == -1) - free_base_data(base); - - base->ref_first++; - return result; - } - - if (base->ofs_first <= base->ofs_last) { - struct object_entry *child = objects + ofs_deltas[base->ofs_first].obj_no; - struct base_data *result = alloc_base_data(); - - assert(child->real_type == OBJ_OFS_DELTA); - child->real_type = base->obj->real_type; - resolve_delta(child, base, result); - if (base->ofs_first == base->ofs_last) - free_base_data(base); - - base->ofs_first++; - return result; - } - - unlink_base_data(base); - return NULL; -} - -static void find_unresolved_deltas(struct base_data *base) -{ - struct base_data *new_base, *prev_base = NULL; - for (;;) { - new_base = find_unresolved_deltas_1(base, prev_base); - - if (new_base) { - prev_base = base; - base = new_base; - } else { - free(base); - base = prev_base; - if (!base) - return; - prev_base = base->base; - } - } + return result; } static int compare_ofs_delta_entry(const void *a, const void *b) @@ -1084,39 +1010,140 @@ static int compare_ref_delta_entry(const void *a, const void *b) return oidcmp(&delta_a->oid, &delta_b->oid); } -static void resolve_base(struct object_entry *obj) -{ - struct base_data *base_obj = alloc_base_data(); - base_obj->obj = obj; - base_obj->data = NULL; - find_unresolved_deltas(base_obj); -} - -#ifndef NO_PTHREADS static void *threaded_second_pass(void *data) { - set_thread_data(data); + if (data) + set_thread_data(data); for (;;) { - int i; + struct base_data *parent = NULL; + struct object_entry *child_obj; + struct base_data *child; + counter_lock(); display_progress(progress, nr_resolved_deltas); counter_unlock(); + work_lock(); - while (nr_dispatched < nr_objects && - is_delta_type(objects[nr_dispatched].type)) - nr_dispatched++; - if (nr_dispatched >= nr_objects) { - work_unlock(); - break; + if (list_empty(&work_head)) { + /* + * Take an object from the object array. + */ + while (nr_dispatched < nr_objects && + is_delta_type(objects[nr_dispatched].type)) + nr_dispatched++; + if (nr_dispatched >= nr_objects) { + work_unlock(); + break; + } + child_obj = &objects[nr_dispatched++]; + } else { + /* + * Peek at the top of the stack, and take a child from + * it. + */ + parent = list_first_entry(&work_head, struct base_data, + list); + + if (parent->ref_first <= parent->ref_last) { + int offset = ref_deltas[parent->ref_first++].obj_no; + child_obj = objects + offset; + if (child_obj->real_type != OBJ_REF_DELTA) + die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)", + (uintmax_t) child_obj->idx.offset, + oid_to_hex(&parent->obj->idx.oid)); + child_obj->real_type = parent->obj->real_type; + } else { + child_obj = objects + + ofs_deltas[parent->ofs_first++].obj_no; + assert(child_obj->real_type == OBJ_OFS_DELTA); + child_obj->real_type = parent->obj->real_type; + } + + if (parent->ref_first > parent->ref_last && + parent->ofs_first > parent->ofs_last) { + /* + * This parent has run out of children, so move + * it to done_head. + */ + list_del(&parent->list); + list_add(&parent->list, &done_head); + } + + /* + * Ensure that the parent has data, since we will need + * it later. + * + * NEEDSWORK: If parent data needs to be reloaded, this + * prolongs the time that the current thread spends in + * the mutex. A mitigating factor is that parent data + * needs to be reloaded only if the delta base cache + * limit is exceeded, so in the typical case, this does + * not happen. + */ + get_base_data(parent); + parent->retain_data++; } - i = nr_dispatched++; work_unlock(); - resolve_base(&objects[i]); + if (parent) { + child = resolve_delta(child_obj, parent); + if (!child->children_remaining) + FREE_AND_NULL(child->data); + } else { + child = make_base(child_obj, NULL); + if (child->children_remaining) { + /* + * Since this child has its own delta children, + * we will need this data in the future. + * Inflate now so that future iterations will + * have access to this object's data while + * outside the work mutex. + */ + child->data = get_data_from_pack(child_obj); + child->size = child_obj->size; + } + } + + work_lock(); + if (parent) + parent->retain_data--; + if (child->data) { + /* + * This child has its own children, so add it to + * work_head. + */ + list_add(&child->list, &work_head); + base_cache_used += child->size; + prune_base_data(NULL); + free_base_data(child); + } else { + /* + * This child does not have its own children. It may be + * the last descendant of its ancestors; free those + * that we can. + */ + struct base_data *p = parent; + + while (p) { + struct base_data *next_p; + + p->children_remaining--; + if (p->children_remaining) + break; + + next_p = p->base; + free_base_data(p); + list_del(&p->list); + free(p); + + p = next_p; + } + FREE_AND_NULL(child); + } + work_unlock(); } return NULL; } -#endif /* * First pass: @@ -1133,6 +1160,7 @@ static void parse_pack_objects(unsigned char *hash) if (verbose) progress = start_progress( + progress_title ? progress_title : from_stdin ? _("Receiving objects") : _("Indexing objects"), nr_objects); for (i = 0; i < nr_objects; i++) { @@ -1166,7 +1194,7 @@ static void parse_pack_objects(unsigned char *hash) /* Check pack integrity */ flush(); the_hash_algo->final_fn(hash, &input_ctx); - if (hashcmp(fill(the_hash_algo->rawsz), hash)) + if (!hasheq(fill(the_hash_algo->rawsz), hash)) die(_("pack is corrupted (SHA1 mismatch)")); use(the_hash_algo->rawsz); @@ -1213,8 +1241,8 @@ static void resolve_deltas(void) progress = start_progress(_("Resolving deltas"), nr_ref_deltas + nr_ofs_deltas); -#ifndef NO_PTHREADS nr_dispatched = 0; + base_cache_limit = delta_base_cache_limit * nr_threads; if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) { init_thread(); for (i = 0; i < nr_threads; i++) { @@ -1229,16 +1257,7 @@ static void resolve_deltas(void) cleanup_thread(); return; } -#endif - - for (i = 0; i < nr_objects; i++) { - struct object_entry *obj = &objects[i]; - - if (is_delta_type(obj->type)) - continue; - resolve_base(obj); - display_progress(progress, nr_resolved_deltas); - } + threaded_second_pass(¬hread_data); } /* @@ -1275,12 +1294,12 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha nr_objects - nr_objects_initial); stop_progress_msg(&progress, msg.buf); strbuf_release(&msg); - finalize_hashfile(f, tail_hash, 0); + finalize_hashfile(f, tail_hash, FSYNC_COMPONENT_PACK, 0); hashcpy(read_hash, pack_hash); fixup_pack_header_footer(output_fd, pack_hash, curr_pack, nr_objects, read_hash, consumed_bytes-the_hash_algo->rawsz); - if (hashcmp(read_hash, tail_hash) != 0) + if (!hasheq(read_hash, tail_hash)) die(_("Unexpected tail checksum for %s " "(disk corruption?)"), curr_pack); } @@ -1341,7 +1360,7 @@ static struct object_entry *append_obj_to_pack(struct hashfile *f, obj[1].idx.offset += write_compressed(f, buf, size); obj[0].idx.crc32 = crc32_end(f); hashflush(f); - hashcpy(obj->idx.oid.hash, sha1); + oidread(&obj->idx.oid, sha1); return obj; } @@ -1372,38 +1391,63 @@ static void fix_unresolved_deltas(struct hashfile *f) sorted_by_pos[i] = &ref_deltas[i]; QSORT(sorted_by_pos, nr_ref_deltas, delta_pos_compare); + if (has_promisor_remote()) { + /* + * Prefetch the delta bases. + */ + struct oid_array to_fetch = OID_ARRAY_INIT; + for (i = 0; i < nr_ref_deltas; i++) { + struct ref_delta_entry *d = sorted_by_pos[i]; + if (!oid_object_info_extended(the_repository, &d->oid, + NULL, + OBJECT_INFO_FOR_PREFETCH)) + continue; + oid_array_append(&to_fetch, &d->oid); + } + promisor_remote_get_direct(the_repository, + to_fetch.oid, to_fetch.nr); + oid_array_clear(&to_fetch); + } + for (i = 0; i < nr_ref_deltas; i++) { struct ref_delta_entry *d = sorted_by_pos[i]; enum object_type type; - struct base_data *base_obj = alloc_base_data(); + void *data; + unsigned long size; if (objects[d->obj_no].real_type != OBJ_REF_DELTA) continue; - base_obj->data = read_object_file(&d->oid, &type, - &base_obj->size); - if (!base_obj->data) + data = read_object_file(&d->oid, &type, &size); + if (!data) continue; - if (check_object_signature(&d->oid, base_obj->data, - base_obj->size, type_name(type))) + if (check_object_signature(the_repository, &d->oid, data, size, + type) < 0) die(_("local object %s is corrupt"), oid_to_hex(&d->oid)); - base_obj->obj = append_obj_to_pack(f, d->oid.hash, - base_obj->data, base_obj->size, type); - find_unresolved_deltas(base_obj); + + /* + * Add this as an object to the objects array and call + * threaded_second_pass() (which will pick up the added + * object). + */ + append_obj_to_pack(f, d->oid.hash, data, size, type); + free(data); + threaded_second_pass(NULL); + display_progress(progress, nr_resolved_deltas); } free(sorted_by_pos); } -static const char *derive_filename(const char *pack_name, const char *suffix, - struct strbuf *buf) +static const char *derive_filename(const char *pack_name, const char *strip, + const char *suffix, struct strbuf *buf) { size_t len; - if (!strip_suffix(pack_name, ".pack", &len)) - die(_("packfile name '%s' does not end with '.pack'"), - pack_name); + if (!strip_suffix(pack_name, strip, &len) || !len || + pack_name[len - 1] != '.') + die(_("packfile name '%s' does not end with '.%s'"), + pack_name, strip); strbuf_add(buf, pack_name, len); - strbuf_addch(buf, '.'); strbuf_addstr(buf, suffix); return buf->buf; } @@ -1418,7 +1462,7 @@ static void write_special_file(const char *suffix, const char *msg, int msg_len = strlen(msg); if (pack_name) - filename = derive_filename(pack_name, suffix, &name_buf); + filename = derive_filename(pack_name, "pack", suffix, &name_buf); else filename = odb_pack_name(&name_buf, hash, suffix); @@ -1441,20 +1485,38 @@ static void write_special_file(const char *suffix, const char *msg, strbuf_release(&name_buf); } +static void rename_tmp_packfile(const char **final_name, + const char *curr_name, + struct strbuf *name, unsigned char *hash, + const char *ext, int make_read_only_if_same) +{ + if (*final_name != curr_name) { + if (!*final_name) + *final_name = odb_pack_name(name, hash, ext); + if (finalize_object_file(curr_name, *final_name)) + die(_("unable to rename temporary '*.%s' file to '%s'"), + ext, *final_name); + } else if (make_read_only_if_same) { + chmod(*final_name, 0444); + } +} + static void final(const char *final_pack_name, const char *curr_pack_name, const char *final_index_name, const char *curr_index_name, + const char *final_rev_index_name, const char *curr_rev_index_name, const char *keep_msg, const char *promisor_msg, unsigned char *hash) { const char *report = "pack"; struct strbuf pack_name = STRBUF_INIT; struct strbuf index_name = STRBUF_INIT; + struct strbuf rev_index_name = STRBUF_INIT; int err; if (!from_stdin) { close(input_fd); } else { - fsync_or_die(output_fd, curr_pack_name); + fsync_component_or_die(FSYNC_COMPONENT_PACK, output_fd, curr_pack_name); err = close(output_fd); if (err) die_errno(_("error while closing pack file")); @@ -1467,21 +1529,13 @@ static void final(const char *final_pack_name, const char *curr_pack_name, write_special_file("promisor", promisor_msg, final_pack_name, hash, NULL); - if (final_pack_name != curr_pack_name) { - if (!final_pack_name) - final_pack_name = odb_pack_name(&pack_name, hash, "pack"); - if (finalize_object_file(curr_pack_name, final_pack_name)) - die(_("cannot store pack file")); - } else if (from_stdin) - chmod(final_pack_name, 0444); - - if (final_index_name != curr_index_name) { - if (!final_index_name) - final_index_name = odb_pack_name(&index_name, hash, "idx"); - if (finalize_object_file(curr_index_name, final_index_name)) - die(_("cannot store index file")); - } else - chmod(final_index_name, 0444); + rename_tmp_packfile(&final_pack_name, curr_pack_name, &pack_name, + hash, "pack", from_stdin); + if (curr_rev_index_name) + rename_tmp_packfile(&final_rev_index_name, curr_rev_index_name, + &rev_index_name, hash, "rev", 1); + rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, + hash, "idx", 1); if (do_fsck_object) { struct packed_git *p; @@ -1491,11 +1545,11 @@ static void final(const char *final_pack_name, const char *curr_pack_name, } if (!from_stdin) { - printf("%s\n", sha1_to_hex(hash)); + printf("%s\n", hash_to_hex(hash)); } else { struct strbuf buf = STRBUF_INIT; - strbuf_addf(&buf, "%s\t%s\n", report, sha1_to_hex(hash)); + strbuf_addf(&buf, "%s\t%s\n", report, hash_to_hex(hash)); write_or_die(1, buf.buf, buf.len); strbuf_release(&buf); @@ -1512,6 +1566,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, } } + strbuf_release(&rev_index_name); strbuf_release(&index_name); strbuf_release(&pack_name); } @@ -1523,7 +1578,7 @@ static int git_index_pack_config(const char *k, const char *v, void *cb) if (!strcmp(k, "pack.indexversion")) { opts->version = git_config_int(k, v); if (opts->version > 2) - die(_("bad pack.indexversion=%"PRIu32), opts->version); + die(_("bad pack.indexVersion=%"PRIu32), opts->version); return 0; } if (!strcmp(k, "pack.threads")) { @@ -1531,13 +1586,18 @@ static int git_index_pack_config(const char *k, const char *v, void *cb) if (nr_threads < 0) die(_("invalid number of threads specified (%d)"), nr_threads); -#ifdef NO_PTHREADS - if (nr_threads != 1) + if (!HAVE_THREADS && nr_threads != 1) { warning(_("no threads support, ignoring %s"), k); - nr_threads = 1; -#endif + nr_threads = 1; + } return 0; } + if (!strcmp(k, "pack.writereverseindex")) { + if (git_config_bool(k, v)) + opts->flags |= WRITE_REV; + else + opts->flags &= ~WRITE_REV; + } return git_default_config(k, v, cb); } @@ -1554,14 +1614,10 @@ static void read_v2_anomalous_offsets(struct packed_git *p, { const uint32_t *idx1, *idx2; uint32_t i; - const uint32_t hashwords = the_hash_algo->rawsz / sizeof(uint32_t); /* The address of the 4-byte offset table */ - idx1 = (((const uint32_t *)p->index_data) - + 2 /* 8-byte header */ - + 256 /* fan out */ - + hashwords * p->num_objects /* object ID table */ - + p->num_objects /* CRC32 table */ + idx1 = (((const uint32_t *)((const uint8_t *)p->index_data + p->crc_offset)) + + (size_t)p->num_objects /* CRC32 table */ ); /* The address of the 8-byte offset table */ @@ -1605,7 +1661,7 @@ static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) /* * Get rid of the idx file as we do not need it anymore. * NEEDSWORK: extract this bit from free_pack_by_name() in - * sha1-file.c, perhaps? It shouldn't matter very much as we + * object-file.c, perhaps? It shouldn't matter very much as we * know we haven't installed this pack (hence we never have * read anything from it). */ @@ -1619,7 +1675,7 @@ static void show_pack_info(int stat_only) unsigned long *chain_histogram = NULL; if (deepest_delta) - chain_histogram = xcalloc(deepest_delta, sizeof(unsigned long)); + CALLOC_ARRAY(chain_histogram, deepest_delta); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; @@ -1628,10 +1684,10 @@ static void show_pack_info(int stat_only) chain_histogram[obj_stat[i].delta_depth - 1]++; if (stat_only) continue; - printf("%s %-6s %lu %lu %"PRIuMAX, + printf("%s %-6s %"PRIuMAX" %"PRIuMAX" %"PRIuMAX, oid_to_hex(&obj->idx.oid), - type_name(obj->real_type), obj->size, - (unsigned long)(obj[1].idx.offset - obj->idx.offset), + type_name(obj->real_type), (uintmax_t)obj->size, + (uintmax_t)(obj[1].idx.offset - obj->idx.offset), (uintmax_t)obj->idx.offset); if (is_delta_type(obj->type)) { struct object_entry *bobj = &objects[obj_stat[i].base_object_no]; @@ -1655,32 +1711,38 @@ static void show_pack_info(int stat_only) i + 1, chain_histogram[i]); } + free(chain_histogram); } int cmd_index_pack(int argc, const char **argv, const char *prefix) { - int i, fix_thin_pack = 0, verify = 0, stat_only = 0; + int i, fix_thin_pack = 0, verify = 0, stat_only = 0, rev_index; const char *curr_index; - const char *index_name = NULL, *pack_name = NULL; + const char *curr_rev_index = NULL; + const char *index_name = NULL, *pack_name = NULL, *rev_index_name = NULL; const char *keep_msg = NULL; const char *promisor_msg = NULL; struct strbuf index_name_buf = STRBUF_INIT; + struct strbuf rev_index_name_buf = STRBUF_INIT; struct pack_idx_entry **idx_objects; struct pack_idx_option opts; unsigned char pack_hash[GIT_MAX_RAWSZ]; unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */ int report_end_of_input = 0; + int hash_algo = 0; /* - * index-pack never needs to fetch missing objects, since it only - * accesses the repo to do hash collision checks + * index-pack never needs to fetch missing objects except when + * REF_DELTA bases are missing (which are explicitly handled). It only + * accesses the repo to do hash collision checks and to check which + * REF_DELTA bases need to be fetched. */ fetch_if_missing = 0; if (argc == 2 && !strcmp(argv[1], "-h")) usage(index_pack_usage); - check_replace_refs = 0; + read_replace_refs = 0; fsck_options.walk = mark_link; reset_pack_idx_option(&opts); @@ -1688,6 +1750,11 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (prefix && chdir(prefix)) die(_("Cannot come back to cwd")); + if (git_env_bool(GIT_TEST_WRITE_REV_INDEX, 0)) + rev_index = 1; + else + rev_index = !!(opts.flags & (WRITE_REV_VERIFY | WRITE_REV)); + for (i = 1; i < argc; i++) { const char *arg = argv[i]; @@ -1723,12 +1790,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) nr_threads = strtoul(arg+10, &end, 0); if (!arg[10] || *end || nr_threads < 0) usage(index_pack_usage); -#ifdef NO_PTHREADS - if (nr_threads != 1) - warning(_("no threads support, " - "ignoring %s"), arg); - nr_threads = 1; -#endif + if (!HAVE_THREADS && nr_threads != 1) { + warning(_("no threads support, ignoring %s"), arg); + nr_threads = 1; + } } else if (starts_with(arg, "--pack_header=")) { struct pack_header *hdr; char *c; @@ -1744,6 +1809,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) input_len = sizeof(*hdr); } else if (!strcmp(arg, "-v")) { verbose = 1; + } else if (!strcmp(arg, "--progress-title")) { + if (progress_title || (i+1) >= argc) + usage(index_pack_usage); + progress_title = argv[++i]; } else if (!strcmp(arg, "--show-resolving-progress")) { show_resolving_progress = 1; } else if (!strcmp(arg, "--report-end-of-input")) { @@ -1763,6 +1832,15 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) die(_("bad %s"), arg); } else if (skip_prefix(arg, "--max-input-size=", &arg)) { max_input_size = strtoumax(arg, NULL, 10); + } else if (skip_prefix(arg, "--object-format=", &arg)) { + hash_algo = hash_algo_by_name(arg); + if (hash_algo == GIT_HASH_UNKNOWN) + die(_("unknown hash algorithm '%s'"), arg); + repo_set_hash_algo(the_repository, hash_algo); + } else if (!strcmp(arg, "--rev-index")) { + rev_index = 1; + } else if (!strcmp(arg, "--no-rev-index")) { + rev_index = 0; } else usage(index_pack_usage); continue; @@ -1776,11 +1854,22 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (!pack_name && !from_stdin) usage(index_pack_usage); if (fix_thin_pack && !from_stdin) - die(_("--fix-thin cannot be used without --stdin")); + die(_("the option '%s' requires '%s'"), "--fix-thin", "--stdin"); if (from_stdin && !startup_info->have_repository) die(_("--stdin requires a git repository")); + if (from_stdin && hash_algo) + die(_("options '%s' and '%s' cannot be used together"), "--object-format", "--stdin"); if (!index_name && pack_name) - index_name = derive_filename(pack_name, "idx", &index_name_buf); + index_name = derive_filename(pack_name, "pack", "idx", &index_name_buf); + + opts.flags &= ~(WRITE_REV | WRITE_REV_VERIFY); + if (rev_index) { + opts.flags |= verify ? WRITE_REV_VERIFY : WRITE_REV; + if (index_name) + rev_index_name = derive_filename(index_name, + "idx", "rev", + &rev_index_name_buf); + } if (verify) { if (!index_name) @@ -1791,21 +1880,32 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (strict) opts.flags |= WRITE_IDX_STRICT; -#ifndef NO_PTHREADS - if (!nr_threads) { + if (HAVE_THREADS && !nr_threads) { nr_threads = online_cpus(); - /* An experiment showed that more threads does not mean faster */ - if (nr_threads > 3) - nr_threads = 3; + /* + * Experiments show that going above 20 threads doesn't help, + * no matter how many cores you have. Below that, we tend to + * max at half the number of online_cpus(), presumably because + * half of those are hyperthreads rather than full cores. We'll + * never reduce the level below "3", though, to match a + * historical value that nobody complained about. + */ + if (nr_threads < 4) + ; /* too few cores to consider capping */ + else if (nr_threads < 6) + nr_threads = 3; /* historic cap */ + else if (nr_threads < 40) + nr_threads /= 2; + else + nr_threads = 20; /* hard cap */ } -#endif curr_pack = open_pack_file(pack_name); parse_pack_header(); - objects = xcalloc(st_add(nr_objects, 1), sizeof(struct object_entry)); + CALLOC_ARRAY(objects, st_add(nr_objects, 1)); if (show_stat) - obj_stat = xcalloc(st_add(nr_objects, 1), sizeof(struct object_stat)); - ofs_deltas = xcalloc(nr_objects, sizeof(struct ofs_delta_entry)); + CALLOC_ARRAY(obj_stat, st_add(nr_objects, 1)); + CALLOC_ARRAY(ofs_deltas, nr_objects); parse_pack_objects(pack_hash); if (report_end_of_input) write_in_full(2, "\0", 1); @@ -1823,11 +1923,16 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) for (i = 0; i < nr_objects; i++) idx_objects[i] = &objects[i].idx; curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_hash); + if (rev_index) + curr_rev_index = write_rev_file(rev_index_name, idx_objects, + nr_objects, pack_hash, + opts.flags); free(idx_objects); if (!verify) final(pack_name, curr_pack, index_name, curr_index, + rev_index_name, curr_rev_index, keep_msg, promisor_msg, pack_hash); else @@ -1836,12 +1941,16 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (do_fsck_object && fsck_finish(&fsck_options)) die(_("fsck error in pack objects")); + free(opts.anomaly); free(objects); strbuf_release(&index_name_buf); - if (pack_name == NULL) + strbuf_release(&rev_index_name_buf); + if (!pack_name) free((void *) curr_pack); - if (index_name == NULL) + if (!index_name) free((void *) curr_index); + if (!rev_index_name) + free((void *) curr_rev_index); /* * Let the caller know this pack is not self contained |
