diff options
| author | Junio C Hamano <gitster@pobox.com> | 2025-09-09 14:46:00 -0700 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-09-09 14:46:00 -0700 |
| commit | 4a3422b1617daca3a1e4f1173618632ad558a90c (patch) | |
| tree | 8d581d8046d355fe42f7561904769c00d82588e7 | |
| parent | The seventh batch (diff) | |
| parent | bulk-checkin: use repository variable from transaction (diff) | |
| download | git-4a3422b1617daca3a1e4f1173618632ad558a90c.tar.gz git-4a3422b1617daca3a1e4f1173618632ad558a90c.zip | |
Merge branch 'jt/de-global-bulk-checkin' into jt/odb-transaction
* jt/de-global-bulk-checkin:
bulk-checkin: use repository variable from transaction
bulk-checkin: require transaction for index_blob_bulk_checkin()
bulk-checkin: remove global transaction state
bulk-checkin: introduce object database transaction structure
| -rw-r--r-- | builtin/add.c | 5 | ||||
| -rw-r--r-- | builtin/unpack-objects.c | 5 | ||||
| -rw-r--r-- | builtin/update-index.c | 7 | ||||
| -rw-r--r-- | bulk-checkin.c | 152 | ||||
| -rw-r--r-- | bulk-checkin.h | 25 | ||||
| -rw-r--r-- | cache-tree.c | 5 | ||||
| -rw-r--r-- | object-file.c | 30 | ||||
| -rw-r--r-- | odb.h | 8 | ||||
| -rw-r--r-- | read-cache.c | 5 |
9 files changed, 141 insertions, 101 deletions
diff --git a/builtin/add.c b/builtin/add.c index 0235854f80..740c7c4581 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -389,6 +389,7 @@ int cmd_add(int argc, char *seen = NULL; char *ps_matched = NULL; struct lock_file lock_file = LOCK_INIT; + struct odb_transaction *transaction; repo_config(repo, add_config, NULL); @@ -574,7 +575,7 @@ int cmd_add(int argc, string_list_clear(&only_match_skip_worktree, 0); } - begin_odb_transaction(); + transaction = begin_odb_transaction(repo->objects); ps_matched = xcalloc(pathspec.nr, 1); if (add_renormalize) @@ -593,7 +594,7 @@ int cmd_add(int argc, if (chmod_arg && pathspec.nr) exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only); - end_odb_transaction(); + end_odb_transaction(transaction); finish: if (write_locked_index(repo->index, &lock_file, diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 7ae7c82b6c..28124b324d 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -584,6 +584,7 @@ static void unpack_all(void) { int i; unsigned char *hdr = fill(sizeof(struct pack_header)); + struct odb_transaction *transaction; if (get_be32(hdr) != PACK_SIGNATURE) die("bad pack file"); @@ -599,12 +600,12 @@ static void unpack_all(void) progress = start_progress(the_repository, _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } - end_odb_transaction(); + end_odb_transaction(transaction); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index 2380f3ccd6..2ba2d29c95 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -77,7 +77,7 @@ static void report(const char *fmt, ...) * objects invisible while a transaction is active, so flush the * transaction here before reporting a change made by update-index. */ - flush_odb_transaction(); + flush_odb_transaction(the_repository->objects->transaction); va_start(vp, fmt); vprintf(fmt, vp); putchar('\n'); @@ -940,6 +940,7 @@ int cmd_update_index(int argc, strbuf_getline_fn getline_fn; int parseopt_state = PARSE_OPT_UNKNOWN; struct repository *r = the_repository; + struct odb_transaction *transaction; struct option options[] = { OPT_BIT('q', NULL, &refresh_args.flags, N_("continue refresh even when index needs update"), @@ -1130,7 +1131,7 @@ int cmd_update_index(int argc, * Allow the object layer to optimize adding multiple objects in * a batch. */ - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); while (ctx.argc) { if (parseopt_state != PARSE_OPT_DONE) parseopt_state = parse_options_step(&ctx, options, @@ -1213,7 +1214,7 @@ int cmd_update_index(int argc, /* * By now we have added all of the new objects */ - end_odb_transaction(); + end_odb_transaction(transaction); if (split_index > 0) { if (repo_config_get_split_index(the_repository) == 0) diff --git a/bulk-checkin.c b/bulk-checkin.c index b2809ab039..124c493067 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -19,11 +19,7 @@ #include "object-file.h" #include "odb.h" -static int odb_transaction_nesting; - -static struct tmp_objdir *bulk_fsync_objdir; - -static struct bulk_checkin_packfile { +struct bulk_checkin_packfile { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -32,27 +28,36 @@ static struct bulk_checkin_packfile { struct pack_idx_entry **written; uint32_t alloc_written; uint32_t nr_written; -} bulk_checkin_packfile; +}; + +struct odb_transaction { + struct object_database *odb; + + int nesting; + struct tmp_objdir *objdir; + struct bulk_checkin_packfile packfile; +}; -static void finish_tmp_packfile(struct strbuf *basename, - const char *pack_tmp_name, - struct pack_idx_entry **written_list, - uint32_t nr_written, - struct pack_idx_option *pack_idx_opts, +static void finish_tmp_packfile(struct odb_transaction *transaction, + struct strbuf *basename, unsigned char hash[]) { + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; char *idx_tmp_name = NULL; - stage_tmp_packfiles(the_repository, basename, pack_tmp_name, - written_list, nr_written, NULL, pack_idx_opts, hash, - &idx_tmp_name); - rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name); + stage_tmp_packfiles(repo, basename, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); free(idx_tmp_name); } -static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) +static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) { + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; @@ -69,17 +74,17 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); } else { int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(the_hash_algo, fd, hash, state->pack_tmp_name, + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, state->nr_written, hash, state->offset); close(fd); } - strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository), - hash_to_hex(hash)); - finish_tmp_packfile(&packname, state->pack_tmp_name, - state->written, state->nr_written, - &state->pack_idx_opts, hash); + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + finish_tmp_packfile(transaction, &packname, hash); for (uint32_t i = 0; i < state->nr_written; i++) free(state->written[i]); @@ -90,18 +95,18 @@ clear_exit: strbuf_release(&packname); /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(the_repository); + reprepare_packed_git(repo); } /* * Cleanup after batch-mode fsync_object_files. */ -static void flush_batch_fsync(void) +static void flush_batch_fsync(struct odb_transaction *transaction) { struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; - if (!bulk_fsync_objdir) + if (!transaction->objdir) return; /* @@ -113,7 +118,8 @@ static void flush_batch_fsync(void) * to ensure that the data in each new object file is durable before * the final name is visible. */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository)); + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); temp = xmks_tempfile(temp_path.buf); fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); delete_tempfile(&temp); @@ -123,20 +129,21 @@ static void flush_batch_fsync(void) * Make the object files visible in the primary ODB after their data is * fully durable. */ - tmp_objdir_migrate(bulk_fsync_objdir); - bulk_fsync_objdir = NULL; + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; } -static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) { /* The object may already exist in the repository */ - if (odb_has_object(the_repository->objects, oid, + if (odb_has_object(transaction->odb, oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 1; /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < state->nr_written; i++) - if (oideq(&state->written[i]->oid, oid)) + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) return 1; /* This is a new object we need to keep */ @@ -235,13 +242,15 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state, } /* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct bulk_checkin_packfile *state, +static void prepare_to_stream(struct odb_transaction *transaction, unsigned flags) { + struct bulk_checkin_packfile *state = &transaction->packfile; if (!(flags & INDEX_WRITE_OBJECT) || state->f) return; - state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name); + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); reset_pack_idx_option(&state->pack_idx_opts); /* Pretend we are going to write only one object */ @@ -250,11 +259,11 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state, die_errno("unable to write pack header"); } -static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, - struct object_id *result_oid, - int fd, size_t size, - const char *path, unsigned flags) +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, size_t size, + const char *path, unsigned flags) { + struct bulk_checkin_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -268,21 +277,21 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, header_len = format_object_header((char *)obuf, sizeof(obuf), OBJ_BLOB, size); - the_hash_algo->init_fn(&ctx); + transaction->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); /* Note: idx is non-NULL when we are writing */ if ((flags & INDEX_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); - prepare_to_stream(state, flags); + prepare_to_stream(transaction, flags); hashfile_checkpoint_init(state->f, &checkpoint); } already_hashed_to = 0; while (1) { - prepare_to_stream(state, flags); + prepare_to_stream(transaction, flags); if (idx) { hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; @@ -300,7 +309,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(state); + flush_bulk_checkin_packfile(transaction); if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) return error("cannot seek back"); } @@ -309,7 +318,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, return 0; idx->crc32 = crc32_end(state->f); - if (already_written(state, result_oid)) { + if (already_written(transaction, result_oid)) { hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; free(idx); @@ -323,7 +332,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, return 0; } -void prepare_loose_object_bulk_checkin(void) +void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) { /* * We lazily create the temporary object directory @@ -331,15 +340,16 @@ void prepare_loose_object_bulk_checkin(void) * callers may not know whether any objects will be * added at the time they call begin_odb_transaction. */ - if (!odb_transaction_nesting || bulk_fsync_objdir) + if (!transaction || transaction->objdir) return; - bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync"); - if (bulk_fsync_objdir) - tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0); + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); } -void fsync_loose_object_bulk_checkin(int fd, const char *filename) +void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename) { /* * If we have an active ODB transaction, we issue a call that @@ -348,7 +358,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) * before renaming the objects to their final names as part of * flush_batch_fsync. */ - if (!bulk_fsync_objdir || + if (!transaction || !transaction->objdir || git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { if (errno == ENOSYS) warning(_("core.fsyncMethod = batch is unsupported on this platform")); @@ -356,36 +366,38 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) } } -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, - const char *path, unsigned flags) +struct odb_transaction *begin_odb_transaction(struct object_database *odb) { - int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size, - path, flags); - if (!odb_transaction_nesting) - flush_bulk_checkin_packfile(&bulk_checkin_packfile); - return status; -} + if (!odb->transaction) { + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + } -void begin_odb_transaction(void) -{ - odb_transaction_nesting += 1; + odb->transaction->nesting += 1; + + return odb->transaction; } -void flush_odb_transaction(void) +void flush_odb_transaction(struct odb_transaction *transaction) { - flush_batch_fsync(); - flush_bulk_checkin_packfile(&bulk_checkin_packfile); + if (!transaction) + return; + + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(transaction); } -void end_odb_transaction(void) +void end_odb_transaction(struct odb_transaction *transaction) { - odb_transaction_nesting -= 1; - if (odb_transaction_nesting < 0) + if (!transaction || transaction->nesting == 0) BUG("Unbalanced ODB transaction nesting"); - if (odb_transaction_nesting) + transaction->nesting -= 1; + + if (transaction->nesting) return; - flush_odb_transaction(); + flush_odb_transaction(transaction); + transaction->odb->transaction = NULL; + free(transaction); } diff --git a/bulk-checkin.h b/bulk-checkin.h index 7246ea58dc..ac8887f476 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -5,13 +5,20 @@ #define BULK_CHECKIN_H #include "object.h" +#include "odb.h" -void prepare_loose_object_bulk_checkin(void); -void fsync_loose_object_bulk_checkin(int fd, const char *filename); +struct odb_transaction; + +void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction); +void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename); /* - * This creates one packfile per large blob unless bulk-checkin - * machinery is "plugged". + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. * * This also bypasses the usual "convert-to-git" dance, and that is on * purpose. We could write a streaming version of the converting @@ -24,8 +31,8 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename); * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *oid, int fd, size_t size, const char *path, unsigned flags); /* @@ -35,20 +42,20 @@ int index_blob_bulk_checkin(struct object_id *oid, * and objects are only visible after the outermost transaction * is complete or the transaction is flushed. */ -void begin_odb_transaction(void); +struct odb_transaction *begin_odb_transaction(struct object_database *odb); /* * Make any objects that are currently part of a pending object * database transaction visible. It is valid to call this function * even if no transaction is active. */ -void flush_odb_transaction(void); +void flush_odb_transaction(struct odb_transaction *transaction); /* * Tell the object database to make any objects from the * current transaction visible if this is the final nested * transaction. */ -void end_odb_transaction(void); +void end_odb_transaction(struct odb_transaction *transaction); #endif diff --git a/cache-tree.c b/cache-tree.c index 66ef2becbe..d225554eed 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -474,6 +474,7 @@ static int update_one(struct cache_tree *it, int cache_tree_update(struct index_state *istate, int flags) { + struct odb_transaction *transaction; int skip, i; i = verify_cache(istate, flags); @@ -489,10 +490,10 @@ int cache_tree_update(struct index_state *istate, int flags) trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); - end_odb_transaction(); + end_odb_transaction(transaction); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) diff --git a/object-file.c b/object-file.c index 2bc36ab3ee..bc15af4245 100644 --- a/object-file.c +++ b/object-file.c @@ -674,7 +674,7 @@ static void close_loose_object(struct odb_source *source, goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(fd, filename); + fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -852,7 +852,7 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_bulk_checkin(source->odb->transaction); odb_loose_path(source, &filename, oid); @@ -941,7 +941,7 @@ int stream_loose_object(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_bulk_checkin(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ strbuf_addf(&filename, "%s/", source->path); @@ -1253,18 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid, * Call xsize_t() only when needed to avoid potentially unnecessary * die() for large files. */ - if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) { ret = index_stream_convert_blob(istate, oid, fd, path, flags); - else if (!S_ISREG(st->st_mode)) + } else if (!S_ISREG(st->st_mode)) { ret = index_pipe(istate, oid, fd, type, path, flags); - else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || - type != OBJ_BLOB || - (path && would_convert_to_git(istate, path))) + } else if ((st->st_size >= 0 && + (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || + type != OBJ_BLOB || + (path && would_convert_to_git(istate, path))) { ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); - else - ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path, - flags); + } else { + struct odb_transaction *transaction; + + transaction = begin_odb_transaction(the_repository->objects); + ret = index_blob_bulk_checkin(transaction, + oid, fd, xsize_t(st->st_size), + path, flags); + end_odb_transaction(transaction); + } + close(fd); return ret; } @@ -84,6 +84,7 @@ struct odb_source { struct packed_git; struct cached_object_entry; +struct odb_transaction; /* * The object database encapsulates access to objects in a repository. It @@ -95,6 +96,13 @@ struct object_database { struct repository *repo; /* + * State of current current object database transaction. Only one + * transaction may be pending at a time. Is NULL when no transaction is + * configured. + */ + struct odb_transaction *transaction; + + /* * Set of all object directories; the main directory is first (and * cannot be NULL after initialization). Subsequent directories are * alternates. diff --git a/read-cache.c b/read-cache.c index 06ad74db22..229b8ef11c 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3947,6 +3947,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix, const struct pathspec *pathspec, char *ps_matched, int include_sparse, int flags) { + struct odb_transaction *transaction; struct update_callback_data data; struct rev_info rev; @@ -3972,9 +3973,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix, * This function is invoked from commands other than 'add', which * may not have their own transaction active. */ - begin_odb_transaction(); + transaction = begin_odb_transaction(repo->objects); run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); - end_odb_transaction(); + end_odb_transaction(transaction); release_revisions(&rev); return !!data.add_errors; |
