diff options
Diffstat (limited to 'builtin/clone.c')
| -rw-r--r-- | builtin/clone.c | 292 |
1 files changed, 211 insertions, 81 deletions
diff --git a/builtin/clone.c b/builtin/clone.c index 65b5b7db6d..b28f88eb43 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -8,15 +8,20 @@ * Clone a repository into a different directory that does not yet exist. */ -#define USE_THE_INDEX_VARIABLE #include "builtin.h" +#include "abspath.h" +#include "advice.h" #include "config.h" +#include "copy.h" +#include "environment.h" +#include "gettext.h" +#include "hex.h" #include "lockfile.h" #include "parse-options.h" -#include "fetch-pack.h" #include "refs.h" #include "refspec.h" -#include "object-store.h" +#include "object-file.h" +#include "object-store-ll.h" #include "tree.h" #include "tree-walk.h" #include "unpack-trees.h" @@ -29,8 +34,11 @@ #include "branch.h" #include "remote.h" #include "run-command.h" +#include "setup.h" #include "connected.h" #include "packfile.h" +#include "path.h" +#include "pkt-line.h" #include "list-objects-filter-options.h" #include "hook.h" #include "bundle.h" @@ -62,7 +70,8 @@ static char *remote_name = NULL; static char *option_branch = NULL; static struct string_list option_not = STRING_LIST_INIT_NODUP; static const char *real_git_dir; -static char *option_upload_pack = "git-upload-pack"; +static const char *ref_format; +static const char *option_upload_pack = "git-upload-pack"; static int option_verbosity; static int option_progress = -1; static int option_sparse_checkout; @@ -106,7 +115,7 @@ static struct option builtin_clone_options[] = { OPT_HIDDEN_BOOL(0, "naked", &option_bare, N_("create a bare repository")), OPT_BOOL(0, "mirror", &option_mirror, - N_("create a mirror repository (implies bare)")), + N_("create a mirror repository (implies --bare)")), OPT_BOOL('l', "local", &option_local, N_("to clone from a local repository")), OPT_BOOL(0, "no-hardlinks", &option_no_hardlinks, @@ -147,14 +156,13 @@ static struct option builtin_clone_options[] = { N_("any cloned submodules will be shallow")), OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"), N_("separate git dir from working tree")), + OPT_STRING(0, "ref-format", &ref_format, N_("format"), + N_("specify the reference format to use")), OPT_STRING_LIST('c', "config", &option_config, N_("key=value"), N_("set config inside the new repository")), OPT_STRING_LIST(0, "server-option", &server_options, N_("server-specific"), N_("option to transmit")), - OPT_SET_INT('4', "ipv4", &family, N_("use IPv4 addresses only"), - TRANSPORT_FAMILY_IPV4), - OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), - TRANSPORT_FAMILY_IPV6), + OPT_IPVERSION(&family), OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_BOOL(0, "also-filter-submodules", &option_filter_submodules, N_("apply partial clone filters to submodules")), @@ -169,8 +177,8 @@ static struct option builtin_clone_options[] = { static const char *get_repo_path_1(struct strbuf *path, int *is_bundle) { - static char *suffix[] = { "/.git", "", ".git/.git", ".git" }; - static char *bundle_suffix[] = { ".bundle", "" }; + static const char *suffix[] = { "/.git", "", ".git/.git", ".git" }; + static const char *bundle_suffix[] = { ".bundle", "" }; size_t baselen = path->len; struct stat st; int i; @@ -320,14 +328,37 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, int src_len, dest_len; struct dir_iterator *iter; int iter_status; - struct strbuf realpath = STRBUF_INIT; + + /* + * Refuse copying directories by default which aren't owned by us. The + * code that performs either the copying or hardlinking is not prepared + * to handle various edge cases where an adversary may for example + * racily swap out files for symlinks. This can cause us to + * inadvertently use the wrong source file. + * + * Furthermore, even if we were prepared to handle such races safely, + * creating hardlinks across user boundaries is an inherently unsafe + * operation as the hardlinked files can be rewritten at will by the + * potentially-untrusted user. We thus refuse to do so by default. + */ + die_upon_dubious_ownership(NULL, NULL, src_repo); mkdir_if_missing(dest->buf, 0777); iter = dir_iterator_begin(src->buf, DIR_ITERATOR_PEDANTIC); - if (!iter) + if (!iter) { + if (errno == ENOTDIR) { + int saved_errno = errno; + struct stat st; + + if (!lstat(src->buf, &st) && S_ISLNK(st.st_mode)) + die(_("'%s' is a symlink, refusing to clone with --local"), + src->buf); + errno = saved_errno; + } die_errno(_("failed to start iterator over '%s'"), src->buf); + } strbuf_addch(src, '/'); src_len = src->len; @@ -358,9 +389,27 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, if (unlink(dest->buf) && errno != ENOENT) die_errno(_("failed to unlink '%s'"), dest->buf); if (!option_no_hardlinks) { - strbuf_realpath(&realpath, src->buf, 1); - if (!link(realpath.buf, dest->buf)) + if (!link(src->buf, dest->buf)) { + struct stat st; + + /* + * Sanity-check whether the created hardlink + * actually links to the expected file now. This + * catches time-of-check-time-of-use bugs in + * case the source file was meanwhile swapped. + */ + if (lstat(dest->buf, &st)) + die(_("hardlink cannot be checked at '%s'"), dest->buf); + if (st.st_mode != iter->st.st_mode || + st.st_ino != iter->st.st_ino || + st.st_dev != iter->st.st_dev || + st.st_size != iter->st.st_size || + st.st_uid != iter->st.st_uid || + st.st_gid != iter->st.st_gid) + die(_("hardlink different from source at '%s'"), dest->buf); + continue; + } if (option_local > 0) die_errno(_("failed to create link '%s'"), dest->buf); option_no_hardlinks = 1; @@ -373,8 +422,6 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, strbuf_setlen(src, src_len); die(_("failed to iterate over '%s'"), src->buf); } - - strbuf_release(&realpath); } static void clone_local(const char *src_repo, const char *dest_repo) @@ -476,6 +523,9 @@ static struct ref *wanted_peer_refs(const struct ref *refs, struct ref *head = copy_ref(find_ref_by_name(refs, "HEAD")); struct ref *local_refs = head; struct ref **tail = head ? &head->next : &local_refs; + struct refspec_item tag_refspec; + + refspec_item_init(&tag_refspec, TAG_REFSPEC, 0); if (option_single_branch) { struct ref *remote_head = NULL; @@ -498,7 +548,7 @@ static struct ref *wanted_peer_refs(const struct ref *refs, &tail, 0); /* if --branch=tag, pull the requested tag explicitly */ - get_fetch_map(remote_head, tag_refspec, &tail, 0); + get_fetch_map(remote_head, &tag_refspec, &tail, 0); } free_refs(remote_head); } else { @@ -508,8 +558,9 @@ static struct ref *wanted_peer_refs(const struct ref *refs, } if (!option_mirror && !option_single_branch && !option_no_tags) - get_fetch_map(refs, tag_refspec, &tail, 0); + get_fetch_map(refs, &tag_refspec, &tail, 0); + refspec_item_clear(&tag_refspec); return local_refs; } @@ -520,7 +571,8 @@ static void write_remote_refs(const struct ref *local_refs) struct ref_transaction *t; struct strbuf err = STRBUF_INIT; - t = ref_transaction_begin(&err); + t = ref_store_transaction_begin(get_main_ref_store(the_repository), + &err); if (!t) die("%s", err.buf); @@ -528,7 +580,7 @@ static void write_remote_refs(const struct ref *local_refs) if (!r->peer_ref) continue; if (ref_transaction_create(t, r->peer_ref->name, &r->old_oid, - 0, NULL, &err)) + NULL, 0, NULL, &err)) die("%s", err.buf); } @@ -547,12 +599,13 @@ static void write_followtags(const struct ref *refs, const char *msg) continue; if (ends_with(ref->name, "^{}")) continue; - if (!has_object_file_with_flags(&ref->old_oid, - OBJECT_INFO_QUICK | - OBJECT_INFO_SKIP_FETCH_OBJECT)) + if (!repo_has_object_file_with_flags(the_repository, &ref->old_oid, + OBJECT_INFO_QUICK | + OBJECT_INFO_SKIP_FETCH_OBJECT)) continue; - update_ref(msg, ref->name, &ref->old_oid, NULL, 0, - UPDATE_REFS_DIE_ON_ERR); + refs_update_ref(get_main_ref_store(the_repository), msg, + ref->name, &ref->old_oid, NULL, 0, + UPDATE_REFS_DIE_ON_ERR); } } @@ -604,9 +657,9 @@ static void update_remote_refs(const struct ref *refs, struct strbuf head_ref = STRBUF_INIT; strbuf_addstr(&head_ref, branch_top); strbuf_addstr(&head_ref, "HEAD"); - if (create_symref(head_ref.buf, - remote_head_points_at->peer_ref->name, - msg) < 0) + if (refs_update_symref(get_main_ref_store(the_repository), head_ref.buf, + remote_head_points_at->peer_ref->name, + msg) < 0) die(_("unable to update %s"), head_ref.buf); strbuf_release(&head_ref); } @@ -618,33 +671,36 @@ static void update_head(const struct ref *our, const struct ref *remote, const char *head; if (our && skip_prefix(our->name, "refs/heads/", &head)) { /* Local default branch link */ - if (create_symref("HEAD", our->name, NULL) < 0) + if (refs_update_symref(get_main_ref_store(the_repository), "HEAD", our->name, NULL) < 0) die(_("unable to update HEAD")); if (!option_bare) { - update_ref(msg, "HEAD", &our->old_oid, NULL, 0, - UPDATE_REFS_DIE_ON_ERR); + refs_update_ref(get_main_ref_store(the_repository), + msg, "HEAD", &our->old_oid, NULL, 0, + UPDATE_REFS_DIE_ON_ERR); install_branch_config(0, head, remote_name, our->name); } } else if (our) { struct commit *c = lookup_commit_reference(the_repository, &our->old_oid); /* --branch specifies a non-branch (i.e. tags), detach HEAD */ - update_ref(msg, "HEAD", &c->object.oid, NULL, REF_NO_DEREF, - UPDATE_REFS_DIE_ON_ERR); + refs_update_ref(get_main_ref_store(the_repository), msg, + "HEAD", &c->object.oid, NULL, REF_NO_DEREF, + UPDATE_REFS_DIE_ON_ERR); } else if (remote) { /* * We know remote HEAD points to a non-branch, or * HEAD points to a branch but we don't know which one. * Detach HEAD in all these cases. */ - update_ref(msg, "HEAD", &remote->old_oid, NULL, REF_NO_DEREF, - UPDATE_REFS_DIE_ON_ERR); + refs_update_ref(get_main_ref_store(the_repository), msg, + "HEAD", &remote->old_oid, NULL, REF_NO_DEREF, + UPDATE_REFS_DIE_ON_ERR); } else if (unborn && skip_prefix(unborn, "refs/heads/", &head)) { /* * Unborn head from remote; same as "our" case above except * that we have no ref to update. */ - if (create_symref("HEAD", unborn, NULL) < 0) + if (refs_update_symref(get_main_ref_store(the_repository), "HEAD", unborn, NULL) < 0) die(_("unable to update HEAD")); if (!option_bare) install_branch_config(0, head, remote_name, unborn); @@ -685,7 +741,8 @@ static int checkout(int submodule_progress, int filter_submodules) if (option_no_checkout) return 0; - head = resolve_refdup("HEAD", RESOLVE_REF_READING, &oid, NULL); + head = refs_resolve_refdup(get_main_ref_store(the_repository), "HEAD", + RESOLVE_REF_READING, &oid, NULL); if (!head) { warning(_("remote HEAD refers to nonexistent ref, " "unable to checkout")); @@ -712,21 +769,22 @@ static int checkout(int submodule_progress, int filter_submodules) opts.preserve_ignored = 0; opts.fn = oneway_merge; opts.verbose_update = (option_verbosity >= 0); - opts.src_index = &the_index; - opts.dst_index = &the_index; + opts.src_index = the_repository->index; + opts.dst_index = the_repository->index; init_checkout_metadata(&opts.meta, head, &oid, NULL); tree = parse_tree_indirect(&oid); if (!tree) die(_("unable to parse commit %s"), oid_to_hex(&oid)); - parse_tree(tree); - init_tree_desc(&t, tree->buffer, tree->size); + if (parse_tree(tree) < 0) + exit(128); + init_tree_desc(&t, &tree->object.oid, tree->buffer, tree->size); if (unpack_trees(1, &t, &opts) < 0) die(_("unable to checkout working tree")); free(head); - if (write_locked_index(&the_index, &lock_file, COMMIT_LOCK)) + if (write_locked_index(the_repository->index, &lock_file, COMMIT_LOCK)) die(_("unable to write new index file")); err |= run_hooks_l("post-checkout", oid_to_hex(null_oid()), @@ -770,9 +828,12 @@ static int checkout(int submodule_progress, int filter_submodules) return err; } -static int git_clone_config(const char *k, const char *v, void *cb) +static int git_clone_config(const char *k, const char *v, + const struct config_context *ctx, void *cb) { if (!strcmp(k, "clone.defaultremotename")) { + if (!v) + return config_error_nonbool(k); free(remote_name); remote_name = xstrdup(v); } @@ -781,17 +842,19 @@ static int git_clone_config(const char *k, const char *v, void *cb) if (!strcmp(k, "clone.filtersubmodules")) config_filter_submodules = git_config_bool(k, v); - return git_default_config(k, v, cb); + return git_default_config(k, v, ctx, cb); } -static int write_one_config(const char *key, const char *value, void *data) +static int write_one_config(const char *key, const char *value, + const struct config_context *ctx, + void *data) { /* * give git_clone_config a chance to write config values back to the * environment, since git_config_set_multivar_gently only deals with * config-file writes */ - int apply_failed = git_clone_config(key, value, data); + int apply_failed = git_clone_config(key, value, ctx, data); if (apply_failed) return apply_failed; @@ -902,6 +965,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) struct ref *mapped_refs = NULL; const struct ref *ref; struct strbuf key = STRBUF_INIT; + struct strbuf buf = STRBUF_INIT; struct strbuf branch_top = STRBUF_INIT, reflog_msg = STRBUF_INIT; struct transport *transport = NULL; const char *src_ref_prefix = "refs/heads/"; @@ -909,6 +973,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix) int err = 0, complete_refs_before_fetch = 1; int submodule_progress; int filter_submodules = 0; + int hash_algo; + enum ref_storage_format ref_storage_format = REF_STORAGE_FORMAT_UNKNOWN; + const int do_not_override_repo_unix_permissions = -1; struct transport_ls_refs_options transport_ls_refs_options = TRANSPORT_LS_REFS_OPTIONS_INIT; @@ -933,6 +1000,12 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (option_single_branch == -1) option_single_branch = deepen ? 1 : 0; + if (ref_format) { + ref_storage_format = ref_storage_format_by_name(ref_format); + if (ref_storage_format == REF_STORAGE_FORMAT_UNKNOWN) + die(_("unknown ref storage format '%s'"), ref_format); + } + if (option_mirror) option_bare = 1; @@ -943,7 +1016,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } if (bundle_uri && deepen) - die(_("--bundle-uri is incompatible with --depth, --shallow-since, and --shallow-exclude")); + die(_("options '%s' and '%s' cannot be used together"), + "--bundle-uri", + "--depth/--shallow-since/--shallow-exclude"); repo_name = argv[0]; @@ -1075,8 +1150,15 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } } - init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, NULL, - INIT_DB_QUIET); + /* + * Initialize the repository, but skip initializing the reference + * database. We do not yet know about the object format of the + * repository, and reference backends may persist that information into + * their on-disk data structures. + */ + init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, + ref_storage_format, NULL, + do_not_override_repo_unix_permissions, INIT_DB_QUIET | INIT_DB_SKIP_REFDB); if (real_git_dir) { free((char *)git_dir); @@ -1084,6 +1166,50 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } /* + * We have a chicken-and-egg situation between initializing the refdb + * and spawning transport helpers: + * + * - Initializing the refdb requires us to know about the object + * format. We thus have to spawn the transport helper to learn + * about it. + * + * - The transport helper may want to access the Git repository. But + * because the refdb has not been initialized, we don't have "HEAD" + * or "refs/". Thus, the helper cannot find the Git repository. + * + * Ideally, we would have structured the helper protocol such that it's + * mandatory for the helper to first announce its capabilities without + * yet assuming a fully initialized repository. Like that, we could + * have added a "lazy-refdb-init" capability that announces whether the + * helper is ready to handle not-yet-initialized refdbs. If any helper + * didn't support them, we would have fully initialized the refdb with + * the SHA1 object format, but later on bailed out if we found out that + * the remote repository used a different object format. + * + * But we didn't, and thus we use the following workaround to partially + * initialize the repository's refdb such that it can be discovered by + * Git commands. To do so, we: + * + * - Create an invalid HEAD ref pointing at "refs/heads/.invalid". + * + * - Create the "refs/" directory. + * + * - Set up the ref storage format and repository version as + * required. + * + * This is sufficient for Git commands to discover the Git directory. + */ + initialize_repository_version(GIT_HASH_UNKNOWN, + the_repository->ref_storage_format, 1); + + strbuf_addf(&buf, "%s/HEAD", git_dir); + write_file(buf.buf, "ref: refs/heads/.invalid"); + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s/refs", git_dir); + safe_create_dir(buf.buf, 1); + + /* * additional config can be injected with -c, make sure it's included * after init_db, which clears the entire config environment. */ @@ -1163,10 +1289,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (option_required_reference.nr || option_optional_reference.nr) setup_reference(); - if (option_sparse_checkout && git_sparse_checkout_init(dir)) - return 1; - - remote = remote_get(remote_name); + remote = remote_get_early(remote_name); refspec_appendf(&remote->fetch, "+%s*:%s*", src_ref_prefix, branch_top.buf); @@ -1244,6 +1367,27 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (transport->smart_options && !deepen && !filter_options.choice) transport->smart_options->check_self_contained_and_connected = 1; + strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD"); + refspec_ref_prefixes(&remote->fetch, + &transport_ls_refs_options.ref_prefixes); + if (option_branch) + expand_ref_prefix(&transport_ls_refs_options.ref_prefixes, + option_branch); + if (!option_no_tags) + strvec_push(&transport_ls_refs_options.ref_prefixes, + "refs/tags/"); + + refs = transport_get_remote_refs(transport, &transport_ls_refs_options); + + /* + * Now that we know what algorithm the remote side is using, let's set + * ours to the same thing. + */ + hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); + initialize_repository_version(hash_algo, the_repository->ref_storage_format, 1); + repo_set_hash_algo(the_repository, hash_algo); + create_reference_database(the_repository->ref_storage_format, NULL, 1); + /* * Before fetching from the remote, download and install bundle * data from the --bundle-uri option. @@ -1259,24 +1403,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) bundle_uri); else if (has_heuristic) git_config_set_gently("fetch.bundleuri", bundle_uri); - } - - strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD"); - refspec_ref_prefixes(&remote->fetch, - &transport_ls_refs_options.ref_prefixes); - if (option_branch) - expand_ref_prefix(&transport_ls_refs_options.ref_prefixes, - option_branch); - if (!option_no_tags) - strvec_push(&transport_ls_refs_options.ref_prefixes, - "refs/tags/"); - - refs = transport_get_remote_refs(transport, &transport_ls_refs_options); - - if (refs) - mapped_refs = wanted_peer_refs(refs, &remote->fetch); - - if (!bundle_uri) { + } else { /* * Populate transport->got_remote_bundle_uri and * transport->bundle_uri. We might get nothing. @@ -1297,15 +1424,10 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } } - if (mapped_refs) { - int hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); + if (refs) + mapped_refs = wanted_peer_refs(refs, &remote->fetch); - /* - * Now that we know what algorithm the remote side is using, - * let's set ours to the same thing. - */ - initialize_repository_version(hash_algo, 1); - repo_set_hash_algo(the_repository, hash_algo); + if (mapped_refs) { /* * transport_get_remote_refs() may return refs with null sha-1 * in mapped_refs (see struct transport->get_refs_list @@ -1341,6 +1463,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } else if (remote_head) { our_head_points_at = NULL; } else { + char *to_free = NULL; const char *branch; if (!mapped_refs) { @@ -1353,7 +1476,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) "refs/heads/", &branch)) { unborn_head = xstrdup(transport_ls_refs_options.unborn_head_target); } else { - branch = git_default_branch_name(0); + branch = to_free = repo_default_branch_name(the_repository, 0); unborn_head = xstrfmt("refs/heads/%s", branch); } @@ -1369,6 +1492,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) * a match. */ our_head_points_at = find_remote_branch(mapped_refs, branch); + + free(to_free); } write_refspec_config(src_ref_prefix, our_head_points_at, @@ -1406,12 +1531,16 @@ int cmd_clone(int argc, const char **argv, const char *prefix) dissociate_from_references(); } + if (option_sparse_checkout && git_sparse_checkout_init(dir)) + return 1; + junk_mode = JUNK_LEAVE_REPO; err = checkout(submodule_progress, filter_submodules); free(remote_name); strbuf_release(&reflog_msg); strbuf_release(&branch_top); + strbuf_release(&buf); strbuf_release(&key); free_refs(mapped_refs); free_refs(remote_head_points_at); @@ -1419,6 +1548,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) free(dir); free(path); free(repo_to_free); + UNLEAK(repo); junk_mode = JUNK_LEAVE_ALL; transport_ls_refs_options_release(&transport_ls_refs_options); |
