Skip to main content

uv_git/
git.rs

1//! Git support is derived from Cargo's implementation.
2//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice.
3//! Source: <https://github.com/rust-lang/cargo/blob/23eb492cf920ce051abfc56bbaf838514dc8365c/src/cargo/sources/git/utils.rs>
4use std::fmt::Display;
5use std::path::{Path, PathBuf};
6use std::str::{self};
7use std::sync::LazyLock;
8
9use anyhow::{Context, Result, anyhow};
10use cargo_util::{ProcessBuilder, paths};
11use owo_colors::OwoColorize;
12use tracing::{debug, instrument, warn};
13use url::Url;
14
15use uv_fs::Simplified;
16use uv_git_types::{GitOid, GitReference};
17use uv_redacted::DisplaySafeUrl;
18use uv_static::EnvVars;
19use uv_warnings::warn_user_once;
20
21/// A file indicates that if present, `git reset` has been done and a repo
22/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
23const CHECKOUT_READY_LOCK: &str = ".ok";
24
25#[derive(Debug, thiserror::Error)]
26pub enum GitError {
27    #[error("Git executable not found. Ensure that Git is installed and available.")]
28    GitNotFound,
29    #[error("Git LFS extension not found. Ensure that Git LFS is installed and available.")]
30    GitLfsNotFound,
31    #[error("Is Git LFS configured? Run `{}` to initialize Git LFS.", "git lfs install".green())]
32    GitLfsNotConfigured,
33    #[error(transparent)]
34    Other(#[from] which::Error),
35    #[error(
36        "Remote Git fetches are not allowed because network connectivity is disabled (i.e., with `--offline`)"
37    )]
38    TransportNotAllowed,
39}
40
41/// A global cache of the result of `which git` as a command
42///
43/// Caching the command allows us to avoid needing to remove environment
44/// variables everywhere.
45pub static GIT: LazyLock<Result<ProcessBuilder, GitError>> = LazyLock::new(|| {
46    let path = which::which("git").map_err(|err| match err {
47        which::Error::CannotFindBinaryPath => GitError::GitNotFound,
48        err => GitError::Other(err),
49    })?;
50
51    let mut cmd = ProcessBuilder::new(path);
52
53    // Certain git environment variables never make sense to inherit because
54    // they affect what the current command will act on.
55
56    // This can cause problems if for example uv is ran by git (for example, the
57    // `exec` command in `git rebase`), the GIT_DIR is set by git and will point
58    // to the wrong location (this takes precedence over the cwd).
59    cmd.env_remove(EnvVars::GIT_DIR)
60        .env_remove(EnvVars::GIT_WORK_TREE)
61        .env_remove(EnvVars::GIT_INDEX_FILE)
62        .env_remove(EnvVars::GIT_OBJECT_DIRECTORY)
63        .env_remove(EnvVars::GIT_ALTERNATE_OBJECT_DIRECTORIES)
64        .env_remove(EnvVars::GIT_COMMON_DIR);
65
66    Ok(cmd)
67});
68
69/// Strategy when fetching refspecs for a [`GitReference`]
70enum RefspecStrategy {
71    /// All refspecs should be fetched, if any fail then the fetch will fail.
72    All,
73    /// Stop after the first successful fetch, if none succeed then the fetch will fail.
74    First,
75}
76
77/// A Git reference (like a tag or branch) or a specific commit.
78#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
79enum ReferenceOrOid<'reference> {
80    /// A Git reference, like a tag or branch.
81    Reference(&'reference GitReference),
82    /// A specific commit.
83    Oid(GitOid),
84}
85
86impl ReferenceOrOid<'_> {
87    /// Resolves the [`ReferenceOrOid`] to an object ID with objects the `repo` currently has.
88    fn resolve(&self, repo: &GitRepository) -> Result<GitOid> {
89        let refkind = self.kind_str();
90        let result = match self {
91            // Resolve the commit pointed to by the tag.
92            //
93            // `^0` recursively peels away from the revision to the underlying commit object.
94            // This also verifies that the tag indeed refers to a commit.
95            Self::Reference(GitReference::Tag(s)) => {
96                repo.rev_parse(&format!("refs/remotes/origin/tags/{s}^0"))
97            }
98
99            // Resolve the commit pointed to by the branch.
100            Self::Reference(GitReference::Branch(s)) => repo.rev_parse(&format!("origin/{s}^0")),
101
102            // Attempt to resolve the branch, then the tag.
103            Self::Reference(GitReference::BranchOrTag(s)) => repo
104                .rev_parse(&format!("origin/{s}^0"))
105                .or_else(|_| repo.rev_parse(&format!("refs/remotes/origin/tags/{s}^0"))),
106
107            // Attempt to resolve the branch, then the tag, then the commit.
108            Self::Reference(GitReference::BranchOrTagOrCommit(s)) => repo
109                .rev_parse(&format!("origin/{s}^0"))
110                .or_else(|_| repo.rev_parse(&format!("refs/remotes/origin/tags/{s}^0")))
111                .or_else(|_| repo.rev_parse(&format!("{s}^0"))),
112
113            // We'll be using the HEAD commit.
114            Self::Reference(GitReference::DefaultBranch) => {
115                repo.rev_parse("refs/remotes/origin/HEAD")
116            }
117
118            // Resolve a named reference.
119            Self::Reference(GitReference::NamedRef(s)) => repo.rev_parse(&format!("{s}^0")),
120
121            // Resolve a specific commit.
122            Self::Oid(s) => repo.rev_parse(&format!("{s}^0")),
123        };
124
125        result.with_context(|| anyhow::format_err!("failed to find {refkind} `{self}`"))
126    }
127
128    /// Returns the kind of this [`ReferenceOrOid`].
129    fn kind_str(&self) -> &str {
130        match self {
131            Self::Reference(reference) => reference.kind_str(),
132            Self::Oid(_) => "commit",
133        }
134    }
135
136    /// Converts the [`ReferenceOrOid`] to a `str` that can be used as a revision.
137    fn as_rev(&self) -> &str {
138        match self {
139            Self::Reference(r) => r.as_rev(),
140            Self::Oid(rev) => rev.as_str(),
141        }
142    }
143}
144
145impl Display for ReferenceOrOid<'_> {
146    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
147        match self {
148            Self::Reference(reference) => write!(f, "{reference}"),
149            Self::Oid(oid) => write!(f, "{oid}"),
150        }
151    }
152}
153
154/// A remote repository. It gets cloned into a local [`GitDatabase`].
155#[derive(PartialEq, Clone, Debug)]
156pub(crate) struct GitRemote {
157    /// URL to a remote repository.
158    url: DisplaySafeUrl,
159}
160
161/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
162/// can be cloned from a single [`GitDatabase`].
163pub(crate) struct GitDatabase {
164    /// The remote repository where this database is fetched from.
165    remote: GitRemote,
166    /// Underlying Git repository instance for this database.
167    repo: GitRepository,
168    /// Git LFS artifacts have been initialized (if requested).
169    lfs_ready: Option<bool>,
170}
171
172/// A local checkout of a particular revision from a [`GitRepository`].
173pub(crate) struct GitCheckout {
174    /// The git revision this checkout is for.
175    revision: GitOid,
176    /// Underlying Git repository instance for this checkout.
177    repo: GitRepository,
178    /// Git LFS artifacts have been initialized (if requested).
179    lfs_ready: Option<bool>,
180}
181
182/// A local Git repository.
183pub(crate) struct GitRepository {
184    /// Path to the underlying Git repository on the local filesystem.
185    path: PathBuf,
186}
187
188impl GitRepository {
189    /// Opens an existing Git repository at `path`.
190    pub(crate) fn open(path: &Path) -> Result<Self> {
191        // Make sure there is a Git repository at the specified path.
192        GIT.as_ref()
193            .cloned()?
194            .arg("rev-parse")
195            .cwd(path)
196            .exec_with_output()?;
197
198        Ok(Self {
199            path: path.to_path_buf(),
200        })
201    }
202
203    /// Initializes a Git repository at `path`.
204    fn init(path: &Path) -> Result<Self> {
205        // TODO(ibraheem): see if this still necessary now that we no longer use libgit2
206        // Skip anything related to templates, they just call all sorts of issues as
207        // we really don't want to use them yet they insist on being used. See #6240
208        // for an example issue that comes up.
209        // opts.external_template(false);
210
211        // Initialize the repository.
212        GIT.as_ref()
213            .cloned()?
214            .arg("init")
215            .cwd(path)
216            .exec_with_output()?;
217
218        Ok(Self {
219            path: path.to_path_buf(),
220        })
221    }
222
223    /// Parses the object ID of the given `refname`.
224    fn rev_parse(&self, refname: &str) -> Result<GitOid> {
225        let result = GIT
226            .as_ref()
227            .cloned()?
228            .arg("rev-parse")
229            .arg(refname)
230            .cwd(&self.path)
231            .exec_with_output()?;
232
233        let mut result = String::from_utf8(result.stdout)?;
234        result.truncate(result.trim_end().len());
235        Ok(result.parse()?)
236    }
237
238    /// Verifies LFS artifacts have been initialized for a given `refname`.
239    #[instrument(skip_all, fields(path = %self.path.user_display(), refname = %refname))]
240    fn lfs_fsck_objects(&self, refname: &str) -> bool {
241        let mut cmd = if let Ok(lfs) = GIT_LFS.as_ref() {
242            lfs.clone()
243        } else {
244            warn!("Git LFS is not available, skipping LFS fetch");
245            return false;
246        };
247
248        // Requires Git LFS 3.x (2021 release)
249        let result = cmd
250            .arg("fsck")
251            .arg("--objects")
252            .arg(refname)
253            .cwd(&self.path)
254            .exec_with_output();
255
256        match result {
257            Ok(_) => true,
258            Err(err) => {
259                let lfs_error = err.to_string();
260                if lfs_error.contains("unknown flag: --objects") {
261                    warn_user_once!(
262                        "Skipping Git LFS validation as Git LFS extension is outdated. \
263                        Upgrade to `git-lfs>=3.0.2` or manually verify git-lfs objects were \
264                        properly fetched after the current operation finishes."
265                    );
266                    true
267                } else {
268                    debug!("Git LFS validation failed: {err}");
269                    false
270                }
271            }
272        }
273    }
274}
275
276impl GitRemote {
277    /// Creates an instance for a remote repository URL.
278    pub(crate) fn new(url: &DisplaySafeUrl) -> Self {
279        Self { url: url.clone() }
280    }
281
282    /// Gets the remote repository URL.
283    pub(crate) fn url(&self) -> &DisplaySafeUrl {
284        &self.url
285    }
286
287    /// Fetches and checkouts to a reference or a revision from this remote
288    /// into a local path.
289    ///
290    /// This ensures that it gets the up-to-date commit when a named reference
291    /// is given (tag, branch, refs/*). Thus, network connection is involved.
292    ///
293    /// When `locked_rev` is provided, it takes precedence over `reference`.
294    ///
295    /// If we have a previous instance of [`GitDatabase`] then fetch into that
296    /// if we can. If that can successfully load our revision then we've
297    /// populated the database with the latest version of `reference`, so
298    /// return that database and the rev we resolve to.
299    pub(crate) fn checkout(
300        self,
301        into: &Path,
302        db: Option<GitDatabase>,
303        reference: &GitReference,
304        locked_rev: Option<GitOid>,
305        disable_ssl: bool,
306        offline: bool,
307        with_lfs: bool,
308    ) -> Result<(GitDatabase, GitOid)> {
309        let reference = locked_rev
310            .map(ReferenceOrOid::Oid)
311            .unwrap_or(ReferenceOrOid::Reference(reference));
312        if let Some(mut db) = db {
313            fetch(&mut db.repo, &self.url, reference, disable_ssl, offline)
314                .with_context(|| format!("failed to fetch into: {}", into.user_display()))?;
315
316            let resolved_commit_hash = match locked_rev {
317                Some(rev) => db.contains(rev).then_some(rev),
318                None => reference.resolve(&db.repo).ok(),
319            };
320
321            if let Some(rev) = resolved_commit_hash {
322                if with_lfs {
323                    let lfs_ready = fetch_lfs(&mut db.repo, &self.url, &rev, disable_ssl)
324                        .with_context(|| format!("failed to fetch LFS objects at {rev}"))?;
325                    db = db.with_lfs_ready(Some(lfs_ready));
326                }
327                return Ok((db, rev));
328            }
329        }
330
331        // Otherwise start from scratch to handle corrupt git repositories.
332        // After our fetch (which is interpreted as a clone now) we do the same
333        // resolution to figure out what we cloned.
334        match fs_err::remove_dir_all(into) {
335            Ok(()) => {}
336            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
337            Err(e) => return Err(e.into()),
338        }
339
340        fs_err::create_dir_all(into)?;
341        let mut repo = GitRepository::init(into)?;
342        fetch(&mut repo, &self.url, reference, disable_ssl, offline)
343            .with_context(|| format!("failed to clone into: {}", into.user_display()))?;
344        let rev = match locked_rev {
345            Some(rev) => rev,
346            None => reference.resolve(&repo)?,
347        };
348        let lfs_ready = with_lfs
349            .then(|| {
350                fetch_lfs(&mut repo, &self.url, &rev, disable_ssl)
351                    .with_context(|| format!("failed to fetch LFS objects at {rev}"))
352            })
353            .transpose()?;
354
355        Ok((
356            GitDatabase {
357                remote: self,
358                repo,
359                lfs_ready,
360            },
361            rev,
362        ))
363    }
364
365    /// Creates a [`GitDatabase`] of this remote at `db_path`.
366    pub(crate) fn db_at(&self, db_path: &Path) -> Result<GitDatabase> {
367        let repo = GitRepository::open(db_path)?;
368        Ok(GitDatabase {
369            remote: self.clone(),
370            repo,
371            lfs_ready: None,
372        })
373    }
374}
375
376impl GitDatabase {
377    /// Checkouts to a revision at `destination` from this database.
378    pub(crate) fn copy_to(&self, rev: GitOid, destination: &Path) -> Result<GitCheckout> {
379        // If the existing checkout exists, and it is fresh, use it.
380        // A non-fresh checkout can happen if the checkout operation was
381        // interrupted. In that case, the checkout gets deleted and a new
382        // clone is created.
383        let checkout = match GitRepository::open(destination)
384            .ok()
385            .map(|repo| GitCheckout::new(rev, repo))
386            .filter(GitCheckout::is_fresh)
387        {
388            Some(co) => co.with_lfs_ready(self.lfs_ready),
389            None => GitCheckout::clone_into(destination, self, rev, self.remote.url())?,
390        };
391        Ok(checkout)
392    }
393
394    /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
395    pub(crate) fn to_short_id(&self, revision: GitOid) -> Result<String> {
396        let output = GIT
397            .as_ref()
398            .cloned()?
399            .arg("rev-parse")
400            .arg("--short")
401            .arg(revision.as_str())
402            .cwd(&self.repo.path)
403            .exec_with_output()?;
404
405        let mut result = String::from_utf8(output.stdout)?;
406        result.truncate(result.trim_end().len());
407        Ok(result)
408    }
409
410    /// Checks if `oid` resolves to a commit in this database.
411    pub(crate) fn contains(&self, oid: GitOid) -> bool {
412        self.repo.rev_parse(&format!("{oid}^0")).is_ok()
413    }
414
415    /// Checks if `oid` contains necessary LFS artifacts in this database.
416    pub(crate) fn contains_lfs_artifacts(&self, oid: GitOid) -> bool {
417        self.repo.lfs_fsck_objects(&format!("{oid}^0"))
418    }
419
420    /// Set the Git LFS validation state (if any).
421    #[must_use]
422    pub(crate) fn with_lfs_ready(mut self, lfs: Option<bool>) -> Self {
423        self.lfs_ready = lfs;
424        self
425    }
426}
427
428impl GitCheckout {
429    /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
430    /// is done. Use [`GitCheckout::is_fresh`] to check.
431    ///
432    /// * The `repo` will be the checked out Git repository.
433    fn new(revision: GitOid, repo: GitRepository) -> Self {
434        Self {
435            revision,
436            repo,
437            lfs_ready: None,
438        }
439    }
440
441    /// Clone a repo for a `revision` into a local path from a `database`.
442    /// This is a filesystem-to-filesystem clone.
443    fn clone_into(
444        into: &Path,
445        database: &GitDatabase,
446        revision: GitOid,
447        original_remote_url: &DisplaySafeUrl,
448    ) -> Result<Self> {
449        let dirname = into.parent().unwrap();
450        fs_err::create_dir_all(dirname)?;
451        match fs_err::remove_dir_all(into) {
452            Ok(()) => {}
453            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
454            Err(e) => return Err(e.into()),
455        }
456
457        // Perform a local clone of the repository, which will attempt to use
458        // hardlinks to set up the repository. This should speed up the clone operation
459        // quite a bit if it works.
460        let res = GIT
461            .as_ref()
462            .cloned()?
463            .arg("clone")
464            .arg("--local")
465            // Make sure to pass the local file path and not a file://... url. If given a url,
466            // Git treats the repository as a remote origin and gets confused because we don't
467            // have a HEAD checked out.
468            .arg(database.repo.path.simplified_display().to_string())
469            .arg(into.simplified_display().to_string())
470            .exec_with_output();
471
472        if let Err(e) = res {
473            debug!("Cloning git repo with --local failed, retrying without hardlinks: {e}");
474
475            GIT.as_ref()
476                .cloned()?
477                .arg("clone")
478                .arg("--no-hardlinks")
479                .arg(database.repo.path.simplified_display().to_string())
480                .arg(into.simplified_display().to_string())
481                .exec_with_output()?;
482        }
483
484        let repo = GitRepository::open(into)?;
485        let checkout = Self::new(revision, repo);
486        let lfs_ready = checkout.reset(database.lfs_ready, original_remote_url)?;
487        Ok(checkout.with_lfs_ready(lfs_ready))
488    }
489
490    /// Checks if the `HEAD` of this checkout points to the expected revision.
491    fn is_fresh(&self) -> bool {
492        match self.repo.rev_parse("HEAD") {
493            Ok(id) if id == self.revision => {
494                // See comments in reset() for why we check this
495                self.repo.path.join(CHECKOUT_READY_LOCK).exists()
496            }
497            _ => false,
498        }
499    }
500
501    /// Indicates Git LFS artifacts have been initialized (when requested).
502    pub(crate) fn lfs_ready(&self) -> Option<bool> {
503        self.lfs_ready
504    }
505
506    /// Set the Git LFS validation state (if any).
507    #[must_use]
508    pub(crate) fn with_lfs_ready(mut self, lfs: Option<bool>) -> Self {
509        self.lfs_ready = lfs;
510        self
511    }
512
513    /// This performs `git reset --hard` to the revision of this checkout and updates submodules,
514    /// with additional interrupt protection by a dummy file [`CHECKOUT_READY_LOCK`].
515    ///
516    /// If we're interrupted while performing any of the processes in this method (e.g., we die
517    /// because of a signal) uv needs to be sure to try to check out this
518    /// repo again on the next go-round.
519    ///
520    /// To enable this we have a dummy file in our checkout, [`.ok`],
521    /// which if present means that the repo has been successfully checked out and is
522    /// ready to go. Hence if we start to update submodules, we make sure this file
523    /// *doesn't* exist, and then once we're done we create the file.
524    ///
525    /// [`.ok`]: CHECKOUT_READY_LOCK
526    /// `git reset --hard [<commit>]` can break relative submodule URLs, so we update submodules
527    /// using the original remote URL.
528    fn reset(
529        &self,
530        with_lfs: Option<bool>,
531        original_remote_url: &DisplaySafeUrl,
532    ) -> Result<Option<bool>> {
533        let ok_file = self.repo.path.join(CHECKOUT_READY_LOCK);
534        let _ = paths::remove_file(&ok_file);
535
536        // We want to skip smudge if lfs was disabled for the repository
537        // as smudge filters can trigger on a reset even if lfs artifacts
538        // were not originally "fetched".
539        let lfs_skip_smudge = if with_lfs == Some(true) { "0" } else { "1" };
540
541        debug!("Reset {} to {}", self.repo.path.display(), self.revision);
542
543        // Perform the hard reset.
544        GIT.as_ref()
545            .cloned()?
546            .arg("reset")
547            .arg("--hard")
548            .arg(self.revision.as_str())
549            .env(EnvVars::GIT_LFS_SKIP_SMUDGE, lfs_skip_smudge)
550            .cwd(&self.repo.path)
551            .exec_with_output()?;
552
553        // Initialize direct submodules using the original remote URL so Git can resolve relative
554        // submodule URLs, but don't write it to `remote.origin.url`. Git persists resolved submodule
555        // URLs during initialization, so writing a credentialed parent remote can leak credentials
556        // into checkout configuration.
557        //
558        // Do not use `--recursive` here: command-local `remote.origin.url` config is inherited by
559        // Git commands run inside submodules, which would make nested relative URLs resolve against
560        // the top-level remote instead of their immediate parent submodule.
561        let mut submodule_update = GIT.as_ref().cloned()?;
562        for config in submodule_update_config(original_remote_url) {
563            submodule_update.arg("-c").arg(config);
564        }
565
566        submodule_update
567            .arg("submodule")
568            .arg("update")
569            .arg("--init")
570            .env(EnvVars::GIT_LFS_SKIP_SMUDGE, lfs_skip_smudge)
571            .cwd(&self.repo.path)
572            .exec_with_output()
573            .map(drop)?;
574
575        // Recursively update nested submodules without overriding `remote.origin.url`, so each
576        // nested relative URL resolves against its immediate parent submodule. The transient
577        // credential rewrite is still safe to inherit because it only affects transport.
578        let mut submodule_update = GIT.as_ref().cloned()?;
579        for config in submodule_auth_config(original_remote_url) {
580            submodule_update.arg("-c").arg(config);
581        }
582
583        submodule_update
584            .arg("submodule")
585            .arg("update")
586            .arg("--recursive")
587            .arg("--init")
588            .env(EnvVars::GIT_LFS_SKIP_SMUDGE, lfs_skip_smudge)
589            .cwd(&self.repo.path)
590            .exec_with_output()
591            .map(drop)?;
592
593        // Validate Git LFS objects (if needed) after the reset.
594        // See `fetch_lfs` why we do this.
595        let lfs_validation = match with_lfs {
596            None => None,
597            Some(false) => Some(false),
598            Some(true) => Some(self.repo.lfs_fsck_objects(self.revision.as_str())),
599        };
600
601        // The .ok file should be written when the reset is successful.
602        // When Git LFS is enabled, the objects must also be fetched and
603        // validated successfully as part of the corresponding db.
604        if with_lfs.is_none() || lfs_validation == Some(true) {
605            paths::create(ok_file)?;
606        }
607
608        Ok(lfs_validation)
609    }
610}
611
612/// Return command-local Git configuration for initializing direct submodules in a checkout.
613///
614/// Relative submodule URLs are resolved from `remote.origin.url`, but writing the original remote
615/// URL into checkout configuration can persist credentials in the parent repository or submodule
616/// remotes. Instead, callers pass these values via `git -c`, using a credential-stripped origin URL
617/// for resolution and a transient `url.*.insteadOf` rewrite when credentials are needed for
618/// transport.
619fn submodule_update_config(original_remote_url: &DisplaySafeUrl) -> Vec<String> {
620    let remote_url = original_remote_url.without_credentials();
621    let mut config = vec![format!("remote.origin.url={}", remote_url.as_str())];
622
623    config.extend(submodule_auth_config(original_remote_url));
624    config
625}
626
627/// Return command-local Git authentication configuration for updating submodules.
628///
629/// Unlike `remote.origin.url`, these rewrites are safe to inherit during recursive submodule
630/// updates: they rewrite transport URLs for authentication, but do not change the base URL that Git
631/// uses to resolve nested relative submodule URLs.
632fn submodule_auth_config(original_remote_url: &DisplaySafeUrl) -> Vec<String> {
633    let remote_url = original_remote_url.without_credentials();
634    let mut config = Vec::new();
635
636    if remote_url.as_str() != original_remote_url.as_str() {
637        let safe_root = remote_url_root(remote_url.as_ref());
638        let credentialed_root = remote_url_root(original_remote_url);
639
640        if safe_root.as_str() != credentialed_root.as_str() {
641            config.push(format!(
642                "url.{}.insteadOf={}",
643                credentialed_root.as_str(),
644                safe_root.as_str()
645            ));
646        }
647    }
648
649    config
650}
651
652/// Return the scheme, authority, and root path of a remote URL.
653///
654/// This is used as the rewrite prefix for `url.*.insteadOf`, so a credentialed parent URL can
655/// authenticate sibling submodule URLs without making the credentials part of any persisted
656/// submodule URL.
657fn remote_url_root(url: &Url) -> Url {
658    let mut root = url.clone();
659    root.set_path("/");
660    root.set_query(None);
661    root.set_fragment(None);
662    root
663}
664
665/// Attempts to fetch the given git `reference` for a Git repository.
666///
667/// This is the main entry for git clone/fetch. It does the following:
668///
669/// * Turns [`GitReference`] into refspecs accordingly.
670/// * Dispatches `git fetch` using the git CLI.
671///
672/// The `remote_url` argument is the git remote URL where we want to fetch from.
673fn fetch(
674    repo: &mut GitRepository,
675    remote_url: &DisplaySafeUrl,
676    reference: ReferenceOrOid<'_>,
677    disable_ssl: bool,
678    offline: bool,
679) -> Result<()> {
680    let oid_to_fetch = if let ReferenceOrOid::Oid(rev) = reference {
681        let local_object = reference.resolve(repo).ok();
682        if let Some(local_object) = local_object {
683            if rev == local_object {
684                return Ok(());
685            }
686        }
687
688        // If we know the reference is a full commit hash, we can just return it without
689        // querying GitHub.
690        Some(rev)
691    } else {
692        None
693    };
694
695    // Translate the reference desired here into an actual list of refspecs
696    // which need to get fetched. Additionally record if we're fetching tags.
697    let mut refspecs = Vec::new();
698    let mut tags = false;
699    let mut refspec_strategy = RefspecStrategy::All;
700    // The `+` symbol on the refspec means to allow a forced (fast-forward)
701    // update which is needed if there is ever a force push that requires a
702    // fast-forward.
703    match reference {
704        // For branches and tags we can fetch simply one reference and copy it
705        // locally, no need to fetch other branches/tags.
706        ReferenceOrOid::Reference(GitReference::Branch(branch)) => {
707            refspecs.push(format!("+refs/heads/{branch}:refs/remotes/origin/{branch}"));
708        }
709
710        ReferenceOrOid::Reference(GitReference::Tag(tag)) => {
711            refspecs.push(format!("+refs/tags/{tag}:refs/remotes/origin/tags/{tag}"));
712        }
713
714        ReferenceOrOid::Reference(GitReference::BranchOrTag(branch_or_tag)) => {
715            refspecs.push(format!(
716                "+refs/heads/{branch_or_tag}:refs/remotes/origin/{branch_or_tag}"
717            ));
718            refspecs.push(format!(
719                "+refs/tags/{branch_or_tag}:refs/remotes/origin/tags/{branch_or_tag}"
720            ));
721            refspec_strategy = RefspecStrategy::First;
722        }
723
724        // For ambiguous references, we can fetch the exact commit (if known); otherwise,
725        // we fetch all branches and tags.
726        ReferenceOrOid::Reference(GitReference::BranchOrTagOrCommit(branch_or_tag_or_commit)) => {
727            // The `oid_to_fetch` is the exact commit we want to fetch. But it could be the exact
728            // commit of a branch or tag. We should only fetch it directly if it's the exact commit
729            // of a short commit hash.
730            if let Some(oid_to_fetch) =
731                oid_to_fetch.filter(|oid| is_short_hash_of(branch_or_tag_or_commit, *oid))
732            {
733                refspecs.push(format!("+{oid_to_fetch}:refs/commit/{oid_to_fetch}"));
734            } else {
735                // We don't know what the rev will point to. To handle this
736                // situation we fetch all branches and tags, and then we pray
737                // it's somewhere in there.
738                refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
739                refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
740                tags = true;
741            }
742        }
743
744        ReferenceOrOid::Reference(GitReference::DefaultBranch) => {
745            refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
746        }
747
748        ReferenceOrOid::Reference(GitReference::NamedRef(rev)) => {
749            refspecs.push(format!("+{rev}:{rev}"));
750        }
751
752        ReferenceOrOid::Oid(rev) => {
753            refspecs.push(format!("+{rev}:refs/commit/{rev}"));
754        }
755    }
756
757    debug!("Performing a Git fetch for: {remote_url}");
758    let result = match refspec_strategy {
759        RefspecStrategy::All => fetch_with_cli(
760            repo,
761            remote_url,
762            refspecs.as_slice(),
763            tags,
764            disable_ssl,
765            offline,
766        ),
767        RefspecStrategy::First => {
768            // Try each refspec
769            let mut errors = refspecs
770                .iter()
771                .map_while(|refspec| {
772                    let fetch_result = fetch_with_cli(
773                        repo,
774                        remote_url,
775                        std::slice::from_ref(refspec),
776                        tags,
777                        disable_ssl,
778                        offline,
779                    );
780
781                    // Stop after the first success and log failures
782                    match fetch_result {
783                        Err(ref err) => {
784                            debug!("Failed to fetch refspec `{refspec}`: {err}");
785                            Some(fetch_result)
786                        }
787                        Ok(()) => None,
788                    }
789                })
790                .collect::<Vec<_>>();
791
792            if errors.len() == refspecs.len() {
793                if let Some(result) = errors.pop() {
794                    // Use the last error for the message
795                    result
796                } else {
797                    // Can only occur if there were no refspecs to fetch
798                    Ok(())
799                }
800            } else {
801                Ok(())
802            }
803        }
804    };
805    match reference {
806        // With the default branch, adding context is confusing
807        ReferenceOrOid::Reference(GitReference::DefaultBranch) => result,
808        _ => result.with_context(|| {
809            format!(
810                "failed to fetch {} `{}`",
811                reference.kind_str(),
812                reference.as_rev()
813            )
814        }),
815    }
816}
817
818/// Attempts to use `git` CLI installed on the system to fetch a repository.
819fn fetch_with_cli(
820    repo: &mut GitRepository,
821    url: &DisplaySafeUrl,
822    refspecs: &[String],
823    tags: bool,
824    disable_ssl: bool,
825    offline: bool,
826) -> Result<()> {
827    let mut cmd = GIT.as_ref().cloned()?;
828    // Disable interactive prompts in the terminal, as they'll be erased by the progress bar
829    // animation and the process will "hang". Interactive prompts via the GUI like `SSH_ASKPASS`
830    // are still usable.
831    cmd.env(EnvVars::GIT_TERMINAL_PROMPT, "0");
832
833    cmd.arg("fetch");
834    if tags {
835        cmd.arg("--tags");
836    }
837    if disable_ssl {
838        debug!("Disabling SSL verification for Git fetch via `GIT_SSL_NO_VERIFY`");
839        cmd.env(EnvVars::GIT_SSL_NO_VERIFY, "true");
840    }
841    if offline {
842        debug!("Disabling remote protocols for Git fetch via `GIT_ALLOW_PROTOCOL=file`");
843        cmd.env(EnvVars::GIT_ALLOW_PROTOCOL, "file");
844    }
845    cmd.arg("--force") // handle force pushes
846        .arg("--update-head-ok") // see discussion in #2078
847        .arg(url.as_str())
848        .args(refspecs)
849        .cwd(&repo.path);
850
851    // We capture the output to avoid streaming it to the user's console during clones.
852    // The required `on...line` callbacks currently do nothing.
853    // The output appears to be included in error messages by default.
854    cmd.exec_with_output().map_err(|err| {
855        let msg = err.to_string();
856        if msg.contains("transport '") && msg.contains("' not allowed") && offline {
857            return GitError::TransportNotAllowed.into();
858        }
859        err
860    })?;
861
862    Ok(())
863}
864
865/// A global cache of the `git lfs` command.
866///
867/// Returns an error if Git LFS isn't available.
868/// Caching the command allows us to only check if LFS is installed once.
869///
870/// We also support a helper private environment variable to allow
871/// controlling the LFS extension from being loaded for testing purposes.
872/// Once installed, Git will always load `git-lfs` as a built-in alias
873/// which takes priority over loading from `PATH` which prevents us
874/// from shadowing the extension with other means.
875pub static GIT_LFS: LazyLock<Result<ProcessBuilder>> = LazyLock::new(|| {
876    if std::env::var_os(EnvVars::UV_INTERNAL__TEST_LFS_DISABLED).is_some() {
877        return Err(anyhow!("Git LFS extension has been forcefully disabled."));
878    }
879
880    let mut cmd = GIT.as_ref()?.clone();
881    cmd.arg("lfs");
882
883    // Run a simple command to verify LFS is installed
884    cmd.clone().arg("version").exec_with_output()?;
885    Ok(cmd)
886});
887
888/// Attempts to use `git-lfs` CLI to fetch required LFS objects for a given revision.
889fn fetch_lfs(
890    repo: &mut GitRepository,
891    url: &DisplaySafeUrl,
892    revision: &GitOid,
893    disable_ssl: bool,
894) -> Result<bool> {
895    let mut cmd = if let Ok(lfs) = GIT_LFS.as_ref() {
896        debug!("Fetching Git LFS objects");
897        lfs.clone()
898    } else {
899        // Since this feature is opt-in, warn if not available
900        warn!("Git LFS is not available, skipping LFS fetch");
901        return Ok(false);
902    };
903
904    if disable_ssl {
905        debug!("Disabling SSL verification for Git LFS");
906        cmd.env(EnvVars::GIT_SSL_NO_VERIFY, "true");
907    }
908
909    cmd.arg("fetch")
910        .arg(url.as_str())
911        .arg(revision.as_str())
912        // We should not support requesting LFS artifacts with skip smudge being set.
913        // While this may not be necessary, it's added to avoid any potential future issues.
914        .env_remove(EnvVars::GIT_LFS_SKIP_SMUDGE)
915        .cwd(&repo.path);
916
917    cmd.exec_with_output()?;
918
919    // We now validate the Git LFS objects explicitly (if supported). This is
920    // needed to avoid issues with Git LFS not being installed or configured
921    // on the system and giving the wrong impression to the user that Git LFS
922    // objects were initialized correctly when installation finishes.
923    // We may want to allow the user to skip validation in the future via
924    // UV_GIT_LFS_NO_VALIDATION environment variable on rare cases where
925    // validation costs outweigh the benefit.
926    let validation_result = repo.lfs_fsck_objects(revision.as_str());
927
928    Ok(validation_result)
929}
930
931/// Whether `rev` is a shorter hash of `oid`.
932fn is_short_hash_of(rev: &str, oid: GitOid) -> bool {
933    let long_hash = oid.to_string();
934    match long_hash.get(..rev.len()) {
935        Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
936        None => false,
937    }
938}
939
940#[cfg(test)]
941mod tests {
942    use super::*;
943
944    #[test]
945    fn submodule_update_config_strips_credentials_from_origin_override() {
946        let url = DisplaySafeUrl::parse("https://user:password@example.com/org/repo.git").unwrap();
947
948        assert_eq!(
949            submodule_update_config(&url),
950            vec![
951                "remote.origin.url=https://example.com/org/repo.git".to_string(),
952                "url.https://user:password@example.com/.insteadOf=https://example.com/".to_string(),
953            ]
954        );
955    }
956
957    #[test]
958    fn submodule_update_config_preserves_git_ssh_user() {
959        let url = DisplaySafeUrl::parse("ssh://git@example.com/org/repo.git").unwrap();
960
961        assert_eq!(
962            submodule_update_config(&url),
963            vec!["remote.origin.url=ssh://git@example.com/org/repo.git".to_string()]
964        );
965    }
966}