uv_git/
source.rs

1//! Git support is derived from Cargo's implementation.
2//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice.
3//! Source: <https://github.com/rust-lang/cargo/blob/23eb492cf920ce051abfc56bbaf838514dc8365c/src/cargo/sources/git/source.rs>
4
5use std::borrow::Cow;
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8
9use anyhow::Result;
10use tracing::{debug, instrument};
11
12use uv_cache_key::{RepositoryUrl, cache_digest};
13use uv_git_types::{GitOid, GitReference, GitUrl};
14use uv_redacted::DisplaySafeUrl;
15
16use crate::GIT_STORE;
17use crate::git::{GitDatabase, GitRemote};
18
19/// A remote Git source that can be checked out locally.
20pub struct GitSource {
21    /// The Git reference from the manifest file.
22    git: GitUrl,
23    /// Whether to disable SSL verification.
24    disable_ssl: bool,
25    /// Whether to operate without network connectivity.
26    offline: bool,
27    /// The path to the Git source database.
28    cache: PathBuf,
29    /// The reporter to use for this source.
30    reporter: Option<Arc<dyn Reporter>>,
31}
32
33impl GitSource {
34    /// Initialize a [`GitSource`] with the given Git URL, HTTP client, and cache path.
35    pub fn new(git: GitUrl, cache: impl Into<PathBuf>, offline: bool) -> Self {
36        Self {
37            git,
38            disable_ssl: false,
39            offline,
40            cache: cache.into(),
41            reporter: None,
42        }
43    }
44
45    /// Disable SSL verification for this [`GitSource`].
46    #[must_use]
47    pub fn dangerous(self) -> Self {
48        Self {
49            disable_ssl: true,
50            ..self
51        }
52    }
53
54    /// Set the [`Reporter`] to use for the [`GitSource`].
55    #[must_use]
56    pub fn with_reporter(self, reporter: Arc<dyn Reporter>) -> Self {
57        Self {
58            reporter: Some(reporter),
59            ..self
60        }
61    }
62
63    /// Fetch the underlying Git repository at the given revision.
64    #[instrument(skip(self), fields(repository = %self.git.repository(), rev = ?self.git.precise()))]
65    pub fn fetch(self) -> Result<Fetch> {
66        let lfs_requested = self.git.lfs().enabled();
67
68        // Compute the canonical URL for the repository.
69        let canonical = RepositoryUrl::new(self.git.repository());
70
71        // The path to the repo, within the Git database.
72        let ident = cache_digest(&canonical);
73        let db_path = self.cache.join("db").join(&ident);
74
75        // Authenticate the URL, if necessary.
76        let remote = if let Some(credentials) = GIT_STORE.get(&canonical) {
77            Cow::Owned(credentials.apply(self.git.repository().clone()))
78        } else {
79            Cow::Borrowed(self.git.repository())
80        };
81
82        // Fetch the commit, if we don't already have it. Wrapping this section in a closure makes
83        // it easier to short-circuit this in the cases where we do have the commit.
84        let (db, actual_rev, maybe_task) = || -> Result<(GitDatabase, GitOid, Option<usize>)> {
85            let git_remote = GitRemote::new(&remote);
86            let maybe_db = git_remote.db_at(&db_path).ok();
87
88            // If we have a locked revision, and we have a pre-existing database which has that
89            // revision, then no update needs to happen.
90            // When requested, we also check if LFS artifacts have been fetched and validated.
91            if let (Some(rev), Some(db)) = (self.git.precise(), &maybe_db) {
92                if db.contains(rev) && (!lfs_requested || db.contains_lfs_artifacts(rev)) {
93                    debug!("Using existing Git source `{}`", self.git.repository());
94                    return Ok((
95                        maybe_db
96                            .unwrap()
97                            .with_lfs_ready(lfs_requested.then_some(true)),
98                        rev,
99                        None,
100                    ));
101                }
102            }
103
104            // If the revision isn't locked, but it looks like it might be an exact commit hash,
105            // and we do have a pre-existing database, then check whether it is, in fact, a commit
106            // hash. If so, treat it like it's locked.
107            // When requested, we also check if LFS artifacts have been fetched and validated.
108            if let Some(db) = &maybe_db {
109                if let GitReference::BranchOrTagOrCommit(maybe_commit) = self.git.reference() {
110                    if let Ok(oid) = maybe_commit.parse::<GitOid>() {
111                        if db.contains(oid) && (!lfs_requested || db.contains_lfs_artifacts(oid)) {
112                            // This reference is an exact commit. Treat it like it's locked.
113                            debug!("Using existing Git source `{}`", self.git.repository());
114                            return Ok((
115                                maybe_db
116                                    .unwrap()
117                                    .with_lfs_ready(lfs_requested.then_some(true)),
118                                oid,
119                                None,
120                            ));
121                        }
122                    }
123                }
124            }
125
126            // ... otherwise, we use this state to update the Git database. Note that we still check
127            // for being offline here, for example in the situation that we have a locked revision
128            // but the database doesn't have it.
129            debug!("Updating Git source `{}`", self.git.repository());
130
131            // Report the checkout operation to the reporter.
132            let task = self.reporter.as_ref().map(|reporter| {
133                reporter.on_checkout_start(git_remote.url(), self.git.reference().as_rev())
134            });
135
136            let (db, actual_rev) = git_remote.checkout(
137                &db_path,
138                maybe_db,
139                self.git.reference(),
140                self.git.precise(),
141                self.disable_ssl,
142                self.offline,
143                lfs_requested,
144            )?;
145
146            Ok((db, actual_rev, task))
147        }()?;
148
149        // Don’t use the full hash, in order to contribute less to reaching the
150        // path length limit on Windows.
151        let short_id = db.to_short_id(actual_rev)?;
152
153        // Compute the canonical URL for the repository checkout.
154        let canonical = canonical.with_lfs(Some(lfs_requested));
155        // Recompute the checkout hash when Git LFS is enabled as we want
156        // to distinctly differentiate between LFS vs non-LFS source trees.
157        let ident = if lfs_requested {
158            cache_digest(&canonical)
159        } else {
160            ident
161        };
162        let checkout_path = self
163            .cache
164            .join("checkouts")
165            .join(&ident)
166            .join(short_id.as_str());
167
168        // Check out `actual_rev` from the database to a scoped location on the
169        // filesystem. This will use hard links and such to ideally make the
170        // checkout operation here pretty fast.
171        let checkout = db.copy_to(actual_rev, &checkout_path)?;
172
173        // Report the checkout operation to the reporter.
174        if let Some(task) = maybe_task {
175            if let Some(reporter) = self.reporter.as_ref() {
176                reporter.on_checkout_complete(remote.as_ref(), actual_rev.as_str(), task);
177            }
178        }
179
180        Ok(Fetch {
181            git: self.git.with_precise(actual_rev),
182            path: checkout_path,
183            lfs_ready: checkout.lfs_ready().unwrap_or(false),
184        })
185    }
186}
187
188pub struct Fetch {
189    /// The [`GitUrl`] reference that was fetched.
190    git: GitUrl,
191    /// The path to the checked out repository.
192    path: PathBuf,
193    /// Git LFS artifacts have been initialized (if requested).
194    lfs_ready: bool,
195}
196
197impl Fetch {
198    pub fn git(&self) -> &GitUrl {
199        &self.git
200    }
201
202    pub fn path(&self) -> &Path {
203        &self.path
204    }
205
206    pub fn lfs_ready(&self) -> &bool {
207        &self.lfs_ready
208    }
209
210    pub fn into_git(self) -> GitUrl {
211        self.git
212    }
213
214    pub fn into_path(self) -> PathBuf {
215        self.path
216    }
217}
218
219pub trait Reporter: Send + Sync {
220    /// Callback to invoke when a repository checkout begins.
221    fn on_checkout_start(&self, url: &DisplaySafeUrl, rev: &str) -> usize;
222
223    /// Callback to invoke when a repository checkout completes.
224    fn on_checkout_complete(&self, url: &DisplaySafeUrl, rev: &str, index: usize);
225}