uv_git/source.rs
1//! Git support is derived from Cargo's implementation.
2//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice.
3//! Source: <https://github.com/rust-lang/cargo/blob/23eb492cf920ce051abfc56bbaf838514dc8365c/src/cargo/sources/git/source.rs>
4
5use std::borrow::Cow;
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8
9use anyhow::Result;
10use tracing::{debug, instrument};
11
12use uv_cache_key::cache_digest;
13use uv_git_types::{GitOid, GitReference, GitUrl};
14use uv_redacted::DisplaySafeUrl;
15
16use crate::GIT_STORE;
17use crate::git::{GitDatabase, GitRemote};
18
19/// A remote Git source that can be checked out locally.
20pub struct GitSource {
21 /// The Git reference from the manifest file.
22 git: GitUrl,
23 /// Whether to disable SSL verification.
24 disable_ssl: bool,
25 /// Whether to operate without network connectivity.
26 offline: bool,
27 /// The path to the Git source database.
28 cache: PathBuf,
29 /// The reporter to use for this source.
30 reporter: Option<Arc<dyn Reporter>>,
31}
32
33impl GitSource {
34 /// Initialize a [`GitSource`] with the given Git URL, HTTP client, and cache path.
35 pub fn new(git: GitUrl, cache: impl Into<PathBuf>, offline: bool) -> Self {
36 Self {
37 git,
38 disable_ssl: false,
39 offline,
40 cache: cache.into(),
41 reporter: None,
42 }
43 }
44
45 /// Disable SSL verification for this [`GitSource`].
46 #[must_use]
47 pub fn dangerous(self) -> Self {
48 Self {
49 disable_ssl: true,
50 ..self
51 }
52 }
53
54 /// Set the [`Reporter`] to use for the [`GitSource`].
55 #[must_use]
56 pub fn with_reporter(self, reporter: Arc<dyn Reporter>) -> Self {
57 Self {
58 reporter: Some(reporter),
59 ..self
60 }
61 }
62
63 /// Fetch the underlying Git repository at the given revision.
64 #[instrument(skip(self), fields(repository = %self.git.url(), rev = ?self.git.precise()))]
65 pub fn fetch(self) -> Result<Fetch> {
66 let lfs_requested = self.git.lfs().enabled();
67
68 // The path to the repo, within the Git database.
69 let ident = cache_digest(self.git.repository());
70 let db_path = self.cache.join("db").join(&ident);
71
72 // Authenticate the URL, if necessary.
73 let remote = if let Some(credentials) = GIT_STORE.get(self.git.repository()) {
74 Cow::Owned(credentials.apply(self.git.url().clone()))
75 } else {
76 Cow::Borrowed(self.git.url())
77 };
78
79 // Fetch the commit, if we don't already have it. Wrapping this section in a closure makes
80 // it easier to short-circuit this in the cases where we do have the commit.
81 let (db, actual_rev, maybe_task) = || -> Result<(GitDatabase, GitOid, Option<usize>)> {
82 let git_remote = GitRemote::new(&remote);
83 let maybe_db = git_remote.db_at(&db_path).ok();
84
85 // If we have a locked revision, and we have a pre-existing database which has that
86 // revision, then no update needs to happen.
87 // When requested, we also check if LFS artifacts have been fetched and validated.
88 if let (Some(rev), Some(db)) = (self.git.precise(), &maybe_db) {
89 if db.contains(rev) && (!lfs_requested || db.contains_lfs_artifacts(rev)) {
90 debug!("Using existing Git source `{}`", self.git.url());
91 return Ok((
92 maybe_db
93 .unwrap()
94 .with_lfs_ready(lfs_requested.then_some(true)),
95 rev,
96 None,
97 ));
98 }
99 }
100
101 // If the revision isn't locked, but it looks like it might be an exact commit hash,
102 // and we do have a pre-existing database, then check whether it is, in fact, a commit
103 // hash. If so, treat it like it's locked.
104 // When requested, we also check if LFS artifacts have been fetched and validated.
105 if let Some(db) = &maybe_db {
106 if let GitReference::BranchOrTagOrCommit(maybe_commit) = self.git.reference() {
107 if let Ok(oid) = maybe_commit.parse::<GitOid>() {
108 if db.contains(oid) && (!lfs_requested || db.contains_lfs_artifacts(oid)) {
109 // This reference is an exact commit. Treat it like it's locked.
110 debug!("Using existing Git source `{}`", self.git.url());
111 return Ok((
112 maybe_db
113 .unwrap()
114 .with_lfs_ready(lfs_requested.then_some(true)),
115 oid,
116 None,
117 ));
118 }
119 }
120 }
121 }
122
123 // ... otherwise, we use this state to update the Git database. Note that we still check
124 // for being offline here, for example in the situation that we have a locked revision
125 // but the database doesn't have it.
126 debug!("Updating Git source `{}`", self.git.url());
127
128 // Report the checkout operation to the reporter.
129 let task = self.reporter.as_ref().map(|reporter| {
130 reporter.on_checkout_start(git_remote.url(), self.git.reference().as_rev())
131 });
132
133 let (db, actual_rev) = git_remote.checkout(
134 &db_path,
135 maybe_db,
136 self.git.reference(),
137 self.git.precise(),
138 self.disable_ssl,
139 self.offline,
140 lfs_requested,
141 )?;
142
143 Ok((db, actual_rev, task))
144 }()?;
145
146 // Don’t use the full hash, in order to contribute less to reaching the
147 // path length limit on Windows.
148 let short_id = db.to_short_id(actual_rev)?;
149
150 // Compute the canonical URL for the repository checkout.
151 let canonical = self.git.repository().clone().with_lfs(Some(lfs_requested));
152 // Recompute the checkout hash when Git LFS is enabled as we want
153 // to distinctly differentiate between LFS vs non-LFS source trees.
154 let ident = if lfs_requested {
155 cache_digest(&canonical)
156 } else {
157 ident
158 };
159 let checkout_path = self
160 .cache
161 .join("checkouts")
162 .join(&ident)
163 .join(short_id.as_str());
164
165 // Check out `actual_rev` from the database to a scoped location on the
166 // filesystem. This will use hard links and such to ideally make the
167 // checkout operation here pretty fast.
168 let checkout = db.copy_to(actual_rev, &checkout_path)?;
169
170 // Report the checkout operation to the reporter.
171 if let Some(task) = maybe_task {
172 if let Some(reporter) = self.reporter.as_ref() {
173 reporter.on_checkout_complete(remote.as_ref(), actual_rev.as_str(), task);
174 }
175 }
176
177 Ok(Fetch {
178 git: self.git.with_precise(actual_rev),
179 path: checkout_path,
180 lfs_ready: checkout.lfs_ready().unwrap_or(false),
181 })
182 }
183}
184
185pub struct Fetch {
186 /// The [`GitUrl`] reference that was fetched.
187 git: GitUrl,
188 /// The path to the checked out repository.
189 path: PathBuf,
190 /// Git LFS artifacts have been initialized (if requested).
191 lfs_ready: bool,
192}
193
194impl Fetch {
195 pub fn git(&self) -> &GitUrl {
196 &self.git
197 }
198
199 pub fn path(&self) -> &Path {
200 &self.path
201 }
202
203 pub fn lfs_ready(&self) -> &bool {
204 &self.lfs_ready
205 }
206
207 pub fn into_git(self) -> GitUrl {
208 self.git
209 }
210
211 pub fn into_path(self) -> PathBuf {
212 self.path
213 }
214}
215
216pub trait Reporter: Send + Sync {
217 /// Callback to invoke when a repository checkout begins.
218 fn on_checkout_start(&self, url: &DisplaySafeUrl, rev: &str) -> usize;
219
220 /// Callback to invoke when a repository checkout completes.
221 fn on_checkout_complete(&self, url: &DisplaySafeUrl, rev: &str, index: usize);
222}