Skip to main content

uv_git/
resolver.rs

1use std::borrow::Cow;
2use std::path::PathBuf;
3use std::str::FromStr;
4use std::sync::Arc;
5
6use fs_err::tokio as fs;
7use papaya::{HashMap, ResizeMode};
8use reqwest_middleware::ClientWithMiddleware;
9use tracing::debug;
10
11use uv_cache_key::{RepositoryUrl, cache_digest};
12use uv_fs::{LockedFile, LockedFileError, LockedFileMode};
13use uv_git_types::{GitHubRepository, GitOid, GitReference, GitUrl};
14use uv_static::EnvVars;
15use uv_version::version;
16
17use crate::{
18    Fetch, GitSource, Reporter,
19    rate_limit::{GITHUB_RATE_LIMIT_STATUS, is_github_rate_limited},
20};
21
22#[derive(Debug, thiserror::Error)]
23pub enum GitResolverError {
24    #[error(transparent)]
25    Io(#[from] std::io::Error),
26    #[error(transparent)]
27    LockedFile(#[from] LockedFileError),
28    #[error(transparent)]
29    Join(#[from] tokio::task::JoinError),
30    #[error("Git operation failed")]
31    Git(#[source] anyhow::Error),
32    #[error(transparent)]
33    Reqwest(#[from] reqwest::Error),
34    #[error(transparent)]
35    ReqwestMiddleware(#[from] reqwest_middleware::Error),
36}
37
38/// HTTP settings for fetching a Git repository.
39#[derive(Debug, Clone, Copy, Default)]
40pub struct GitHttpSettings {
41    disable_ssl: bool,
42    offline: bool,
43}
44
45impl GitHttpSettings {
46    /// Configure whether certificate verification should be disabled.
47    #[must_use]
48    pub fn with_disabled_ssl(mut self, disable_ssl: bool) -> Self {
49        self.disable_ssl = disable_ssl;
50        self
51    }
52
53    /// Configure whether network access should be disabled.
54    #[must_use]
55    pub fn with_offline(mut self, offline: bool) -> Self {
56        self.offline = offline;
57        self
58    }
59}
60
61/// A resolver for Git repositories.
62#[derive(Clone)]
63pub struct GitResolver(Arc<HashMap<RepositoryReference, GitOid>>);
64
65impl Default for GitResolver {
66    fn default() -> Self {
67        Self(Arc::new(
68            HashMap::builder().resize_mode(ResizeMode::Blocking).build(),
69        ))
70    }
71}
72
73impl GitResolver {
74    /// Inserts a new [`GitOid`] for the given [`RepositoryReference`].
75    pub fn insert(&self, reference: RepositoryReference, sha: GitOid) {
76        self.0.pin().insert(reference, sha);
77    }
78
79    /// Returns the [`GitOid`] for the given [`RepositoryReference`], if it exists.
80    fn get(&self, reference: &RepositoryReference) -> Option<GitOid> {
81        self.0.pin().get(reference).copied()
82    }
83
84    /// Return the [`GitOid`] for the given [`GitUrl`], if it is already known.
85    pub fn get_precise(&self, url: &GitUrl) -> Option<GitOid> {
86        // If the URL is already precise, return it.
87        if let Some(precise) = url.precise() {
88            return Some(precise);
89        }
90
91        // If we know the precise commit already, return it.
92        let reference = RepositoryReference::from(url);
93        if let Some(precise) = self.get(&reference) {
94            return Some(precise);
95        }
96
97        None
98    }
99
100    /// Resolve a Git URL to a specific commit without performing any Git operations.
101    ///
102    /// Returns a [`GitOid`] if the URL has already been resolved (i.e., is available in the cache),
103    /// or if it can be fetched via the GitHub API. Otherwise, returns `None`.
104    pub async fn github_fast_path(
105        &self,
106        url: &GitUrl,
107        client: &ClientWithMiddleware,
108    ) -> Result<Option<GitOid>, GitResolverError> {
109        if std::env::var_os(EnvVars::UV_NO_GITHUB_FAST_PATH).is_some() {
110            return Ok(None);
111        }
112
113        // If the URL is already precise or we know the precise commit, return it.
114        if let Some(precise) = self.get_precise(url) {
115            return Ok(Some(precise));
116        }
117
118        // If the URL is a GitHub URL, attempt to resolve it via the GitHub API.
119        let Some(GitHubRepository { owner, repo }) = GitHubRepository::parse(url.repository())
120        else {
121            return Ok(None);
122        };
123
124        // Check if we're rate-limited by GitHub, before determining the Git reference
125        if GITHUB_RATE_LIMIT_STATUS.is_active() {
126            debug!("Rate-limited by GitHub. Skipping GitHub fast path attempt for: {url}");
127            return Ok(None);
128        }
129
130        // Determine the Git reference.
131        let rev = url.reference().as_rev();
132
133        let github_api_base_url = std::env::var(EnvVars::UV_GITHUB_FAST_PATH_URL)
134            .unwrap_or("https://api.github.com/repos".to_owned());
135        let github_api_url = format!("{github_api_base_url}/{owner}/{repo}/commits/{rev}");
136
137        debug!("Querying GitHub for commit at: {github_api_url}");
138        let mut request = client.get(&github_api_url);
139        request = request.header("Accept", "application/vnd.github.3.sha");
140        request = request.header(
141            "User-Agent",
142            format!("uv/{} (+https://github.com/astral-sh/uv)", version()),
143        );
144
145        let response = request.send().await?;
146        let status = response.status();
147        if !status.is_success() {
148            // Returns a 404 if the repository does not exist, and a 422 if GitHub is unable to
149            // resolve the requested rev.
150            debug!(
151                "GitHub API request failed for: {github_api_url} ({})",
152                response.status()
153            );
154
155            if is_github_rate_limited(&response) {
156                // Mark that we are being rate-limited by GitHub
157                GITHUB_RATE_LIMIT_STATUS.activate();
158            }
159
160            return Ok(None);
161        }
162
163        // Parse the response as a Git SHA.
164        let precise = response.text().await?;
165        let precise =
166            GitOid::from_str(&precise).map_err(|err| GitResolverError::Git(err.into()))?;
167
168        // Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
169        // resolve to the same precise commit.
170        self.insert(RepositoryReference::from(url), precise);
171
172        Ok(Some(precise))
173    }
174
175    /// Fetch a remote Git repository.
176    pub async fn fetch(
177        &self,
178        url: &GitUrl,
179        http_settings: GitHttpSettings,
180        cache: PathBuf,
181        reporter: Option<Arc<dyn Reporter>>,
182    ) -> Result<Fetch, GitResolverError> {
183        debug!("Fetching source distribution from Git: {url}");
184
185        let reference = RepositoryReference::from(url);
186
187        // If we know the precise commit already, reuse it, to ensure that all fetches within a
188        // single process are consistent.
189        let url = {
190            if let Some(precise) = self.get(&reference) {
191                Cow::Owned(url.clone().with_precise(precise))
192            } else {
193                Cow::Borrowed(url)
194            }
195        };
196
197        // Avoid races between different processes, too.
198        let lock_dir = cache.join("locks");
199        fs::create_dir_all(&lock_dir).await?;
200        let repository_url = url.repository().clone();
201        let _lock = LockedFile::acquire(
202            lock_dir.join(cache_digest(&repository_url)),
203            LockedFileMode::Exclusive,
204            &repository_url,
205        )
206        .await?;
207
208        // Fetch the Git repository.
209        let source = if let Some(reporter) = reporter {
210            GitSource::new(url.as_ref().clone(), cache, http_settings.offline)
211                .with_reporter(reporter)
212        } else {
213            GitSource::new(url.as_ref().clone(), cache, http_settings.offline)
214        };
215
216        // If necessary, disable SSL.
217        let source = if http_settings.disable_ssl {
218            source.dangerous()
219        } else {
220            source
221        };
222
223        let fetch = tokio::task::spawn_blocking(move || source.fetch())
224            .await?
225            .map_err(GitResolverError::Git)?;
226
227        // Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
228        // resolve to the same precise commit.
229        if let Some(precise) = fetch.git().precise() {
230            self.insert(reference, precise);
231        }
232
233        Ok(fetch)
234    }
235
236    /// Given a remote source distribution, return a precise variant, if possible.
237    ///
238    /// For example, given a Git dependency with a reference to a branch or tag, return a URL
239    /// with a precise reference to the current commit of that branch or tag.
240    ///
241    /// This method takes into account various normalizations that are independent of the Git
242    /// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
243    /// prefix kinds.
244    ///
245    /// This method will only return precise URLs for URLs that have already been resolved via
246    /// [`resolve_precise`], and will return `None` for URLs that have not been resolved _or_
247    /// already have a precise reference.
248    pub fn precise(&self, url: GitUrl) -> Option<GitUrl> {
249        let reference = RepositoryReference::from(&url);
250        let precise = self.get(&reference)?;
251        Some(url.with_precise(precise))
252    }
253
254    /// Returns `true` if the two Git URLs refer to the same precise commit.
255    pub fn same_ref(&self, a: &GitUrl, b: &GitUrl) -> bool {
256        // Convert `a` to a repository URL.
257        let a_ref = RepositoryReference::from(a);
258
259        // Convert `b` to a repository URL.
260        let b_ref = RepositoryReference::from(b);
261
262        // The URLs must refer to the same repository.
263        if a_ref.url != b_ref.url {
264            return false;
265        }
266
267        // If the URLs have the same tag, they refer to the same commit.
268        if a_ref.reference == b_ref.reference {
269            return true;
270        }
271
272        // Otherwise, the URLs must resolve to the same precise commit.
273        let Some(a_precise) = a.precise().or_else(|| self.get(&a_ref)) else {
274            return false;
275        };
276
277        let Some(b_precise) = b.precise().or_else(|| self.get(&b_ref)) else {
278            return false;
279        };
280
281        a_precise == b_precise
282    }
283}
284
285#[derive(Debug, Clone, PartialEq, Eq, Hash)]
286pub struct ResolvedRepositoryReference {
287    /// An abstract reference to a Git repository, including the URL and the commit (e.g., a branch,
288    /// tag, or revision).
289    pub reference: RepositoryReference,
290    /// The precise commit SHA of the reference.
291    pub sha: GitOid,
292}
293
294#[derive(Debug, Clone, PartialEq, Eq, Hash)]
295pub struct RepositoryReference {
296    /// The URL of the Git repository, with any query parameters and fragments removed.
297    pub url: RepositoryUrl,
298    /// The reference to the commit to use, which could be a branch, tag, or revision.
299    pub reference: GitReference,
300}
301
302impl From<&GitUrl> for RepositoryReference {
303    fn from(git: &GitUrl) -> Self {
304        Self {
305            url: git.repository().clone(),
306            reference: git.reference().clone(),
307        }
308    }
309}