Skip to main content

uv_git/
resolver.rs

1use std::borrow::Cow;
2use std::path::PathBuf;
3use std::str::FromStr;
4use std::sync::Arc;
5
6use dashmap::DashMap;
7use dashmap::mapref::one::Ref;
8use fs_err::tokio as fs;
9use reqwest_middleware::ClientWithMiddleware;
10use tracing::debug;
11
12use uv_cache_key::{RepositoryUrl, cache_digest};
13use uv_fs::{LockedFile, LockedFileError, LockedFileMode};
14use uv_git_types::{GitHubRepository, GitOid, GitReference, GitUrl};
15use uv_static::EnvVars;
16use uv_version::version;
17
18use crate::{
19    Fetch, GitSource, Reporter,
20    rate_limit::{GITHUB_RATE_LIMIT_STATUS, is_github_rate_limited},
21};
22
23#[derive(Debug, thiserror::Error)]
24pub enum GitResolverError {
25    #[error(transparent)]
26    Io(#[from] std::io::Error),
27    #[error(transparent)]
28    LockedFile(#[from] LockedFileError),
29    #[error(transparent)]
30    Join(#[from] tokio::task::JoinError),
31    #[error("Git operation failed")]
32    Git(#[source] anyhow::Error),
33    #[error(transparent)]
34    Reqwest(#[from] reqwest::Error),
35    #[error(transparent)]
36    ReqwestMiddleware(#[from] reqwest_middleware::Error),
37}
38
39/// HTTP settings for fetching a Git repository.
40#[derive(Debug, Clone, Copy, Default)]
41pub struct GitHttpSettings {
42    disable_ssl: bool,
43    offline: bool,
44}
45
46impl GitHttpSettings {
47    /// Configure whether certificate verification should be disabled.
48    #[must_use]
49    pub fn with_disabled_ssl(mut self, disable_ssl: bool) -> Self {
50        self.disable_ssl = disable_ssl;
51        self
52    }
53
54    /// Configure whether network access should be disabled.
55    #[must_use]
56    pub fn with_offline(mut self, offline: bool) -> Self {
57        self.offline = offline;
58        self
59    }
60}
61
62/// A resolver for Git repositories.
63#[derive(Default, Clone)]
64pub struct GitResolver(Arc<DashMap<RepositoryReference, GitOid>>);
65
66impl GitResolver {
67    /// Inserts a new [`GitOid`] for the given [`RepositoryReference`].
68    pub fn insert(&self, reference: RepositoryReference, sha: GitOid) {
69        self.0.insert(reference, sha);
70    }
71
72    /// Returns the [`GitOid`] for the given [`RepositoryReference`], if it exists.
73    fn get(&self, reference: &RepositoryReference) -> Option<Ref<'_, RepositoryReference, GitOid>> {
74        self.0.get(reference)
75    }
76
77    /// Return the [`GitOid`] for the given [`GitUrl`], if it is already known.
78    pub fn get_precise(&self, url: &GitUrl) -> Option<GitOid> {
79        // If the URL is already precise, return it.
80        if let Some(precise) = url.precise() {
81            return Some(precise);
82        }
83
84        // If we know the precise commit already, return it.
85        let reference = RepositoryReference::from(url);
86        if let Some(precise) = self.get(&reference) {
87            return Some(*precise);
88        }
89
90        None
91    }
92
93    /// Resolve a Git URL to a specific commit without performing any Git operations.
94    ///
95    /// Returns a [`GitOid`] if the URL has already been resolved (i.e., is available in the cache),
96    /// or if it can be fetched via the GitHub API. Otherwise, returns `None`.
97    pub async fn github_fast_path(
98        &self,
99        url: &GitUrl,
100        client: &ClientWithMiddleware,
101    ) -> Result<Option<GitOid>, GitResolverError> {
102        if std::env::var_os(EnvVars::UV_NO_GITHUB_FAST_PATH).is_some() {
103            return Ok(None);
104        }
105
106        // If the URL is already precise or we know the precise commit, return it.
107        if let Some(precise) = self.get_precise(url) {
108            return Ok(Some(precise));
109        }
110
111        // If the URL is a GitHub URL, attempt to resolve it via the GitHub API.
112        let Some(GitHubRepository { owner, repo }) = GitHubRepository::parse(url.repository())
113        else {
114            return Ok(None);
115        };
116
117        // Check if we're rate-limited by GitHub, before determining the Git reference
118        if GITHUB_RATE_LIMIT_STATUS.is_active() {
119            debug!("Rate-limited by GitHub. Skipping GitHub fast path attempt for: {url}");
120            return Ok(None);
121        }
122
123        // Determine the Git reference.
124        let rev = url.reference().as_rev();
125
126        let github_api_base_url = std::env::var(EnvVars::UV_GITHUB_FAST_PATH_URL)
127            .unwrap_or("https://api.github.com/repos".to_owned());
128        let github_api_url = format!("{github_api_base_url}/{owner}/{repo}/commits/{rev}");
129
130        debug!("Querying GitHub for commit at: {github_api_url}");
131        let mut request = client.get(&github_api_url);
132        request = request.header("Accept", "application/vnd.github.3.sha");
133        request = request.header(
134            "User-Agent",
135            format!("uv/{} (+https://github.com/astral-sh/uv)", version()),
136        );
137
138        let response = request.send().await?;
139        let status = response.status();
140        if !status.is_success() {
141            // Returns a 404 if the repository does not exist, and a 422 if GitHub is unable to
142            // resolve the requested rev.
143            debug!(
144                "GitHub API request failed for: {github_api_url} ({})",
145                response.status()
146            );
147
148            if is_github_rate_limited(&response) {
149                // Mark that we are being rate-limited by GitHub
150                GITHUB_RATE_LIMIT_STATUS.activate();
151            }
152
153            return Ok(None);
154        }
155
156        // Parse the response as a Git SHA.
157        let precise = response.text().await?;
158        let precise =
159            GitOid::from_str(&precise).map_err(|err| GitResolverError::Git(err.into()))?;
160
161        // Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
162        // resolve to the same precise commit.
163        self.insert(RepositoryReference::from(url), precise);
164
165        Ok(Some(precise))
166    }
167
168    /// Fetch a remote Git repository.
169    pub async fn fetch(
170        &self,
171        url: &GitUrl,
172        http_settings: GitHttpSettings,
173        cache: PathBuf,
174        reporter: Option<Arc<dyn Reporter>>,
175    ) -> Result<Fetch, GitResolverError> {
176        debug!("Fetching source distribution from Git: {url}");
177
178        let reference = RepositoryReference::from(url);
179
180        // If we know the precise commit already, reuse it, to ensure that all fetches within a
181        // single process are consistent.
182        let url = {
183            if let Some(precise) = self.get(&reference) {
184                Cow::Owned(url.clone().with_precise(*precise))
185            } else {
186                Cow::Borrowed(url)
187            }
188        };
189
190        // Avoid races between different processes, too.
191        let lock_dir = cache.join("locks");
192        fs::create_dir_all(&lock_dir).await?;
193        let repository_url = url.repository().clone();
194        let _lock = LockedFile::acquire(
195            lock_dir.join(cache_digest(&repository_url)),
196            LockedFileMode::Exclusive,
197            &repository_url,
198        )
199        .await?;
200
201        // Fetch the Git repository.
202        let source = if let Some(reporter) = reporter {
203            GitSource::new(url.as_ref().clone(), cache, http_settings.offline)
204                .with_reporter(reporter)
205        } else {
206            GitSource::new(url.as_ref().clone(), cache, http_settings.offline)
207        };
208
209        // If necessary, disable SSL.
210        let source = if http_settings.disable_ssl {
211            source.dangerous()
212        } else {
213            source
214        };
215
216        let fetch = tokio::task::spawn_blocking(move || source.fetch())
217            .await?
218            .map_err(GitResolverError::Git)?;
219
220        // Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
221        // resolve to the same precise commit.
222        if let Some(precise) = fetch.git().precise() {
223            self.insert(reference, precise);
224        }
225
226        Ok(fetch)
227    }
228
229    /// Given a remote source distribution, return a precise variant, if possible.
230    ///
231    /// For example, given a Git dependency with a reference to a branch or tag, return a URL
232    /// with a precise reference to the current commit of that branch or tag.
233    ///
234    /// This method takes into account various normalizations that are independent of the Git
235    /// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
236    /// prefix kinds.
237    ///
238    /// This method will only return precise URLs for URLs that have already been resolved via
239    /// [`resolve_precise`], and will return `None` for URLs that have not been resolved _or_
240    /// already have a precise reference.
241    pub fn precise(&self, url: GitUrl) -> Option<GitUrl> {
242        let reference = RepositoryReference::from(&url);
243        let precise = self.get(&reference)?;
244        Some(url.with_precise(*precise))
245    }
246
247    /// Returns `true` if the two Git URLs refer to the same precise commit.
248    pub fn same_ref(&self, a: &GitUrl, b: &GitUrl) -> bool {
249        // Convert `a` to a repository URL.
250        let a_ref = RepositoryReference::from(a);
251
252        // Convert `b` to a repository URL.
253        let b_ref = RepositoryReference::from(b);
254
255        // The URLs must refer to the same repository.
256        if a_ref.url != b_ref.url {
257            return false;
258        }
259
260        // If the URLs have the same tag, they refer to the same commit.
261        if a_ref.reference == b_ref.reference {
262            return true;
263        }
264
265        // Otherwise, the URLs must resolve to the same precise commit.
266        let Some(a_precise) = a.precise().or_else(|| self.get(&a_ref).map(|sha| *sha)) else {
267            return false;
268        };
269
270        let Some(b_precise) = b.precise().or_else(|| self.get(&b_ref).map(|sha| *sha)) else {
271            return false;
272        };
273
274        a_precise == b_precise
275    }
276}
277
278#[derive(Debug, Clone, PartialEq, Eq, Hash)]
279pub struct ResolvedRepositoryReference {
280    /// An abstract reference to a Git repository, including the URL and the commit (e.g., a branch,
281    /// tag, or revision).
282    pub reference: RepositoryReference,
283    /// The precise commit SHA of the reference.
284    pub sha: GitOid,
285}
286
287#[derive(Debug, Clone, PartialEq, Eq, Hash)]
288pub struct RepositoryReference {
289    /// The URL of the Git repository, with any query parameters and fragments removed.
290    pub url: RepositoryUrl,
291    /// The reference to the commit to use, which could be a branch, tag, or revision.
292    pub reference: GitReference,
293}
294
295impl From<&GitUrl> for RepositoryReference {
296    fn from(git: &GitUrl) -> Self {
297        Self {
298            url: git.repository().clone(),
299            reference: git.reference().clone(),
300        }
301    }
302}