uv_git/
resolver.rs

1use std::borrow::Cow;
2use std::path::PathBuf;
3use std::str::FromStr;
4use std::sync::Arc;
5
6use dashmap::DashMap;
7use dashmap::mapref::one::Ref;
8use fs_err::tokio as fs;
9use reqwest_middleware::ClientWithMiddleware;
10use tracing::debug;
11
12use uv_cache_key::{RepositoryUrl, cache_digest};
13use uv_fs::LockedFile;
14use uv_git_types::{GitHubRepository, GitOid, GitReference, GitUrl};
15use uv_static::EnvVars;
16use uv_version::version;
17
18use crate::{
19    Fetch, GitSource, Reporter,
20    rate_limit::{GITHUB_RATE_LIMIT_STATUS, is_github_rate_limited},
21};
22
23#[derive(Debug, thiserror::Error)]
24pub enum GitResolverError {
25    #[error(transparent)]
26    Io(#[from] std::io::Error),
27    #[error(transparent)]
28    Join(#[from] tokio::task::JoinError),
29    #[error("Git operation failed")]
30    Git(#[source] anyhow::Error),
31    #[error(transparent)]
32    Reqwest(#[from] reqwest::Error),
33    #[error(transparent)]
34    ReqwestMiddleware(#[from] reqwest_middleware::Error),
35}
36
37/// A resolver for Git repositories.
38#[derive(Default, Clone)]
39pub struct GitResolver(Arc<DashMap<RepositoryReference, GitOid>>);
40
41impl GitResolver {
42    /// Inserts a new [`GitOid`] for the given [`RepositoryReference`].
43    pub fn insert(&self, reference: RepositoryReference, sha: GitOid) {
44        self.0.insert(reference, sha);
45    }
46
47    /// Returns the [`GitOid`] for the given [`RepositoryReference`], if it exists.
48    fn get(&self, reference: &RepositoryReference) -> Option<Ref<'_, RepositoryReference, GitOid>> {
49        self.0.get(reference)
50    }
51
52    /// Return the [`GitOid`] for the given [`GitUrl`], if it is already known.
53    pub fn get_precise(&self, url: &GitUrl) -> Option<GitOid> {
54        // If the URL is already precise, return it.
55        if let Some(precise) = url.precise() {
56            return Some(precise);
57        }
58
59        // If we know the precise commit already, return it.
60        let reference = RepositoryReference::from(url);
61        if let Some(precise) = self.get(&reference) {
62            return Some(*precise);
63        }
64
65        None
66    }
67
68    /// Resolve a Git URL to a specific commit without performing any Git operations.
69    ///
70    /// Returns a [`GitOid`] if the URL has already been resolved (i.e., is available in the cache),
71    /// or if it can be fetched via the GitHub API. Otherwise, returns `None`.
72    pub async fn github_fast_path(
73        &self,
74        url: &GitUrl,
75        client: &ClientWithMiddleware,
76    ) -> Result<Option<GitOid>, GitResolverError> {
77        if std::env::var_os(EnvVars::UV_NO_GITHUB_FAST_PATH).is_some() {
78            return Ok(None);
79        }
80
81        // If the URL is already precise or we know the precise commit, return it.
82        if let Some(precise) = self.get_precise(url) {
83            return Ok(Some(precise));
84        }
85
86        // If the URL is a GitHub URL, attempt to resolve it via the GitHub API.
87        let Some(GitHubRepository { owner, repo }) = GitHubRepository::parse(url.repository())
88        else {
89            return Ok(None);
90        };
91
92        // Check if we're rate-limited by GitHub, before determining the Git reference
93        if GITHUB_RATE_LIMIT_STATUS.is_active() {
94            debug!("Rate-limited by GitHub. Skipping GitHub fast path attempt for: {url}");
95            return Ok(None);
96        }
97
98        // Determine the Git reference.
99        let rev = url.reference().as_rev();
100
101        let github_api_base_url = std::env::var(EnvVars::UV_GITHUB_FAST_PATH_URL)
102            .unwrap_or("https://api.github.com/repos".to_owned());
103        let github_api_url = format!("{github_api_base_url}/{owner}/{repo}/commits/{rev}");
104
105        debug!("Querying GitHub for commit at: {github_api_url}");
106        let mut request = client.get(&github_api_url);
107        request = request.header("Accept", "application/vnd.github.3.sha");
108        request = request.header(
109            "User-Agent",
110            format!("uv/{} (+https://github.com/astral-sh/uv)", version()),
111        );
112
113        let response = request.send().await?;
114        let status = response.status();
115        if !status.is_success() {
116            // Returns a 404 if the repository does not exist, and a 422 if GitHub is unable to
117            // resolve the requested rev.
118            debug!(
119                "GitHub API request failed for: {github_api_url} ({})",
120                response.status()
121            );
122
123            if is_github_rate_limited(&response) {
124                // Mark that we are being rate-limited by GitHub
125                GITHUB_RATE_LIMIT_STATUS.activate();
126            }
127
128            return Ok(None);
129        }
130
131        // Parse the response as a Git SHA.
132        let precise = response.text().await?;
133        let precise =
134            GitOid::from_str(&precise).map_err(|err| GitResolverError::Git(err.into()))?;
135
136        // Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
137        // resolve to the same precise commit.
138        self.insert(RepositoryReference::from(url), precise);
139
140        Ok(Some(precise))
141    }
142
143    /// Fetch a remote Git repository.
144    pub async fn fetch(
145        &self,
146        url: &GitUrl,
147        disable_ssl: bool,
148        offline: bool,
149        cache: PathBuf,
150        reporter: Option<Arc<dyn Reporter>>,
151    ) -> Result<Fetch, GitResolverError> {
152        debug!("Fetching source distribution from Git: {url}");
153
154        let reference = RepositoryReference::from(url);
155
156        // If we know the precise commit already, reuse it, to ensure that all fetches within a
157        // single process are consistent.
158        let url = {
159            if let Some(precise) = self.get(&reference) {
160                Cow::Owned(url.clone().with_precise(*precise))
161            } else {
162                Cow::Borrowed(url)
163            }
164        };
165
166        // Avoid races between different processes, too.
167        let lock_dir = cache.join("locks");
168        fs::create_dir_all(&lock_dir).await?;
169        let repository_url = RepositoryUrl::new(url.repository());
170        let _lock = LockedFile::acquire(
171            lock_dir.join(cache_digest(&repository_url)),
172            &repository_url,
173        )
174        .await?;
175
176        // Fetch the Git repository.
177        let source = if let Some(reporter) = reporter {
178            GitSource::new(url.as_ref().clone(), cache, offline).with_reporter(reporter)
179        } else {
180            GitSource::new(url.as_ref().clone(), cache, offline)
181        };
182
183        // If necessary, disable SSL.
184        let source = if disable_ssl {
185            source.dangerous()
186        } else {
187            source
188        };
189
190        let fetch = tokio::task::spawn_blocking(move || source.fetch())
191            .await?
192            .map_err(GitResolverError::Git)?;
193
194        // Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
195        // resolve to the same precise commit.
196        if let Some(precise) = fetch.git().precise() {
197            self.insert(reference, precise);
198        }
199
200        Ok(fetch)
201    }
202
203    /// Given a remote source distribution, return a precise variant, if possible.
204    ///
205    /// For example, given a Git dependency with a reference to a branch or tag, return a URL
206    /// with a precise reference to the current commit of that branch or tag.
207    ///
208    /// This method takes into account various normalizations that are independent of the Git
209    /// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
210    /// prefix kinds.
211    ///
212    /// This method will only return precise URLs for URLs that have already been resolved via
213    /// [`resolve_precise`], and will return `None` for URLs that have not been resolved _or_
214    /// already have a precise reference.
215    pub fn precise(&self, url: GitUrl) -> Option<GitUrl> {
216        let reference = RepositoryReference::from(&url);
217        let precise = self.get(&reference)?;
218        Some(url.with_precise(*precise))
219    }
220
221    /// Returns `true` if the two Git URLs refer to the same precise commit.
222    pub fn same_ref(&self, a: &GitUrl, b: &GitUrl) -> bool {
223        // Convert `a` to a repository URL.
224        let a_ref = RepositoryReference::from(a);
225
226        // Convert `b` to a repository URL.
227        let b_ref = RepositoryReference::from(b);
228
229        // The URLs must refer to the same repository.
230        if a_ref.url != b_ref.url {
231            return false;
232        }
233
234        // If the URLs have the same tag, they refer to the same commit.
235        if a_ref.reference == b_ref.reference {
236            return true;
237        }
238
239        // Otherwise, the URLs must resolve to the same precise commit.
240        let Some(a_precise) = a.precise().or_else(|| self.get(&a_ref).map(|sha| *sha)) else {
241            return false;
242        };
243
244        let Some(b_precise) = b.precise().or_else(|| self.get(&b_ref).map(|sha| *sha)) else {
245            return false;
246        };
247
248        a_precise == b_precise
249    }
250}
251
252#[derive(Debug, Clone, PartialEq, Eq, Hash)]
253pub struct ResolvedRepositoryReference {
254    /// An abstract reference to a Git repository, including the URL and the commit (e.g., a branch,
255    /// tag, or revision).
256    pub reference: RepositoryReference,
257    /// The precise commit SHA of the reference.
258    pub sha: GitOid,
259}
260
261#[derive(Debug, Clone, PartialEq, Eq, Hash)]
262pub struct RepositoryReference {
263    /// The URL of the Git repository, with any query parameters and fragments removed.
264    pub url: RepositoryUrl,
265    /// The reference to the commit to use, which could be a branch, tag, or revision.
266    pub reference: GitReference,
267}
268
269impl From<&GitUrl> for RepositoryReference {
270    fn from(git: &GitUrl) -> Self {
271        Self {
272            url: RepositoryUrl::new(git.repository()),
273            reference: git.reference().clone(),
274        }
275    }
276}