Skip to main content

binstalk_fetchers/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2
3use std::{path::Path, sync::Arc, time::Duration};
4
5use binstalk_downloader::{download::DownloadError, remote::Error as RemoteError};
6use binstalk_git_repo_api::gh_api_client::{GhApiError, GhRepo, RepoInfo as GhRepoInfo};
7use binstalk_types::cargo_toml_binstall::{SigningAlgorithm, Strategy};
8use thiserror::Error as ThisError;
9use tokio::{sync::OnceCell, task::JoinError, time::sleep};
10pub use url::ParseError as UrlParseError;
11
12mod gh_crate_meta;
13pub use gh_crate_meta::*;
14
15#[cfg(feature = "quickinstall")]
16mod quickinstall;
17#[cfg(feature = "quickinstall")]
18pub use quickinstall::*;
19
20mod common;
21use common::*;
22
23mod signing;
24use signing::*;
25
26mod futures_resolver;
27
28use gh_crate_meta::hosting::RepositoryHost;
29
30static DEFAULT_GH_API_RETRY_DURATION: Duration = Duration::from_secs(1);
31
32#[derive(Debug, ThisError)]
33#[error("Invalid pkg-url {pkg_url} for {crate_name}@{version} on {target}: {reason}")]
34pub struct InvalidPkgFmtError {
35    pub crate_name: CompactString,
36    pub version: CompactString,
37    pub target: CompactString,
38    pub pkg_url: Box<str>,
39    pub reason: &'static &'static str,
40}
41
42#[derive(Debug, ThisError, miette::Diagnostic)]
43#[non_exhaustive]
44pub enum FetchError {
45    #[error(transparent)]
46    Download(#[from] DownloadError),
47
48    #[error("Failed to parse template: {0}")]
49    #[diagnostic(transparent)]
50    TemplateParse(#[from] leon::ParseError),
51
52    #[error("Failed to render template: {0}")]
53    #[diagnostic(transparent)]
54    TemplateRender(#[from] leon::RenderError),
55
56    #[error("Failed to render template: {0}")]
57    GhApi(#[from] GhApiError),
58
59    #[error(transparent)]
60    InvalidPkgFmt(Box<InvalidPkgFmtError>),
61
62    #[error("Failed to parse url: {0}")]
63    UrlParse(#[from] UrlParseError),
64
65    #[error("Signing algorithm not supported: {0:?}")]
66    UnsupportedSigningAlgorithm(SigningAlgorithm),
67
68    #[error("No signature present")]
69    MissingSignature,
70
71    #[error("Failed to verify signature")]
72    InvalidSignature,
73
74    #[error("Failed to wait for task: {0}")]
75    TaskJoinError(#[from] JoinError),
76}
77
78impl From<RemoteError> for FetchError {
79    fn from(e: RemoteError) -> Self {
80        DownloadError::from(e).into()
81    }
82}
83
84impl From<InvalidPkgFmtError> for FetchError {
85    fn from(e: InvalidPkgFmtError) -> Self {
86        Self::InvalidPkgFmt(Box::new(e))
87    }
88}
89
90#[async_trait::async_trait]
91pub trait Fetcher: Send + Sync {
92    /// Create a new fetcher from some data
93    #[allow(clippy::new_ret_no_self)]
94    fn new(
95        client: Client,
96        gh_api_client: GhApiClient,
97        data: Arc<Data>,
98        target_data: Arc<TargetDataErased>,
99        signature_policy: SignaturePolicy,
100    ) -> Arc<dyn Fetcher>
101    where
102        Self: Sized;
103
104    /// Fetch a package and extract
105    async fn fetch_and_extract(&self, dst: &Path) -> Result<ExtractedFiles, FetchError>;
106
107    /// Find the package, if it is available for download
108    ///
109    /// This may look for multiple remote targets, but must write (using some form of interior
110    /// mutability) the best one to the implementing struct in some way so `fetch_and_extract` can
111    /// proceed without additional work.
112    ///
113    /// Must return `true` if a package is available, `false` if none is, and reserve errors to
114    /// fatal conditions only.
115    fn find(self: Arc<Self>) -> JoinHandle<Result<bool, FetchError>>;
116
117    /// Report to upstream that cargo-binstall tries to use this fetcher.
118    /// Currently it is only overridden by [`quickinstall::QuickInstall`].
119    fn report_to_upstream(self: Arc<Self>) {}
120
121    /// Return the package format
122    fn pkg_fmt(&self) -> PkgFmt;
123
124    /// Return finalized target meta.
125    fn target_meta(&self) -> PkgMeta;
126
127    /// A short human-readable name or descriptor for the package source
128    fn source_name(&self) -> CompactString;
129
130    /// A short human-readable name, must contains only characters
131    /// and numbers and it also must be unique.
132    ///
133    /// It is used to create a temporary dir where it is used for
134    /// [`Fetcher::fetch_and_extract`].
135    fn fetcher_name(&self) -> &'static str;
136
137    /// The strategy used by this fetcher
138    fn strategy(&self) -> Strategy;
139
140    /// Should return true if the remote is from a third-party source
141    fn is_third_party(&self) -> bool;
142
143    /// Return the target for this fetcher
144    fn target(&self) -> &str;
145
146    fn target_data(&self) -> &Arc<TargetDataErased>;
147}
148
149#[derive(Clone, Debug)]
150struct RepoInfo {
151    repo: Url,
152    repository_host: RepositoryHost,
153    subcrate: Option<CompactString>,
154    is_private: bool,
155}
156
157/// What to do about package signatures
158#[derive(Clone, Copy, Debug, Eq, PartialEq)]
159pub enum SignaturePolicy {
160    /// Don't process any signing information at all
161    Ignore,
162
163    /// Verify and fail if a signature is found, but pass a signature-less package
164    IfPresent,
165
166    /// Require signatures to be present (and valid)
167    Require,
168}
169
170/// Data required to fetch a package
171#[derive(Clone, Debug)]
172pub struct Data {
173    name: CompactString,
174    version: CompactString,
175    repo: Option<String>,
176    repo_info: OnceCell<Option<RepoInfo>>,
177}
178
179impl Data {
180    pub fn new(name: CompactString, version: CompactString, repo: Option<String>) -> Self {
181        Self {
182            name,
183            version,
184            repo,
185            repo_info: OnceCell::new(),
186        }
187    }
188
189    #[instrument(skip(client))]
190    async fn get_repo_info(&self, client: &GhApiClient) -> Result<Option<&RepoInfo>, FetchError> {
191        async fn gh_get_repo_info(
192            client: &GhApiClient,
193            gh_repo: &GhRepo,
194        ) -> Result<GhRepoInfo, GhApiError> {
195            loop {
196                match client.get_repo_info(gh_repo).await {
197                    Ok(Some(gh_repo_info)) => break Ok(gh_repo_info),
198                    Ok(None) => break Err(GhApiError::NotFound),
199                    Err(GhApiError::RateLimit { retry_after }) => {
200                        sleep(retry_after.unwrap_or(DEFAULT_GH_API_RETRY_DURATION)).await
201                    }
202                    Err(err) => break Err(err),
203                }
204            }
205        }
206
207        async fn get_repo_info_inner(
208            repo: &str,
209            client: &GhApiClient,
210        ) -> Result<RepoInfo, FetchError> {
211            let repo = Url::parse(repo)?;
212            let mut repo = client
213                .remote_client()
214                .get_redirected_final_url(repo.clone())
215                .await
216                .unwrap_or(repo);
217            let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
218
219            let subcrate = RepoInfo::detect_subcrate(&mut repo, repository_host);
220
221            if let Some(repo) = repo
222                .as_str()
223                .strip_suffix(".git")
224                .and_then(|s| Url::parse(s).ok())
225            {
226                let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
227                match GhRepo::try_extract_from_url(&repo) {
228                    Some(gh_repo) if client.has_gh_token() => {
229                        if let Ok(gh_repo_info) = gh_get_repo_info(client, &gh_repo).await {
230                            return Ok(RepoInfo {
231                                subcrate,
232                                repository_host,
233                                repo,
234                                is_private: gh_repo_info.is_private(),
235                            });
236                        }
237                    }
238                    _ => {
239                        if let Ok(repo) =
240                            client.remote_client().get_redirected_final_url(repo).await
241                        {
242                            return Ok(RepoInfo {
243                                subcrate,
244                                repository_host: RepositoryHost::guess_git_hosting_services(&repo),
245                                repo,
246                                is_private: false,
247                            });
248                        }
249                    }
250                }
251            }
252
253            Ok(RepoInfo {
254                is_private: match GhRepo::try_extract_from_url(&repo) {
255                    Some(gh_repo) if client.has_gh_token() => {
256                        gh_get_repo_info(client, &gh_repo).await?.is_private()
257                    }
258                    _ => false,
259                },
260                subcrate,
261                repo,
262                repository_host,
263            })
264        }
265
266        self.repo_info
267            .get_or_try_init(move || {
268                Box::pin(async move {
269                    let Some(repo) = self.repo.as_deref() else {
270                        return Ok(None);
271                    };
272
273                    let repo_info = get_repo_info_inner(repo, client).await?;
274
275                    debug!("Resolved repo_info = {repo_info:#?}");
276
277                    Ok(Some(repo_info))
278                })
279            })
280            .await
281            .map(Option::as_ref)
282    }
283}
284
285impl RepoInfo {
286    /// If `repo` contains a subcrate, then extracts and returns it.
287    /// It will also remove that subcrate path from `repo` to match
288    /// `scheme:/{repo_owner}/{repo_name}`
289    fn detect_subcrate(repo: &mut Url, repository_host: RepositoryHost) -> Option<CompactString> {
290        match repository_host {
291            RepositoryHost::GitHub => Self::detect_subcrate_common(repo, &["tree"]),
292            RepositoryHost::GitLab => Self::detect_subcrate_common(repo, &["-", "blob"]),
293            RepositoryHost::Codeberg => Self::detect_subcrate_common(repo, &["src", "branch"]),
294            _ => None,
295        }
296    }
297
298    fn detect_subcrate_common(repo: &mut Url, seps: &[&str]) -> Option<CompactString> {
299        let mut path_segments = repo.path_segments()?;
300
301        let _repo_owner = path_segments.next()?;
302        let _repo_name = path_segments.next()?;
303
304        // Skip separators
305        for sep in seps.iter().copied() {
306            if path_segments.next()? != sep {
307                return None;
308            }
309        }
310
311        // Skip branch name
312        let _branch_name = path_segments.next()?;
313
314        let (subcrate, is_crate_present) = match path_segments.next()? {
315            // subcrate url is of path /crates/$subcrate_name, e.g. wasm-bindgen-cli
316            "crates" => (path_segments.next()?, true),
317            // subcrate url is of path $subcrate_name, e.g. cargo-audit
318            subcrate => (subcrate, false),
319        };
320
321        if path_segments.next().is_some() {
322            // A subcrate url should not contain anything more.
323            None
324        } else {
325            let subcrate = subcrate.into();
326
327            // Pop subcrate path to match regular repo style:
328            //
329            // scheme:/{addr}/{repo_owner}/{repo_name}
330            //
331            // path_segments() succeeds, so path_segments_mut()
332            // must also succeeds.
333            let mut paths = repo.path_segments_mut().unwrap();
334
335            paths.pop(); // pop subcrate
336            if is_crate_present {
337                paths.pop(); // pop crate
338            }
339            paths.pop(); // pop branch name
340            seps.iter().for_each(|_| {
341                paths.pop();
342            }); // pop separators
343
344            Some(subcrate)
345        }
346    }
347}
348
349/// Target specific data required to fetch a package
350#[derive(Clone, Debug)]
351pub struct TargetData<T: leon::Values + ?Sized> {
352    pub target: String,
353    pub meta: PkgMeta,
354    /// More target related info, it's recommend to provide the following keys:
355    ///  - target_family,
356    ///  - target_arch
357    ///  - target_libc
358    ///  - target_vendor
359    pub target_related_info: T,
360}
361
362pub type TargetDataErased = TargetData<dyn leon::Values + Send + Sync + 'static>;
363
364#[cfg(test)]
365mod test {
366    use std::num::{NonZeroU16, NonZeroU64};
367
368    use super::*;
369
370    #[test]
371    fn test_detect_subcrate_github() {
372        // cargo-audit
373        let urls = [
374            "https://github.com/RustSec/rustsec/tree/main/cargo-audit",
375            "https://github.com/RustSec/rustsec/tree/master/cargo-audit",
376        ];
377        for url in urls {
378            let mut repo = Url::parse(url).unwrap();
379
380            let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
381            assert_eq!(repository_host, RepositoryHost::GitHub);
382
383            let subcrate_prefix = RepoInfo::detect_subcrate(&mut repo, repository_host).unwrap();
384            assert_eq!(subcrate_prefix, "cargo-audit");
385
386            assert_eq!(
387                repo,
388                Url::parse("https://github.com/RustSec/rustsec").unwrap()
389            );
390        }
391
392        // wasm-bindgen-cli
393        let urls = [
394            "https://github.com/rustwasm/wasm-bindgen/tree/main/crates/cli",
395            "https://github.com/rustwasm/wasm-bindgen/tree/master/crates/cli",
396        ];
397        for url in urls {
398            let mut repo = Url::parse(url).unwrap();
399
400            let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
401            assert_eq!(repository_host, RepositoryHost::GitHub);
402
403            let subcrate_prefix = RepoInfo::detect_subcrate(&mut repo, repository_host).unwrap();
404            assert_eq!(subcrate_prefix, "cli");
405
406            assert_eq!(
407                repo,
408                Url::parse("https://github.com/rustwasm/wasm-bindgen").unwrap()
409            );
410        }
411    }
412
413    #[test]
414    fn test_detect_subcrate_gitlab() {
415        let urls = [
416            "https://gitlab.kitware.com/NobodyXu/hello/-/blob/main/cargo-binstall",
417            "https://gitlab.kitware.com/NobodyXu/hello/-/blob/master/cargo-binstall",
418        ];
419        for url in urls {
420            let mut repo = Url::parse(url).unwrap();
421
422            let repository_host = RepositoryHost::guess_git_hosting_services(&repo);
423            assert_eq!(repository_host, RepositoryHost::GitLab);
424
425            let subcrate_prefix = RepoInfo::detect_subcrate(&mut repo, repository_host).unwrap();
426            assert_eq!(subcrate_prefix, "cargo-binstall");
427
428            assert_eq!(
429                repo,
430                Url::parse("https://gitlab.kitware.com/NobodyXu/hello").unwrap()
431            );
432        }
433    }
434
435    #[tokio::test]
436    async fn test_ignore_dot_git_for_github_repos() {
437        let url_without_git = "https://github.com/cargo-bins/cargo-binstall";
438        let url_with_git = format!("{}.git", url_without_git);
439
440        let data = Data::new("cargo-binstall".into(), "v1.2.3".into(), Some(url_with_git));
441
442        let gh_client = GhApiClient::new(
443            Client::new(
444                "user-agent",
445                None,
446                NonZeroU16::new(1000).unwrap(),
447                NonZeroU64::new(1000).unwrap(),
448                [],
449            )
450            .unwrap(),
451            None,
452        );
453
454        let repo_info = data.get_repo_info(&gh_client).await.unwrap().unwrap();
455
456        assert_eq!(url_without_git, repo_info.repo.as_str());
457    }
458}