fetch_source/
lib.rs

1#![warn(missing_docs)]
2#![warn(rustdoc::missing_crate_level_docs)]
3#![warn(rustdoc::redundant_explicit_links)]
4//! Declare external source dependencies in `Cargo.toml` and fetch them programmatically.
5//!
6//! This crate allows you to define external sources (Git repositories, tar archives) in your
7//! `Cargo.toml` under `[package.metadata.fetch-source]` and fetch them programmatically.
8//! This crate is intended for use in build scripts where Rust bindings are generated from external
9//! source(s).
10//!
11//! Inspired by CMake's [`FetchContent`] module.
12//!
13//! [`FetchContent`]: https://cmake.org/cmake/help/latest/module/FetchContent.html#fetchcontent
14//!
15//! # Core Features
16//!
17//! - Define sources directly in your project metadata.
18//! - Cache fetched sources for efficient sharing between projects.
19//! - Clone git repositories (possibly recursively) by branch, tag, or specific commit (requires `git`
20//!   to be installed and available on `PATH`).
21//!
22//! # Optional Features
23//!
24//! - `tar`: Download and extract `.tar.gz` archives. This is an optional feature because it uses the
25//!   [`reqwest`] crate which brings quite a few more dependencies.
26//! - `rayon`: Fetch sources in parallel with [`rayon`].
27//!
28//! [`reqwest`]: https://crates.io/crates/reqwest
29//! [`rayon`]: https://crates.io/crates/rayon
30//!
31//! # Basic Usage
32//!
33//! Parse external sources declared in your `Cargo.toml` like so:
34//!
35//! ```rust
36//! // Imagine this is in your Cargo.toml:
37//! let cargo_toml = r#"
38//! [package.metadata.fetch-source]
39//! my-repo = { git = "https://github.com/user/repo.git", recursive = true }
40//! other-repo = { git = "https://github.com/user/project.git", branch = "the-feature" }
41//! my-data = { tar = "https://example.com/data.tar.gz" }
42//! "#;
43//!
44//! for (name, source) in fetch_source::try_parse_toml(cargo_toml)? {
45//!     println!("{name}: {source}");
46//! }
47//! # Ok::<(), Box<dyn std::error::Error>>(())
48//! ```
49//!
50//! Fetch all sources into a directory:
51//!
52//! ```rust
53//! # use fetch_source::Error;
54//! use std::path::PathBuf;
55//!
56//! # fn main() -> Result<(), Error> {
57//! let cargo_toml = r#"
58//! [package.metadata.fetch-source]
59//! "syn::latest" = { git = "https://github.com/dtolnay/syn.git" }
60//! "syn::1.0.0" = { tar = "https://github.com/dtolnay/syn/archive/refs/tags/1.0.0.tar.gz" }
61//! "#;
62//!
63//! let out_dir = PathBuf::from(std::env::temp_dir());
64//! for err in fetch_source::try_parse_toml(cargo_toml)?.into_iter()
65//!     .map(|(_, source)| source.fetch(&out_dir))
66//!     .filter_map(Result::err) {
67//!     eprintln!("{err}");
68//! }
69//! # Ok(())
70//! # }
71//! ```
72//!
73#![cfg_attr(
74    feature = "rayon",
75    doc = r##"
76With `rayon`, it's trivial to fetch sources in parallel:
77
78```rust
79# use fetch_source::Error;
80use rayon::prelude::*;
81use std::path::PathBuf;
82
83# fn main() -> Result<(), Error> {
84let cargo_toml = r#"
85[package.metadata.fetch-source]
86"syn::latest" = { git = "https://github.com/dtolnay/syn.git" }
87"syn::1.0.0" = { tar = "https://github.com/dtolnay/syn/archive/refs/tags/1.0.0.tar.gz" }
88"#;
89
90let out_dir = PathBuf::from(std::env::temp_dir());
91fetch_source::try_parse_toml(cargo_toml)?.into_par_iter()
92    .map(|(_, source)| source.fetch(&out_dir))
93    .filter_map(Result::err)
94    .for_each(|err| eprintln!("{err}"));
95# Ok(())
96# }
97```
98"##
99)]
100//!
101//! # Caching Sources
102//!
103//! Cache sources for efficient sharing across repeated builds. Refer to the same source across
104//! different builds or projects by using the same source definition in `Cargo.toml`.
105//!
106//! ```rust
107//! # use fetch_source::Cache;
108//! # fn main() -> Result<(), fetch_source::Error> {
109//! let cache = Cache::load_or_create(std::env::temp_dir())?;
110//!
111//! let project1 = r#"
112//! [package.metadata.fetch-source]
113//! "syn::latest" = { git = "https://github.com/dtolnay/syn.git" }
114//! "#;
115//!
116//! let sources1 = fetch_source::try_parse_toml(project1)?;
117//! // Check where this source would be cached
118//! let cache_latest = cache.cached_path(&sources1.get("syn::latest").unwrap());
119//!
120//! // Note the re-use of 'syn::latest' with a different definition!
121//! let project2 = r#"
122//! [package.metadata.fetch-source]
123//! "syn::greatest" = { git = "https://github.com/dtolnay/syn.git" }
124//! "syn::latest" = { git = "https://github.com/dtolnay/syn.git", branch = "dev" }
125//! "#;
126//!
127//! let sources2 = fetch_source::try_parse_toml(project2)?;
128//! let cache_greatest = cache.cached_path(&sources2.get("syn::greatest").unwrap());
129//! let cache_dev = cache.cached_path(&sources2.get("syn::latest").unwrap());
130//!
131//! // The same source by a different name from a different project is the same in the cache
132//! assert_eq!(cache_latest, cache_greatest);
133//!
134//! // The name doesn't uniquely identify a source - only the definition of the source matters
135//! assert_ne!(cache_latest, cache_dev);
136//!
137//! # Ok(())
138//! # }
139//! ```
140//!
141//! # Declaring sources
142//!
143//! The keys in the `package.metadata.fetch-source` table name a remote source. They can include
144//! any path character and zero or more '`::`' separators. Each `::`-separated component of a
145//! name maps to a subdirectory of the output directory.
146//!
147//! Each value in the `package.metadata.fetch-source` table must be a table which identifies the
148//! remote source it represents:
149//!
150//! **Tar archives**
151//! - The `tar` key gives the URL of the archive.
152//!
153//! **Git repos**
154//! - The `git` key gives the SSH or HTTPS upstream URL.
155//! - Any one of the `branch`/`tag`/`rev` keys indicates what to clone. The default is to clone the
156//!   default branch.
157//! - Use `recursive = true` to recursively clone submodules.
158//! - All clones are shallow, i.e. with a depth of 1.
159//!
160
161mod cache;
162mod error;
163mod git;
164mod source;
165#[cfg(feature = "tar")]
166mod tar;
167
168pub use cache::{Cache, CacheDir, CacheItems, CacheRoot, RelCacheDir};
169pub use error::{Error, ErrorKind, FetchError};
170pub use git::Git;
171pub use source::{
172    Artefact, Digest, FetchResult, Source, SourceName, SourceParseError, SourcesTable,
173    try_parse_toml,
174};
175#[cfg(feature = "tar")]
176pub use tar::Tar;
177
178/// Convenience function to load sources from `Cargo.toml` in the given directory
179///
180/// Returns an error if the manifest can't be loaded or if deserialisation fails.
181pub fn load_sources<P: AsRef<std::path::Path>>(path: P) -> Result<SourcesTable, Error> {
182    Ok(try_parse_toml(&std::fs::read_to_string(
183        path.as_ref().to_path_buf().join("Cargo.toml"),
184    )?)?)
185}
186
187/// Convenience function to fetch all sources serially
188pub fn fetch_all<P: AsRef<std::path::Path>>(
189    sources: SourcesTable,
190    out_dir: P,
191) -> Vec<(SourceName, FetchResult<Artefact>)> {
192    sources
193        .into_iter()
194        .map(
195            |(name, source)| match source.fetch(out_dir.as_ref().join(&name)) {
196                Ok(artefact) => (name, Ok(artefact)),
197                Err(err) => (name, Err(err)),
198            },
199        )
200        .collect()
201}
202
203#[cfg(feature = "rayon")]
204mod par {
205    use super::*;
206    use rayon::prelude::*;
207
208    /// Convenience function to fetch all sources in parallel
209    pub fn fetch_all_par<P: AsRef<std::path::Path> + Sync>(
210        sources: SourcesTable,
211        out_dir: P,
212    ) -> Vec<(SourceName, FetchResult<Artefact>)> {
213        sources
214            .into_par_iter()
215            .map(
216                |(name, source)| match source.fetch(out_dir.as_ref().join(&name)) {
217                    Ok(artefact) => (name, Ok(artefact)),
218                    Err(err) => (name, Err(err)),
219                },
220            )
221            .collect::<Vec<_>>()
222    }
223
224    /// Convenience function to update the given cache with all missing sources in parallel.
225    /// Returns any errors that occurred when fetching the missing sources.
226    pub fn cache_all_par(
227        cache: &mut Cache,
228        sources: SourcesTable,
229    ) -> Vec<(SourceName, FetchError)> {
230        let items = cache.items();
231        let cache_root = cache.cache_dir();
232        let results = sources
233            .into_iter()
234            .filter(|(_, source)| !items.contains(source))
235            .collect::<Vec<_>>()
236            .into_par_iter()
237            .map(|(name, source)| {
238                let artefact_dir = cache_root.append(items.relative_path(&source));
239                (name, source.fetch(&*artefact_dir))
240            })
241            .collect::<Vec<_>>();
242        let items = cache.items_mut();
243        results.into_iter().fold(Vec::new(), {
244            |mut errors, (name, result)| {
245                match result {
246                    Ok(artefact) => items.insert(artefact),
247                    Err(err) => errors.push((name, err)),
248                }
249                errors
250            }
251        })
252    }
253}
254
255#[cfg(feature = "rayon")]
256pub use par::{cache_all_par, fetch_all_par};
257
258/// Construct a serde-compatible type from a JSON table literal. Useful in testing.
259#[cfg(test)]
260#[macro_export]
261macro_rules! build_from_json {
262    ($t:ty) => {{
263        serde_json::from_value::<$t>(serde_json::json! { { } }).map_err($crate::SourceParseError::from)
264    }};
265    ($t:ty, $($json:tt)+) => {{
266        serde_json::from_value::<$t>(serde_json::json! { { $($json)+ } }).map_err($crate::SourceParseError::from)
267    }};
268    ($($json:tt)*) => {{
269        serde_json::from_value(serde_json::json! { { $($json)* } }).map_err($crate::SourceParseError::from)
270    }};
271}