xtest_data/lib.rs
1//! Fetch test data in packaged crate tests.
2//!
3//! # For crate authors
4//!
5//! Drop these lines into your _integration tests_ (due to a limitation in `cargo` this will only
6//! work in integration tests right now¹). Note that this requires your repository—through the URL
7//! contained in `Cargo.toml`—to be readable by the environment where you wish to test the packaged
8//! crate.
9//!
10//! ```rust
11//! use std::path::PathBuf;
12//!
13//! // or any other file you want to use.
14//! let mut datazip = PathBuf::from("tests/data.zip");
15//! xtest_data::setup!().rewrite([&mut datazip]).build();
16//!
17//! // … and the crate works its magic to make this succeed.
18//! assert!(datazip.exists(), "{}", datazip.display());
19//! ```
20//!
21//! # For packagers
22//!
23//! The `.crate` file you have downloaded is a `.tar.gz` in disguise. When you unpack it for your
24//! local build steps etc., verify that this package contains `Cargo.toml.orig` as well as a
25//! `.cargo_vcs_info.json` file; and that the latter file has git commit information.
26//!
27//! Then you can then run the tests:
28//!
29//! ```bash
30//! cargo test -- --nocapture
31//! ```
32//!
33//! Don't worry, this won't access the network yet. In the first step it will only verify the
34//! basic installation. It will then panic while printing information on what it _would have_ done
35//! and instructions on how to proceed. You can opt into allow network access by default with:
36//!
37//! ```bash
38//! CARGO_XTEST_DATA_FETCH=yes cargo test -- --nocapture
39//! ```
40//!
41//! ¹We need a place to store a shallow clone of the crate's source repository.
42#![forbid(unsafe_code)]
43mod git;
44
45use std::{borrow::Cow, env, ffi::OsString, fs, io, path::Path, path::PathBuf};
46use tinyjson::JsonValue;
47
48/// A file or tree that was registered from [`Setup`].
49///
50/// This is a key into [`FsData`]. You can retrieve the local path using [`FsData::path()`]. The
51/// returned path is either the local path on disk, when you are currently developing under a local
52/// checkout of the version control system, or the path into which the file has been checked out.
53#[derive(Debug)]
54pub struct Files {
55 key: usize,
56}
57
58#[derive(Debug)]
59enum Managed {
60 // TODO: have a spec for the glob `<dir>/**.ext`?
61 Files(PathBuf),
62}
63
64type FsItem<'lt> = &'lt mut PathBuf;
65
66/// The product of `Setup`, ensuring local file system accessible test resources.
67///
68/// This object is used to retrieve the local paths of resources that have been registered with the
69/// method [`Setup::add()`].
70#[derive(Debug)]
71pub struct FsData {
72 /// Map all configured items to their paths.
73 /// This map will essentially be constant and we do not care about the VCS interpretation.
74 map: Vec<PathBuf>,
75}
76
77#[derive(Debug)]
78enum Source {
79 /// The data source is the crate's repository at a specific commit id.
80 VcsFromManifest {
81 /// TODO: we should support other commit identifiers.
82 commit_id: git::CommitId,
83 /// Evidence how we plan to access the source.
84 git: git::Git,
85 /// The directory where we may put git-dir and checkout of the resources.
86 datadir: PathBuf,
87 },
88 /// The data will be relative to the crate manifest.
89 Local(git::Git),
90}
91
92#[derive(Default, Debug)]
93struct Resources<'paths> {
94 /// All files and tree that are owned by the `Setup`.
95 /// Note: we never intend to remove anything from here. If we did we would have to do some kind
96 /// of remapping data structure to ensure that `Files` does not access the wrong item.
97 relative_files: Vec<Managed>,
98 /// Resources where we do 'simple' path replacement in a filter style.
99 ///
100 /// Note on ergonomics: We MAY take several different kinds of paths in the future to allow the
101 /// glob-style usage (`tests/samples/*.png`) to be efficiently executed. However, we should NOT
102 /// change the public API for this. We may well do some wrapping internally but the calls
103 /// should map to exactly one variant of any such item; and the enum variant should not be
104 /// directly exposed.
105 ///
106 /// This is based on the needs to perform more imports and additional calls to wrap locals in
107 /// those items. Basically, adding the crate should not be much more complex than making all
108 /// paths a variable and then throwing a `xtest_data::setup!()` on top.
109 unmanaged: Vec<FsItem<'paths>>,
110}
111
112/// A builder to configure desired test data paths.
113///
114/// This is created through [`setup!`] instead of a usual method as it must gather some information
115/// from the _callers_ environment first.
116///
117/// This is a builder and after configuration, its [`Setup::build()`] method should be called. Note
118/// the lifetime on this struct. This is either the lifetime of paths borrowed from the caller,
119/// which it will rewrite, or it can be `'static` when it owns all of the paths. The latter case
120/// requires them to be registered with [`Setup::add()`].
121///
122/// On a VCS copy of the surrounding package this will simply collect and validate the information,
123/// canonicalizing paths to be interpreted from the Manifest in the process.
124///
125/// However, when executed in the source tree from `.crate` then it will rewrite them all to refer
126/// to a local copy of the data instead. That is, if it is allowed to, since by default we merely
127/// provide a detailed report of data paths, repository location, and commit information that would
128/// _need_ to be fetched before aborting. When the environment has opted into our access of network
129/// (and might have overridden the repository path) then we will perform the actual access,
130/// checkout, and rewrite.
131#[must_use = "This is only a builder. Call `build` to perform validation/fetch/etc."]
132#[derive(Debug)]
133pub struct Setup<'paths> {
134 repository: OsString,
135 manifest: &'static str,
136 /// Have we determined to be local or in a crate?.
137 source: Source,
138 /// The resources that we store.
139 resources: Resources<'paths>,
140}
141
142/// The options determined from the compile time environment of the crate that called us.
143///
144/// This is every environment data we are gather from the `setup` macro, which allows us to get the
145/// environment flags passed to the _calling_ crate instead of our own. Please do not construct
146/// this directly since doing so could affect the integrity of the information.
147///
148/// This is independent from the data gathered from the _runtime_ environment. It is combined with
149/// that information in `Setup::build`.
150#[doc(hidden)]
151pub struct EnvOptions {
152 pub pkg_repository: &'static str,
153 pub manifest_dir: &'static str,
154 pub target_tmpdir: Option<&'static str>,
155}
156
157/// Create a builder to configure local test data.
158///
159/// This evaluates to an instance of [`Setup`].
160///
161/// This can be ran in _integration tests_ (and in integration tests only) to ensure that those can
162/// be replicated from a source distribution of the package, while actually using additional data
163/// stored in your repository. The commit ID of the head, stored inside the package, is used for
164/// bit-by-bit reproducibility of the test data.
165///
166/// You can rely on this package only using data within the git tree associated with the commit ID
167/// stored in the package. As a tester downstream, if the maintainer of the package signs their
168/// crates, and you validate that signature, then by extension and Git's content addressability all
169/// data is ensured to have been signed-off by the maintainer.
170///
171/// When developing locally this checks the plausibility of cargo data and then tries to determine
172/// if `git` is in use (other VCS are welcome but need to be supported by cargo first).
173///
174/// ## Panics
175///
176/// This function _panics_ if any of the following is true:
177/// * The function is called outside of an integration test.
178/// * There is no VCS in use.
179/// * We could not determine how to use the VCS of the repository.
180/// * The repository URL as configured in `Cargo.toml` is not valid.
181/// * We could not create a bare repository in the directory `${CARGO_TARGET_TMPDIR}`.
182///
183/// When executing from the distribution form of a package, we will also panic if any of the
184/// following are true:
185/// * The commit ID that is being read from `.cargo_vcs_info.json` can not be fetched from the
186/// remote repository.
187/// * There is no `.cargo_vcs_info.json` and the manifest is _not_ in a VCS folder.
188///
189/// Note that the eventual call to `build()` has some additional panics.
190#[macro_export]
191macro_rules! setup {
192 () => {
193 $crate::_setup($crate::EnvOptions {
194 // FIXME: technically this isn't critical information.
195 // We could rely on the user passing one to us since we will fail when that is not a
196 // git repository with the correct commit ID. That's just their fault.
197 pkg_repository: env!("CARGO_PKG_REPOSITORY"),
198 manifest_dir: env!("CARGO_MANIFEST_DIR"),
199 target_tmpdir: option_env!("CARGO_TARGET_TMPDIR"),
200 })
201 };
202}
203
204#[doc(hidden)]
205pub fn _setup(options: EnvOptions) -> Setup<'static> {
206 let EnvOptions {
207 pkg_repository: repository,
208 manifest_dir: manifest,
209 target_tmpdir: tmpdir,
210 } = options;
211 if repository.is_empty() {
212 inconclusive(&mut "The crate must have a valid URL in `package.repository`");
213 }
214
215 // Now allow the override.
216 let repository = env::var_os("CARGO_XTEST_DATA_REPOSITORY_ORIGIN")
217 .unwrap_or_else(|| OsString::from(repository));
218
219 // Make sure this is an integration test, or at least we have the dir.
220 // We don't want to block building over this (e.g. the crate itself here) but we _do_ want to
221 // restrict running this `setup` function
222 let integration_test_tempdir = tmpdir.map(Path::new);
223
224 let vcs_info_path = Path::new(manifest).join(".cargo_vcs_info.json");
225
226 let source = if vcs_info_path.exists() {
227 // Allow the override.
228 trait GetKey {
229 fn get_key(&self, key: &str) -> Option<&Self>;
230 }
231 impl GetKey for JsonValue {
232 fn get_key(&self, key: &str) -> Option<&Self> {
233 self.get::<std::collections::HashMap<_, _>>()?.get(key)
234 }
235 }
236
237 let data =
238 fs::read_to_string(vcs_info_path).unwrap_or_else(|mut err| inconclusive(&mut err));
239 let vcs: JsonValue = data
240 .parse()
241 .unwrap_or_else(|mut err| inconclusive(&mut err));
242 let commit_id = vcs
243 .get_key("git")
244 .unwrap_or_else(|| inconclusive(&mut "VCS does not contain a git section."))
245 .get_key("sha1")
246 .unwrap_or_else(|| inconclusive(&mut "VCS commit ID not recognized."))
247 .get::<String>()
248 .map(|id| git::CommitId::from(&**id))
249 .unwrap_or_else(|| inconclusive(&mut "VCS commit ID is not a string"));
250
251 // Okay, that makes sense. We know _what_ to access.
252 // Now let's also try to find out how we will access it. Let's find `git`.
253 // To shell out to because we are lazy.
254 let git = git::Git::new().unwrap_or_else(|mut err| inconclusive(&mut err));
255
256 let datadir = integration_test_tempdir
257 .map(Cow::Borrowed)
258 .or_else(|| {
259 let environment_temp = std::env::var_os("CARGO_XTEST_DATA_TMPDIR")
260 .or_else(|| std::env::var_os("TMPDIR"))
261 .map(PathBuf::from)?;
262 // TODO: nah, in this case we should have some distinguisher for the exact crate
263 // name and version in the tmpdir. At least that would catch the gravest of errors
264 // when testing many crates at the same time. (Although sharing the git dir would
265 // be an advantage).
266 Some(Cow::Owned(environment_temp))
267 })
268 .expect("This setup must only be called in an integration test or benchmark, or with an explicit TMPDIR")
269 .into_owned();
270
271 Source::VcsFromManifest {
272 commit_id,
273 git,
274 datadir,
275 }
276 } else {
277 // Check that we can recognize tracked files.
278 let git = git::Git::new().unwrap_or_else(|mut err| inconclusive(&mut err));
279 Source::Local(git)
280 };
281
282 // And finally this must be valid.
283 if repository.is_empty() {
284 inconclusive(&mut "The repository must have a valid URL");
285 }
286
287 Setup {
288 repository,
289 manifest,
290 source,
291 resources: Resources::default(),
292 }
293}
294
295impl<'lt> Setup<'lt> {
296 /// Register some paths to rewrite their location.
297 ///
298 /// The paths should be relative to the crate's manifest. For example, to refer to data in your
299 /// `tests` directory you would use `PathBuf::from("tests/data.zip")`.
300 ///
301 /// The paths will be registered internally. If the repository is local they will be rewritten
302 /// to be relative to the manifest location. If the repository is a crate distribution then the
303 /// paths will be sparsely checked out (meaning: only that path will be downloaded from the VCS
304 /// working dir and you can't expect any other files to be present).
305 ///
306 /// Those actions will happen when you call [`Setup::build()`].
307 ///
308 /// # Example
309 ///
310 /// ```
311 /// use std::path::PathBuf;
312 /// use xtest_data::setup;
313 ///
314 /// let mut path = PathBuf::from("tests/data.zip");
315 /// setup!().rewrite([&mut path]).build();
316 ///
317 /// assert!(path.exists(), "{}", path.display());
318 /// ```
319 pub fn rewrite(mut self, iter: impl IntoIterator<Item = &'lt mut PathBuf>) -> Self {
320 self.resources.unmanaged.extend(iter);
321 self
322 }
323
324 /// Register the path of a file or a tree of files.
325 ///
326 /// The return value is a key that can later be used in [`FsData`]. All the files under this
327 /// location will be checked out when `Setup::build()` is called in a crate-build.
328 ///
329 /// # Example
330 ///
331 /// ```
332 /// let mut vcs = xtest_data::setup!();
333 /// let datazip = vcs.add("tests/data.zip");
334 /// let testdata = vcs.build();
335 ///
336 /// let path = testdata.path(&datazip);
337 /// assert!(path.exists(), "{}", path.display());
338 /// ```
339
340 pub fn add(&mut self, path: impl AsRef<Path>) -> Files {
341 fn path_impl(resources: &mut Resources, path: &Path) -> usize {
342 let item = Managed::Files(path.to_owned());
343 let key = resources.relative_files.len();
344 resources.relative_files.push(item);
345 key
346 }
347
348 let key = path_impl(&mut self.resources, path.as_ref());
349 Files { key }
350 }
351
352 /// Run the final validation and perform rewrites.
353 ///
354 /// Returns the frozen dictionary of file mappings that had been registered with
355 /// [`Setup::add()`]. This allows retrieving the final data paths for those items.
356 ///
357 /// ## Panics
358 ///
359 /// This will panic if:
360 /// * Any registered file or tree is not tracked in the VCS.
361 /// * You have not allowed retrieving data from the VCS.
362 /// * It was not possible to retrieve the data from the VCS.
363 pub fn build(self) -> FsData {
364 let mut map;
365 match self.source {
366 Source::Local(git) => {
367 let dir = git::CrateDir::new(self.manifest, &git);
368 let datapath = Path::new(self.manifest);
369 dir.tracked(&git, &mut self.resources.path_specs());
370 map = vec![];
371 self.resources.relative_files.iter().for_each(|path| {
372 map.push(datapath.join(path.as_path()));
373 });
374 self.resources
375 .unmanaged
376 .into_iter()
377 .for_each(|item| set_root(datapath, item));
378 }
379 Source::VcsFromManifest {
380 commit_id,
381 datadir,
382 git,
383 } => {
384 let origin = git::Origin {
385 url: self.repository,
386 };
387
388 let gitpath = datadir.join("xtest-data-git");
389 let datapath = unique_dir(&datadir, "xtest-data-tree")
390 .unwrap_or_else(|mut err| inconclusive(&mut err));
391
392 git.consent_to_use(
393 &gitpath,
394 &datapath,
395 &origin,
396 &commit_id,
397 &mut self.resources.as_paths(),
398 &mut self.resources.path_specs(),
399 );
400
401 let shallow = git.shallow_clone(gitpath, origin);
402
403 shallow.fetch(&git, &commit_id);
404 shallow.checkout(
405 &git,
406 &datapath,
407 &commit_id,
408 &mut self.resources.path_specs(),
409 );
410 map = vec![];
411 self.resources.relative_files.iter().for_each(|path| {
412 map.push(datapath.join(path.as_path()));
413 });
414 self.resources
415 .unmanaged
416 .into_iter()
417 .for_each(|item| set_root(&datapath, item));
418 }
419 }
420
421 // In the end we just discard some information.
422 // We don't really need it anymore after the checks.
423 //
424 // TODO: of course we could avoid actually checking files onto the disk if we had some kind
425 // of `io::Read` abstraction that read them straight from `git cat` instead. But chances
426 // are you'll like your files and directory structures.
427 FsData { map }
428 }
429}
430
431impl Resources<'_> {
432 pub fn as_paths(&self) -> impl Iterator<Item = &'_ Path> {
433 let values = self.relative_files.iter().map(Managed::as_path);
434 let unmanaged = self.unmanaged.iter().map(|x| Path::new(x));
435 values.chain(unmanaged)
436 }
437
438 pub fn path_specs(&self) -> impl Iterator<Item = git::PathSpec<'_>> {
439 let values = self.relative_files.iter().map(Managed::as_path_spec);
440 let unmanaged = self.unmanaged.iter().map(|x| git::PathSpec::Path(&**x));
441 values.chain(unmanaged)
442 }
443}
444
445impl FsData {
446 /// Retrieve the rewritten path of a file or tree of files.
447 pub fn path(&self, file: &Files) -> &Path {
448 self.map.get(file.key).unwrap().as_path()
449 }
450}
451
452impl Managed {
453 pub fn as_path(&self) -> &Path {
454 match self {
455 Managed::Files(path) => path,
456 }
457 }
458
459 fn as_path_spec(&self) -> git::PathSpec<'_> {
460 match self {
461 Managed::Files(path) => git::PathSpec::Path(path),
462 }
463 }
464}
465
466fn set_root(path: &Path, dir: &mut PathBuf) {
467 *dir = path.join(&*dir)
468}
469
470// We do not use tempdir. This should already be done by our environment (e.g. cargo).
471fn unique_dir(base: &Path, prefix: &str) -> Result<PathBuf, std::io::Error> {
472 let mut rng = nanorand::tls::tls_rng();
473 assert!(matches!(
474 Path::new(prefix).components().next(),
475 Some(std::path::Component::Normal(_))
476 ));
477 assert!(Path::new(prefix).components().nth(1).is_none());
478
479 let mut buffer = prefix.to_string();
480 let mut generate_name = move || -> PathBuf {
481 use nanorand::Rng;
482 const TABLE: &str = "0123456789abcdef";
483 let num: [u8; 8] = rng.rand();
484
485 buffer.clear();
486 buffer.push_str(prefix);
487
488 for byte in num {
489 let (low, hi) = (usize::from(byte & 0xf), usize::from((byte >> 4) & 0xf));
490 buffer.push_str(&TABLE[low..low + 1]);
491 buffer.push_str(&TABLE[hi..hi + 1]);
492 }
493
494 base.join(&buffer)
495 };
496
497 loop {
498 let path = generate_name();
499 match fs::create_dir(&path) {
500 Ok(_) => return Ok(path),
501 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {}
502 Err(other) => return Err(other),
503 }
504 }
505}
506
507#[cold]
508fn inconclusive(err: &mut dyn std::fmt::Display) -> ! {
509 eprintln!("xtest-data failed to setup.");
510 eprintln!("Information: {}", err);
511 panic!();
512}