Skip to main content

testing_conventions/
packaging.rs

1//! Packaging rule — foundation (issue #70).
2//!
3//! README "Packaging": test files never ship in the built artifact. Colocated
4//! unit tests live next to the source, so packaging has to strip them — and this
5//! rule confirms it did, by inspecting the *built* artifact rather than the
6//! working tree.
7//!
8//! This module is the deterministic core: given the root of an unpacked built
9//! artifact and the test-file globs that must not appear in it, [`scan`] walks
10//! the tree and returns every offending file. Producing the artifact (building a
11//! wheel/sdist, `npm pack`, `cargo package`, then unpacking it) is a per-language
12//! layer on top — kept separate, and out of this foundation slice, so the core
13//! guarantee is testable without any language toolchain. The per-language slices
14//! supply the build step and the glob set: Python `*_test.py` (#72), TypeScript
15//! `*.test.*` (#73), Rust `tests/` (#74).
16
17use std::path::{Path, PathBuf};
18
19use anyhow::{bail, Context, Result};
20
21/// Walk `root` — the root of an unpacked built artifact — and return every file
22/// whose name matches one of `globs`, sorted for deterministic output.
23///
24/// `globs` are file-name globs where `*` matches any run of characters
25/// (including none); each is matched against an entry's file name, not its full
26/// path. A non-empty result means test files leaked into the artifact. Returns
27/// an error if the tree under `root` cannot be read.
28pub fn scan(root: impl AsRef<Path>, globs: &[String]) -> Result<Vec<PathBuf>> {
29    let root = root.as_ref();
30    let mut offenders = Vec::new();
31    collect_offenders(root, root, globs, &mut offenders)?;
32    offenders.sort();
33    Ok(offenders)
34}
35
36/// Inspect a built artifact at `path` for files matching `globs` — the test-file
37/// patterns that must not ship.
38///
39/// `path` is either a **directory** (an already-unpacked artifact) or a packed
40/// archive this rule understands — a Python wheel (`.whl`, a zip) or a gzipped tar
41/// (`.tgz` / `.tar.gz`, e.g. an `npm pack` tarball or Python sdist; a Cargo
42/// `.crate` too) — which is unpacked into a scratch directory first. Either way
43/// the unpacked tree is handed to [`scan`]. Offenders come back as paths
44/// **relative to the artifact root** (e.g. `package/dist/widget.test.js`), so they
45/// read the same whether the artifact was a directory or an archive. Errors if the
46/// artifact can't be read, or isn't a directory or a recognized archive.
47pub fn inspect(path: impl AsRef<Path>, globs: &[String]) -> Result<Vec<PathBuf>> {
48    let path = path.as_ref();
49    if path.is_dir() {
50        return Ok(relative_to(path, scan(path, globs)?));
51    }
52    let unpacked = if is_zip_artifact(path) {
53        unzip_to_temp(path)?
54    } else if is_tar_gz_artifact(path) {
55        untar_gz_to_temp(path)?
56    } else {
57        bail!(
58            "`{}` is not a directory or a recognized built artifact \
59             (expected a directory, a `.whl`, a `.tgz`/`.tar.gz`, or a `.crate`)",
60            path.display()
61        )
62    };
63    Ok(relative_to(unpacked.path(), scan(unpacked.path(), globs)?))
64}
65
66/// `true` for an artifact this rule unpacks as a zip: a Python wheel (`.whl`) or
67/// a plain `.zip`.
68fn is_zip_artifact(path: &Path) -> bool {
69    matches!(
70        path.extension().and_then(|ext| ext.to_str()),
71        Some("whl" | "zip")
72    )
73}
74
75/// Re-express each offender as a path relative to `root`. [`scan`] returns paths
76/// under `root`, so the strip always succeeds; an unexpected path is kept as-is.
77fn relative_to(root: &Path, offenders: Vec<PathBuf>) -> Vec<PathBuf> {
78    offenders
79        .into_iter()
80        .map(|p| p.strip_prefix(root).map(Path::to_path_buf).unwrap_or(p))
81        .collect()
82}
83
84/// Unpack a zip artifact into a fresh scratch directory (removed on drop).
85fn unzip_to_temp(archive: &Path) -> Result<TempDir> {
86    let file = std::fs::File::open(archive)
87        .with_context(|| format!("opening artifact `{}`", archive.display()))?;
88    let mut zip = zip::ZipArchive::new(file)
89        .with_context(|| format!("reading `{}` as a zip archive", archive.display()))?;
90    let dir = TempDir::new()?;
91    zip.extract(dir.path())
92        .with_context(|| format!("unpacking `{}`", archive.display()))?;
93    Ok(dir)
94}
95
96/// `true` for an artifact this rule unpacks as a gzipped tar: an `npm pack`
97/// tarball (`.tgz`), a `.tar.gz` (a Python sdist), or a Cargo `.crate` from
98/// `cargo package` (#74) — all gzipped tarballs.
99fn is_tar_gz_artifact(path: &Path) -> bool {
100    let name = path
101        .file_name()
102        .and_then(|n| n.to_str())
103        .unwrap_or_default();
104    name.ends_with(".tgz") || name.ends_with(".tar.gz") || name.ends_with(".crate")
105}
106
107/// Unpack a gzipped-tar artifact into a fresh scratch directory (removed on drop).
108fn untar_gz_to_temp(archive: &Path) -> Result<TempDir> {
109    let file = std::fs::File::open(archive)
110        .with_context(|| format!("opening artifact `{}`", archive.display()))?;
111    let mut tar = tar::Archive::new(flate2::read::GzDecoder::new(file));
112    let dir = TempDir::new()?;
113    tar.unpack(dir.path())
114        .with_context(|| format!("unpacking `{}`", archive.display()))?;
115    Ok(dir)
116}
117
118/// A scratch directory removed on drop — where an archive artifact is unpacked.
119/// Unique per call (so parallel checks don't collide) and cleaned up so nothing
120/// leaks into the temp dir.
121struct TempDir(PathBuf);
122
123impl TempDir {
124    fn new() -> Result<Self> {
125        use std::sync::atomic::{AtomicU64, Ordering};
126        static COUNTER: AtomicU64 = AtomicU64::new(0);
127        let path = std::env::temp_dir().join(format!(
128            "testing-conventions-pkg-{}-{}",
129            std::process::id(),
130            COUNTER.fetch_add(1, Ordering::Relaxed),
131        ));
132        std::fs::create_dir_all(&path)
133            .with_context(|| format!("creating scratch directory `{}`", path.display()))?;
134        Ok(TempDir(path))
135    }
136
137    fn path(&self) -> &Path {
138        &self.0
139    }
140}
141
142impl Drop for TempDir {
143    fn drop(&mut self) {
144        let _ = std::fs::remove_dir_all(&self.0);
145    }
146}
147
148/// Recursively collect every file under `dir` (within the artifact `root`) that
149/// matches one of `patterns`.
150fn collect_offenders(
151    dir: &Path,
152    root: &Path,
153    patterns: &[String],
154    out: &mut Vec<PathBuf>,
155) -> Result<()> {
156    let entries =
157        std::fs::read_dir(dir).with_context(|| format!("reading directory `{}`", dir.display()))?;
158    for entry in entries {
159        let path = entry
160            .with_context(|| format!("reading an entry under `{}`", dir.display()))?
161            .path();
162        if path.is_dir() {
163            collect_offenders(&path, root, patterns, out)?;
164        } else if matches_any(&path, root, patterns) {
165            out.push(path);
166        }
167    }
168    Ok(())
169}
170
171/// `true` when `path` matches any of `patterns`.
172///
173/// A pattern ending in `/` is a **directory** pattern: it matches when `path`
174/// (relative to the artifact `root`) lives under a directory of that name — e.g.
175/// `tests/` flags `…/tests/integration.rs` (Rust's crate-root integration tests,
176/// #74). Every other pattern is a file-name glob (`*` wildcards) matched against
177/// the entry's name (`*_test.py`, `*.test.*`).
178fn matches_any(path: &Path, root: &Path, patterns: &[String]) -> bool {
179    let name = path
180        .file_name()
181        .and_then(|n| n.to_str())
182        .unwrap_or_default();
183    patterns
184        .iter()
185        .any(|pattern| match pattern.strip_suffix('/') {
186            Some(dir) => path_under_dir(path, root, dir),
187            None => matches_glob(pattern, name),
188        })
189}
190
191/// `true` when `path` (relative to `root`) has an **ancestor** directory named
192/// `dir` — i.e. the file lives somewhere under a `dir/`.
193fn path_under_dir(path: &Path, root: &Path, dir: &str) -> bool {
194    let relative = path.strip_prefix(root).unwrap_or(path);
195    relative
196        .parent()
197        .is_some_and(|parents| parents.components().any(|c| c.as_os_str() == dir))
198}
199
200/// Match `name` against a file-name `glob` where `*` matches any run of
201/// characters (including none) and every other character is literal.
202///
203/// `*` is the only metacharacter — it is all the test-file patterns this rule
204/// checks (`*_test.py`, `*.test.*`) need. Matching is over Unicode scalar values.
205fn matches_glob(glob: &str, name: &str) -> bool {
206    let glob: Vec<char> = glob.chars().collect();
207    let name: Vec<char> = name.chars().collect();
208    // Linear wildcard match: walk `name`, and on a mismatch backtrack to the most
209    // recent `*`, extending what it consumed by one character.
210    let (mut g, mut n) = (0usize, 0usize);
211    let mut star: Option<usize> = None;
212    let mut consumed_by_star = 0usize;
213    while n < name.len() {
214        if g < glob.len() && glob[g] == name[n] {
215            g += 1;
216            n += 1;
217        } else if g < glob.len() && glob[g] == '*' {
218            star = Some(g);
219            consumed_by_star = n;
220            g += 1;
221        } else if let Some(star) = star {
222            // Mismatch under an open `*`: let the star swallow one more char.
223            g = star + 1;
224            consumed_by_star += 1;
225            n = consumed_by_star;
226        } else {
227            return false;
228        }
229    }
230    // The pattern matches iff what's left is only trailing `*`s (each empty).
231    while g < glob.len() && glob[g] == '*' {
232        g += 1;
233    }
234    g == glob.len()
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240    use std::sync::atomic::{AtomicU64, Ordering};
241
242    /// A throwaway directory tree, removed on drop.
243    struct TempTree(PathBuf);
244
245    impl TempTree {
246        fn new(files: &[&str]) -> Self {
247            static COUNTER: AtomicU64 = AtomicU64::new(0);
248            let root = std::env::temp_dir().join(format!(
249                "tc-packaging-{}-{}",
250                std::process::id(),
251                COUNTER.fetch_add(1, Ordering::Relaxed),
252            ));
253            for rel in files {
254                let path = root.join(rel);
255                std::fs::create_dir_all(path.parent().unwrap()).unwrap();
256                std::fs::write(path, "x").unwrap();
257            }
258            TempTree(root)
259        }
260
261        fn path(&self) -> &Path {
262            &self.0
263        }
264    }
265
266    impl Drop for TempTree {
267        fn drop(&mut self) {
268            let _ = std::fs::remove_dir_all(&self.0);
269        }
270    }
271
272    #[test]
273    fn star_matches_any_run_including_empty() {
274        assert!(matches_glob("*", ""));
275        assert!(matches_glob("*", "anything.py"));
276        // The `*` consumes nothing: the literal `.py` matches the whole name.
277        assert!(matches_glob("*.py", ".py"));
278    }
279
280    #[test]
281    fn the_python_test_glob_matches_only_test_files() {
282        assert!(matches_glob("*_test.py", "widget_test.py"));
283        assert!(!matches_glob("*_test.py", "widget.py"));
284        // A trailing extension beyond `.py` must not match (no `*` at the end).
285        assert!(!matches_glob("*_test.py", "widget_test.pyc"));
286    }
287
288    #[test]
289    fn the_typescript_test_glob_matches_across_extensions() {
290        assert!(matches_glob("*.test.*", "button.test.ts"));
291        assert!(matches_glob("*.test.*", "button.test.mts"));
292        assert!(matches_glob("*.test.*", "button.test.tsx"));
293        assert!(!matches_glob("*.test.*", "button.ts"));
294    }
295
296    #[test]
297    fn a_literal_glob_must_match_exactly() {
298        assert!(matches_glob("conftest.py", "conftest.py"));
299        assert!(!matches_glob("conftest.py", "conftest.pyi"));
300        assert!(!matches_glob("conftest.py", "xconftest.py"));
301    }
302
303    #[test]
304    fn scan_flags_a_test_file_anywhere_in_the_tree() {
305        let tree = TempTree::new(&["pkg/widget.py", "pkg/sub/helper_test.py"]);
306        let offenders = scan(tree.path(), &["*_test.py".to_string()]).unwrap();
307        assert_eq!(offenders, vec![tree.path().join("pkg/sub/helper_test.py")]);
308    }
309
310    #[test]
311    fn a_directory_pattern_flags_files_under_that_dir() {
312        let tree = TempTree::new(&["tests/integration.rs", "src/lib.rs", "src/tests/nested.rs"]);
313        let offenders = scan(tree.path(), &["tests/".to_string()]).unwrap();
314        // Any file with a `tests/` ancestor is flagged (here the crate-root
315        // `tests/` and a nested `src/tests/`); `src/lib.rs` is not.
316        assert_eq!(
317            offenders,
318            vec![
319                tree.path().join("src/tests/nested.rs"),
320                tree.path().join("tests/integration.rs"),
321            ],
322        );
323    }
324
325    #[test]
326    fn recognizes_a_dot_crate_as_a_gzipped_tar() {
327        assert!(is_tar_gz_artifact(Path::new("widget-0.1.0.crate")));
328        assert!(is_tar_gz_artifact(Path::new("pkg.tgz")));
329        assert!(is_tar_gz_artifact(Path::new("pkg.tar.gz")));
330        assert!(!is_tar_gz_artifact(Path::new("pkg.whl")));
331    }
332
333    #[test]
334    fn scan_is_clean_when_nothing_matches() {
335        let tree = TempTree::new(&["pkg/widget.py", "pkg/helper.py"]);
336        let offenders = scan(tree.path(), &["*_test.py".to_string()]).unwrap();
337        assert!(offenders.is_empty());
338    }
339
340    #[test]
341    fn scan_matches_any_of_several_globs_and_returns_sorted() {
342        let tree = TempTree::new(&["a.test.ts", "b_test.py", "keep.ts"]);
343        let globs = vec!["*_test.py".to_string(), "*.test.*".to_string()];
344        let offenders = scan(tree.path(), &globs).unwrap();
345        assert_eq!(
346            offenders,
347            vec![tree.path().join("a.test.ts"), tree.path().join("b_test.py")],
348        );
349    }
350
351    #[test]
352    fn scan_errors_when_the_root_cannot_be_read() {
353        let missing = std::env::temp_dir().join("tc-packaging-does-not-exist-9f8e7d");
354        assert!(scan(&missing, &["*_test.py".to_string()]).is_err());
355    }
356
357    #[test]
358    fn inspect_scans_a_directory_artifact_with_relative_paths() {
359        let tree = TempTree::new(&["pkg/widget.py", "pkg/widget_test.py"]);
360        let offenders = inspect(tree.path(), &["*_test.py".to_string()]).unwrap();
361        assert_eq!(offenders, vec![PathBuf::from("pkg/widget_test.py")]);
362    }
363
364    #[test]
365    fn inspect_rejects_an_unrecognized_artifact() {
366        let tree = TempTree::new(&["not-an-archive.txt"]);
367        let err = inspect(
368            tree.path().join("not-an-archive.txt"),
369            &["*_test.py".to_string()],
370        )
371        .unwrap_err();
372        assert!(
373            err.to_string().contains("not a directory or a recognized"),
374            "got: {err}"
375        );
376    }
377}