Skip to main content

testing_conventions/
packaging.rs

1//! Packaging rule — foundation (issue #70).
2//!
3//! README "Packaging": test files never ship in the built artifact. Colocated
4//! unit tests live next to the source, so packaging has to strip them — and this
5//! rule confirms it did, by inspecting the *built* artifact rather than the
6//! working tree.
7//!
8//! This module is the deterministic core: given the root of an unpacked built
9//! artifact and the test-file globs that must not appear in it, [`scan`] walks
10//! the tree and returns every offending file. Producing the artifact (building a
11//! wheel/sdist, `npm pack`, `cargo package`, then unpacking it) is a per-language
12//! layer on top — kept separate, and out of this foundation slice, so the core
13//! guarantee is testable without any language toolchain. The per-language slices
14//! supply the build step and the glob set: Python `*_test.py` (#72), TypeScript
15//! `*.test.*` (#73), Rust `tests/` (#74).
16
17use std::path::{Path, PathBuf};
18
19use anyhow::{bail, Context, Result};
20
21/// Walk `root` — the root of an unpacked built artifact — and return every file
22/// whose name matches one of `globs`, sorted for deterministic output.
23///
24/// `globs` are file-name globs where `*` matches any run of characters
25/// (including none); each is matched against an entry's file name, not its full
26/// path. A non-empty result means test files leaked into the artifact. Returns
27/// an error if the tree under `root` cannot be read.
28pub fn scan(root: impl AsRef<Path>, globs: &[String]) -> Result<Vec<PathBuf>> {
29    let root = root.as_ref();
30    let mut offenders = Vec::new();
31    collect_offenders(root, globs, &mut offenders)?;
32    offenders.sort();
33    Ok(offenders)
34}
35
36/// Inspect a built artifact at `path` for files matching `globs` — the test-file
37/// patterns that must not ship.
38///
39/// `path` is either a **directory** (an already-unpacked artifact) or a packed
40/// archive this rule understands — currently a Python wheel (`.whl`, a zip),
41/// which is unpacked into a scratch directory first. Either way the unpacked
42/// tree is handed to [`scan`]. Offenders come back as paths **relative to the
43/// artifact root** (e.g. `widget/core_test.py`), so they read the same whether
44/// the artifact was a directory or an archive. Errors if the artifact can't be
45/// read, or isn't a directory or a recognized archive.
46pub fn inspect(path: impl AsRef<Path>, globs: &[String]) -> Result<Vec<PathBuf>> {
47    let path = path.as_ref();
48    if path.is_dir() {
49        return Ok(relative_to(path, scan(path, globs)?));
50    }
51    if is_zip_artifact(path) {
52        let unpacked = unzip_to_temp(path)?;
53        return Ok(relative_to(unpacked.path(), scan(unpacked.path(), globs)?));
54    }
55    bail!(
56        "`{}` is not a directory or a recognized built artifact \
57         (expected a directory or a `.whl`)",
58        path.display()
59    )
60}
61
62/// `true` for an artifact this rule unpacks as a zip: a Python wheel (`.whl`) or
63/// a plain `.zip`.
64fn is_zip_artifact(path: &Path) -> bool {
65    matches!(
66        path.extension().and_then(|ext| ext.to_str()),
67        Some("whl" | "zip")
68    )
69}
70
71/// Re-express each offender as a path relative to `root`. [`scan`] returns paths
72/// under `root`, so the strip always succeeds; an unexpected path is kept as-is.
73fn relative_to(root: &Path, offenders: Vec<PathBuf>) -> Vec<PathBuf> {
74    offenders
75        .into_iter()
76        .map(|p| p.strip_prefix(root).map(Path::to_path_buf).unwrap_or(p))
77        .collect()
78}
79
80/// Unpack a zip artifact into a fresh scratch directory (removed on drop).
81fn unzip_to_temp(archive: &Path) -> Result<TempDir> {
82    let file = std::fs::File::open(archive)
83        .with_context(|| format!("opening artifact `{}`", archive.display()))?;
84    let mut zip = zip::ZipArchive::new(file)
85        .with_context(|| format!("reading `{}` as a zip archive", archive.display()))?;
86    let dir = TempDir::new()?;
87    zip.extract(dir.path())
88        .with_context(|| format!("unpacking `{}`", archive.display()))?;
89    Ok(dir)
90}
91
92/// A scratch directory removed on drop — where an archive artifact is unpacked.
93/// Unique per call (so parallel checks don't collide) and cleaned up so nothing
94/// leaks into the temp dir.
95struct TempDir(PathBuf);
96
97impl TempDir {
98    fn new() -> Result<Self> {
99        use std::sync::atomic::{AtomicU64, Ordering};
100        static COUNTER: AtomicU64 = AtomicU64::new(0);
101        let path = std::env::temp_dir().join(format!(
102            "testing-conventions-pkg-{}-{}",
103            std::process::id(),
104            COUNTER.fetch_add(1, Ordering::Relaxed),
105        ));
106        std::fs::create_dir_all(&path)
107            .with_context(|| format!("creating scratch directory `{}`", path.display()))?;
108        Ok(TempDir(path))
109    }
110
111    fn path(&self) -> &Path {
112        &self.0
113    }
114}
115
116impl Drop for TempDir {
117    fn drop(&mut self) {
118        let _ = std::fs::remove_dir_all(&self.0);
119    }
120}
121
122/// Recursively collect every file under `dir` whose name matches one of `globs`.
123fn collect_offenders(dir: &Path, globs: &[String], out: &mut Vec<PathBuf>) -> Result<()> {
124    let entries =
125        std::fs::read_dir(dir).with_context(|| format!("reading directory `{}`", dir.display()))?;
126    for entry in entries {
127        let path = entry
128            .with_context(|| format!("reading an entry under `{}`", dir.display()))?
129            .path();
130        if path.is_dir() {
131            collect_offenders(&path, globs, out)?;
132        } else if matches_any(&path, globs) {
133            out.push(path);
134        }
135    }
136    Ok(())
137}
138
139/// `true` when the file name of `path` matches any glob in `globs`.
140fn matches_any(path: &Path, globs: &[String]) -> bool {
141    let name = path
142        .file_name()
143        .and_then(|n| n.to_str())
144        .unwrap_or_default();
145    globs.iter().any(|glob| matches_glob(glob, name))
146}
147
148/// Match `name` against a file-name `glob` where `*` matches any run of
149/// characters (including none) and every other character is literal.
150///
151/// `*` is the only metacharacter — it is all the test-file patterns this rule
152/// checks (`*_test.py`, `*.test.*`) need. Matching is over Unicode scalar values.
153fn matches_glob(glob: &str, name: &str) -> bool {
154    let glob: Vec<char> = glob.chars().collect();
155    let name: Vec<char> = name.chars().collect();
156    // Linear wildcard match: walk `name`, and on a mismatch backtrack to the most
157    // recent `*`, extending what it consumed by one character.
158    let (mut g, mut n) = (0usize, 0usize);
159    let mut star: Option<usize> = None;
160    let mut consumed_by_star = 0usize;
161    while n < name.len() {
162        if g < glob.len() && glob[g] == name[n] {
163            g += 1;
164            n += 1;
165        } else if g < glob.len() && glob[g] == '*' {
166            star = Some(g);
167            consumed_by_star = n;
168            g += 1;
169        } else if let Some(star) = star {
170            // Mismatch under an open `*`: let the star swallow one more char.
171            g = star + 1;
172            consumed_by_star += 1;
173            n = consumed_by_star;
174        } else {
175            return false;
176        }
177    }
178    // The pattern matches iff what's left is only trailing `*`s (each empty).
179    while g < glob.len() && glob[g] == '*' {
180        g += 1;
181    }
182    g == glob.len()
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use std::sync::atomic::{AtomicU64, Ordering};
189
190    /// A throwaway directory tree, removed on drop.
191    struct TempTree(PathBuf);
192
193    impl TempTree {
194        fn new(files: &[&str]) -> Self {
195            static COUNTER: AtomicU64 = AtomicU64::new(0);
196            let root = std::env::temp_dir().join(format!(
197                "tc-packaging-{}-{}",
198                std::process::id(),
199                COUNTER.fetch_add(1, Ordering::Relaxed),
200            ));
201            for rel in files {
202                let path = root.join(rel);
203                std::fs::create_dir_all(path.parent().unwrap()).unwrap();
204                std::fs::write(path, "x").unwrap();
205            }
206            TempTree(root)
207        }
208
209        fn path(&self) -> &Path {
210            &self.0
211        }
212    }
213
214    impl Drop for TempTree {
215        fn drop(&mut self) {
216            let _ = std::fs::remove_dir_all(&self.0);
217        }
218    }
219
220    #[test]
221    fn star_matches_any_run_including_empty() {
222        assert!(matches_glob("*", ""));
223        assert!(matches_glob("*", "anything.py"));
224        // The `*` consumes nothing: the literal `.py` matches the whole name.
225        assert!(matches_glob("*.py", ".py"));
226    }
227
228    #[test]
229    fn the_python_test_glob_matches_only_test_files() {
230        assert!(matches_glob("*_test.py", "widget_test.py"));
231        assert!(!matches_glob("*_test.py", "widget.py"));
232        // A trailing extension beyond `.py` must not match (no `*` at the end).
233        assert!(!matches_glob("*_test.py", "widget_test.pyc"));
234    }
235
236    #[test]
237    fn the_typescript_test_glob_matches_across_extensions() {
238        assert!(matches_glob("*.test.*", "button.test.ts"));
239        assert!(matches_glob("*.test.*", "button.test.mts"));
240        assert!(matches_glob("*.test.*", "button.test.tsx"));
241        assert!(!matches_glob("*.test.*", "button.ts"));
242    }
243
244    #[test]
245    fn a_literal_glob_must_match_exactly() {
246        assert!(matches_glob("conftest.py", "conftest.py"));
247        assert!(!matches_glob("conftest.py", "conftest.pyi"));
248        assert!(!matches_glob("conftest.py", "xconftest.py"));
249    }
250
251    #[test]
252    fn scan_flags_a_test_file_anywhere_in_the_tree() {
253        let tree = TempTree::new(&["pkg/widget.py", "pkg/sub/helper_test.py"]);
254        let offenders = scan(tree.path(), &["*_test.py".to_string()]).unwrap();
255        assert_eq!(offenders, vec![tree.path().join("pkg/sub/helper_test.py")]);
256    }
257
258    #[test]
259    fn scan_is_clean_when_nothing_matches() {
260        let tree = TempTree::new(&["pkg/widget.py", "pkg/helper.py"]);
261        let offenders = scan(tree.path(), &["*_test.py".to_string()]).unwrap();
262        assert!(offenders.is_empty());
263    }
264
265    #[test]
266    fn scan_matches_any_of_several_globs_and_returns_sorted() {
267        let tree = TempTree::new(&["a.test.ts", "b_test.py", "keep.ts"]);
268        let globs = vec!["*_test.py".to_string(), "*.test.*".to_string()];
269        let offenders = scan(tree.path(), &globs).unwrap();
270        assert_eq!(
271            offenders,
272            vec![tree.path().join("a.test.ts"), tree.path().join("b_test.py")],
273        );
274    }
275
276    #[test]
277    fn scan_errors_when_the_root_cannot_be_read() {
278        let missing = std::env::temp_dir().join("tc-packaging-does-not-exist-9f8e7d");
279        assert!(scan(&missing, &["*_test.py".to_string()]).is_err());
280    }
281
282    #[test]
283    fn inspect_scans_a_directory_artifact_with_relative_paths() {
284        let tree = TempTree::new(&["pkg/widget.py", "pkg/widget_test.py"]);
285        let offenders = inspect(tree.path(), &["*_test.py".to_string()]).unwrap();
286        assert_eq!(offenders, vec![PathBuf::from("pkg/widget_test.py")]);
287    }
288
289    #[test]
290    fn inspect_rejects_an_unrecognized_artifact() {
291        let tree = TempTree::new(&["not-an-archive.txt"]);
292        let err = inspect(
293            tree.path().join("not-an-archive.txt"),
294            &["*_test.py".to_string()],
295        )
296        .unwrap_err();
297        assert!(
298            err.to_string().contains("not a directory or a recognized"),
299            "got: {err}"
300        );
301    }
302}