Skip to main content

testing_conventions/
packaging.rs

1//! Packaging rule — foundation (issue #70).
2//!
3//! README "Packaging": test files never ship in the built artifact. Colocated
4//! unit tests live next to the source, so packaging has to strip them — and this
5//! rule confirms it did, by inspecting the *built* artifact rather than the
6//! working tree.
7//!
8//! This module is the deterministic core: given the root of an unpacked built
9//! artifact and the test-file globs that must not appear in it, [`scan`] walks
10//! the tree and returns every offending file. Producing the artifact (building a
11//! wheel/sdist, `npm pack`, `cargo package`, then unpacking it) is a per-language
12//! layer on top — kept separate, and out of this foundation slice, so the core
13//! guarantee is testable without any language toolchain. The per-language slices
14//! supply the build step and the glob set: Python `*_test.py` (#72), TypeScript
15//! `*.test.*` (#73), Rust `tests/` (#74).
16
17use std::path::{Path, PathBuf};
18
19use anyhow::{Context, Result};
20
21/// Walk `root` — the root of an unpacked built artifact — and return every file
22/// whose name matches one of `globs`, sorted for deterministic output.
23///
24/// `globs` are file-name globs where `*` matches any run of characters
25/// (including none); each is matched against an entry's file name, not its full
26/// path. A non-empty result means test files leaked into the artifact. Returns
27/// an error if the tree under `root` cannot be read.
28pub fn scan(root: impl AsRef<Path>, globs: &[String]) -> Result<Vec<PathBuf>> {
29    let root = root.as_ref();
30    let mut offenders = Vec::new();
31    collect_offenders(root, globs, &mut offenders)?;
32    offenders.sort();
33    Ok(offenders)
34}
35
36/// Recursively collect every file under `dir` whose name matches one of `globs`.
37fn collect_offenders(dir: &Path, globs: &[String], out: &mut Vec<PathBuf>) -> Result<()> {
38    let entries =
39        std::fs::read_dir(dir).with_context(|| format!("reading directory `{}`", dir.display()))?;
40    for entry in entries {
41        let path = entry
42            .with_context(|| format!("reading an entry under `{}`", dir.display()))?
43            .path();
44        if path.is_dir() {
45            collect_offenders(&path, globs, out)?;
46        } else if matches_any(&path, globs) {
47            out.push(path);
48        }
49    }
50    Ok(())
51}
52
53/// `true` when the file name of `path` matches any glob in `globs`.
54fn matches_any(path: &Path, globs: &[String]) -> bool {
55    let name = path
56        .file_name()
57        .and_then(|n| n.to_str())
58        .unwrap_or_default();
59    globs.iter().any(|glob| matches_glob(glob, name))
60}
61
62/// Match `name` against a file-name `glob` where `*` matches any run of
63/// characters (including none) and every other character is literal.
64///
65/// `*` is the only metacharacter — it is all the test-file patterns this rule
66/// checks (`*_test.py`, `*.test.*`) need. Matching is over Unicode scalar values.
67fn matches_glob(glob: &str, name: &str) -> bool {
68    let glob: Vec<char> = glob.chars().collect();
69    let name: Vec<char> = name.chars().collect();
70    // Linear wildcard match: walk `name`, and on a mismatch backtrack to the most
71    // recent `*`, extending what it consumed by one character.
72    let (mut g, mut n) = (0usize, 0usize);
73    let mut star: Option<usize> = None;
74    let mut consumed_by_star = 0usize;
75    while n < name.len() {
76        if g < glob.len() && glob[g] == name[n] {
77            g += 1;
78            n += 1;
79        } else if g < glob.len() && glob[g] == '*' {
80            star = Some(g);
81            consumed_by_star = n;
82            g += 1;
83        } else if let Some(star) = star {
84            // Mismatch under an open `*`: let the star swallow one more char.
85            g = star + 1;
86            consumed_by_star += 1;
87            n = consumed_by_star;
88        } else {
89            return false;
90        }
91    }
92    // The pattern matches iff what's left is only trailing `*`s (each empty).
93    while g < glob.len() && glob[g] == '*' {
94        g += 1;
95    }
96    g == glob.len()
97}
98
99#[cfg(test)]
100mod tests {
101    use super::*;
102    use std::sync::atomic::{AtomicU64, Ordering};
103
104    /// A throwaway directory tree, removed on drop.
105    struct TempTree(PathBuf);
106
107    impl TempTree {
108        fn new(files: &[&str]) -> Self {
109            static COUNTER: AtomicU64 = AtomicU64::new(0);
110            let root = std::env::temp_dir().join(format!(
111                "tc-packaging-{}-{}",
112                std::process::id(),
113                COUNTER.fetch_add(1, Ordering::Relaxed),
114            ));
115            for rel in files {
116                let path = root.join(rel);
117                std::fs::create_dir_all(path.parent().unwrap()).unwrap();
118                std::fs::write(path, "x").unwrap();
119            }
120            TempTree(root)
121        }
122
123        fn path(&self) -> &Path {
124            &self.0
125        }
126    }
127
128    impl Drop for TempTree {
129        fn drop(&mut self) {
130            let _ = std::fs::remove_dir_all(&self.0);
131        }
132    }
133
134    #[test]
135    fn star_matches_any_run_including_empty() {
136        assert!(matches_glob("*", ""));
137        assert!(matches_glob("*", "anything.py"));
138        // The `*` consumes nothing: the literal `.py` matches the whole name.
139        assert!(matches_glob("*.py", ".py"));
140    }
141
142    #[test]
143    fn the_python_test_glob_matches_only_test_files() {
144        assert!(matches_glob("*_test.py", "widget_test.py"));
145        assert!(!matches_glob("*_test.py", "widget.py"));
146        // A trailing extension beyond `.py` must not match (no `*` at the end).
147        assert!(!matches_glob("*_test.py", "widget_test.pyc"));
148    }
149
150    #[test]
151    fn the_typescript_test_glob_matches_across_extensions() {
152        assert!(matches_glob("*.test.*", "button.test.ts"));
153        assert!(matches_glob("*.test.*", "button.test.mts"));
154        assert!(matches_glob("*.test.*", "button.test.tsx"));
155        assert!(!matches_glob("*.test.*", "button.ts"));
156    }
157
158    #[test]
159    fn a_literal_glob_must_match_exactly() {
160        assert!(matches_glob("conftest.py", "conftest.py"));
161        assert!(!matches_glob("conftest.py", "conftest.pyi"));
162        assert!(!matches_glob("conftest.py", "xconftest.py"));
163    }
164
165    #[test]
166    fn scan_flags_a_test_file_anywhere_in_the_tree() {
167        let tree = TempTree::new(&["pkg/widget.py", "pkg/sub/helper_test.py"]);
168        let offenders = scan(tree.path(), &["*_test.py".to_string()]).unwrap();
169        assert_eq!(offenders, vec![tree.path().join("pkg/sub/helper_test.py")]);
170    }
171
172    #[test]
173    fn scan_is_clean_when_nothing_matches() {
174        let tree = TempTree::new(&["pkg/widget.py", "pkg/helper.py"]);
175        let offenders = scan(tree.path(), &["*_test.py".to_string()]).unwrap();
176        assert!(offenders.is_empty());
177    }
178
179    #[test]
180    fn scan_matches_any_of_several_globs_and_returns_sorted() {
181        let tree = TempTree::new(&["a.test.ts", "b_test.py", "keep.ts"]);
182        let globs = vec!["*_test.py".to_string(), "*.test.*".to_string()];
183        let offenders = scan(tree.path(), &globs).unwrap();
184        assert_eq!(
185            offenders,
186            vec![tree.path().join("a.test.ts"), tree.path().join("b_test.py")],
187        );
188    }
189
190    #[test]
191    fn scan_errors_when_the_root_cannot_be_read() {
192        let missing = std::env::temp_dir().join("tc-packaging-does-not-exist-9f8e7d");
193        assert!(scan(&missing, &["*_test.py".to_string()]).is_err());
194    }
195}