Skip to main content

testing_conventions/
workflow.rs

1//! Workflow guard — keep the reusable workflow in step with the CLI (issue #92).
2//!
3//! The reusable workflow (`.github/workflows/testing-conventions.yml`) is the
4//! documented `@v0` consumption path: a consumer pins `@v0`, and the workflow runs
5//! the *published* `testing-conventions` binary via `npx`. When a CLI subcommand is
6//! renamed or removed — e.g. `unit location` → `unit colocated-test` (#55) — but a
7//! workflow still invokes the old name, every `@v0` consumer breaks with
8//! `unrecognized subcommand`, silently: the workflow file is frozen at the tag
9//! while `npx` keeps pulling the latest binary.
10//!
11//! This module is the deterministic guard against that drift. [`invocations`]
12//! extracts every `testing-conventions …` call from a workflow file's shell, and
13//! [`unknown_subcommands`] checks each one's subcommand chain against the binary's
14//! own command tree (the source of truth, [`crate::command`]), flagging any chain
15//! the binary no longer exposes. Run in CI against the reusable workflow it fails
16//! the build the moment a workflow and the CLI fall out of step — before a release
17//! can strand `@v0`.
18//!
19//! Extraction is a line-based, shell-aware scan, not a full GitHub Actions parser:
20//! it tokenizes each non-comment line, finds the `testing-conventions` binary token
21//! (the bare command word, optionally version-pinned `…@x` /
22//! `…${VERSION:+@$VERSION}` — the `npx` / on-`PATH` form the reusable workflow and
23//! the docs use), and reads the tokens after it as the invocation. That is the
24//! deterministic bright-line; a path-qualified invocation (`./bin/testing-conventions`),
25//! a subcommand split across a `\`-continuation, or one named in non-`run:` prose is
26//! a documented limit.
27
28use std::path::{Path, PathBuf};
29
30use anyhow::{Context, Result};
31
32use crate::violation::Violation;
33
34/// A single `testing-conventions` invocation found in a workflow file.
35#[derive(Debug, Clone, PartialEq, Eq)]
36pub struct Invocation {
37    /// Workflow file the invocation was found in.
38    pub file: PathBuf,
39    /// 1-based line of the invocation.
40    pub line: usize,
41    /// Tokens after the `testing-conventions` binary name, in order — the
42    /// subcommand chain first, then flags / values / positionals.
43    pub args: Vec<String>,
44}
45
46/// Walk `path` — a workflow file, or a directory of them — and return every
47/// `testing-conventions` invocation, in file-then-line order.
48///
49/// Directories are scanned recursively for `*.yml` / `*.yaml` files (sorted, for
50/// deterministic output). Returns an error if a file or directory cannot be read.
51pub fn invocations(path: impl AsRef<Path>) -> Result<Vec<Invocation>> {
52    let path = path.as_ref();
53    let mut files = Vec::new();
54    collect_workflow_files(path, &mut files)?;
55    files.sort();
56    let mut out = Vec::new();
57    for file in files {
58        let text = std::fs::read_to_string(&file)
59            .with_context(|| format!("reading workflow `{}`", file.display()))?;
60        for (i, line) in text.lines().enumerate() {
61            if let Some(args) = line_invocation(line) {
62                out.push(Invocation {
63                    file: file.clone(),
64                    line: i + 1,
65                    args,
66                });
67            }
68        }
69    }
70    Ok(out)
71}
72
73/// Collect workflow files under `path` into `out`: `path` itself when it is a
74/// file, else every `*.yml` / `*.yaml` under it, recursively.
75fn collect_workflow_files(path: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
76    if path.is_file() {
77        out.push(path.to_path_buf());
78        return Ok(());
79    }
80    let entries = std::fs::read_dir(path)
81        .with_context(|| format!("reading directory `{}`", path.display()))?;
82    for entry in entries {
83        let entry =
84            entry.with_context(|| format!("reading an entry under `{}`", path.display()))?;
85        let child = entry.path();
86        if child.is_dir() {
87            collect_workflow_files(&child, out)?;
88        } else if is_workflow_file(&child) {
89            out.push(child);
90        }
91    }
92    Ok(())
93}
94
95/// `true` when `path` has a `.yml` / `.yaml` extension (a GitHub Actions workflow).
96fn is_workflow_file(path: &Path) -> bool {
97    matches!(
98        path.extension().and_then(|e| e.to_str()),
99        Some("yml" | "yaml")
100    )
101}
102
103/// The args of a `testing-conventions` invocation on `line`, or `None` if the
104/// line has no such call. Comments are ignored and surrounding quotes stripped.
105fn line_invocation(line: &str) -> Option<Vec<String>> {
106    let tokens = tokenize(line);
107    let pos = tokens.iter().position(|t| is_binary_token(t))?;
108    Some(tokens[pos + 1..].to_vec())
109}
110
111/// `true` when `token` is the `testing-conventions` binary as a command word: bare,
112/// or version-pinned (`testing-conventions@0.1.0`,
113/// `testing-conventions${VERSION:+@$VERSION}`).
114///
115/// Only the bare command word is matched — the `npx` / on-`PATH` form the reusable
116/// workflow and the "roll your own" docs use. A path-qualified token
117/// (`packages/…/testing-conventions`, a `cp` / `install` argument) is deliberately
118/// *not* matched, so a path that merely ends in the binary name isn't read as an
119/// invocation.
120fn is_binary_token(token: &str) -> bool {
121    // Strip any version pin / shell expansion suffix, then require an exact match.
122    let end = [token.find('@'), token.find("${")]
123        .into_iter()
124        .flatten()
125        .min()
126        .unwrap_or(token.len());
127    &token[..end] == "testing-conventions"
128}
129
130/// Split `line` into shell-ish tokens: whitespace separates, `'…'` and `"…"`
131/// group (and are stripped), and an unquoted `#` starting a token begins a comment
132/// that runs to end of line.
133fn tokenize(line: &str) -> Vec<String> {
134    let mut tokens = Vec::new();
135    let mut cur = String::new();
136    let mut started = false;
137    let mut quote: Option<char> = None;
138    for c in line.chars() {
139        match quote {
140            Some(q) => {
141                if c == q {
142                    quote = None;
143                } else {
144                    cur.push(c);
145                }
146            }
147            None => match c {
148                '#' if !started => break,
149                '\'' | '"' => {
150                    quote = Some(c);
151                    started = true;
152                }
153                c if c.is_whitespace() => {
154                    if started {
155                        tokens.push(std::mem::take(&mut cur));
156                        started = false;
157                    }
158                }
159                c => {
160                    cur.push(c);
161                    started = true;
162                }
163            },
164        }
165    }
166    if started {
167        tokens.push(cur);
168    }
169    tokens
170}
171
172/// Of `invocations`, the ones whose subcommand chain names a subcommand the binary
173/// — described by `root`, its clap command tree — no longer exposes.
174///
175/// Each invocation's leading tokens are walked against the tree: a token in a
176/// subcommand position (the current command takes subcommands) must name one of
177/// them, else it is flagged. The walk stops at the first flag (`-…`) — subcommands
178/// precede options in clap — and at the first command that takes positionals rather
179/// than subcommands, so a path argument is never mistaken for a subcommand.
180pub fn unknown_subcommands(invocations: &[Invocation], root: &clap::Command) -> Vec<Violation> {
181    let mut out = Vec::new();
182    for inv in invocations {
183        let mut node = root;
184        for tok in &inv.args {
185            // Flags begin the options/positionals section: the subcommand chain is
186            // complete. A command that takes positionals (not subcommands) means
187            // this token is an argument, not a subcommand to validate.
188            if tok.starts_with('-') || !node.has_subcommands() {
189                break;
190            }
191            match node.find_subcommand(tok.as_str()) {
192                Some(sub) => node = sub,
193                None => {
194                    out.push(Violation {
195                        file: inv.file.clone(),
196                        line: inv.line,
197                        rule: "no-unknown-subcommand",
198                        message: format!(
199                            "`{}` is not a `{}` subcommand — the published binary no longer exposes it",
200                            tok,
201                            node.get_name()
202                        ),
203                    });
204                    break;
205                }
206            }
207        }
208    }
209    out
210}
211
212/// Check `path` (a workflow file or directory): every `testing-conventions`
213/// invocation must name a subcommand `root` still exposes. Returns one
214/// [`Violation`] per offending invocation.
215pub fn check(path: impl AsRef<Path>, root: &clap::Command) -> Result<Vec<Violation>> {
216    Ok(unknown_subcommands(&invocations(path)?, root))
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222    use std::sync::atomic::{AtomicU64, Ordering};
223
224    /// A throwaway directory tree, removed on drop.
225    struct TempTree(PathBuf);
226
227    impl TempTree {
228        fn new(files: &[(&str, &str)]) -> Self {
229            static COUNTER: AtomicU64 = AtomicU64::new(0);
230            let root = std::env::temp_dir().join(format!(
231                "tc-workflow-{}-{}",
232                std::process::id(),
233                COUNTER.fetch_add(1, Ordering::Relaxed),
234            ));
235            for (rel, content) in files {
236                let path = root.join(rel);
237                std::fs::create_dir_all(path.parent().unwrap()).unwrap();
238                std::fs::write(path, content).unwrap();
239            }
240            TempTree(root)
241        }
242
243        fn path(&self) -> &Path {
244            &self.0
245        }
246    }
247
248    impl Drop for TempTree {
249        fn drop(&mut self) {
250            let _ = std::fs::remove_dir_all(&self.0);
251        }
252    }
253
254    #[test]
255    fn tokenize_strips_quotes_and_groups() {
256        assert_eq!(
257            tokenize(r#"npx -y "testing-conventions${VERSION:+@$VERSION}" unit coverage"#),
258            vec![
259                "npx",
260                "-y",
261                "testing-conventions${VERSION:+@$VERSION}",
262                "unit",
263                "coverage",
264            ]
265        );
266    }
267
268    #[test]
269    fn tokenize_stops_at_a_comment() {
270        assert_eq!(
271            tokenize("      # run testing-conventions later"),
272            Vec::<String>::new()
273        );
274        assert_eq!(
275            tokenize("testing-conventions check  # trailing note"),
276            vec!["testing-conventions", "check"]
277        );
278    }
279
280    #[test]
281    fn is_binary_token_accepts_the_command_word() {
282        assert!(is_binary_token("testing-conventions"));
283        assert!(is_binary_token("testing-conventions@0.1.0"));
284        assert!(is_binary_token("testing-conventions${VERSION:+@$VERSION}"));
285    }
286
287    #[test]
288    fn is_binary_token_rejects_lookalikes() {
289        assert!(!is_binary_token("testing-conventions.toml"));
290        assert!(!is_binary_token("testing-conventions.yml@v0"));
291        assert!(!is_binary_token("actions/checkout@v6"));
292        assert!(!is_binary_token("npx"));
293        // Path-qualified tokens — e.g. a `cp` / `install` argument — are not
294        // invocations, even when they end in the binary name (#92, node.yml).
295        assert!(!is_binary_token(
296            "packages/rust/target/release/testing-conventions"
297        ));
298        assert!(!is_binary_token("$target/bin/testing-conventions"));
299        assert!(!is_binary_token("./target/release/testing-conventions"));
300    }
301
302    #[test]
303    fn line_invocation_reads_the_args_after_the_binary() {
304        assert_eq!(
305            line_invocation(
306                "- run: npx -y testing-conventions unit location --language python src"
307            ),
308            Some(vec![
309                "unit".to_string(),
310                "location".to_string(),
311                "--language".to_string(),
312                "python".to_string(),
313                "src".to_string(),
314            ])
315        );
316        assert_eq!(line_invocation("- uses: actions/checkout@v6"), None);
317    }
318
319    #[test]
320    fn invocations_scans_a_file_and_a_directory() {
321        let tree = TempTree::new(&[
322            ("ci.yml", "- run: testing-conventions check\n"),
323            (
324                "nested/more.yaml",
325                "- run: testing-conventions unit isolation --language rust .\n",
326            ),
327            ("notes.txt", "testing-conventions check\n"),
328        ]);
329        // Directory: both workflow files, not the .txt; sorted file-then-line.
330        let dir = invocations(tree.path()).unwrap();
331        assert_eq!(dir.len(), 2);
332        assert_eq!(dir[0].args, vec!["check"]);
333        assert_eq!(dir[0].line, 1);
334        // Single file: just that file.
335        let file = invocations(tree.path().join("ci.yml")).unwrap();
336        assert_eq!(file.len(), 1);
337    }
338
339    #[test]
340    fn invocations_errors_on_a_missing_path() {
341        let missing = std::env::temp_dir().join("tc-workflow-does-not-exist-2b1c");
342        assert!(invocations(&missing).is_err());
343    }
344
345    /// An [`Invocation`] from a bare token list (file/line are placeholders).
346    fn inv(line: usize, args: &[&str]) -> Invocation {
347        Invocation {
348            file: PathBuf::from("ci.yml"),
349            line,
350            args: args.iter().map(|s| s.to_string()).collect(),
351        }
352    }
353
354    #[test]
355    fn unknown_subcommands_flags_a_renamed_nested_rule() {
356        let v = unknown_subcommands(
357            &[inv(9, &["unit", "location", "--language", "python", "src"])],
358            &crate::command(),
359        );
360        assert_eq!(v.len(), 1);
361        assert_eq!(v[0].line, 9);
362        assert_eq!(v[0].rule, "no-unknown-subcommand");
363        // Named under its parent group, not the root.
364        assert!(v[0].message.contains("`location`"), "{}", v[0].message);
365        assert!(v[0].message.contains("`unit`"), "{}", v[0].message);
366    }
367
368    #[test]
369    fn unknown_subcommands_flags_a_removed_top_level_command() {
370        let v = unknown_subcommands(
371            &[inv(1, &["unit-location", "--lang", "python", "src"])],
372            &crate::command(),
373        );
374        assert_eq!(v.len(), 1);
375        assert!(v[0].message.contains("`unit-location`"), "{}", v[0].message);
376        assert!(
377            v[0].message.contains("`testing-conventions`"),
378            "{}",
379            v[0].message
380        );
381    }
382
383    #[test]
384    fn unknown_subcommands_accepts_every_live_invocation() {
385        let invs = [
386            inv(
387                1,
388                &["unit", "colocated-test", "--language", "python", "src"],
389            ),
390            inv(2, &["unit", "coverage", "--language", "typescript", "src"]),
391            inv(3, &["unit", "isolation", "--language", "rust", "."]),
392            inv(4, &["integration", "lint", "--language", "python", "src"]),
393            // A leaf's positional must not be read as a subcommand.
394            inv(5, &["packaging", "--language", "python", "dist"]),
395            inv(6, &["check"]),
396            // Flags-only and empty invocations have no subcommand to check.
397            inv(7, &["--version"]),
398            inv(8, &[]),
399        ];
400        assert!(unknown_subcommands(&invs, &crate::command()).is_empty());
401    }
402}