Skip to main content

aatxe_core/
affected.rs

1//! Resolve the set of bench files affected by a diff.
2//!
3//! Aatxe walks each bench file's transitive **same-language** imports, then
4//! intersects each bench's closure with the diff. A bench is "affected" iff
5//! itself or any reachable file appears in the diff.
6//!
7//! Per-language import parsers come in two flavours: the in-crate regex
8//! pass ([`RegexImportExtractor`], the default), and the AST-based
9//! extractor in the CLI binary (`aatxe_ast::FileGraph::file_edges`),
10//! plumbed through [`AffectedOptions::import_extractor`]. The AST pass
11//! is language-correct (no string/comment false positives, captures
12//! TS `export … from` re-exports + dynamic `import()` + Rust `mod foo;`
13//! declarations) and stays in the CLI so `aatxe-core` keeps its
14//! zero-dep, pure-logic shape. The regex path is the always-on fallback
15//! the existing test surface still pins against. The escape hatch for
16//! misses in either path is [`AffectedOptions::extra_changed_files`]
17//! (e.g. when CI knows about a config change the parser can't see).
18//!
19//! ## Why three-dot diff
20//!
21//! `git diff $base...HEAD` answers "what did this branch change since it
22//! diverged from base", which is exactly what CI is asking. It's stable
23//! against the base moving forward independently.
24//!
25//! ## IO model
26//!
27//! All side-effecting calls (read a file, read a directory, run `git`) are
28//! routed through trait objects. The default implementations use the real
29//! filesystem and shell out to `git`; tests inject in-memory stand-ins.
30
31use crate::types::Language;
32use std::collections::HashSet;
33use std::path::{Path, PathBuf};
34
35/// Errors surfaced by [`resolve_affected`].
36#[derive(Debug, thiserror::Error)]
37pub enum AffectedError {
38    #[error("not inside a git repository: {0}")]
39    NotARepo(PathBuf),
40    #[error("git command failed: {0}")]
41    GitFailed(String),
42    #[error("io error: {0}")]
43    Io(String),
44}
45
46/// Inputs and IO seams for [`resolve_affected`].
47pub struct AffectedOptions<'a> {
48    /// Project root (also the search root for bench discovery).
49    pub cwd: PathBuf,
50    /// Git ref to diff against, e.g. `origin/master`.
51    pub base: String,
52    pub language: Language,
53    /// Bench-discovery globs. Empty ⇒ use [`Language::default_globs`].
54    pub patterns: Vec<String>,
55    /// Extra files to treat as changed. Escape hatch for tests and for cases
56    /// the parser can't see (config files, codegen outputs, etc.).
57    pub extra_changed_files: Vec<String>,
58    pub git: &'a dyn GitRunner,
59    pub fs: &'a dyn Fs,
60    /// Per-language import extractor used to derive file edges from a
61    /// source string. `None` ⇒ in-crate regex pass (the default, kept
62    /// for backwards-compatibility with every existing call site).
63    /// The CLI passes an AST-backed extractor that uses tree-sitter to
64    /// extract the same shape language-correctly.
65    pub import_extractor: Option<&'a dyn ImportExtractor>,
66}
67
68/// Pluggable source for "what file-edge specifiers does this source
69/// contain". Implemented by [`RegexImportExtractor`] (the default) and,
70/// in the CLI binary, by a wrapper around `aatxe_ast::describe(...)`.
71///
72/// The returned strings live in the same shape as [`extract_specifiers`]
73/// (e.g. `./foo`, `./alt/d.rs`, `./shared`); [`resolve_import`] is what
74/// turns each into one or more on-disk paths and is shared across both
75/// implementations.
76pub trait ImportExtractor {
77    fn extract(&self, src: &str, lang: Language) -> Vec<String>;
78}
79
80/// The default in-crate extractor — delegates to [`extract_specifiers`].
81///
82/// Kept as a named type so the CLI can build an [`AffectedOptions`] that
83/// re-uses the default without re-implementing the trait on the fly.
84pub struct RegexImportExtractor;
85
86impl ImportExtractor for RegexImportExtractor {
87    fn extract(&self, src: &str, lang: Language) -> Vec<String> {
88        extract_specifiers(src, lang)
89    }
90}
91
92/// Output of [`resolve_affected`].
93#[derive(Debug, Clone)]
94pub struct AffectedSet {
95    pub base: String,
96    pub changed_files: Vec<String>,
97    /// Absolute paths to bench files in the affected closure.
98    pub bench_files: Vec<PathBuf>,
99    /// Absolute paths to *every* discovered bench file. Callers diff this
100    /// against `bench_files` to report what was skipped.
101    pub all_bench_files: Vec<PathBuf>,
102}
103
104pub trait GitRunner {
105    fn run(&self, args: &[&str], cwd: &Path) -> Result<String, AffectedError>;
106}
107
108pub trait Fs {
109    fn read_to_string(&self, path: &Path) -> Result<String, AffectedError>;
110    fn read_dir(&self, path: &Path) -> Result<Vec<DirEntry>, AffectedError>;
111    fn metadata(&self, path: &Path) -> Result<EntryKind, AffectedError>;
112}
113
114#[derive(Debug, Clone)]
115pub struct DirEntry {
116    pub path: PathBuf,
117    pub kind: EntryKind,
118}
119
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
121pub enum EntryKind {
122    File,
123    Dir,
124    Other,
125}
126
127/// Resolve which bench files are affected by the diff between `base` and `HEAD`.
128pub fn resolve_affected(opts: &AffectedOptions<'_>) -> Result<AffectedSet, AffectedError> {
129    let all_bench_files = discover_benches(&opts.cwd, &opts.patterns, opts.language, opts.fs)?;
130    if all_bench_files.is_empty() {
131        return Ok(AffectedSet {
132            base: opts.base.clone(),
133            changed_files: vec![],
134            bench_files: vec![],
135            all_bench_files: vec![],
136        });
137    }
138
139    let repo_root = detect_repo_root(&opts.cwd, opts.git)?;
140    let mut changed_rel = git_changed_files(&repo_root, &opts.base, opts.git)?;
141    for extra in &opts.extra_changed_files {
142        changed_rel.push(extra.clone());
143    }
144    let changed_abs: HashSet<PathBuf> = changed_rel
145        .iter()
146        .map(|f| normalize_path(&repo_root.join(f)))
147        .collect();
148
149    let default_extractor = RegexImportExtractor;
150    let extractor: &dyn ImportExtractor = match opts.import_extractor {
151        Some(e) => e,
152        None => &default_extractor,
153    };
154
155    let mut bench_files: Vec<PathBuf> = Vec::new();
156    for bench in &all_bench_files {
157        let reachable = collect_reachable_with(bench, opts.language, opts.fs, extractor);
158        if reachable.iter().any(|f| changed_abs.contains(f)) {
159            bench_files.push(bench.clone());
160        }
161    }
162
163    Ok(AffectedSet {
164        base: opts.base.clone(),
165        changed_files: changed_rel,
166        bench_files,
167        all_bench_files,
168    })
169}
170
171// --- discovery ---
172
173fn discover_benches(
174    cwd: &Path,
175    patterns: &[String],
176    lang: Language,
177    fs: &dyn Fs,
178) -> Result<Vec<PathBuf>, AffectedError> {
179    let mut globs: Vec<&str> = patterns.iter().map(|s| s.as_str()).collect();
180    if globs.is_empty() {
181        globs.extend(lang.default_globs().iter().copied());
182    }
183    let matchers: Vec<GlobMatcher> = globs.iter().map(|g| GlobMatcher::new(g)).collect();
184    let excludes = &["node_modules", "dist", "build", ".git", "target", "vendor"];
185    let mut out: Vec<PathBuf> = Vec::new();
186    walk(cwd, fs, &matchers, excludes, &mut out)?;
187    out.sort();
188    out.dedup();
189    Ok(out)
190}
191
192fn walk(
193    root: &Path,
194    fs: &dyn Fs,
195    matchers: &[GlobMatcher],
196    excludes: &[&str],
197    out: &mut Vec<PathBuf>,
198) -> Result<(), AffectedError> {
199    let mut stack: Vec<PathBuf> = vec![root.to_path_buf()];
200    while let Some(dir) = stack.pop() {
201        let entries = match fs.read_dir(&dir) {
202            Ok(e) => e,
203            Err(_) => continue,
204        };
205        for e in entries {
206            let name = e.path.file_name().and_then(|s| s.to_str()).unwrap_or("");
207            if excludes.iter().any(|ex| ex == &name) {
208                continue;
209            }
210            match e.kind {
211                EntryKind::Dir => stack.push(e.path),
212                EntryKind::File => {
213                    if matchers.iter().any(|m| m.matches(&e.path)) {
214                        out.push(e.path);
215                    }
216                }
217                EntryKind::Other => {}
218            }
219        }
220    }
221    Ok(())
222}
223
224// --- import graph ---
225
226/// Walk imports/exports/uses transitively from `entry` and return every
227/// reachable absolute path, using the default regex extractor.
228///
229/// Kept for backwards compatibility with the existing test surface and
230/// the (few) external call sites. New code should call
231/// [`collect_reachable_with`] and pass an explicit extractor — the CLI
232/// uses that path to inject an AST-based extractor.
233pub fn collect_reachable(entry: &Path, lang: Language, fs: &dyn Fs) -> HashSet<PathBuf> {
234    collect_reachable_with(entry, lang, fs, &RegexImportExtractor)
235}
236
237/// Same as [`collect_reachable`], but with a caller-supplied
238/// [`ImportExtractor`]. The CLI binary passes the AST-backed extractor;
239/// callers without access to `aatxe-ast` (and the tests) pass
240/// [`RegexImportExtractor`].
241pub fn collect_reachable_with(
242    entry: &Path,
243    lang: Language,
244    fs: &dyn Fs,
245    extractor: &dyn ImportExtractor,
246) -> HashSet<PathBuf> {
247    let mut seen: HashSet<PathBuf> = HashSet::new();
248    let mut stack: Vec<PathBuf> = vec![normalize_path(entry)];
249    while let Some(file) = stack.pop() {
250        if !seen.insert(file.clone()) {
251            continue;
252        }
253        let src = match fs.read_to_string(&file) {
254            Ok(s) => s,
255            Err(_) => continue,
256        };
257        let specifiers = extractor.extract(&src, lang);
258        let from_dir = file.parent().unwrap_or(Path::new(".")).to_path_buf();
259        for spec in specifiers {
260            if !is_relative_spec(&spec, lang) {
261                continue;
262            }
263            for resolved in resolve_import(&from_dir, &spec, lang, fs) {
264                if !seen.contains(&resolved) {
265                    stack.push(resolved);
266                }
267            }
268        }
269    }
270    seen
271}
272
273/// Extract import specifiers from source text.
274///
275/// **Deliberately permissive**: false positives (non-existent paths) get
276/// filtered out by [`resolve_import`]; false negatives would lose real edges
277/// in the graph and weaken the affected-set guarantee.
278pub fn extract_specifiers(src: &str, lang: Language) -> Vec<String> {
279    let stripped = strip_comments(src, lang);
280    let mut out: Vec<String> = Vec::new();
281    match lang {
282        Language::Ts => {
283            for caps in TS_FROM_RE.captures_iter(&stripped) {
284                out.push(caps[1].to_string());
285            }
286            for caps in TS_SIDE_EFFECT_RE.captures_iter(&stripped) {
287                out.push(caps[1].to_string());
288            }
289            for caps in TS_CALL_RE.captures_iter(&stripped) {
290                out.push(caps[1].to_string());
291            }
292        }
293        Language::Go => {
294            // `import "path"` and `import ( ... )` blocks.
295            for caps in GO_SINGLE_IMPORT_RE.captures_iter(&stripped) {
296                out.push(caps[1].to_string());
297            }
298            for caps in GO_BLOCK_IMPORT_RE.captures_iter(&stripped) {
299                let block = &caps[1];
300                for sub in GO_BLOCK_PATH_RE.captures_iter(block) {
301                    out.push(sub[1].to_string());
302                }
303            }
304        }
305        Language::Rust => {
306            // We treat `mod foo;` declarations as edges: they reach `./foo.rs` or `./foo/mod.rs`.
307            for caps in RUST_MOD_RE.captures_iter(&stripped) {
308                out.push(format!("./{}", &caps[1]));
309            }
310            // `include!("path.rs")` macro — always file-local; normalise as relative.
311            for caps in RUST_INCLUDE_RE.captures_iter(&stripped) {
312                out.push(prefix_relative(&caps[1]));
313            }
314            // `path = "foo.rs"` attribute used to re-target a mod declaration.
315            // The path is resolved relative to the *current* source file, so
316            // we prefix `./` when the author didn't already supply `./`/`../`.
317            for caps in RUST_PATH_ATTR_RE.captures_iter(&stripped) {
318                out.push(prefix_relative(&caps[1]));
319            }
320        }
321    }
322    out
323}
324
325/// Prefix a bare path with `./` so [`is_relative_spec`] accepts it.
326fn prefix_relative(p: &str) -> String {
327    if p.starts_with("./") || p.starts_with("../") || p.starts_with('/') {
328        p.to_string()
329    } else {
330        format!("./{p}")
331    }
332}
333
334fn strip_comments(src: &str, lang: Language) -> String {
335    match lang {
336        Language::Ts | Language::Rust => {
337            // `//` line comments and `/* ... */` block comments. Naive — not
338            // string-literal-aware — which is fine: at worst we over-collect
339            // a string that looks like an import, and the resolver drops it.
340            let no_block = BLOCK_COMMENT_RE.replace_all(src, "").to_string();
341            LINE_COMMENT_RE.replace_all(&no_block, "$1").to_string()
342        }
343        Language::Go => {
344            // Go uses `//` and `/* */` like C.
345            let no_block = BLOCK_COMMENT_RE.replace_all(src, "").to_string();
346            LINE_COMMENT_RE.replace_all(&no_block, "$1").to_string()
347        }
348    }
349}
350
351fn is_relative_spec(spec: &str, lang: Language) -> bool {
352    match lang {
353        Language::Ts => {
354            spec.starts_with("./")
355                || spec.starts_with("../")
356                || spec == "."
357                || spec == ".."
358                || spec.starts_with('/')
359        }
360        Language::Go => {
361            // In Go, intra-module imports are tracked by module path, not
362            // relative file path. Aatxe's import graph for Go therefore only
363            // follows specifiers prefixed with `./` (an opt-in convention for
364            // tests) — for a fully-resolved Go graph, the consumer should
365            // supply `extra_changed_files` from `go list -deps`.
366            spec.starts_with("./") || spec.starts_with("../")
367        }
368        Language::Rust => {
369            // `mod` declarations are always relative to the current file.
370            // The synthetic `./{name}` produced by [`extract_specifiers`]
371            // always passes this check.
372            spec.starts_with("./") || spec.starts_with("../")
373        }
374    }
375}
376
377/// Resolve a relative import to one or more on-disk paths.
378///
379/// * TS / Rust return at most one path (verbatim extension, candidate
380///   extension, then `index.<ext>` / `mod.<ext>`).
381/// * Go resolves a relative import to a *package* — the directory pointed
382///   at by `spec` — and returns every `.go` file inside it, because Go
383///   compiles all files of a package together.
384pub fn resolve_import(from_dir: &Path, spec: &str, lang: Language, fs: &dyn Fs) -> Vec<PathBuf> {
385    let exts = lang.source_extensions();
386    let base = normalize_path(&from_dir.join(spec));
387
388    // Verbatim extension on the spec itself.
389    for ext in exts {
390        if spec.ends_with(ext) {
391            return if file_exists(&base, fs) {
392                vec![base]
393            } else {
394                vec![]
395            };
396        }
397    }
398
399    // Go: directory-as-package — collect every `.go` file in `base`.
400    if matches!(lang, Language::Go) {
401        if let Ok(EntryKind::Dir) = fs.metadata(&base) {
402            if let Ok(entries) = fs.read_dir(&base) {
403                let mut out: Vec<PathBuf> = entries
404                    .into_iter()
405                    .filter(|e| matches!(e.kind, EntryKind::File))
406                    .filter(|e| {
407                        e.path
408                            .extension()
409                            .and_then(|s| s.to_str())
410                            .map(|s| s == "go")
411                            .unwrap_or(false)
412                    })
413                    .map(|e| e.path)
414                    .collect();
415                out.sort();
416                return out;
417            }
418        }
419        // Fall through: occasionally a "./shared" specifier names a .go
420        // file directly. Try the standard extension append below.
421    }
422
423    // Add candidate extension.
424    for ext in exts {
425        let cand = path_with_ext(&base, ext);
426        if file_exists(&cand, fs) {
427            return vec![cand];
428        }
429    }
430    // Index / mod resolution (TS, Rust).
431    if let Ok(EntryKind::Dir) = fs.metadata(&base) {
432        let index_names: &[&str] = match lang {
433            Language::Ts => &["index"],
434            Language::Rust => &["mod"],
435            Language::Go => &[],
436        };
437        for stem in index_names {
438            for ext in exts {
439                let cand = base.join(format!("{stem}{ext}"));
440                if file_exists(&cand, fs) {
441                    return vec![cand];
442                }
443            }
444        }
445    }
446    vec![]
447}
448
449/// Collapse `.` and `..` components from a path without touching the
450/// filesystem. Lets us compare paths from `Path::join` (which preserves
451/// `./`) against canonical keys in the in-memory test FS.
452fn normalize_path(p: &Path) -> PathBuf {
453    use std::path::Component;
454    let mut out = PathBuf::new();
455    for c in p.components() {
456        match c {
457            Component::CurDir => {}
458            Component::ParentDir => {
459                out.pop();
460            }
461            other => out.push(other.as_os_str()),
462        }
463    }
464    out
465}
466
467fn path_with_ext(p: &Path, ext: &str) -> PathBuf {
468    let mut s = p.to_path_buf().into_os_string();
469    s.push(ext);
470    PathBuf::from(s)
471}
472
473fn file_exists(p: &Path, fs: &dyn Fs) -> bool {
474    matches!(fs.metadata(p), Ok(EntryKind::File))
475}
476
477// --- git ---
478
479fn detect_repo_root(cwd: &Path, git: &dyn GitRunner) -> Result<PathBuf, AffectedError> {
480    let out = git.run(&["rev-parse", "--show-toplevel"], cwd)?;
481    let trimmed = out.trim();
482    if trimmed.is_empty() {
483        return Err(AffectedError::NotARepo(cwd.to_path_buf()));
484    }
485    Ok(PathBuf::from(trimmed))
486}
487
488fn git_changed_files(
489    repo_root: &Path,
490    base: &str,
491    git: &dyn GitRunner,
492) -> Result<Vec<String>, AffectedError> {
493    let triple_dot = format!("{}...HEAD", base);
494    let out = git.run(&["diff", "--name-only", &triple_dot], repo_root)?;
495    let lines: Vec<String> = out
496        .split('\n')
497        .map(|l| l.trim().to_string())
498        .filter(|l| !l.is_empty())
499        .collect();
500    Ok(lines)
501}
502
503// --- glob ---
504
505/// Minimal recursive-glob matcher: `**` matches across separators, `*` does
506/// not. Sufficient for the bench-discovery patterns Aatxe supports.
507#[derive(Debug)]
508pub struct GlobMatcher {
509    re: regex::Regex,
510}
511
512impl GlobMatcher {
513    pub fn new(pattern: &str) -> Self {
514        let mut rx = String::with_capacity(pattern.len() * 2);
515        rx.push('^');
516        let chars: Vec<char> = pattern.chars().collect();
517        let mut i = 0;
518        while i < chars.len() {
519            let c = chars[i];
520            let next = chars.get(i + 1).copied();
521            if c == '*' && next == Some('*') {
522                rx.push_str(".*");
523                i += 2;
524                if chars.get(i) == Some(&'/') {
525                    i += 1;
526                }
527            } else if c == '*' {
528                rx.push_str("[^/]*");
529                i += 1;
530            } else if c == '?' {
531                rx.push_str("[^/]");
532                i += 1;
533            } else if matches!(
534                c,
535                '.' | '+' | '(' | ')' | '[' | ']' | '{' | '}' | '^' | '$' | '|' | '\\'
536            ) {
537                rx.push('\\');
538                rx.push(c);
539                i += 1;
540            } else {
541                rx.push(c);
542                i += 1;
543            }
544        }
545        rx.push('$');
546        let re = regex::Regex::new(&rx).expect("internal: glob regex compile failed");
547        Self { re }
548    }
549    pub fn matches(&self, p: &Path) -> bool {
550        let s = p.to_string_lossy();
551        self.re.is_match(&s)
552    }
553}
554
555// --- regex statics ---
556
557use once_cell::sync::Lazy;
558
559static BLOCK_COMMENT_RE: Lazy<regex::Regex> =
560    Lazy::new(|| regex::Regex::new(r"(?s)/\*.*?\*/").unwrap());
561static LINE_COMMENT_RE: Lazy<regex::Regex> =
562    Lazy::new(|| regex::Regex::new(r"(^|[^:])//[^\n]*").unwrap());
563
564static TS_FROM_RE: Lazy<regex::Regex> =
565    Lazy::new(|| regex::Regex::new(r#"\bfrom\s+['"]([^'"]+)['"]"#).unwrap());
566static TS_SIDE_EFFECT_RE: Lazy<regex::Regex> =
567    Lazy::new(|| regex::Regex::new(r#"\bimport\s+['"]([^'"]+)['"]"#).unwrap());
568static TS_CALL_RE: Lazy<regex::Regex> = Lazy::new(|| {
569    regex::Regex::new(r#"\b(?:import|require)\s*\(\s*['"]([^'"]+)['"]\s*\)"#).unwrap()
570});
571
572static GO_SINGLE_IMPORT_RE: Lazy<regex::Regex> =
573    Lazy::new(|| regex::Regex::new(r#"(?m)^\s*import\s+(?:[A-Za-z_]\w*\s+)?"([^"]+)""#).unwrap());
574static GO_BLOCK_IMPORT_RE: Lazy<regex::Regex> =
575    Lazy::new(|| regex::Regex::new(r#"(?s)import\s*\(\s*(.*?)\s*\)"#).unwrap());
576static GO_BLOCK_PATH_RE: Lazy<regex::Regex> =
577    Lazy::new(|| regex::Regex::new(r#"(?m)^\s*(?:[A-Za-z_]\w*\s+)?"([^"]+)""#).unwrap());
578
579static RUST_MOD_RE: Lazy<regex::Regex> = Lazy::new(|| {
580    // Matches: `mod x;` · `pub mod x;` · `pub(crate) mod x;` · `pub(in path) mod x;`.
581    regex::Regex::new(r"(?m)^\s*(?:pub(?:\s*\([^)]*\))?\s+)?mod\s+([A-Za-z_]\w*)\s*;").unwrap()
582});
583static RUST_INCLUDE_RE: Lazy<regex::Regex> =
584    Lazy::new(|| regex::Regex::new(r#"\binclude!\s*\(\s*"([^"]+)"\s*\)"#).unwrap());
585static RUST_PATH_ATTR_RE: Lazy<regex::Regex> =
586    Lazy::new(|| regex::Regex::new(r#"#\[\s*path\s*=\s*"([^"]+)"\s*\]"#).unwrap());