cargo-impact 0.5.0

Blast-radius analysis and selective test execution for Rust workspaces
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
//! Macro-expansion-backed trait-impl detection.
//!
//! Shells out to `cargo expand` (requires `cargo install cargo-expand`
//! on the user side — we don't bundle it) to reveal trait impls
//! synthesized by derive and attribute macros. The syn-only
//! `derive.rs` analyzer only flags derives of traits defined in
//! changed files; this module catches impls that macros expand to at
//! compile time even when the trait itself lives in an external crate.
//!
//! Motivating cases
//! ----------------
//! * `#[derive(Serialize, Deserialize)]` on a changed struct — the
//!   generated `impl Serialize for S` block appears only after
//!   expansion. A downstream consumer calling `serde_json::to_string`
//!   on that struct would compile against the expanded impl, and
//!   changing the struct's fields changes that impl's behavior.
//! * `#[tokio::main]` / `#[tracing::instrument]` — these wrap the
//!   user's fn body with additional tokens that are invisible to
//!   syn-only walkers. Their bodies include references that syn-only
//!   analyzers can't reach.
//! * `#[clap::Parser]` / `#[thiserror::Error]` — similar story: impls
//!   of `clap::Parser` / `std::error::Error` get synthesized.
//!
//! Scope in this release
//! ---------------------
//! Emits two classes of finding from the expanded `syn::File`
//! (cargo-expand merges a whole crate into one stream):
//!
//! 1. **Expanded trait impls** — `impl Trait for T` blocks where
//!    `Trait` is in `changed_traits`. Catches derives and attribute
//!    macros (`#[derive(Serialize)]`, `#[derive(clap::Parser)]`,
//!    `#[derive(thiserror::Error)]`) that synthesize impls the
//!    syn-only walker never sees.
//! 2. **Expanded test references** — `#[test]` / `#[tokio::test]` /
//!    `#[rstest]` fns whose post-expansion body tokens reference a
//!    name in `changed_symbols`. Catches the `sqlx::query!(...)`
//!    case: raw source has a literal string, but the expansion
//!    names the referenced struct. Dedup against raw-source
//!    `TestReference` findings happens in `dedup.rs` so we don't
//!    double-count tests the syn-only walker already caught.
//!
//! Still deferred: full source-map back to the unexpanded file for
//! jump-to-definition (expansion loses line anchors), and expansion
//! for binary-only crates (we only `cargo expand --lib` today).
//!
//! Graceful degradation
//! --------------------
//! The gate is `--macro-expand`. If the flag is off, this module is a
//! no-op. If the flag is on but `cargo-expand` isn't on PATH, the tool
//! fails to spawn, or expansion takes longer than
//! [`MACRO_EXPAND_TIMEOUT`], we log a stderr notice and return an
//! empty finding list — consistent with the project-wide "never fail
//! the whole run because an optional tool is missing" policy.

use crate::finding::{Finding, FindingKind, Location, Tier};
use crate::tests_scan::{is_test_fn, tokens_contain_ident};
use anyhow::{Context, Result};
use quote::ToTokens;
use std::collections::BTreeSet;
use std::io::Read;
use std::path::Path;
use std::process::{Command, Stdio};
use std::thread::JoinHandle;
use std::time::Duration;
use syn::visit::Visit;
use syn::{ItemFn, ItemImpl, Path as SynPath, Type, TypePath};

const TOOL_BIN: &str = "cargo-expand";

/// Wall-clock budget for a single `cargo expand` invocation. Cold
/// builds on a mid-sized crate can hit 30s; we allow 90s for headroom
/// but kill runs that stall past that so a misbehaving expansion
/// doesn't hang the whole pipeline.
const MACRO_EXPAND_TIMEOUT: Duration = Duration::from_secs(90);

/// Run `cargo expand` and emit findings from the expanded AST: trait
/// impls matching `changed_traits` plus test references matching
/// `changed_symbols`. Dedup against existing findings is the
/// orchestrator's responsibility (the content-hashed ID plus the
/// dedup passes already handle it).
///
/// Blank IDs on returned findings; the orchestrator fills them in.
pub fn run(
    root: &Path,
    changed_traits: &BTreeSet<String>,
    changed_symbols: &BTreeSet<String>,
    enabled: bool,
) -> Result<Vec<Finding>> {
    if !enabled {
        return Ok(Vec::new());
    }
    if changed_traits.is_empty() && changed_symbols.is_empty() {
        return Ok(Vec::new());
    }
    if !is_installed() {
        eprintln!(
            "cargo-impact: --macro-expand requested but `{TOOL_BIN}` not found on PATH. \
             Install it via `cargo install cargo-expand`; skipping."
        );
        return Ok(Vec::new());
    }

    let expanded = match run_cargo_expand(root) {
        Ok(s) => s,
        Err(e) => {
            eprintln!("cargo-impact: cargo-expand invocation failed: {e:#}; skipping.");
            return Ok(Vec::new());
        }
    };
    Ok(find_in_expanded(&expanded, changed_traits, changed_symbols))
}

/// Parse the expanded-source string and walk for both trait impls
/// (matching `changed_traits`) and test references (matching
/// `changed_symbols`). Pulled out of `run` so it's testable without
/// spawning cargo-expand — any syn-parseable string serves as a
/// fixture.
pub(crate) fn find_in_expanded(
    expanded: &str,
    changed_traits: &BTreeSet<String>,
    changed_symbols: &BTreeSet<String>,
) -> Vec<Finding> {
    let Ok(ast) = syn::parse_file(expanded) else {
        eprintln!(
            "cargo-impact: cargo-expand output didn't parse as a syn::File; skipping. \
             This is usually a stability bug in the expansion; report with the expanded \
             output attached."
        );
        return Vec::new();
    };
    let mut visitor = ExpandedVisitor {
        changed_traits,
        changed_symbols,
        impl_hits: Vec::new(),
        test_hits: Vec::new(),
    };
    visitor.visit_file(&ast);

    let mut findings = Vec::with_capacity(visitor.impl_hits.len() + visitor.test_hits.len());

    for (trait_name, impl_for) in visitor.impl_hits {
        let evidence = format!(
            "`impl {trait_name} for {impl_for}` — revealed by macro expansion (syn-only \
             analysis doesn't see impls synthesized by derive/attribute macros like \
             serde, tokio, clap, thiserror)"
        );
        let kind = FindingKind::TraitImpl {
            trait_name: trait_name.clone(),
            impl_for: impl_for.clone(),
            impl_site: Location {
                // Synthesized impls don't have a stable source
                // location — they live in the expansion of the
                // derive that produced them. Use `<expanded>` as a
                // sentinel so consumers know not to jump-to-file.
                file: std::path::PathBuf::from("<expanded>"),
                symbol: format!("impl {trait_name} for {impl_for}"),
            },
        };
        findings.push(Finding::new("", Tier::Likely, 0.75, kind, evidence));
    }

    for (test_name, matched) in visitor.test_hits {
        let matched_vec: Vec<String> = matched.into_iter().collect();
        let evidence = format!(
            "test body references {} after macro expansion (syn-only source walk missed \
             it — likely a fn-like macro like `sqlx::query!` or `include_str!` that \
             expands to code naming the changed symbol)",
            matched_vec.join(", ")
        );
        let kind = FindingKind::TestReference {
            test: Location {
                file: std::path::PathBuf::from("<expanded>"),
                symbol: test_name.clone(),
            },
            matched_symbols: matched_vec,
        };
        findings.push(
            Finding::new("", Tier::Likely, 0.75, kind, evidence)
                .with_suggested_action(format!("cargo nextest run -E 'test({test_name})'")),
        );
    }

    findings
}

fn is_installed() -> bool {
    which(TOOL_BIN).is_some()
}

fn which(name: &str) -> Option<std::path::PathBuf> {
    let path_var = std::env::var_os("PATH")?;
    for dir in std::env::split_paths(&path_var) {
        let candidate = dir.join(name);
        if candidate.is_file() {
            return Some(candidate);
        }
        #[cfg(windows)]
        {
            let with_exe = candidate.with_extension("exe");
            if with_exe.is_file() {
                return Some(with_exe);
            }
        }
    }
    None
}

fn run_cargo_expand(root: &Path) -> Result<String> {
    // Strategy: try `--lib` first because lib+bin crates should expand
    // the library side (traits, types, derive impls typically live
    // there). If that fails because the crate is binary-only (no
    // `[lib]` target), retry without `--lib` so cargo's default target
    // selection picks the bin. Other failure modes (compile errors,
    // missing dependencies) surface from the second attempt's stderr.
    match spawn_cargo_expand(root, &["--lib"]) {
        Ok(out) => Ok(out),
        Err(e) if is_no_library_error(&e.to_string()) => {
            eprintln!(
                "cargo-impact: `cargo expand --lib` found no library target; \
                 retrying without --lib for binary-only crate."
            );
            spawn_cargo_expand(root, &[])
        }
        Err(e) => Err(e),
    }
}

/// Detect cargo-expand's "no library targets" error so we can fall
/// back to binary expansion. Pattern-match on both the cargo-expand
/// message and the underlying cargo message — phrasing drifts between
/// versions, so we match on a stable substring.
fn is_no_library_error(stderr: &str) -> bool {
    let haystack = stderr.to_lowercase();
    haystack.contains("no library targets")
        || haystack.contains("no lib target")
        || haystack.contains("does not have a library")
}

fn spawn_cargo_expand(root: &Path, extra_args: &[&str]) -> Result<String> {
    let mut cmd = Command::new("cargo");
    cmd.arg("expand")
        .args(extra_args)
        .arg("--color=never")
        .arg("--ugly") // no rustfmt — faster, syn doesn't care
        .current_dir(root)
        .stdin(Stdio::null())
        .stdout(Stdio::piped())
        .stderr(Stdio::piped());

    let mut child = cmd.spawn()?;
    let stdout = child
        .stdout
        .take()
        .context("cargo expand child missing stdout pipe")?;
    let stderr = child
        .stderr
        .take()
        .context("cargo expand child missing stderr pipe")?;
    let stdout_reader = read_pipe(stdout);
    let stderr_reader = read_pipe(stderr);
    let start = std::time::Instant::now();

    // Poll with a wall-clock budget. Can't use `wait_timeout` without
    // pulling a new dep; the poll loop keeps us std-only. The stdout/stderr
    // reader threads are required: cargo-expand output can exceed the OS pipe
    // buffer, and waiting for process exit before reading would deadlock.
    loop {
        if let Some(status) = child.try_wait()? {
            let stdout = join_pipe(stdout_reader)?;
            let stderr = join_pipe(stderr_reader)?;
            if !status.success() {
                let stderr = String::from_utf8_lossy(&stderr);
                anyhow::bail!("cargo expand exited with status {status:?}; stderr:\n{stderr}");
            }
            return Ok(String::from_utf8_lossy(&stdout).into_owned());
        }
        if start.elapsed() > MACRO_EXPAND_TIMEOUT {
            let _ = child.kill();
            let _ = child.wait();
            let _ = join_pipe(stdout_reader);
            let _ = join_pipe(stderr_reader);
            anyhow::bail!(
                "cargo expand did not finish within {:?}",
                MACRO_EXPAND_TIMEOUT
            );
        }
        std::thread::sleep(Duration::from_millis(100));
    }
}

fn read_pipe<R>(mut pipe: R) -> JoinHandle<std::io::Result<Vec<u8>>>
where
    R: Read + Send + 'static,
{
    std::thread::spawn(move || {
        let mut buf = Vec::new();
        pipe.read_to_end(&mut buf)?;
        Ok(buf)
    })
}

fn join_pipe(handle: JoinHandle<std::io::Result<Vec<u8>>>) -> Result<Vec<u8>> {
    handle
        .join()
        .map_err(|_| anyhow::anyhow!("cargo expand pipe reader panicked"))?
        .context("reading cargo expand output")
}

// ---------------------------------------------------------------------------
// Visitor — merges trait-impl and test-ref detection over the single
// expanded stream. Kept local (rather than composed from traits.rs +
// tests_scan.rs visitors) because `cargo expand` merges the whole
// crate into one file; we can't cheaply map hits back to per-file
// paths, so we don't try.
// ---------------------------------------------------------------------------

struct ExpandedVisitor<'a> {
    changed_traits: &'a BTreeSet<String>,
    changed_symbols: &'a BTreeSet<String>,
    impl_hits: Vec<(String, String)>,
    test_hits: Vec<(String, BTreeSet<String>)>,
}

impl<'ast> Visit<'ast> for ExpandedVisitor<'_> {
    fn visit_item_impl(&mut self, node: &'ast ItemImpl) {
        if let Some((_, trait_path, _)) = &node.trait_
            && let Some(trait_name) = last_ident(trait_path)
            && self.changed_traits.contains(&trait_name)
        {
            let impl_for = type_to_string(&node.self_ty);
            self.impl_hits.push((trait_name, impl_for));
        }
        syn::visit::visit_item_impl(self, node);
    }

    fn visit_item_fn(&mut self, f: &'ast ItemFn) {
        if !self.changed_symbols.is_empty() && is_test_fn(&f.attrs) {
            let body = f.block.to_token_stream().to_string();
            let matched: BTreeSet<String> = self
                .changed_symbols
                .iter()
                .filter(|sym| tokens_contain_ident(&body, sym))
                .cloned()
                .collect();
            if !matched.is_empty() {
                self.test_hits.push((f.sig.ident.to_string(), matched));
            }
        }
        syn::visit::visit_item_fn(self, f);
    }
}

fn last_ident(path: &SynPath) -> Option<String> {
    path.segments.last().map(|s| s.ident.to_string())
}

fn type_to_string(ty: &Type) -> String {
    if let Type::Path(TypePath { qself: None, path }) = ty
        && let Some(seg) = path.segments.last()
    {
        return seg.ident.to_string();
    }
    ty.to_token_stream().to_string()
}

#[cfg(test)]
mod tests {
    use super::*;

    fn changed(names: &[&str]) -> BTreeSet<String> {
        names.iter().map(|s| (*s).to_string()).collect()
    }

    #[test]
    fn empty_changed_set_returns_no_findings() {
        let src = "impl Serialize for S {}";
        let hits = find_in_expanded(src, &BTreeSet::new(), &BTreeSet::new());
        assert!(hits.is_empty());
    }

    #[test]
    fn matches_derived_impl_on_changed_trait() {
        let src = "struct S; impl Greeter for S { fn hi(&self) {} }";
        let hits = find_in_expanded(src, &changed(&["Greeter"]), &BTreeSet::new());
        assert_eq!(hits.len(), 1);
        let FindingKind::TraitImpl {
            trait_name,
            impl_for,
            ..
        } = &hits[0].kind
        else {
            panic!("wrong kind");
        };
        assert_eq!(trait_name, "Greeter");
        assert_eq!(impl_for, "S");
    }

    #[test]
    fn evidence_calls_out_macro_expansion_source() {
        let src = "impl Greeter for S { fn hi(&self) {} }";
        let hits = find_in_expanded(src, &changed(&["Greeter"]), &BTreeSet::new());
        assert!(
            hits[0].evidence.contains("revealed by macro expansion"),
            "evidence should mark the finding as expansion-derived: {}",
            hits[0].evidence
        );
    }

    #[test]
    fn ignores_impls_on_unchanged_traits() {
        let src = "impl Unrelated for S { }";
        let hits = find_in_expanded(src, &changed(&["Greeter"]), &BTreeSet::new());
        assert!(hits.is_empty());
    }

    #[test]
    fn matches_impl_via_last_path_segment() {
        // Expanded output often carries fully-qualified paths;
        // match on the trailing segment only, same as traits.rs.
        let src = "impl ::serde::Serialize for S { }";
        let hits = find_in_expanded(src, &changed(&["Serialize"]), &BTreeSet::new());
        assert_eq!(hits.len(), 1);
    }

    #[test]
    fn multiple_matches_in_one_stream_all_emitted() {
        let src = "
            impl A for X { }
            impl A for Y { }
            impl B for Z { }
        ";
        let hits = find_in_expanded(src, &changed(&["A", "B"]), &BTreeSet::new());
        assert_eq!(hits.len(), 3);
    }

    #[test]
    fn unparseable_input_returns_empty_without_panicking() {
        let hits = find_in_expanded(
            "this is {{ not syn parseable",
            &changed(&["X"]),
            &changed(&["Y"]),
        );
        assert!(hits.is_empty());
    }

    #[test]
    fn disabled_flag_short_circuits_before_calling_cargo() {
        // If the tool weren't installed this would still return Ok(empty)
        // because `enabled = false` short-circuits. Proves the flag gate.
        let findings = run(
            Path::new("/nonexistent"),
            &changed(&["X"]),
            &BTreeSet::new(),
            false,
        )
        .unwrap();
        assert!(findings.is_empty());
    }

    #[test]
    fn empty_inputs_short_circuit_before_spawning() {
        let findings = run(
            Path::new("/nonexistent"),
            &BTreeSet::new(),
            &BTreeSet::new(),
            true,
        )
        .unwrap();
        assert!(findings.is_empty());
    }

    // --- Expanded test-reference findings ---

    #[test]
    fn detects_test_referencing_changed_symbol_in_expanded_body() {
        // Simulates a test that, after cargo expand, references `User`
        // — the kind of code `sqlx::query!("SELECT * FROM users")`
        // expands into. The raw source wouldn't have carried this
        // reference.
        let src = r#"
            #[test]
            fn query_test() {
                let _: User = User::default();
            }
        "#;
        let hits = find_in_expanded(src, &BTreeSet::new(), &changed(&["User"]));
        assert_eq!(hits.len(), 1);
        let FindingKind::TestReference {
            test,
            matched_symbols,
        } = &hits[0].kind
        else {
            panic!("wrong kind: {:?}", hits[0].kind);
        };
        assert_eq!(test.symbol, "query_test");
        assert_eq!(test.file, std::path::PathBuf::from("<expanded>"));
        assert_eq!(matched_symbols, &vec!["User".to_string()]);
        assert_eq!(hits[0].tier, Tier::Likely);
        assert!((hits[0].confidence - 0.75).abs() < f64::EPSILON);
    }

    #[test]
    fn expanded_test_ref_evidence_calls_out_expansion_source() {
        let src = "#[test] fn t() { User::new(); }";
        let hits = find_in_expanded(src, &BTreeSet::new(), &changed(&["User"]));
        assert!(
            hits[0].evidence.contains("after macro expansion"),
            "evidence should mark the finding as expansion-derived: {}",
            hits[0].evidence
        );
    }

    #[test]
    fn expanded_test_ref_emits_nextest_filter_suggestion() {
        let src = "#[test] fn login_case() { login(); }";
        let hits = find_in_expanded(src, &BTreeSet::new(), &changed(&["login"]));
        assert!(
            hits[0]
                .suggested_action
                .as_deref()
                .is_some_and(|s| s.contains("test(login_case)")),
            "expected nextest filter suggestion, got {:?}",
            hits[0].suggested_action
        );
    }

    #[test]
    fn non_test_fns_are_not_emitted_as_test_refs() {
        let src = "fn helper() { let _ = User::default(); }";
        let hits = find_in_expanded(src, &BTreeSet::new(), &changed(&["User"]));
        assert!(hits.is_empty());
    }

    #[test]
    fn expanded_test_ref_respects_word_boundaries() {
        // `user_profile` must not match the changed symbol `user`.
        let src = "#[test] fn t() { let user_profile = 1; let _ = user_profile; }";
        let hits = find_in_expanded(src, &BTreeSet::new(), &changed(&["user"]));
        assert!(hits.is_empty(), "unexpected hits: {hits:?}");
    }

    // --- Binary-only crate fallback ---

    #[test]
    fn is_no_library_error_matches_known_phrasings() {
        // cargo-expand's message (current phrasing).
        assert!(is_no_library_error(
            "error: no library targets found in package `foo`"
        ));
        // cargo's own variant used in some versions.
        assert!(is_no_library_error(
            "error: no lib target found in package `foo`"
        ));
        // Another phrasing surfaced in older cargo/cargo-expand combos.
        assert!(is_no_library_error(
            "error: the package `foo` does not have a library"
        ));
    }

    #[test]
    fn is_no_library_error_is_case_insensitive() {
        assert!(is_no_library_error(
            "ERROR: No Library Targets Found in package `foo`"
        ));
    }

    #[test]
    fn is_no_library_error_rejects_unrelated_errors() {
        assert!(!is_no_library_error(
            "error[E0382]: borrow of moved value `x`"
        ));
        assert!(!is_no_library_error("error: could not compile `foo`"));
        assert!(!is_no_library_error(""));
    }

    #[test]
    fn pipe_reader_collects_output_without_waiting_for_process_exit() {
        let handle = read_pipe(std::io::Cursor::new(b"expanded output".to_vec()));
        let out = join_pipe(handle).unwrap();
        assert_eq!(out, b"expanded output");
    }

    #[test]
    fn impl_and_test_findings_emitted_together_from_same_stream() {
        let src = r#"
            struct User;
            impl Greeter for User { fn hi(&self) {} }
            #[test]
            fn uses_user() {
                let _ = User;
            }
        "#;
        let hits = find_in_expanded(src, &changed(&["Greeter"]), &changed(&["User"]));
        assert_eq!(hits.len(), 2);
        let kinds: Vec<_> = hits
            .iter()
            .map(|h| match &h.kind {
                FindingKind::TraitImpl { .. } => "impl",
                FindingKind::TestReference { .. } => "test",
                _ => "other",
            })
            .collect();
        assert!(kinds.contains(&"impl"));
        assert!(kinds.contains(&"test"));
    }
}