Skip to main content

tatara_rust_survey/
lib.rs

1//! `tatara-rust-survey` — adoption multiplier for the macro farm.
2//!
3//! Walks Rust source via `syn::parse_file`, visits every `impl` block,
4//! and classifies each method body against the catalog of published
5//! pleme-io derives. Reports a typed [`RefactorCandidate`] punch list
6//! the operator can act on — either by hand or via a future
7//! `survey-apply` mode that uses `syn::visit_mut` to rewrite the file.
8//!
9//! This is the **discovery** half of the structured adoption pipeline:
10//!
11//! ```text
12//! survey  →  apply  →  validate
13//!   │         │          │
14//!   │         │          └── cargo test on the modified crate;
15//!   │         │              landed only when green
16//!   │         └── syn::visit_mut transform; produces a diff
17//!   └── identify candidates; produce typed RefactorCandidate values
18//! ```
19//!
20//! Today we ship the four densest patterns from the 2026-05-28 fleet
21//! survey: **GetterAll**, **SetterAll**, **WithBuilder**, **IsVariant**.
22//! Each one is a detector that inspects a `syn::ImplItem::Fn` and
23//! returns `Some(MatchedPattern)` if the body matches the canonical
24//! derive shape exactly. New detectors plug in as one more enum
25//! variant + one more pattern-matching function.
26
27use proc_macro2::Span;
28use serde::Serialize;
29use std::path::{Path, PathBuf};
30use syn::visit::Visit;
31
32pub mod apply;
33pub mod cargo_deps;
34pub mod detector;
35pub mod fleet;
36pub mod pipeline;
37pub mod returns;
38pub use apply::{apply_to_source, ApplyError};
39pub use cargo_deps::{inject_deps, CargoDepsError, DepSource, InjectOutcome};
40pub use detector::{detectors, Detector};
41pub use fleet::{
42    survey_fleet, survey_fleet_apply, survey_fleet_validate, CandidateValidation,
43    CrateSurveyEntry, FleetApplyEntry, FleetApplyOpts, FleetApplyReport, FleetSurveyReport,
44    FleetValidateReport, ValidateOutcome,
45};
46pub use pipeline::{
47    apply_all_to_source, survey_apply_validate, FileOutcome, PipelineError, PipelineOpts,
48    PipelineOutcome,
49};
50pub use returns::{
51    fleet_returns, first_party_frontier_2026_06, Decision, FleetReturnsReport, FleetVerdict,
52    FrontierEstimate, LiftCostModel, PatternEconomics, Readiness,
53};
54
55/// One opportunity to replace a hand-written impl with a farm derive.
56/// The `current_impl` and `loc_saved` fields let `apply` produce a
57/// precise diff; the `derive_crate` field tells the operator which
58/// git dep to add.
59#[derive(Clone, Debug, Serialize)]
60pub struct RefactorCandidate {
61    pub file: PathBuf,
62    /// 1-based line of the impl block's `impl` keyword.
63    pub line: usize,
64    /// The pleme-io published derive crate the operator would consume.
65    pub derive_crate: &'static str,
66    /// The trait identifier the user would write inside `#[derive(…)]`.
67    pub derive_trait: &'static str,
68    /// Which farm pattern this site matches.
69    pub pattern: MatchedPattern,
70    /// The struct/enum name the impl block targets.
71    pub target_type: String,
72    /// How many lines of impl-block boilerplate adopting this derive
73    /// would delete (approximate — counts the function bodies).
74    pub estimated_loc_saved: usize,
75}
76
77#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize)]
78#[serde(rename_all = "kebab-case")]
79pub enum MatchedPattern {
80    /// `pub fn <field>(&self) -> &<T> { &self.<field> }` per field.
81    /// Adopt: `pleme-getter-derive` → `#[derive(GetterAll)]`.
82    GetterAll,
83    /// `pub fn set_<field>(&mut self, v: <T>) { self.<field> = v; }` per field.
84    /// Adopt: `pleme-setter-derive` → `#[derive(SetterAll)]`.
85    SetterAll,
86    /// `pub fn with_<field>(mut self, v: <T>) -> Self { self.<field> = v; self }` per field.
87    /// Adopt: `pleme-builder-derive` → `#[derive(WithBuilder)]`.
88    WithBuilder,
89    /// `pub fn is_<variant>(&self) -> bool { matches!(self, Self::<variant>(..) | Self::<variant> { .. } | Self::<variant>) }` per enum variant.
90    /// Adopt: `pleme-isvariant-derive` → `#[derive(IsVariant)]`.
91    IsVariant,
92    /// `pub fn <field>_mut(&mut self) -> &mut <T> { &mut self.<field> }` per field.
93    /// Adopt: `pleme-asmut-derive` → `#[derive(AsMutAll)]`.
94    AsMutAll,
95    /// `pub fn into_<field>(self) -> <T> { self.<field> }` per field — consuming getter.
96    /// Adopt: `pleme-owned-derive` → `#[derive(OwnedAll)]`.
97    OwnedAll,
98    /// `pub fn replace_<field>(&mut self, v: <T>) -> <T> { std::mem::replace(&mut self.<field>, v) }`.
99    /// Adopt: `pleme-replace-derive` → `#[derive(ReplaceAll)]`.
100    ReplaceAll,
101    /// `pub fn take_<field>(&mut self) -> <T> where <T>: Default { std::mem::take(&mut self.<field>) }`.
102    /// Adopt: `pleme-take-derive` → `#[derive(TakeAll)]`.
103    TakeAll,
104    /// `pub fn reset_<field>(&mut self) where <T>: Default { self.<field> = <T>::default() }`.
105    /// Adopt: `pleme-reset-derive` → `#[derive(ResetAll)]`.
106    ResetAll,
107    /// `pub fn swap_<field>(&mut self, other: &mut Self) { std::mem::swap(...) }`.
108    /// Adopt: `pleme-swap-derive` → `#[derive(SwapAll)]`.
109    SwapAll,
110    /// `pub const COUNT: usize = N` on an enum impl block.
111    /// Adopt: `pleme-variantcount-derive` → `#[derive(VariantCount)]`.
112    VariantCountConst,
113    /// `pub const ALL: &'static [Self] = &[Self::A, Self::B, ...]` on a unit-variant enum.
114    /// Adopt: `pleme-allvariants-derive` → `#[derive(AllVariants)]`.
115    AllVariantsConst,
116}
117
118impl MatchedPattern {
119    /// Derive crate name (kebab-case) for this pattern. Routed
120    /// through the [`detector`] registry so MatchedPattern stays a
121    /// pure identifier — the per-pattern data lives on the detector
122    /// that owns it. Panics if a pattern variant has no detector
123    /// (registry-invariant: every variant MUST have one).
124    pub fn derive_crate(self) -> &'static str {
125        detector::detectors()
126            .iter()
127            .find(|d| d.pattern() == self)
128            .map(|d| d.derive_crate())
129            .expect("every MatchedPattern variant has a Detector in the registry")
130    }
131    /// Trait identifier (PascalCase) for this pattern. Same routing
132    /// as [`Self::derive_crate`].
133    pub fn derive_trait(self) -> &'static str {
134        detector::detectors()
135            .iter()
136            .find(|d| d.pattern() == self)
137            .map(|d| d.derive_trait())
138            .expect("every MatchedPattern variant has a Detector in the registry")
139    }
140}
141
142#[derive(Debug, thiserror::Error)]
143pub enum SurveyError {
144    #[error("io: {0}")]
145    Io(#[from] std::io::Error),
146    #[error("syn parse failed for {path}: {err}")]
147    Parse {
148        path: PathBuf,
149        err: syn::Error,
150    },
151}
152
153/// Walk a single `.rs` file and return every refactor candidate found.
154pub fn survey_file(path: &Path) -> Result<Vec<RefactorCandidate>, SurveyError> {
155    let src = std::fs::read_to_string(path)?;
156    let file: syn::File = syn::parse_file(&src).map_err(|err| SurveyError::Parse {
157        path: path.to_path_buf(),
158        err,
159    })?;
160    let mut v = SurveyVisitor::new(path.to_path_buf());
161    v.visit_file(&file);
162    Ok(v.candidates)
163}
164
165/// Walk every `.rs` file under `root` recursively. Skips `target/`,
166/// `.git/`, and hidden directories.
167pub fn survey_tree(root: &Path) -> Result<Vec<RefactorCandidate>, SurveyError> {
168    let mut out = vec![];
169    visit_rs_files(root, &mut |p| {
170        match survey_file(p) {
171            Ok(mut cands) => out.append(&mut cands),
172            Err(SurveyError::Parse { .. }) => {
173                // Parse errors on individual files (e.g. malformed
174                // generated code) shouldn't abort the whole survey.
175            }
176            Err(other) => return Err(other),
177        }
178        Ok(())
179    })?;
180    Ok(out)
181}
182
183fn visit_rs_files(
184    root: &Path,
185    f: &mut dyn FnMut(&Path) -> Result<(), SurveyError>,
186) -> Result<(), SurveyError> {
187    for entry in std::fs::read_dir(root)? {
188        let entry = entry?;
189        let path = entry.path();
190        let name = entry.file_name();
191        let name = name.to_string_lossy();
192        if name.starts_with('.') || name == "target" {
193            continue;
194        }
195        let file_type = entry.file_type()?;
196        if file_type.is_dir() {
197            visit_rs_files(&path, f)?;
198        } else if path.extension().is_some_and(|e| e == "rs") {
199            f(&path)?;
200        }
201    }
202    Ok(())
203}
204
205// ─────────────────────────────────────────────────────────────────────
206// SurveyVisitor — syn::Visit walker
207// ─────────────────────────────────────────────────────────────────────
208
209struct SurveyVisitor {
210    file: PathBuf,
211    candidates: Vec<RefactorCandidate>,
212}
213
214impl SurveyVisitor {
215    fn new(file: PathBuf) -> Self {
216        Self { file, candidates: vec![] }
217    }
218}
219
220impl<'ast> Visit<'ast> for SurveyVisitor {
221    fn visit_item_impl(&mut self, i: &'ast syn::ItemImpl) {
222        // Only inherent impls (no `for Trait`) are candidates for
223        // GetterAll / SetterAll / WithBuilder / IsVariant — those
224        // are typed-fleet derives that emit inherent impls.
225        if i.trait_.is_some() {
226            return;
227        }
228
229        // Extract the BASE identifier — for `impl<T> Foo<T> { ... }`
230        // we want `target_type == "Foo"` so it matches `Item::Struct`
231        // whose `ident` is the unadorned name. `type_to_string` would
232        // produce `"Foo < T >"` here (token spaces) and break the
233        // downstream apply lookup.
234        let Some(target_type) = type_base_ident(&i.self_ty) else {
235            return; // weird shape (Trait object, tuple, etc.) — skip
236        };
237        // proc-macro2's Span::start() is gated behind the "span-locations"
238        // feature; under stable rustc without that feature we just
239        // report line 1. Operators get the file path either way.
240        let line = 1;
241
242        // Walk the impl items and classify each one against every
243        // detector. ImplItem::Fn → matches/matches_assoc_const?
244        // ImplItem::Const → matches_assoc_const only. Each detector
245        // controls its own min_count (per-field/per-variant default
246        // to ≥2; whole-impl assoc-const patterns override to ≥1).
247        let mut counts: std::collections::HashMap<MatchedPattern, usize> =
248            std::collections::HashMap::new();
249        for item in &i.items {
250            match item {
251                syn::ImplItem::Fn(f) => {
252                    if let Some(pat) = classify_fn(f) {
253                        *counts.entry(pat).or_default() += 1;
254                    }
255                }
256                syn::ImplItem::Const(c) => {
257                    for d in detector::detectors() {
258                        if d.matches_assoc_const(c) {
259                            *counts.entry(d.pattern()).or_default() += 1;
260                            break; // first-match-wins; assoc-const detectors are disjoint
261                        }
262                    }
263                }
264                _ => {}
265            }
266        }
267
268        // Per-detector min_count gates emission. Look up the detector
269        // by pattern to read its threshold.
270        let det_for = |p: MatchedPattern| -> Option<&'static dyn detector::Detector> {
271            detector::detectors().iter().find(|d| d.pattern() == p).copied()
272        };
273        for (pat, count) in counts {
274            let min = det_for(pat).map(|d| d.min_count()).unwrap_or(2);
275            if count >= min {
276                self.candidates.push(RefactorCandidate {
277                    file: self.file.clone(),
278                    line,
279                    derive_crate: pat.derive_crate(),
280                    derive_trait: pat.derive_trait(),
281                    pattern: pat,
282                    target_type: target_type.clone(),
283                    // Rough estimate: 5 LOC per impl item (open + body + close).
284                    estimated_loc_saved: count * 5,
285                });
286            }
287        }
288    }
289}
290
291// ─────────────────────────────────────────────────────────────────────
292// Per-fn classifiers
293// ─────────────────────────────────────────────────────────────────────
294
295/// Classify a single `impl` method against the farm derive shapes.
296/// Returns `None` if the fn doesn't match any [`Detector`] in the
297/// registry.
298///
299/// First-match-wins: the registry is ordered, but the canonical four
300/// detectors are disjoint by fn-name prefix + body shape, so order
301/// doesn't currently matter. Future detectors must preserve that
302/// invariant or land registry-order tests asserting precedence.
303pub(crate) fn classify_fn(f: &syn::ImplItemFn) -> Option<MatchedPattern> {
304    detector::detectors()
305        .iter()
306        .find(|d| d.matches(f))
307        .map(|d| d.pattern())
308}
309
310pub(crate) fn type_to_string(t: &syn::Type) -> String {
311    use quote::ToTokens;
312    let _ = Span::call_site();
313    let mut buf = proc_macro2::TokenStream::new();
314    t.to_tokens(&mut buf);
315    buf.to_string()
316}
317
318/// Return the FINAL path segment's identifier as a String — strips
319/// generic arguments + module path. For `Foo<T>` returns `"Foo"`;
320/// for `a::b::Foo<T, U>` returns `"Foo"`; for `&[u8]`, tuples, trait
321/// objects, etc., returns `None`. This is what `Item::Struct.ident`
322/// compares against — the bare struct name, not the token-rendered
323/// generic spelling.
324pub(crate) fn type_base_ident(t: &syn::Type) -> Option<String> {
325    let syn::Type::Path(tp) = t else {
326        return None;
327    };
328    tp.path.segments.last().map(|seg| seg.ident.to_string())
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334
335    fn tmp_file(name: &str, body: &str) -> PathBuf {
336        let tmp = std::env::temp_dir().join(format!(
337            "tatara-survey-{}-{}",
338            name,
339            std::process::id()
340        ));
341        std::fs::create_dir_all(&tmp).unwrap();
342        let path = tmp.join("lib.rs");
343        std::fs::write(&path, body).unwrap();
344        path
345    }
346
347    #[test]
348    fn detects_getter_all_pattern() {
349        let path = tmp_file(
350            "getter",
351            r#"
352pub struct Foo { pub a: i32, pub b: String }
353
354impl Foo {
355    pub fn a(&self) -> &i32 { &self.a }
356    pub fn b(&self) -> &String { &self.b }
357}
358"#,
359        );
360        let cands = survey_file(&path).unwrap();
361        assert_eq!(cands.len(), 1);
362        assert_eq!(cands[0].pattern, MatchedPattern::GetterAll);
363        assert_eq!(cands[0].derive_crate, "pleme-getter-derive");
364        assert_eq!(cands[0].target_type, "Foo");
365    }
366
367    #[test]
368    fn detects_setter_all_pattern() {
369        let path = tmp_file(
370            "setter",
371            r#"
372pub struct Foo { pub a: i32, pub b: String }
373
374impl Foo {
375    pub fn set_a(&mut self, v: i32) { self.a = v; }
376    pub fn set_b(&mut self, v: String) { self.b = v; }
377}
378"#,
379        );
380        let cands = survey_file(&path).unwrap();
381        assert!(cands.iter().any(|c| c.pattern == MatchedPattern::SetterAll));
382    }
383
384    #[test]
385    fn detects_with_builder_pattern() {
386        let path = tmp_file(
387            "builder",
388            r#"
389pub struct Foo { pub a: i32, pub b: String }
390
391impl Foo {
392    pub fn with_a(mut self, v: i32) -> Self { self.a = v; self }
393    pub fn with_b(mut self, v: String) -> Self { self.b = v; self }
394}
395"#,
396        );
397        let cands = survey_file(&path).unwrap();
398        assert!(cands.iter().any(|c| c.pattern == MatchedPattern::WithBuilder));
399    }
400
401    #[test]
402    fn detects_isvariant_pattern() {
403        let path = tmp_file(
404            "isvariant",
405            r#"
406pub enum State { A, B(i32), C { x: u8 } }
407
408impl State {
409    pub fn is_a(&self) -> bool { matches!(self, Self::A) }
410    pub fn is_b(&self) -> bool { matches!(self, Self::B(_)) }
411    pub fn is_c(&self) -> bool { matches!(self, Self::C { .. }) }
412}
413"#,
414        );
415        let cands = survey_file(&path).unwrap();
416        assert!(cands.iter().any(|c| c.pattern == MatchedPattern::IsVariant));
417    }
418
419    #[test]
420    fn skips_non_inherent_impls() {
421        let path = tmp_file(
422            "trait_impl",
423            r#"
424pub struct Foo;
425
426impl std::fmt::Display for Foo {
427    fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Ok(()) }
428}
429"#,
430        );
431        let cands = survey_file(&path).unwrap();
432        assert!(cands.is_empty(), "trait impls aren't farm-derive targets");
433    }
434
435    #[test]
436    fn generic_impl_target_type_strips_to_base_ident() {
437        // Regression for real-fleet drift surfaced 2026-05-28:
438        // `impl<T> Attested<T> { ... }` previously produced
439        // target_type == "Attested < T >" (TokenStream::to_string
440        // spaces around generics), breaking the downstream apply
441        // match against Item::Struct.ident which is the bare name.
442        let path = tmp_file(
443            "generic-target",
444            r#"
445pub struct Attested<T> { pub inner: T, pub sig: String }
446
447impl<T> Attested<T> {
448    pub fn inner(&self) -> &T { &self.inner }
449    pub fn sig(&self) -> &String { &self.sig }
450}
451"#,
452        );
453        let cands = survey_file(&path).unwrap();
454        assert!(!cands.is_empty(), "must surface candidates on generic struct");
455        for c in &cands {
456            assert_eq!(
457                c.target_type, "Attested",
458                "target_type must strip generics + module path"
459            );
460        }
461        // Apply must succeed on the same source — proves the
462        // end-to-end roundtrip works for generic types.
463        let src = std::fs::read_to_string(&path).unwrap();
464        let out = apply_to_source(&src, &cands[0]).unwrap();
465        assert!(out.contains("#[derive(GetterAll)]"));
466        assert!(out.contains("pub struct Attested<T>"));
467    }
468
469    #[test]
470    fn skips_singleton_patterns() {
471        // One getter alone isn't worth a derive — only flag at ≥2.
472        let path = tmp_file(
473            "singleton",
474            r#"
475pub struct Foo { pub a: i32 }
476
477impl Foo {
478    pub fn a(&self) -> &i32 { &self.a }
479}
480"#,
481        );
482        let cands = survey_file(&path).unwrap();
483        assert!(cands.is_empty(), "single match doesn't justify a derive");
484    }
485
486    #[test]
487    fn classifies_invalidating_setter_as_non_setter() {
488        // mado's invalidating-setters have an extra `self.last_seqno = 0;`
489        // statement; canonical SetterAll has only the assign. The detector
490        // should NOT flag invalidating-setters as plain SetterAll.
491        let path = tmp_file(
492            "invalidating",
493            r#"
494pub struct R { pub bg: [f32; 4], pub last_seqno: u64 }
495
496impl R {
497    pub fn set_bg(&mut self, v: [f32; 4]) { self.bg = v; self.last_seqno = 0; }
498    pub fn set_fg(&mut self, v: [f32; 4]) { self.fg = v; self.last_seqno = 0; }
499}
500"#,
501        );
502        let cands = survey_file(&path).unwrap();
503        // The bodies have 2 stmts each — SetterAll requires exactly 1.
504        // Should NOT match.
505        assert!(
506            !cands.iter().any(|c| c.pattern == MatchedPattern::SetterAll),
507            "invalidating-setters must not be confused with plain SetterAll"
508        );
509    }
510
511    #[test]
512    fn estimated_loc_saved_scales_with_field_count() {
513        let path = tmp_file(
514            "many",
515            r#"
516pub struct Foo { pub a: i32, pub b: i32, pub c: i32, pub d: i32 }
517
518impl Foo {
519    pub fn a(&self) -> &i32 { &self.a }
520    pub fn b(&self) -> &i32 { &self.b }
521    pub fn c(&self) -> &i32 { &self.c }
522    pub fn d(&self) -> &i32 { &self.d }
523}
524"#,
525        );
526        let cands = survey_file(&path).unwrap();
527        let getter = cands
528            .iter()
529            .find(|c| c.pattern == MatchedPattern::GetterAll)
530            .unwrap();
531        // 4 fields × 5 lines = 20.
532        assert_eq!(getter.estimated_loc_saved, 20);
533    }
534}