layer_conform_core/
pipeline.rs

1//! Pure orchestrator: rules + extracted functions → deviations.
2//!
3//! No I/O, no parsing — the caller pre-extracts every function and supplies
4//! them indexed by file path. This keeps the core testable with hand-built
5//! `FunctionRef`s and lets language adapters parallelize file parsing.
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use compact_str::CompactString;
11
12use crate::deviation::{diff_sets, pick_best, Deviation, Differences, GoldenMatch};
13use crate::rule::{GoldenSelector, Rule};
14use crate::similarity::{aggregate, jaccard_sorted, SimilarityScore, Weights};
15use crate::tsed;
16use crate::FunctionRef;
17
18const DEFAULT_THRESHOLD: f64 = 0.7;
19
20#[derive(Debug, thiserror::Error)]
21pub enum PipelineError {
22    #[error("rule `{rule_id}`: golden `{file}:{symbol}` not found in extracted files")]
23    GoldenNotFound { rule_id: String, file: String, symbol: String },
24}
25
26/// All functions extracted from one source file, keyed by relative path string.
27pub type ExtractedFiles = HashMap<String, Vec<FunctionRef>>;
28
29/// Run the deviation pipeline.
30///
31/// `threshold_override` (e.g. `--threshold` from CLI) wins over each rule's
32/// `threshold`, which wins over the built-in default of 0.7.
33pub fn detect_deviations(
34    rules: &[Rule],
35    files: &ExtractedFiles,
36    threshold_override: Option<f64>,
37) -> Result<Vec<Deviation>, PipelineError> {
38    let weights = Weights::default();
39    let mut out = Vec::new();
40    for rule in rules {
41        if rule.disabled {
42            continue;
43        }
44        let goldens = resolve_goldens(rule, files)?;
45        let threshold = threshold_override
46            .or(rule.threshold)
47            .unwrap_or(DEFAULT_THRESHOLD);
48
49        for (file_path, funcs) in files {
50            if !rule.matches(Path::new(file_path)) {
51                continue;
52            }
53            for func in funcs {
54                if func.ignore.is_some() {
55                    continue;
56                }
57                if is_golden(&goldens, file_path, &func.symbol) {
58                    continue;
59                }
60                let matches = score_against_all(func, &goldens, weights);
61                let (best, sorted) = pick_best(matches);
62                if best.similarity.overall < threshold {
63                    out.push(build_deviation(rule, file_path, func, best, sorted, &goldens));
64                }
65            }
66        }
67    }
68    Ok(out)
69}
70
71fn resolve_goldens<'f>(
72    rule: &Rule,
73    files: &'f ExtractedFiles,
74) -> Result<Vec<(GoldenSelector, &'f FunctionRef)>, PipelineError> {
75    let mut goldens = Vec::with_capacity(rule.goldens.len());
76    for g in &rule.goldens {
77        let funcs = files.get(&g.file).ok_or_else(|| PipelineError::GoldenNotFound {
78            rule_id: rule.id.clone(),
79            file: g.file.clone(),
80            symbol: g.symbol.clone(),
81        })?;
82        let func = funcs
83            .iter()
84            .find(|f| f.symbol.as_str() == g.symbol)
85            .ok_or_else(|| PipelineError::GoldenNotFound {
86                rule_id: rule.id.clone(),
87                file: g.file.clone(),
88                symbol: g.symbol.clone(),
89            })?;
90        goldens.push((g.clone(), func));
91    }
92    Ok(goldens)
93}
94
95fn is_golden(
96    goldens: &[(GoldenSelector, &FunctionRef)],
97    file_path: &str,
98    symbol: &CompactString,
99) -> bool {
100    goldens
101        .iter()
102        .any(|(g, _)| g.file == file_path && g.symbol == symbol.as_str())
103}
104
105fn score_against_all(
106    func: &FunctionRef,
107    goldens: &[(GoldenSelector, &FunctionRef)],
108    weights: Weights,
109) -> Vec<GoldenMatch> {
110    goldens
111        .iter()
112        .map(|(sel, golden_func)| GoldenMatch {
113            golden: sel.clone(),
114            similarity: score_pair(func, golden_func, weights),
115        })
116        .collect()
117}
118
119/// Score two functions on the 4 axes (shape / calls / imports / signature) and
120/// return the aggregated `SimilarityScore`. Pub so command-level explainers
121/// (`why`) can reuse the exact same scoring as `detect_deviations`.
122pub fn score_pair(actual: &FunctionRef, golden: &FunctionRef, weights: Weights) -> SimilarityScore {
123    let shape = tsed::tsed(&actual.tree, &golden.tree);
124    let calls = jaccard_sorted(&actual.calls, &golden.calls);
125    let imports = jaccard_sorted(&actual.imports, &golden.imports);
126    let signature = if actual.signature == golden.signature { 1.0 } else { 0.0 };
127    aggregate(shape, calls, imports, signature, weights)
128}
129
130fn build_deviation(
131    rule: &Rule,
132    file_path: &str,
133    func: &FunctionRef,
134    best: GoldenMatch,
135    sorted: Vec<GoldenMatch>,
136    goldens: &[(GoldenSelector, &FunctionRef)],
137) -> Deviation {
138    let golden_func = goldens
139        .iter()
140        .find(|(g, _)| *g == best.golden)
141        .map(|(_, f)| *f)
142        .expect("matched golden must be in resolved set");
143    let (missing_calls, extra_calls) = diff_sets(&golden_func.calls, &func.calls);
144    let (missing_imports, extra_imports) = diff_sets(&golden_func.imports, &func.imports);
145    Deviation {
146        rule_id: rule.id.clone(),
147        file: file_path.to_string(),
148        symbol: func.symbol.clone(),
149        matched_golden: best.golden.clone(),
150        all_golden_scores: sorted,
151        similarity: best.similarity,
152        differences: Differences {
153            missing_calls,
154            extra_calls,
155            missing_imports,
156            extra_imports,
157        },
158    }
159}
layer_conform_core/pipeline.rs

layer_conform_core/
pipeline.rs