Skip to main content

lex_vcs/
compute_diff.rs

1//! AST-level structural diff between two sets of `FnDecl`s.
2//!
3//! Moved from `lex-cli/src/diff.rs` so both the CLI (`lex diff`
4//! command) and the HTTP API (`lex serve`) can compute a [`DiffReport`]
5//! without introducing a circular dependency. `lex-vcs` is the right
6//! home because it already owns `DiffReport` and `diff_to_ops`.
7
8use crate::diff_report::{
9    AddRemove, BodyPatch, DiffReport, EffectChanges, Modified, Renamed,
10};
11use lex_ast::{stage_canonical_hash_hex, CExpr, Effect, EffectArg, FnDecl, Stage, TypeExpr};
12use std::collections::{BTreeMap, BTreeSet};
13
14/// Compute a structural diff between two named fn-decl maps.
15///
16/// `body_patches` controls whether body-level expression diffs are
17/// emitted inside each `Modified` entry. Pass `true` for rich output
18/// (CLI / review); `false` for a signature-only diff (faster).
19pub fn compute_diff(
20    a: &BTreeMap<String, FnDecl>,
21    b: &BTreeMap<String, FnDecl>,
22    body_patches: bool,
23) -> DiffReport {
24    let mut report = DiffReport::default();
25    let names_a: BTreeSet<&String> = a.keys().collect();
26    let names_b: BTreeSet<&String> = b.keys().collect();
27
28    let only_a: Vec<&String> = names_a.difference(&names_b).copied().collect();
29    let only_b: Vec<&String> = names_b.difference(&names_a).copied().collect();
30
31    // Detect renames: for each name only-in-A, check if any only-in-B
32    // has a body whose canonical-AST hash matches (modulo the fn
33    // name itself). sig_id over the FnDecl with name normalized
34    // serves as the structural-identity key.
35    let mut renamed_pairs: Vec<(String, String)> = Vec::new();
36    let mut consumed_a: BTreeSet<String> = BTreeSet::new();
37    let mut consumed_b: BTreeSet<String> = BTreeSet::new();
38    for &an in &only_a {
39        let fa = &a[an];
40        let fa_norm_id = body_hash(fa);
41        for &bn in &only_b {
42            if consumed_b.contains(bn) { continue; }
43            let fb = &b[bn];
44            if body_hash(fb) == fa_norm_id {
45                renamed_pairs.push((an.clone(), bn.clone()));
46                consumed_a.insert(an.clone());
47                consumed_b.insert(bn.clone());
48                break;
49            }
50        }
51    }
52
53    for &n in &only_a {
54        if consumed_a.contains(n) { continue; }
55        let fd = &a[n];
56        report.removed.push(AddRemove {
57            name: n.clone(),
58            signature: render_signature(fd),
59        });
60    }
61    for &n in &only_b {
62        if consumed_b.contains(n) { continue; }
63        let fd = &b[n];
64        report.added.push(AddRemove {
65            name: n.clone(),
66            signature: render_signature(fd),
67        });
68    }
69    for (an, bn) in &renamed_pairs {
70        let fd = &b[bn];
71        report.renamed.push(Renamed {
72            from: an.clone(),
73            to: bn.clone(),
74            signature: render_signature(fd),
75        });
76    }
77
78    // Modified: same name on both sides; compare bodies.
79    for n in names_a.intersection(&names_b) {
80        let fa = &a[*n];
81        let fb = &b[*n];
82        let sig_a = render_signature(fa);
83        let sig_b = render_signature(fb);
84        if body_hash(fa) == body_hash(fb) && sig_a == sig_b { continue; }
85
86        let patches = if body_patches {
87            let mut patches = Vec::new();
88            diff_expr(&fa.body, &fb.body, "body", &mut patches, 4);
89            patches
90        } else { Vec::new() };
91
92        let effect_changes = effect_diff(&fa.effects, &fb.effects);
93        report.modified.push(Modified {
94            name: (*n).clone(),
95            signature_before: sig_a.clone(),
96            signature_after: sig_b.clone(),
97            signature_changed: sig_a != sig_b,
98            effect_changes,
99            body_patches: patches,
100        });
101    }
102    report
103}
104
105/// Hash of the function's structural identity, used for rename
106/// detection. Excludes the function's name (so `fn foo -> Int { 1 }`
107/// and `fn bar -> Int { 1 }` share a hash) but includes everything
108/// else: params, effects, return type, body.
109fn body_hash(fd: &FnDecl) -> String {
110    let mut anon = fd.clone();
111    anon.name = String::new();
112    let stage = Stage::FnDecl(anon);
113    stage_canonical_hash_hex(&stage)
114}
115
116/// Walk two CExprs in parallel; record the first divergence at each
117/// child position. `depth` caps recursion so a tiny per-fn diff
118/// doesn't degenerate into hundreds of micro-changes.
119fn diff_expr(a: &CExpr, b: &CExpr, path: &str, out: &mut Vec<BodyPatch>, depth: u32) {
120    if depth == 0 { return; }
121    let kind_a = node_kind(a);
122    let kind_b = node_kind(b);
123    if kind_a != kind_b {
124        out.push(BodyPatch {
125            op: "Replace".into(), node_path: path.into(),
126            from_kind: kind_a.into(), to_kind: kind_b.into(),
127        });
128        return;
129    }
130    // Same kind: recurse into structurally-equivalent children.
131    match (a, b) {
132        (CExpr::Literal { value: la }, CExpr::Literal { value: lb }) => {
133            if la != lb {
134                out.push(BodyPatch {
135                    op: "Replace".into(), node_path: path.into(),
136                    from_kind: "Literal".into(), to_kind: "Literal".into(),
137                });
138            }
139        }
140        (CExpr::Var { name: na }, CExpr::Var { name: nb }) => {
141            if na != nb {
142                out.push(BodyPatch {
143                    op: "Replace".into(), node_path: path.into(),
144                    from_kind: format!("Var({na})"), to_kind: format!("Var({nb})"),
145                });
146            }
147        }
148        (CExpr::Call { callee: ca, args: aa },
149         CExpr::Call { callee: cb, args: ab }) => {
150            diff_expr(ca, cb, &format!("{path}.callee"), out, depth - 1);
151            diff_args(aa, ab, &format!("{path}.args"), out, depth);
152        }
153        (CExpr::Let { name: na, value: va, body: ba, .. },
154         CExpr::Let { name: nb, value: vb, body: bb, .. }) => {
155            if na != nb {
156                out.push(BodyPatch {
157                    op: "Replace".into(),
158                    node_path: format!("{path}.name"),
159                    from_kind: format!("Let({na})"),
160                    to_kind:   format!("Let({nb})"),
161                });
162            }
163            diff_expr(va, vb, &format!("{path}.value"), out, depth - 1);
164            diff_expr(ba, bb, &format!("{path}.body"),  out, depth - 1);
165        }
166        (CExpr::Match { scrutinee: sa, arms: ams },
167         CExpr::Match { scrutinee: sb, arms: bms }) => {
168            diff_expr(sa, sb, &format!("{path}.scrutinee"), out, depth - 1);
169            let n = ams.len().max(bms.len());
170            for i in 0..n {
171                let p = format!("{path}.arms[{i}]");
172                match (ams.get(i), bms.get(i)) {
173                    (Some(a), Some(b)) =>
174                        diff_expr(&a.body, &b.body, &p, out, depth - 1),
175                    (Some(_), None) => out.push(BodyPatch {
176                        op: "Deleted".into(), node_path: p,
177                        from_kind: "MatchArm".into(), to_kind: "(removed)".into(),
178                    }),
179                    (None, Some(_)) => out.push(BodyPatch {
180                        op: "Inserted".into(), node_path: p,
181                        from_kind: "(none)".into(), to_kind: "MatchArm".into(),
182                    }),
183                    (None, None) => break,
184                }
185            }
186        }
187        (CExpr::Block { statements: sa, result: ra },
188         CExpr::Block { statements: sb, result: rb }) => {
189            diff_args(sa, sb, &format!("{path}.statements"), out, depth);
190            diff_expr(ra, rb, &format!("{path}.result"), out, depth - 1);
191        }
192        (CExpr::FieldAccess { value: va, field: fa },
193         CExpr::FieldAccess { value: vb, field: fb }) => {
194            diff_expr(va, vb, &format!("{path}.value"), out, depth - 1);
195            if fa != fb {
196                out.push(BodyPatch {
197                    op: "Replace".into(), node_path: format!("{path}.field"),
198                    from_kind: format!("Field({fa})"), to_kind: format!("Field({fb})"),
199                });
200            }
201        }
202        (CExpr::Lambda { body: ba, .. }, CExpr::Lambda { body: bb, .. }) => {
203            diff_expr(ba, bb, &format!("{path}.body"), out, depth - 1);
204        }
205        // For shapes we don't unfold further, mark the node itself
206        // as edited (same kind, content differs) — finer detail can
207        // come in a follow-up.
208        _ => {
209            out.push(BodyPatch {
210                op: "Replace".into(), node_path: path.into(),
211                from_kind: kind_a.into(), to_kind: kind_b.into(),
212            });
213        }
214    }
215}
216
217fn diff_args(a: &[CExpr], b: &[CExpr], path: &str, out: &mut Vec<BodyPatch>, depth: u32) {
218    let n = a.len().max(b.len());
219    for i in 0..n {
220        let p = format!("{path}[{i}]");
221        match (a.get(i), b.get(i)) {
222            (Some(x), Some(y)) => diff_expr(x, y, &p, out, depth - 1),
223            (Some(x), None) => out.push(BodyPatch {
224                op: "Deleted".into(), node_path: p,
225                from_kind: node_kind(x).into(), to_kind: "(removed)".into(),
226            }),
227            (None, Some(y)) => out.push(BodyPatch {
228                op: "Inserted".into(), node_path: p,
229                from_kind: "(none)".into(), to_kind: node_kind(y).into(),
230            }),
231            (None, None) => break,
232        }
233    }
234}
235
236fn node_kind(e: &CExpr) -> &'static str {
237    match e {
238        CExpr::Literal { .. }     => "Literal",
239        CExpr::Var { .. }         => "Var",
240        CExpr::Call { .. }        => "Call",
241        CExpr::Let { .. }         => "Let",
242        CExpr::Match { .. }       => "Match",
243        CExpr::Block { .. }       => "Block",
244        CExpr::Constructor { .. } => "Constructor",
245        CExpr::RecordLit { .. }   => "RecordLit",
246        CExpr::TupleLit { .. }    => "TupleLit",
247        CExpr::ListLit { .. }     => "ListLit",
248        CExpr::FieldAccess { .. } => "FieldAccess",
249        CExpr::Lambda { .. }      => "Lambda",
250        CExpr::BinOp { .. }       => "BinOp",
251        CExpr::UnaryOp { .. }     => "UnaryOp",
252        CExpr::Return { .. }      => "Return",
253    }
254}
255
256pub fn render_signature(fd: &FnDecl) -> String {
257    let params: Vec<String> = fd.params.iter()
258        .map(|p| format!("{} :: {}", p.name, render_type(&p.ty))).collect();
259    let eff = if fd.effects.is_empty() { String::new() } else {
260        let labels: Vec<String> = fd.effects.iter().map(effect_label).collect();
261        format!("[{}] ", labels.join(", "))
262    };
263    format!("fn {}({}) -> {}{}", fd.name, params.join(", "),
264        eff, render_type(&fd.return_type))
265}
266
267/// Render an effect with its arg if present: `fs_read("/tmp")`,
268/// `net("api.example.com")`, or just `io`. Used by both signature
269/// rendering and effect-diff so the same string identifies the
270/// same effect in either context.
271pub fn effect_label(e: &Effect) -> String {
272    match &e.arg {
273        Some(EffectArg::Str { value })   => format!("{}({:?})", e.name, value),
274        Some(EffectArg::Int { value })   => format!("{}({})",   e.name, value),
275        Some(EffectArg::Ident { value }) => format!("{}({})",   e.name, value),
276        None => e.name.clone(),
277    }
278}
279
280/// Set-style diff over two effect lists. Order-insensitive within
281/// the lists; ordering of the output is sorted-by-label so the
282/// JSON is stable.
283fn effect_diff(a: &[Effect], b: &[Effect]) -> EffectChanges {
284    let labels_a: BTreeSet<String> = a.iter().map(effect_label).collect();
285    let labels_b: BTreeSet<String> = b.iter().map(effect_label).collect();
286    let added:   Vec<String> = labels_b.difference(&labels_a).cloned().collect();
287    let removed: Vec<String> = labels_a.difference(&labels_b).cloned().collect();
288    EffectChanges {
289        before:  labels_a.into_iter().collect(),
290        after:   labels_b.into_iter().collect(),
291        added,
292        removed,
293    }
294}
295
296fn render_type(t: &TypeExpr) -> String {
297    match t {
298        TypeExpr::Named { name, args } => {
299            if args.is_empty() { name.clone() }
300            else {
301                let parts: Vec<String> = args.iter().map(render_type).collect();
302                format!("{name}[{}]", parts.join(", "))
303            }
304        }
305        TypeExpr::Tuple { items } => {
306            let parts: Vec<String> = items.iter().map(render_type).collect();
307            format!("({})", parts.join(", "))
308        }
309        TypeExpr::Record { fields } => {
310            let parts: Vec<String> = fields.iter()
311                .map(|f| format!("{} :: {}", f.name, render_type(&f.ty))).collect();
312            format!("{{ {} }}", parts.join(", "))
313        }
314        TypeExpr::Function { params, effects, ret } => {
315            let parts: Vec<String> = params.iter().map(render_type).collect();
316            let eff = if effects.is_empty() { String::new() } else {
317                let names: Vec<&str> = effects.iter().map(|e| e.name.as_str()).collect();
318                format!("[{}] ", names.join(", "))
319            };
320            format!("({}) -> {}{}", parts.join(", "), eff, render_type(ret))
321        }
322        TypeExpr::Union { variants } => variants.iter().map(|v| match &v.payload {
323            Some(p) => format!("{}({})", v.name, render_type(p)),
324            None => v.name.clone(),
325        }).collect::<Vec<_>>().join(" | "),
326    }
327}