Skip to main content

lean_ctx/core/
structural_diff.rs

1//! Structural diff using tree-sitter chunk identities (named declarations).
2//!
3//! Compares structural chunks from [`super::chunks_ts`] between two sources.
4
5use serde::Serialize;
6
7use super::bm25_index::ChunkKind;
8
9/// Added / removed / modified structural symbol (declaration identified by name + start line).
10#[derive(Debug, Clone, PartialEq, Serialize)]
11pub struct StructuralSymbolDiff {
12    pub change: StructuralChangeKind,
13    pub name: String,
14    pub symbol_kind: ChunkKind,
15    /// 1-based start line in the **new** source (`Modified`, `Added`) or **old** (`Removed`).
16    pub line: usize,
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
20#[serde(rename_all = "lowercase")]
21pub enum StructuralChangeKind {
22    Added,
23    Removed,
24    Modified,
25}
26
27#[cfg(feature = "tree-sitter")]
28type ChunkKey = (String, usize);
29
30#[cfg(feature = "tree-sitter")]
31fn chunk_index(
32    source: &str,
33    extension: &str,
34) -> Option<std::collections::HashMap<ChunkKey, (String, ChunkKind)>> {
35    use std::collections::HashMap;
36
37    let chunks = super::chunks_ts::extract_chunks_ts("", source, extension)?;
38    let mut map = HashMap::new();
39    for c in chunks {
40        map.insert((c.symbol_name, c.start_line), (c.content, c.kind));
41    }
42    Some(map)
43}
44
45#[cfg(feature = "tree-sitter")]
46fn chunk_order(source: &str, extension: &str) -> Option<Vec<ChunkKey>> {
47    Some(
48        super::chunks_ts::extract_chunks_ts("", source, extension)?
49            .into_iter()
50            .map(|c| (c.symbol_name, c.start_line))
51            .collect(),
52    )
53}
54
55/// Compare two sources and report structural declaration changes for `extension` (e.g. `"rs"`).
56///
57/// Identity is `(symbol_name, start_line)` within each version; body text inequality ⇒ `Modified`.
58/// Returns an empty list when tree-sitter is disabled or the language is unsupported.
59pub fn structural_symbol_diff(
60    old_source: &str,
61    new_source: &str,
62    extension: &str,
63) -> Vec<StructuralSymbolDiff> {
64    #[cfg(feature = "tree-sitter")]
65    {
66        structural_symbol_diff_impl(old_source, new_source, extension)
67    }
68    #[cfg(not(feature = "tree-sitter"))]
69    {
70        let _ = (old_source, new_source, extension);
71        Vec::new()
72    }
73}
74
75#[cfg(feature = "tree-sitter")]
76fn structural_symbol_diff_impl(
77    old_source: &str,
78    new_source: &str,
79    extension: &str,
80) -> Vec<StructuralSymbolDiff> {
81    let Some(old_map) = chunk_index(old_source, extension) else {
82        return Vec::new();
83    };
84    let Some(new_map) = chunk_index(new_source, extension) else {
85        return Vec::new();
86    };
87    let Some(new_order) = chunk_order(new_source, extension) else {
88        return Vec::new();
89    };
90    let Some(old_order) = chunk_order(old_source, extension) else {
91        return Vec::new();
92    };
93
94    let mut out = Vec::new();
95
96    for key in &new_order {
97        let Some((body_new, kind_new)) = new_map.get(key) else {
98            continue;
99        };
100        match old_map.get(key) {
101            None => {
102                out.push(StructuralSymbolDiff {
103                    change: StructuralChangeKind::Added,
104                    name: key.0.clone(),
105                    symbol_kind: kind_new.clone(),
106                    line: key.1,
107                });
108            }
109            Some((body_old, _)) => {
110                if body_old != body_new {
111                    out.push(StructuralSymbolDiff {
112                        change: StructuralChangeKind::Modified,
113                        name: key.0.clone(),
114                        symbol_kind: kind_new.clone(),
115                        line: key.1,
116                    });
117                }
118            }
119        }
120    }
121
122    for key in &old_order {
123        if !new_map.contains_key(key) {
124            let Some((_, kind_old)) = old_map.get(key) else {
125                continue;
126            };
127            out.push(StructuralSymbolDiff {
128                change: StructuralChangeKind::Removed,
129                name: key.0.clone(),
130                symbol_kind: kind_old.clone(),
131                line: key.1,
132            });
133        }
134    }
135
136    out.sort_by(|a, b| a.line.cmp(&b.line).then(a.name.cmp(&b.name)));
137    out
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    #[cfg(feature = "tree-sitter")]
145    #[test]
146    fn structural_diff_detects_added_removed_modified_rust() {
147        let old = r"
148pub fn a() { 1 }
149pub fn b() { 2 }
150";
151        let new = r"
152pub fn a() { 99 }
153pub fn b() { 2 }
154pub fn c() { 3 }
155";
156        let d = structural_symbol_diff(old, new, "rs");
157        let kinds: Vec<_> = d.iter().map(|x| (&x.change, x.name.as_str())).collect();
158        assert!(
159            kinds.contains(&(&StructuralChangeKind::Modified, "a")),
160            "{kinds:?}"
161        );
162        assert!(
163            kinds.contains(&(&StructuralChangeKind::Added, "c")),
164            "{kinds:?}"
165        );
166
167        let old2 = r"pub fn only() {}";
168        let new2 = r"pub fn renamed() {}";
169        let d2 = structural_symbol_diff(old2, new2, "rs");
170        assert!(d2
171            .iter()
172            .any(|x| x.change == StructuralChangeKind::Removed && x.name == "only"));
173        assert!(d2
174            .iter()
175            .any(|x| x.change == StructuralChangeKind::Added && x.name == "renamed"));
176    }
177
178    #[cfg(not(feature = "tree-sitter"))]
179    #[test]
180    fn structural_diff_disabled_returns_empty() {
181        assert!(structural_symbol_diff("a", "b", "rs").is_empty());
182    }
183}