Skip to main content

cgx_engine/
diff.rs

1use std::path::Path;
2
3use anyhow::Context;
4
5use crate::parser::{EdgeDef, NodeDef, ParserRegistry};
6use crate::walker::{Language, SourceFile};
7
8#[derive(Debug, Clone)]
9pub struct GraphSnapshot {
10    pub nodes: Vec<NodeDef>,
11    pub edges: Vec<EdgeDef>,
12    pub commit: String,
13}
14
15#[derive(Debug, Clone)]
16pub struct GraphDiff {
17    pub added_nodes: Vec<NodeDef>,
18    pub removed_nodes: Vec<NodeDef>,
19    pub added_edges: Vec<EdgeDef>,
20    pub removed_edges: Vec<EdgeDef>,
21    pub modified_nodes: Vec<(NodeDef, NodeDef)>,
22}
23
24/// Take a graph snapshot by parsing the source tree at a specific git commit.
25pub fn snapshot_at_commit(repo_path: &Path, commit_spec: &str) -> anyhow::Result<GraphSnapshot> {
26    let repo = git2::Repository::open(repo_path).context("Failed to open git repository")?;
27
28    let obj = repo
29        .revparse_single(commit_spec)
30        .context(format!("Invalid commit reference: {}", commit_spec))?;
31    let commit = obj
32        .peel_to_commit()
33        .context("Reference does not resolve to a commit")?;
34    let tree = commit.tree()?;
35    let commit_sha = commit.id().to_string();
36
37    let mut files: Vec<SourceFile> = Vec::new();
38    walk_tree(&repo, &tree, Path::new(""), &mut files)?;
39
40    let registry = ParserRegistry::new();
41    let results = registry.parse_all(&files);
42
43    let mut nodes: Vec<NodeDef> = Vec::new();
44    let mut edges: Vec<EdgeDef> = Vec::new();
45
46    for result in &results {
47        nodes.extend(result.nodes.clone());
48        edges.extend(result.edges.clone());
49    }
50
51    // Add file nodes
52    let lang_map = crate::resolver::build_language_map(&nodes);
53    let file_paths: std::collections::HashSet<String> =
54        files.iter().map(|f| f.relative_path.clone()).collect();
55    let file_nodes = crate::resolver::create_file_nodes(&file_paths, &lang_map);
56    nodes.extend(file_nodes);
57
58    Ok(GraphSnapshot {
59        nodes,
60        edges,
61        commit: commit_sha,
62    })
63}
64
65fn walk_tree(
66    repo: &git2::Repository,
67    tree: &git2::Tree,
68    prefix: &Path,
69    files: &mut Vec<SourceFile>,
70) -> anyhow::Result<()> {
71    for entry in tree.iter() {
72        let name = entry.name().unwrap_or("unknown");
73        let path = prefix.join(name);
74
75        match entry.kind() {
76            Some(git2::ObjectType::Blob) => {
77                let relative = path.to_string_lossy().to_string();
78                if let Some(lang) = detect_language(&relative) {
79                    let blob = entry.to_object(repo)?;
80                    let blob = blob.peel_to_blob()?;
81                    if let Ok(content) = std::str::from_utf8(blob.content()) {
82                        if content.len() < 2_000_000 && !is_binary(content) {
83                            files.push(SourceFile {
84                                path: repo.workdir().unwrap_or(Path::new(".")).join(&path),
85                                relative_path: relative,
86                                language: lang,
87                                content: content.to_string(),
88                                size_bytes: content.len() as u64,
89                            });
90                        }
91                    }
92                }
93            }
94            Some(git2::ObjectType::Tree) => {
95                let subtree = entry.to_object(repo)?.peel_to_tree()?;
96                walk_tree(repo, &subtree, &path, files)?;
97            }
98            _ => {}
99        }
100    }
101    Ok(())
102}
103
104fn detect_language(path: &str) -> Option<Language> {
105    let lower = path.to_lowercase();
106    if lower.ends_with(".ts") || lower.ends_with(".tsx") {
107        Some(Language::TypeScript)
108    } else if lower.ends_with(".js") || lower.ends_with(".jsx") || lower.ends_with(".mjs") {
109        Some(Language::JavaScript)
110    } else if lower.ends_with(".py") {
111        Some(Language::Python)
112    } else if lower.ends_with(".rs") {
113        Some(Language::Rust)
114    } else {
115        None
116    }
117}
118
119fn is_binary(content: &str) -> bool {
120    content.as_bytes().iter().take(8192).any(|&b| b == 0)
121}
122
123/// Compute the diff between two graph snapshots.
124pub fn diff_graphs(before: &GraphSnapshot, after: &GraphSnapshot) -> GraphDiff {
125    let before_nodes: std::collections::HashMap<&str, &NodeDef> =
126        before.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
127    let after_nodes: std::collections::HashMap<&str, &NodeDef> =
128        after.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
129
130    let mut added_nodes = Vec::new();
131    let mut removed_nodes = Vec::new();
132    let mut modified_nodes = Vec::new();
133
134    for (id, node) in &after_nodes {
135        if let Some(before) = before_nodes.get(id) {
136            // Check if modified
137            if before.name != node.name
138                || before.path != node.path
139                || before.line_start != node.line_start
140                || before.line_end != node.line_end
141                || before.kind != node.kind
142            {
143                modified_nodes.push(((**before).clone(), (**node).clone()));
144            }
145        } else {
146            added_nodes.push((**node).clone());
147        }
148    }
149
150    for (id, node) in &before_nodes {
151        if !after_nodes.contains_key(id) {
152            removed_nodes.push((**node).clone());
153        }
154    }
155
156    let mut added_edges = Vec::new();
157    let mut removed_edges = Vec::new();
158
159    let before_edge_ids: std::collections::HashSet<String> =
160        before.edges.iter().map(id_from_edge).collect();
161    let after_edge_ids: std::collections::HashSet<String> =
162        after.edges.iter().map(id_from_edge).collect();
163
164    for edge in &after.edges {
165        let id = id_from_edge(edge);
166        if !before_edge_ids.contains(id.as_str()) {
167            added_edges.push(edge.clone());
168        }
169    }
170
171    for edge in &before.edges {
172        let id = id_from_edge(edge);
173        if !after_edge_ids.contains(id.as_str()) {
174            removed_edges.push(edge.clone());
175        }
176    }
177
178    GraphDiff {
179        added_nodes,
180        removed_nodes,
181        added_edges,
182        removed_edges,
183        modified_nodes,
184    }
185}
186
187fn id_from_edge(e: &EdgeDef) -> String {
188    format!("{}|{}|{}", e.src, e.kind.as_str(), e.dst)
189}
190
191/// Find files changed in the last N days and compute impact.
192pub fn compute_impact(repo_path: &Path, since_days: u32) -> anyhow::Result<ImpactReport> {
193    let repo = git2::Repository::open(repo_path).context("Failed to open git repository")?;
194
195    // Get files changed since N days ago
196    let cutoff = chrono::Utc::now() - chrono::Duration::days(since_days as i64);
197    let cutoff_epoch = cutoff.timestamp();
198
199    let mut changed_files: std::collections::HashSet<String> = std::collections::HashSet::new();
200    let mut revwalk = repo.revwalk()?;
201    revwalk.push_head()?;
202
203    for oid in revwalk {
204        let oid = oid?;
205        let commit = repo.find_commit(oid)?;
206        let commit_time = commit.time().seconds();
207
208        if commit_time < cutoff_epoch {
209            break;
210        }
211
212        if commit.parent_count() == 0 {
213            let tree = commit.tree()?;
214            let diff = repo.diff_tree_to_tree(None, Some(&tree), None)?;
215            diff.foreach(
216                &mut |delta, _| {
217                    if let Some(path) = delta.new_file().path() {
218                        changed_files.insert(path.to_string_lossy().to_string());
219                    }
220                    true
221                },
222                None,
223                None,
224                None,
225            )?;
226        } else {
227            for i in 0..commit.parent_count() {
228                let parent = commit.parent(i)?;
229                let parent_tree = parent.tree()?;
230                let tree = commit.tree()?;
231                let diff = repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), None)?;
232                diff.foreach(
233                    &mut |delta, _| {
234                        if let Some(path) = delta.new_file().path() {
235                            changed_files.insert(path.to_string_lossy().to_string());
236                        }
237                        true
238                    },
239                    None,
240                    None,
241                    None,
242                )?;
243            }
244        }
245    }
246
247    // Load the graph from DuckDB
248    let db = crate::GraphDb::open(repo_path)?;
249    let all_nodes = db.get_all_nodes()?;
250    let all_edges = db.get_all_edges()?;
251
252    // Find nodes in changed files
253    let changed_nodes: Vec<&crate::Node> = all_nodes
254        .iter()
255        .filter(|n| changed_files.contains(&n.path))
256        .collect();
257
258    // Build reverse adjacency: what depends on what
259    let mut rev_adj: std::collections::HashMap<&str, Vec<&str>> = std::collections::HashMap::new();
260    for e in &all_edges {
261        rev_adj
262            .entry(e.dst.as_str())
263            .or_default()
264            .push(e.src.as_str());
265    }
266
267    let mut downstream = std::collections::HashSet::new();
268    let mut dq: Vec<&str> = changed_nodes.iter().map(|n| n.id.as_str()).collect();
269    let mut seen = std::collections::HashSet::new();
270
271    while let Some(current) = dq.pop() {
272        if let Some(dependents) = rev_adj.get(current) {
273            for &dep in dependents {
274                if seen.insert(dep) {
275                    downstream.insert(dep);
276                    dq.push(dep);
277                }
278            }
279        }
280    }
281
282    // Count affected
283    let total_affected = downstream.len() + changed_nodes.len();
284
285    let node_map: std::collections::HashMap<&str, &crate::Node> =
286        all_nodes.iter().map(|n| (n.id.as_str(), n)).collect();
287
288    let affected_nodes: Vec<&crate::Node> = downstream
289        .iter()
290        .filter_map(|id| node_map.get(id))
291        .copied()
292        .collect();
293
294    Ok(ImpactReport {
295        changed_files: changed_files.into_iter().collect(),
296        changed_nodes: changed_nodes.into_iter().cloned().collect(),
297        impacted_nodes: affected_nodes.into_iter().cloned().collect(),
298        total_impacted: total_affected,
299    })
300}
301
302#[derive(Debug, Clone)]
303pub struct ImpactReport {
304    pub changed_files: Vec<String>,
305    pub changed_nodes: Vec<crate::Node>,
306    pub impacted_nodes: Vec<crate::Node>,
307    pub total_impacted: usize,
308}