Skip to main content

cgx_engine/
diff.rs

1use std::path::Path;
2
3use anyhow::Context;
4
5use crate::parser::{EdgeDef, NodeDef, ParserRegistry};
6use crate::walker::{Language, SourceFile};
7
8#[derive(Debug, Clone)]
9pub struct GraphSnapshot {
10    pub nodes: Vec<NodeDef>,
11    pub edges: Vec<EdgeDef>,
12    pub commit: String,
13}
14
15#[derive(Debug, Clone)]
16pub struct GraphDiff {
17    pub added_nodes: Vec<NodeDef>,
18    pub removed_nodes: Vec<NodeDef>,
19    pub added_edges: Vec<EdgeDef>,
20    pub removed_edges: Vec<EdgeDef>,
21    pub modified_nodes: Vec<(NodeDef, NodeDef)>,
22}
23
24/// Take a graph snapshot by parsing the source tree at a specific git commit.
25pub fn snapshot_at_commit(repo_path: &Path, commit_spec: &str) -> anyhow::Result<GraphSnapshot> {
26    let repo = git2::Repository::open(repo_path)
27        .context("Failed to open git repository")?;
28
29    let obj = repo.revparse_single(commit_spec)
30        .context(format!("Invalid commit reference: {}", commit_spec))?;
31    let commit = obj.peel_to_commit()
32        .context("Reference does not resolve to a commit")?;
33    let tree = commit.tree()?;
34    let commit_sha = commit.id().to_string();
35
36    let mut files: Vec<SourceFile> = Vec::new();
37    walk_tree(&repo, &tree, Path::new(""), &mut files)?;
38
39    let registry = ParserRegistry::new();
40    let results = registry.parse_all(&files);
41
42    let mut nodes: Vec<NodeDef> = Vec::new();
43    let mut edges: Vec<EdgeDef> = Vec::new();
44
45    for result in &results {
46        nodes.extend(result.nodes.clone());
47        edges.extend(result.edges.clone());
48    }
49
50    // Add file nodes
51    let lang_map = crate::resolver::build_language_map(&nodes);
52    let file_paths: std::collections::HashSet<String> = files
53        .iter()
54        .map(|f| f.relative_path.clone())
55        .collect();
56    let file_nodes = crate::resolver::create_file_nodes(&file_paths, &lang_map);
57    nodes.extend(file_nodes);
58
59    Ok(GraphSnapshot {
60        nodes,
61        edges,
62        commit: commit_sha,
63    })
64}
65
66fn walk_tree(
67    repo: &git2::Repository,
68    tree: &git2::Tree,
69    prefix: &Path,
70    files: &mut Vec<SourceFile>,
71) -> anyhow::Result<()> {
72    for entry in tree.iter() {
73        let name = entry.name().unwrap_or("unknown");
74        let path = prefix.join(name);
75
76        match entry.kind() {
77            Some(git2::ObjectType::Blob) => {
78                let relative = path.to_string_lossy().to_string();
79                if let Some(lang) = detect_language(&relative) {
80                    let blob = entry.to_object(repo)?;
81                    let blob = blob.peel_to_blob()?;
82                    if let Ok(content) = std::str::from_utf8(blob.content()) {
83                        if content.len() < 2_000_000 && !is_binary(content) {
84                            files.push(SourceFile {
85                                path: repo.workdir().unwrap_or(Path::new(".")).join(&path),
86                                relative_path: relative,
87                                language: lang,
88                                content: content.to_string(),
89                                size_bytes: content.len() as u64,
90                            });
91                        }
92                    }
93                }
94            }
95            Some(git2::ObjectType::Tree) => {
96                let subtree = entry.to_object(repo)?.peel_to_tree()?;
97                walk_tree(repo, &subtree, &path, files)?;
98            }
99            _ => {}
100        }
101    }
102    Ok(())
103}
104
105fn detect_language(path: &str) -> Option<Language> {
106    let lower = path.to_lowercase();
107    if lower.ends_with(".ts") || lower.ends_with(".tsx") { Some(Language::TypeScript) }
108    else if lower.ends_with(".js") || lower.ends_with(".jsx") || lower.ends_with(".mjs") { Some(Language::JavaScript) }
109    else if lower.ends_with(".py") { Some(Language::Python) }
110    else if lower.ends_with(".rs") { Some(Language::Rust) }
111    else { None }
112}
113
114fn is_binary(content: &str) -> bool {
115    content.as_bytes().iter().take(8192).any(|&b| b == 0)
116}
117
118/// Compute the diff between two graph snapshots.
119pub fn diff_graphs(before: &GraphSnapshot, after: &GraphSnapshot) -> GraphDiff {
120    let before_nodes: std::collections::HashMap<&str, &NodeDef> =
121        before.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
122    let after_nodes: std::collections::HashMap<&str, &NodeDef> =
123        after.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
124
125    let mut added_nodes = Vec::new();
126    let mut removed_nodes = Vec::new();
127    let mut modified_nodes = Vec::new();
128
129    for (id, node) in &after_nodes {
130        if let Some(before) = before_nodes.get(id) {
131            // Check if modified
132            if before.name != node.name
133                || before.path != node.path
134                || before.line_start != node.line_start
135                || before.line_end != node.line_end
136                || before.kind != node.kind
137            {
138                modified_nodes.push(((**before).clone(), (**node).clone()));
139            }
140        } else {
141            added_nodes.push((**node).clone());
142        }
143    }
144
145    for (id, node) in &before_nodes {
146        if !after_nodes.contains_key(id) {
147            removed_nodes.push((**node).clone());
148        }
149    }
150
151    let mut added_edges = Vec::new();
152    let mut removed_edges = Vec::new();
153
154    let before_edge_ids: std::collections::HashSet<String> =
155        before.edges.iter().map(id_from_edge).collect();
156    let after_edge_ids: std::collections::HashSet<String> =
157        after.edges.iter().map(id_from_edge).collect();
158
159    for edge in &after.edges {
160        let id = id_from_edge(edge);
161        if !before_edge_ids.contains(id.as_str()) {
162            added_edges.push(edge.clone());
163        }
164    }
165
166    for edge in &before.edges {
167        let id = id_from_edge(edge);
168        if !after_edge_ids.contains(id.as_str()) {
169            removed_edges.push(edge.clone());
170        }
171    }
172
173    GraphDiff {
174        added_nodes,
175        removed_nodes,
176        added_edges,
177        removed_edges,
178        modified_nodes,
179    }
180}
181
182fn id_from_edge(e: &EdgeDef) -> String {
183    format!("{}|{}|{}", e.src, e.kind.as_str(), e.dst)
184}
185
186/// Find files changed in the last N days and compute impact.
187pub fn compute_impact(
188    repo_path: &Path,
189    since_days: u32,
190) -> anyhow::Result<ImpactReport> {
191    let repo = git2::Repository::open(repo_path)
192        .context("Failed to open git repository")?;
193
194    // Get files changed since N days ago
195    let cutoff = chrono::Utc::now() - chrono::Duration::days(since_days as i64);
196    let cutoff_epoch = cutoff.timestamp();
197
198    let mut changed_files: std::collections::HashSet<String> = std::collections::HashSet::new();
199    let mut revwalk = repo.revwalk()?;
200    revwalk.push_head()?;
201
202    for oid in revwalk {
203        let oid = oid?;
204        let commit = repo.find_commit(oid)?;
205        let commit_time = commit.time().seconds();
206
207        if commit_time < cutoff_epoch {
208            break;
209        }
210
211        if commit.parent_count() == 0 {
212            let tree = commit.tree()?;
213            let diff = repo.diff_tree_to_tree(None, Some(&tree), None)?;
214            diff.foreach(
215                &mut |delta, _| {
216                    if let Some(path) = delta.new_file().path() {
217                        changed_files.insert(path.to_string_lossy().to_string());
218                    }
219                    true
220                },
221                None, None, None,
222            )?;
223        } else {
224            for i in 0..commit.parent_count() {
225                let parent = commit.parent(i)?;
226                let parent_tree = parent.tree()?;
227                let tree = commit.tree()?;
228                let diff = repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), None)?;
229                diff.foreach(
230                    &mut |delta, _| {
231                        if let Some(path) = delta.new_file().path() {
232                            changed_files.insert(path.to_string_lossy().to_string());
233                        }
234                        true
235                    },
236                    None, None, None,
237                )?;
238            }
239        }
240    }
241
242    // Load the graph from DuckDB
243    let db = crate::GraphDb::open(repo_path)?;
244    let all_nodes = db.get_all_nodes()?;
245    let all_edges = db.get_all_edges()?;
246
247    // Find nodes in changed files
248    let changed_nodes: Vec<&crate::Node> = all_nodes
249        .iter()
250        .filter(|n| changed_files.contains(&n.path))
251        .collect();
252
253    // Build reverse adjacency: what depends on what
254    let mut rev_adj: std::collections::HashMap<&str, Vec<&str>> = std::collections::HashMap::new();
255    for e in &all_edges {
256        rev_adj.entry(e.dst.as_str()).or_default().push(e.src.as_str());
257    }
258
259    let mut downstream = std::collections::HashSet::new();
260    let mut dq: Vec<&str> = changed_nodes.iter().map(|n| n.id.as_str()).collect();
261    let mut seen = std::collections::HashSet::new();
262
263    while let Some(current) = dq.pop() {
264        if let Some(dependents) = rev_adj.get(current) {
265            for &dep in dependents {
266                if seen.insert(dep) {
267                    downstream.insert(dep);
268                    dq.push(dep);
269                }
270            }
271        }
272    }
273
274    // Count affected
275    let total_affected = downstream.len() + changed_nodes.len();
276
277    let node_map: std::collections::HashMap<&str, &crate::Node> =
278        all_nodes.iter().map(|n| (n.id.as_str(), n)).collect();
279
280    let affected_nodes: Vec<&crate::Node> = downstream
281        .iter()
282        .filter_map(|id| node_map.get(id))
283        .copied()
284        .collect();
285
286    Ok(ImpactReport {
287        changed_files: changed_files.into_iter().collect(),
288        changed_nodes: changed_nodes.into_iter().cloned().collect(),
289        impacted_nodes: affected_nodes.into_iter().cloned().collect(),
290        total_impacted: total_affected,
291    })
292}
293
294#[derive(Debug, Clone)]
295pub struct ImpactReport {
296    pub changed_files: Vec<String>,
297    pub changed_nodes: Vec<crate::Node>,
298    pub impacted_nodes: Vec<crate::Node>,
299    pub total_impacted: usize,
300}