1use std::path::Path;
2
3use anyhow::Context;
4
5use crate::parser::{EdgeDef, NodeDef, ParserRegistry};
6use crate::walker::{Language, SourceFile};
7
8#[derive(Debug, Clone)]
9pub struct GraphSnapshot {
10 pub nodes: Vec<NodeDef>,
11 pub edges: Vec<EdgeDef>,
12 pub commit: String,
13}
14
15#[derive(Debug, Clone)]
16pub struct GraphDiff {
17 pub added_nodes: Vec<NodeDef>,
18 pub removed_nodes: Vec<NodeDef>,
19 pub added_edges: Vec<EdgeDef>,
20 pub removed_edges: Vec<EdgeDef>,
21 pub modified_nodes: Vec<(NodeDef, NodeDef)>,
22}
23
24pub fn snapshot_at_commit(repo_path: &Path, commit_spec: &str) -> anyhow::Result<GraphSnapshot> {
26 let repo = git2::Repository::open(repo_path).context("Failed to open git repository")?;
27
28 let obj = repo
29 .revparse_single(commit_spec)
30 .context(format!("Invalid commit reference: {}", commit_spec))?;
31 let commit = obj
32 .peel_to_commit()
33 .context("Reference does not resolve to a commit")?;
34 let tree = commit.tree()?;
35 let commit_sha = commit.id().to_string();
36
37 let mut files: Vec<SourceFile> = Vec::new();
38 walk_tree(&repo, &tree, Path::new(""), &mut files)?;
39
40 let registry = ParserRegistry::new();
41 let results = registry.parse_all(&files);
42
43 let mut nodes: Vec<NodeDef> = Vec::new();
44 let mut edges: Vec<EdgeDef> = Vec::new();
45
46 for result in &results {
47 nodes.extend(result.nodes.clone());
48 edges.extend(result.edges.clone());
49 }
50
51 let lang_map = crate::resolver::build_language_map(&nodes);
53 let file_paths: std::collections::HashSet<String> =
54 files.iter().map(|f| f.relative_path.clone()).collect();
55 let file_nodes = crate::resolver::create_file_nodes(&file_paths, &lang_map);
56 nodes.extend(file_nodes);
57
58 Ok(GraphSnapshot {
59 nodes,
60 edges,
61 commit: commit_sha,
62 })
63}
64
65fn walk_tree(
66 repo: &git2::Repository,
67 tree: &git2::Tree,
68 prefix: &Path,
69 files: &mut Vec<SourceFile>,
70) -> anyhow::Result<()> {
71 for entry in tree.iter() {
72 let name = entry.name().unwrap_or("unknown");
73 let path = prefix.join(name);
74
75 match entry.kind() {
76 Some(git2::ObjectType::Blob) => {
77 let relative = path.to_string_lossy().to_string();
78 if let Some(lang) = detect_language(&relative) {
79 let blob = entry.to_object(repo)?;
80 let blob = blob.peel_to_blob()?;
81 if let Ok(content) = std::str::from_utf8(blob.content()) {
82 if content.len() < 2_000_000 && !is_binary(content) {
83 files.push(SourceFile {
84 path: repo.workdir().unwrap_or(Path::new(".")).join(&path),
85 relative_path: relative,
86 language: lang,
87 content: content.to_string(),
88 size_bytes: content.len() as u64,
89 });
90 }
91 }
92 }
93 }
94 Some(git2::ObjectType::Tree) => {
95 let subtree = entry.to_object(repo)?.peel_to_tree()?;
96 walk_tree(repo, &subtree, &path, files)?;
97 }
98 _ => {}
99 }
100 }
101 Ok(())
102}
103
104fn detect_language(path: &str) -> Option<Language> {
105 let lower = path.to_lowercase();
106 if lower.ends_with(".ts") || lower.ends_with(".tsx") {
107 Some(Language::TypeScript)
108 } else if lower.ends_with(".js") || lower.ends_with(".jsx") || lower.ends_with(".mjs") {
109 Some(Language::JavaScript)
110 } else if lower.ends_with(".py") {
111 Some(Language::Python)
112 } else if lower.ends_with(".rs") {
113 Some(Language::Rust)
114 } else {
115 None
116 }
117}
118
119fn is_binary(content: &str) -> bool {
120 content.as_bytes().iter().take(8192).any(|&b| b == 0)
121}
122
123pub fn diff_graphs(before: &GraphSnapshot, after: &GraphSnapshot) -> GraphDiff {
125 let before_nodes: std::collections::HashMap<&str, &NodeDef> =
126 before.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
127 let after_nodes: std::collections::HashMap<&str, &NodeDef> =
128 after.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
129
130 let mut added_nodes = Vec::new();
131 let mut removed_nodes = Vec::new();
132 let mut modified_nodes = Vec::new();
133
134 for (id, node) in &after_nodes {
135 if let Some(before) = before_nodes.get(id) {
136 if before.name != node.name
138 || before.path != node.path
139 || before.line_start != node.line_start
140 || before.line_end != node.line_end
141 || before.kind != node.kind
142 {
143 modified_nodes.push(((**before).clone(), (**node).clone()));
144 }
145 } else {
146 added_nodes.push((**node).clone());
147 }
148 }
149
150 for (id, node) in &before_nodes {
151 if !after_nodes.contains_key(id) {
152 removed_nodes.push((**node).clone());
153 }
154 }
155
156 let mut added_edges = Vec::new();
157 let mut removed_edges = Vec::new();
158
159 let before_edge_ids: std::collections::HashSet<String> =
160 before.edges.iter().map(id_from_edge).collect();
161 let after_edge_ids: std::collections::HashSet<String> =
162 after.edges.iter().map(id_from_edge).collect();
163
164 for edge in &after.edges {
165 let id = id_from_edge(edge);
166 if !before_edge_ids.contains(id.as_str()) {
167 added_edges.push(edge.clone());
168 }
169 }
170
171 for edge in &before.edges {
172 let id = id_from_edge(edge);
173 if !after_edge_ids.contains(id.as_str()) {
174 removed_edges.push(edge.clone());
175 }
176 }
177
178 GraphDiff {
179 added_nodes,
180 removed_nodes,
181 added_edges,
182 removed_edges,
183 modified_nodes,
184 }
185}
186
187fn id_from_edge(e: &EdgeDef) -> String {
188 format!("{}|{}|{}", e.src, e.kind.as_str(), e.dst)
189}
190
191pub fn compute_impact(repo_path: &Path, since_days: u32) -> anyhow::Result<ImpactReport> {
193 let repo = git2::Repository::open(repo_path).context("Failed to open git repository")?;
194
195 let cutoff = chrono::Utc::now() - chrono::Duration::days(since_days as i64);
197 let cutoff_epoch = cutoff.timestamp();
198
199 let mut changed_files: std::collections::HashSet<String> = std::collections::HashSet::new();
200 let mut revwalk = repo.revwalk()?;
201 revwalk.push_head()?;
202
203 for oid in revwalk {
204 let oid = oid?;
205 let commit = repo.find_commit(oid)?;
206 let commit_time = commit.time().seconds();
207
208 if commit_time < cutoff_epoch {
209 break;
210 }
211
212 if commit.parent_count() == 0 {
213 let tree = commit.tree()?;
214 let diff = repo.diff_tree_to_tree(None, Some(&tree), None)?;
215 diff.foreach(
216 &mut |delta, _| {
217 if let Some(path) = delta.new_file().path() {
218 changed_files.insert(path.to_string_lossy().to_string());
219 }
220 true
221 },
222 None,
223 None,
224 None,
225 )?;
226 } else {
227 for i in 0..commit.parent_count() {
228 let parent = commit.parent(i)?;
229 let parent_tree = parent.tree()?;
230 let tree = commit.tree()?;
231 let diff = repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), None)?;
232 diff.foreach(
233 &mut |delta, _| {
234 if let Some(path) = delta.new_file().path() {
235 changed_files.insert(path.to_string_lossy().to_string());
236 }
237 true
238 },
239 None,
240 None,
241 None,
242 )?;
243 }
244 }
245 }
246
247 let db = crate::GraphDb::open(repo_path)?;
249 let all_nodes = db.get_all_nodes()?;
250 let all_edges = db.get_all_edges()?;
251
252 let changed_nodes: Vec<&crate::Node> = all_nodes
254 .iter()
255 .filter(|n| changed_files.contains(&n.path))
256 .collect();
257
258 let mut rev_adj: std::collections::HashMap<&str, Vec<&str>> = std::collections::HashMap::new();
260 for e in &all_edges {
261 rev_adj
262 .entry(e.dst.as_str())
263 .or_default()
264 .push(e.src.as_str());
265 }
266
267 let mut downstream = std::collections::HashSet::new();
268 let mut dq: Vec<&str> = changed_nodes.iter().map(|n| n.id.as_str()).collect();
269 let mut seen = std::collections::HashSet::new();
270
271 while let Some(current) = dq.pop() {
272 if let Some(dependents) = rev_adj.get(current) {
273 for &dep in dependents {
274 if seen.insert(dep) {
275 downstream.insert(dep);
276 dq.push(dep);
277 }
278 }
279 }
280 }
281
282 let total_affected = downstream.len() + changed_nodes.len();
284
285 let node_map: std::collections::HashMap<&str, &crate::Node> =
286 all_nodes.iter().map(|n| (n.id.as_str(), n)).collect();
287
288 let affected_nodes: Vec<&crate::Node> = downstream
289 .iter()
290 .filter_map(|id| node_map.get(id))
291 .copied()
292 .collect();
293
294 Ok(ImpactReport {
295 changed_files: changed_files.into_iter().collect(),
296 changed_nodes: changed_nodes.into_iter().cloned().collect(),
297 impacted_nodes: affected_nodes.into_iter().cloned().collect(),
298 total_impacted: total_affected,
299 })
300}
301
302#[derive(Debug, Clone)]
303pub struct ImpactReport {
304 pub changed_files: Vec<String>,
305 pub changed_nodes: Vec<crate::Node>,
306 pub impacted_nodes: Vec<crate::Node>,
307 pub total_impacted: usize,
308}