1use std::path::Path;
2
3use anyhow::Context;
4
5use crate::parser::{EdgeDef, NodeDef, ParserRegistry};
6use crate::walker::{Language, SourceFile};
7
8#[derive(Debug, Clone)]
9pub struct GraphSnapshot {
10 pub nodes: Vec<NodeDef>,
11 pub edges: Vec<EdgeDef>,
12 pub commit: String,
13}
14
15#[derive(Debug, Clone)]
16pub struct GraphDiff {
17 pub added_nodes: Vec<NodeDef>,
18 pub removed_nodes: Vec<NodeDef>,
19 pub added_edges: Vec<EdgeDef>,
20 pub removed_edges: Vec<EdgeDef>,
21 pub modified_nodes: Vec<(NodeDef, NodeDef)>,
22}
23
24pub fn snapshot_at_commit(repo_path: &Path, commit_spec: &str) -> anyhow::Result<GraphSnapshot> {
26 let repo = git2::Repository::open(repo_path)
27 .context("Failed to open git repository")?;
28
29 let obj = repo.revparse_single(commit_spec)
30 .context(format!("Invalid commit reference: {}", commit_spec))?;
31 let commit = obj.peel_to_commit()
32 .context("Reference does not resolve to a commit")?;
33 let tree = commit.tree()?;
34 let commit_sha = commit.id().to_string();
35
36 let mut files: Vec<SourceFile> = Vec::new();
37 walk_tree(&repo, &tree, Path::new(""), &mut files)?;
38
39 let registry = ParserRegistry::new();
40 let results = registry.parse_all(&files);
41
42 let mut nodes: Vec<NodeDef> = Vec::new();
43 let mut edges: Vec<EdgeDef> = Vec::new();
44
45 for result in &results {
46 nodes.extend(result.nodes.clone());
47 edges.extend(result.edges.clone());
48 }
49
50 let lang_map = crate::resolver::build_language_map(&nodes);
52 let file_paths: std::collections::HashSet<String> = files
53 .iter()
54 .map(|f| f.relative_path.clone())
55 .collect();
56 let file_nodes = crate::resolver::create_file_nodes(&file_paths, &lang_map);
57 nodes.extend(file_nodes);
58
59 Ok(GraphSnapshot {
60 nodes,
61 edges,
62 commit: commit_sha,
63 })
64}
65
66fn walk_tree(
67 repo: &git2::Repository,
68 tree: &git2::Tree,
69 prefix: &Path,
70 files: &mut Vec<SourceFile>,
71) -> anyhow::Result<()> {
72 for entry in tree.iter() {
73 let name = entry.name().unwrap_or("unknown");
74 let path = prefix.join(name);
75
76 match entry.kind() {
77 Some(git2::ObjectType::Blob) => {
78 let relative = path.to_string_lossy().to_string();
79 if let Some(lang) = detect_language(&relative) {
80 let blob = entry.to_object(repo)?;
81 let blob = blob.peel_to_blob()?;
82 if let Ok(content) = std::str::from_utf8(blob.content()) {
83 if content.len() < 2_000_000 && !is_binary(content) {
84 files.push(SourceFile {
85 path: repo.workdir().unwrap_or(Path::new(".")).join(&path),
86 relative_path: relative,
87 language: lang,
88 content: content.to_string(),
89 size_bytes: content.len() as u64,
90 });
91 }
92 }
93 }
94 }
95 Some(git2::ObjectType::Tree) => {
96 let subtree = entry.to_object(repo)?.peel_to_tree()?;
97 walk_tree(repo, &subtree, &path, files)?;
98 }
99 _ => {}
100 }
101 }
102 Ok(())
103}
104
105fn detect_language(path: &str) -> Option<Language> {
106 let lower = path.to_lowercase();
107 if lower.ends_with(".ts") || lower.ends_with(".tsx") { Some(Language::TypeScript) }
108 else if lower.ends_with(".js") || lower.ends_with(".jsx") || lower.ends_with(".mjs") { Some(Language::JavaScript) }
109 else if lower.ends_with(".py") { Some(Language::Python) }
110 else if lower.ends_with(".rs") { Some(Language::Rust) }
111 else { None }
112}
113
114fn is_binary(content: &str) -> bool {
115 content.as_bytes().iter().take(8192).any(|&b| b == 0)
116}
117
118pub fn diff_graphs(before: &GraphSnapshot, after: &GraphSnapshot) -> GraphDiff {
120 let before_nodes: std::collections::HashMap<&str, &NodeDef> =
121 before.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
122 let after_nodes: std::collections::HashMap<&str, &NodeDef> =
123 after.nodes.iter().map(|n| (n.id.as_str(), n)).collect();
124
125 let mut added_nodes = Vec::new();
126 let mut removed_nodes = Vec::new();
127 let mut modified_nodes = Vec::new();
128
129 for (id, node) in &after_nodes {
130 if let Some(before) = before_nodes.get(id) {
131 if before.name != node.name
133 || before.path != node.path
134 || before.line_start != node.line_start
135 || before.line_end != node.line_end
136 || before.kind != node.kind
137 {
138 modified_nodes.push(((**before).clone(), (**node).clone()));
139 }
140 } else {
141 added_nodes.push((**node).clone());
142 }
143 }
144
145 for (id, node) in &before_nodes {
146 if !after_nodes.contains_key(id) {
147 removed_nodes.push((**node).clone());
148 }
149 }
150
151 let mut added_edges = Vec::new();
152 let mut removed_edges = Vec::new();
153
154 let before_edge_ids: std::collections::HashSet<String> =
155 before.edges.iter().map(id_from_edge).collect();
156 let after_edge_ids: std::collections::HashSet<String> =
157 after.edges.iter().map(id_from_edge).collect();
158
159 for edge in &after.edges {
160 let id = id_from_edge(edge);
161 if !before_edge_ids.contains(id.as_str()) {
162 added_edges.push(edge.clone());
163 }
164 }
165
166 for edge in &before.edges {
167 let id = id_from_edge(edge);
168 if !after_edge_ids.contains(id.as_str()) {
169 removed_edges.push(edge.clone());
170 }
171 }
172
173 GraphDiff {
174 added_nodes,
175 removed_nodes,
176 added_edges,
177 removed_edges,
178 modified_nodes,
179 }
180}
181
182fn id_from_edge(e: &EdgeDef) -> String {
183 format!("{}|{}|{}", e.src, e.kind.as_str(), e.dst)
184}
185
186pub fn compute_impact(
188 repo_path: &Path,
189 since_days: u32,
190) -> anyhow::Result<ImpactReport> {
191 let repo = git2::Repository::open(repo_path)
192 .context("Failed to open git repository")?;
193
194 let cutoff = chrono::Utc::now() - chrono::Duration::days(since_days as i64);
196 let cutoff_epoch = cutoff.timestamp();
197
198 let mut changed_files: std::collections::HashSet<String> = std::collections::HashSet::new();
199 let mut revwalk = repo.revwalk()?;
200 revwalk.push_head()?;
201
202 for oid in revwalk {
203 let oid = oid?;
204 let commit = repo.find_commit(oid)?;
205 let commit_time = commit.time().seconds();
206
207 if commit_time < cutoff_epoch {
208 break;
209 }
210
211 if commit.parent_count() == 0 {
212 let tree = commit.tree()?;
213 let diff = repo.diff_tree_to_tree(None, Some(&tree), None)?;
214 diff.foreach(
215 &mut |delta, _| {
216 if let Some(path) = delta.new_file().path() {
217 changed_files.insert(path.to_string_lossy().to_string());
218 }
219 true
220 },
221 None, None, None,
222 )?;
223 } else {
224 for i in 0..commit.parent_count() {
225 let parent = commit.parent(i)?;
226 let parent_tree = parent.tree()?;
227 let tree = commit.tree()?;
228 let diff = repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), None)?;
229 diff.foreach(
230 &mut |delta, _| {
231 if let Some(path) = delta.new_file().path() {
232 changed_files.insert(path.to_string_lossy().to_string());
233 }
234 true
235 },
236 None, None, None,
237 )?;
238 }
239 }
240 }
241
242 let db = crate::GraphDb::open(repo_path)?;
244 let all_nodes = db.get_all_nodes()?;
245 let all_edges = db.get_all_edges()?;
246
247 let changed_nodes: Vec<&crate::Node> = all_nodes
249 .iter()
250 .filter(|n| changed_files.contains(&n.path))
251 .collect();
252
253 let mut rev_adj: std::collections::HashMap<&str, Vec<&str>> = std::collections::HashMap::new();
255 for e in &all_edges {
256 rev_adj.entry(e.dst.as_str()).or_default().push(e.src.as_str());
257 }
258
259 let mut downstream = std::collections::HashSet::new();
260 let mut dq: Vec<&str> = changed_nodes.iter().map(|n| n.id.as_str()).collect();
261 let mut seen = std::collections::HashSet::new();
262
263 while let Some(current) = dq.pop() {
264 if let Some(dependents) = rev_adj.get(current) {
265 for &dep in dependents {
266 if seen.insert(dep) {
267 downstream.insert(dep);
268 dq.push(dep);
269 }
270 }
271 }
272 }
273
274 let total_affected = downstream.len() + changed_nodes.len();
276
277 let node_map: std::collections::HashMap<&str, &crate::Node> =
278 all_nodes.iter().map(|n| (n.id.as_str(), n)).collect();
279
280 let affected_nodes: Vec<&crate::Node> = downstream
281 .iter()
282 .filter_map(|id| node_map.get(id))
283 .copied()
284 .collect();
285
286 Ok(ImpactReport {
287 changed_files: changed_files.into_iter().collect(),
288 changed_nodes: changed_nodes.into_iter().cloned().collect(),
289 impacted_nodes: affected_nodes.into_iter().cloned().collect(),
290 total_impacted: total_affected,
291 })
292}
293
294#[derive(Debug, Clone)]
295pub struct ImpactReport {
296 pub changed_files: Vec<String>,
297 pub changed_nodes: Vec<crate::Node>,
298 pub impacted_nodes: Vec<crate::Node>,
299 pub total_impacted: usize,
300}