1pub mod cluster;
2pub mod config;
3pub mod deadcode;
4pub mod diff;
5pub mod export;
6pub mod git;
7pub mod graph;
8pub mod parser;
9pub mod parsers;
10pub mod registry;
11pub mod resolver;
12pub mod skill;
13pub mod walker;
14
15pub use cluster::{detect_communities, run_clustering};
16pub use config::{
17 AnalyzeConfig, CgxConfig, ChatConfig, ExportConfig, IndexConfig, McpConfig, ProjectConfig,
18 ServeConfig, SkillConfig, WatchConfig,
19};
20pub use deadcode::{
21 detect_dead_code, mark_dead_candidates, Confidence, DeadCodeReport, DeadNode, DeadReason,
22};
23pub use diff::{
24 compute_impact, diff_graphs, snapshot_at_commit, GraphDiff, GraphSnapshot, ImpactReport,
25};
26pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
27pub use git::{analyze_repo, GitAnalysis};
28pub use graph::{CommunityRow, Edge, GraphDb, Node, RepoStats, TagRow};
29pub use parser::{
30 CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
31 ParserRegistry,
32};
33pub use registry::{Registry, RepoEntry};
34pub use resolver::resolve;
35pub use skill::{
36 build_skill_data, generate_agents_md, generate_skill, install_git_hooks, write_agents_md,
37 write_skill, CommunityInfo, SkillData,
38};
39pub use walker::{walk_repo, Language, SourceFile};
40
41use sha2::{Digest, Sha256};
42use std::collections::{HashMap, HashSet};
43use std::path::Path;
44
45pub fn analyze_repo_incremental(
48 repo_path: &Path,
49 db: &GraphDb,
50 quiet: bool,
51 no_git: bool,
52 no_cluster: bool,
53 verbose: bool,
54) -> anyhow::Result<bool> {
55 let _ = verbose;
56
57 let files = walk_repo(repo_path)?;
59 let mut current_hashes: HashMap<String, String> = HashMap::new();
60 for file in &files {
61 let mut hasher = Sha256::new();
62 hasher.update(file.content.as_bytes());
63 let hash = format!("{:x}", hasher.finalize());
64 current_hashes.insert(file.relative_path.clone(), hash);
65 }
66
67 let stored_hashes = db.get_file_hashes().unwrap_or_default();
69
70 let mut changed_paths: HashSet<String> = HashSet::new();
72 for (path, hash) in ¤t_hashes {
73 if stored_hashes.get(path) != Some(hash) {
74 changed_paths.insert(path.clone());
75 }
76 }
77
78 let mut deleted_paths: Vec<String> = Vec::new();
79 for path in stored_hashes.keys() {
80 if !current_hashes.contains_key(path) {
81 deleted_paths.push(path.clone());
82 changed_paths.insert(path.clone());
83 }
84 }
85
86 if changed_paths.is_empty() {
87 if !quiet {
88 println!(" No file changes detected. Index is up to date.");
89 }
90 return Ok(false);
91 }
92
93 if !quiet {
94 println!(
95 " Incremental: {} changed/new/deleted file(s)",
96 changed_paths.len()
97 );
98 }
99
100 let existing_nodes = db.get_all_nodes()?;
102 let existing_edges = db.get_all_edges()?;
103
104 let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
105 .into_iter()
106 .filter(|n| !changed_paths.contains(&n.path))
107 .collect();
108
109 let changed_files: Vec<_> = files
111 .into_iter()
112 .filter(|f| changed_paths.contains(&f.relative_path))
113 .collect();
114
115 if !quiet {
116 println!(" Re-parsing {} changed file(s)...", changed_files.len());
117 }
118
119 let registry = ParserRegistry::new();
120 let results = registry.parse_all(&changed_files);
121
122 let mut new_nodes: Vec<NodeDef> = Vec::new();
123 let mut new_edges: Vec<EdgeDef> = Vec::new();
124 let mut changed_file_paths: HashSet<String> = HashSet::new();
125 let mut lang_map: HashMap<String, &str> = changed_files
126 .iter()
127 .map(|f| {
128 let lang_str = match f.language {
129 walker::Language::TypeScript => "typescript",
130 walker::Language::JavaScript => "javascript",
131 walker::Language::Python => "python",
132 walker::Language::Rust => "rust",
133 walker::Language::Go => "go",
134 walker::Language::Java => "java",
135 walker::Language::CSharp => "csharp",
136 walker::Language::Php => "php",
137 walker::Language::Unknown => "unknown",
138 };
139 (f.relative_path.clone(), lang_str)
140 })
141 .collect();
142
143 for result in &results {
144 new_nodes.extend(result.nodes.clone());
145 new_edges.extend(result.edges.clone());
146 }
147 for file in &changed_files {
148 changed_file_paths.insert(file.relative_path.clone());
149 }
150
151 let parsed_lang_map = resolver::build_language_map(&new_nodes);
153 for (path, lang) in parsed_lang_map {
154 if lang != "unknown" {
155 lang_map.entry(path).or_insert(lang);
156 }
157 }
158 let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
159 new_nodes.extend(file_nodes);
160
161 let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
163 .iter()
164 .map(|n| {
165 let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
166 crate::graph::Node::from_def(n, lang)
167 })
168 .collect();
169
170 let new_node_count = new_graph_nodes.len();
172 kept_nodes.extend(new_graph_nodes);
173
174 db.clear()?;
176 db.upsert_nodes(&kept_nodes)?;
177
178 let all_node_defs: Vec<NodeDef> = kept_nodes
180 .iter()
181 .map(|n| NodeDef {
182 id: n.id.clone(),
183 kind: match n.kind.as_str() {
184 "File" => NodeKind::File,
185 "Function" => NodeKind::Function,
186 "Class" => NodeKind::Class,
187 "Variable" => NodeKind::Variable,
188 "Type" => NodeKind::Type,
189 "Module" => NodeKind::Module,
190 "Author" => NodeKind::Author,
191 _ => NodeKind::Variable,
192 },
193 name: n.name.clone(),
194 path: n.path.clone(),
195 line_start: n.line_start,
196 line_end: n.line_end,
197 metadata: serde_json::Value::Null,
198 })
199 .collect();
200
201 let kept_edge_defs: Vec<EdgeDef> = existing_edges
203 .iter()
204 .filter(|e| {
205 let src_file = all_node_defs
207 .iter()
208 .find(|n| n.id == e.src)
209 .map(|n| n.path.clone());
210 let dst_file = all_node_defs
211 .iter()
212 .find(|n| n.id == e.dst)
213 .map(|n| n.path.clone());
214 match (src_file, dst_file) {
215 (Some(sp), Some(dp)) => {
216 !changed_paths.contains(&sp) && !changed_paths.contains(&dp)
217 }
218 _ => false,
219 }
220 })
221 .map(|e| EdgeDef {
222 src: e.src.clone(),
223 dst: e.dst.clone(),
224 kind: match e.kind.as_str() {
225 "CALLS" => EdgeKind::Calls,
226 "IMPORTS" => EdgeKind::Imports,
227 "INHERITS" => EdgeKind::Inherits,
228 "EXPORTS" => EdgeKind::Exports,
229 "CO_CHANGES" => EdgeKind::CoChanges,
230 "OWNS" => EdgeKind::Owns,
231 "DEPENDS_ON" => EdgeKind::DependsOn,
232 _ => EdgeKind::Calls,
233 },
234 weight: e.weight,
235 confidence: e.confidence,
236 })
237 .collect();
238
239 let mut all_edge_defs = kept_edge_defs;
240 all_edge_defs.extend(new_edges);
241
242 let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
244 let resolved_count = resolved_edges.len();
245
246 let graph_edges: Vec<crate::graph::Edge> = all_edge_defs
247 .iter()
248 .map(crate::graph::Edge::from_def)
249 .collect();
250 let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
251 .iter()
252 .map(crate::graph::Edge::from_def)
253 .collect();
254
255 db.upsert_edges(&graph_edges)?;
256 db.upsert_edges(&resolved_graph_edges)?;
257
258 if !no_git {
260 let all_file_paths: Vec<String> = kept_nodes
261 .iter()
262 .filter(|n| n.kind == "File")
263 .map(|n| n.path.clone())
264 .collect();
265 let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
266
267 let max_churn = git_analysis
268 .file_churn
269 .values()
270 .copied()
271 .fold(0.0, f64::max);
272 for (path, churn) in &git_analysis.file_churn {
273 let normalized = if max_churn > 0.0 {
274 churn / max_churn
275 } else {
276 0.0
277 };
278 let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
279 }
280
281 let mut author_nodes = Vec::new();
282 let mut own_edges = Vec::new();
283 for (author, files) in &git_analysis.file_owners {
284 let author_id = format!("author:{}", author);
285 author_nodes.push(crate::graph::Node {
286 id: author_id.clone(),
287 kind: "Author".to_string(),
288 name: author.clone(),
289 path: String::new(),
290 line_start: 0,
291 line_end: 0,
292 language: String::new(),
293 churn: 0.0,
294 coupling: 0.0,
295 community: 0,
296 in_degree: 0,
297 out_degree: 0,
298 exported: false,
299 is_dead_candidate: false,
300 dead_reason: None,
301 });
302 for (file_path, _email, _percent) in files.iter().take(5) {
303 own_edges.push(crate::graph::Edge {
304 id: format!("owns:{}:{}", author_id, file_path),
305 src: author_id.clone(),
306 dst: format!("file:{}", file_path),
307 kind: "OWNS".to_string(),
308 weight: 1.0,
309 confidence: 1.0,
310 });
311 }
312 }
313 db.upsert_nodes(&author_nodes)?;
314 db.upsert_edges(&own_edges)?;
315
316 let mut cochange_edges = Vec::new();
317 for (a, b, weight) in &git_analysis.co_changes {
318 cochange_edges.push(crate::graph::Edge {
319 id: format!("cochange:{}:{}", a, b),
320 src: format!("file:{}", a),
321 dst: format!("file:{}", b),
322 kind: "CO_CHANGES".to_string(),
323 weight: *weight,
324 confidence: 1.0,
325 });
326 }
327 db.upsert_edges(&cochange_edges)?;
328 }
329
330 if !no_cluster {
332 let _ = run_clustering(db)?;
333 }
334
335 db.update_in_out_degrees()?;
337 db.compute_coupling()?;
338
339 let changed_paths_vec: Vec<String> = changed_paths.iter().cloned().collect();
341 db.delete_tags_for_paths(&changed_paths_vec)?;
342 let new_tag_rows: Vec<crate::graph::TagRow> = results
343 .iter()
344 .zip(changed_files.iter())
345 .flat_map(|(result, file)| {
346 result
347 .comment_tags
348 .iter()
349 .map(move |t| crate::graph::TagRow {
350 id: format!("tag:{}:{}:{}", file.relative_path, t.line, t.tag_type),
351 file_path: file.relative_path.clone(),
352 line: t.line,
353 tag_type: t.tag_type.clone(),
354 text: t.text.clone(),
355 comment_type: t.comment_kind.as_str().to_string(),
356 })
357 })
358 .collect();
359 db.upsert_tags(&new_tag_rows)?;
360
361 for (path, hash) in ¤t_hashes {
363 db.set_file_hash(path, hash)?;
364 }
365 if !deleted_paths.is_empty() {
366 db.remove_file_hashes(&deleted_paths)?;
367 }
368
369 if !quiet {
370 println!(" Incremental re-index complete.");
371 println!(
372 " Kept {} unchanged nodes.",
373 kept_nodes.len() - new_node_count
374 );
375 println!(" Added {} new/changed nodes.", new_node_count);
376 if !deleted_paths.is_empty() {
377 println!(" Removed {} deleted files.", deleted_paths.len());
378 }
379 println!(" Resolved {} cross-file edges.", resolved_count);
380 }
381
382 Ok(true)
383}