1pub mod cluster;
2pub mod config;
3pub mod deadcode;
4pub mod deps;
5pub mod diff;
6pub mod dupes;
7pub mod export;
8pub mod git;
9pub mod graph;
10pub mod parser;
11pub mod parsers;
12pub mod registry;
13pub mod resolver;
14pub mod rules;
15pub mod skill;
16pub mod timeline;
17pub mod walker;
18
19pub use cluster::{detect_communities, run_clustering};
20pub use config::{
21 AnalyzeConfig, CgxConfig, ChatConfig, ExportConfig, IndexConfig, McpConfig, ProjectConfig,
22 ServeConfig, SkillConfig, WatchConfig,
23};
24pub use deadcode::{
25 detect_dead_code, mark_dead_candidates, Confidence, DeadCodeReport, DeadNode, DeadReason,
26};
27pub use deps::{audit_dependencies, parse_manifests, DependencyReport};
28pub use diff::{
29 compute_impact, diff_graphs, snapshot_at_commit, GraphDiff, GraphSnapshot, ImpactReport,
30};
31pub use dupes::{detect_clones, CloneKind, ClonePair};
32pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
33pub use git::{analyze_repo, GitAnalysis};
34pub use graph::{
35 CloneRow, CommunityRow, DocsCoverage, Edge, GraphDb, Node, RepoStats, SnapshotEntry, TagRow,
36 TestCoverageSummary,
37};
38pub use parser::{
39 CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
40 ParserRegistry,
41};
42pub use registry::{Registry, RepoEntry};
43pub use resolver::{is_test_path, resolve};
44pub use rules::{run_rules, Rule, RuleResult, RuleViolation, RulesConfig};
45pub use skill::{
46 build_skill_data, generate_agents_md, generate_skill, install_git_hooks, write_agents_md,
47 write_skill, CommunityInfo, SkillData,
48};
49pub use timeline::build_timeline;
50pub use walker::{walk_repo, Language, SourceFile};
51
52use sha2::{Digest, Sha256};
53use std::collections::{HashMap, HashSet};
54use std::path::Path;
55
56pub fn analyze_repo_incremental(
59 repo_path: &Path,
60 db: &GraphDb,
61 quiet: bool,
62 no_git: bool,
63 no_cluster: bool,
64 verbose: bool,
65) -> anyhow::Result<bool> {
66 let _ = verbose;
67
68 let files = walk_repo(repo_path)?;
70 let mut current_hashes: HashMap<String, String> = HashMap::new();
71 for file in &files {
72 let mut hasher = Sha256::new();
73 hasher.update(file.content.as_bytes());
74 let hash = format!("{:x}", hasher.finalize());
75 current_hashes.insert(file.relative_path.clone(), hash);
76 }
77
78 let stored_hashes = db.get_file_hashes().unwrap_or_default();
80
81 let mut changed_paths: HashSet<String> = HashSet::new();
83 for (path, hash) in ¤t_hashes {
84 if stored_hashes.get(path) != Some(hash) {
85 changed_paths.insert(path.clone());
86 }
87 }
88
89 let mut deleted_paths: Vec<String> = Vec::new();
90 for path in stored_hashes.keys() {
91 if !current_hashes.contains_key(path) {
92 deleted_paths.push(path.clone());
93 changed_paths.insert(path.clone());
94 }
95 }
96
97 if changed_paths.is_empty() {
98 if !quiet {
99 println!(" No file changes detected. Index is up to date.");
100 }
101 return Ok(false);
102 }
103
104 if !quiet {
105 println!(
106 " Incremental: {} changed/new/deleted file(s)",
107 changed_paths.len()
108 );
109 }
110
111 let existing_nodes = db.get_all_nodes()?;
113 let existing_edges = db.get_all_edges()?;
114
115 let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
116 .into_iter()
117 .filter(|n| !changed_paths.contains(&n.path))
118 .collect();
119
120 let changed_files: Vec<_> = files
122 .into_iter()
123 .filter(|f| changed_paths.contains(&f.relative_path))
124 .collect();
125
126 if !quiet {
127 println!(" Re-parsing {} changed file(s)...", changed_files.len());
128 }
129
130 let registry = ParserRegistry::new();
131 let results = registry.parse_all(&changed_files);
132
133 let mut new_nodes: Vec<NodeDef> = Vec::new();
134 let mut new_edges: Vec<EdgeDef> = Vec::new();
135 let mut changed_file_paths: HashSet<String> = HashSet::new();
136 let mut lang_map: HashMap<String, &str> = changed_files
137 .iter()
138 .map(|f| {
139 let lang_str = match f.language {
140 walker::Language::TypeScript => "typescript",
141 walker::Language::JavaScript => "javascript",
142 walker::Language::Python => "python",
143 walker::Language::Rust => "rust",
144 walker::Language::Go => "go",
145 walker::Language::Java => "java",
146 walker::Language::CSharp => "csharp",
147 walker::Language::Php => "php",
148 walker::Language::Unknown => "unknown",
149 };
150 (f.relative_path.clone(), lang_str)
151 })
152 .collect();
153
154 for result in &results {
155 new_nodes.extend(result.nodes.clone());
156 new_edges.extend(result.edges.clone());
157 }
158 for file in &changed_files {
159 changed_file_paths.insert(file.relative_path.clone());
160 }
161
162 let parsed_lang_map = resolver::build_language_map(&new_nodes);
164 for (path, lang) in parsed_lang_map {
165 if lang != "unknown" {
166 lang_map.entry(path).or_insert(lang);
167 }
168 }
169 let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
170 new_nodes.extend(file_nodes);
171
172 let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
174 .iter()
175 .map(|n| {
176 let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
177 crate::graph::Node::from_def(n, lang)
178 })
179 .collect();
180
181 let new_node_count = new_graph_nodes.len();
183 kept_nodes.extend(new_graph_nodes);
184
185 db.clear()?;
187 db.upsert_nodes(&kept_nodes)?;
188
189 for result in &results {
191 for node_def in &result.nodes {
192 if let Some(doc) = node_def
193 .metadata
194 .get("doc_comment")
195 .and_then(|v| v.as_str())
196 {
197 if !doc.is_empty() {
198 let _ = db.update_node_doc_comment(&node_def.id, doc);
199 }
200 }
201 }
202 }
203
204 let all_node_defs: Vec<NodeDef> = kept_nodes
206 .iter()
207 .map(|n| NodeDef {
208 id: n.id.clone(),
209 kind: match n.kind.as_str() {
210 "File" => NodeKind::File,
211 "Function" => NodeKind::Function,
212 "Class" => NodeKind::Class,
213 "Variable" => NodeKind::Variable,
214 "Type" => NodeKind::Type,
215 "Module" => NodeKind::Module,
216 "Author" => NodeKind::Author,
217 _ => NodeKind::Variable,
218 },
219 name: n.name.clone(),
220 path: n.path.clone(),
221 line_start: n.line_start,
222 line_end: n.line_end,
223 metadata: serde_json::Value::Null,
224 })
225 .collect();
226
227 let kept_edge_defs: Vec<EdgeDef> = existing_edges
229 .iter()
230 .filter(|e| {
231 let src_file = all_node_defs
233 .iter()
234 .find(|n| n.id == e.src)
235 .map(|n| n.path.clone());
236 let dst_file = all_node_defs
237 .iter()
238 .find(|n| n.id == e.dst)
239 .map(|n| n.path.clone());
240 match (src_file, dst_file) {
241 (Some(sp), Some(dp)) => {
242 !changed_paths.contains(&sp) && !changed_paths.contains(&dp)
243 }
244 _ => false,
245 }
246 })
247 .map(|e| EdgeDef {
248 src: e.src.clone(),
249 dst: e.dst.clone(),
250 kind: match e.kind.as_str() {
251 "CALLS" => EdgeKind::Calls,
252 "IMPORTS" => EdgeKind::Imports,
253 "INHERITS" => EdgeKind::Inherits,
254 "EXPORTS" => EdgeKind::Exports,
255 "CO_CHANGES" => EdgeKind::CoChanges,
256 "OWNS" => EdgeKind::Owns,
257 "DEPENDS_ON" => EdgeKind::DependsOn,
258 _ => EdgeKind::Calls,
259 },
260 weight: e.weight,
261 confidence: e.confidence,
262 })
263 .collect();
264
265 let mut all_edge_defs = kept_edge_defs;
266 all_edge_defs.extend(new_edges);
267
268 let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
270 let resolved_count = resolved_edges.len();
271
272 let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
276 .iter()
277 .map(crate::graph::Edge::from_def)
278 .collect();
279 db.upsert_edges(&resolved_graph_edges)?;
280
281 if !no_git {
283 let all_file_paths: Vec<String> = kept_nodes
284 .iter()
285 .filter(|n| n.kind == "File")
286 .map(|n| n.path.clone())
287 .collect();
288 let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
289
290 let max_churn = git_analysis
291 .file_churn
292 .values()
293 .copied()
294 .fold(0.0, f64::max);
295 for (path, churn) in &git_analysis.file_churn {
296 let normalized = if max_churn > 0.0 {
297 churn / max_churn
298 } else {
299 0.0
300 };
301 let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
302 }
303
304 let mut author_nodes = Vec::new();
305 let mut own_edges = Vec::new();
306 for (author, files) in &git_analysis.file_owners {
307 let author_id = format!("author:{}", author);
308 author_nodes.push(crate::graph::Node {
309 id: author_id.clone(),
310 kind: "Author".to_string(),
311 name: author.clone(),
312 path: String::new(),
313 line_start: 0,
314 line_end: 0,
315 language: String::new(),
316 churn: 0.0,
317 coupling: 0.0,
318 community: 0,
319 in_degree: 0,
320 out_degree: 0,
321 exported: false,
322 is_dead_candidate: false,
323 dead_reason: None,
324 complexity: 0.0,
325 is_test_file: false,
326 test_count: 0,
327 is_tested: false,
328 });
329 for (file_path, _email, _percent) in files.iter().take(5) {
330 own_edges.push(crate::graph::Edge {
331 id: format!("owns:{}:{}", author_id, file_path),
332 src: author_id.clone(),
333 dst: format!("file:{}", file_path),
334 kind: "OWNS".to_string(),
335 weight: 1.0,
336 confidence: 1.0,
337 });
338 }
339 }
340 db.upsert_nodes(&author_nodes)?;
341 db.upsert_edges(&own_edges)?;
342
343 let mut cochange_edges = Vec::new();
344 for (a, b, weight) in &git_analysis.co_changes {
345 cochange_edges.push(crate::graph::Edge {
346 id: format!("cochange:{}:{}", a, b),
347 src: format!("file:{}", a),
348 dst: format!("file:{}", b),
349 kind: "CO_CHANGES".to_string(),
350 weight: *weight,
351 confidence: 1.0,
352 });
353 }
354 db.upsert_edges(&cochange_edges)?;
355 }
356
357 if !no_cluster {
359 let _ = run_clustering(db)?;
360 }
361
362 db.update_in_out_degrees()?;
364 db.compute_coupling()?;
365
366 let test_file_paths: Vec<String> = kept_nodes
368 .iter()
369 .filter(|n| n.kind == "File" && crate::resolver::is_test_path(&n.path))
370 .map(|n| n.path.clone())
371 .collect();
372 let test_node_paths: Vec<String> = kept_nodes
374 .iter()
375 .filter(|n| crate::resolver::is_test_path(&n.path))
376 .map(|n| n.path.clone())
377 .collect();
378 let all_test_paths: std::collections::HashSet<String> =
379 test_file_paths.into_iter().chain(test_node_paths).collect();
380 db.mark_test_files(&all_test_paths.into_iter().collect::<Vec<_>>())?;
381 db.update_test_coverage()?;
382
383 let changed_paths_vec: Vec<String> = changed_paths.iter().cloned().collect();
385 db.delete_tags_for_paths(&changed_paths_vec)?;
386 let new_tag_rows: Vec<crate::graph::TagRow> = results
387 .iter()
388 .zip(changed_files.iter())
389 .flat_map(|(result, file)| {
390 result
391 .comment_tags
392 .iter()
393 .map(move |t| crate::graph::TagRow {
394 id: format!("tag:{}:{}:{}", file.relative_path, t.line, t.tag_type),
395 file_path: file.relative_path.clone(),
396 line: t.line,
397 tag_type: t.tag_type.clone(),
398 text: t.text.clone(),
399 comment_type: t.comment_kind.as_str().to_string(),
400 })
401 })
402 .collect();
403 db.upsert_tags(&new_tag_rows)?;
404
405 for (path, hash) in ¤t_hashes {
407 db.set_file_hash(path, hash)?;
408 }
409 if !deleted_paths.is_empty() {
410 db.remove_file_hashes(&deleted_paths)?;
411 }
412
413 if !quiet {
414 println!(" Incremental re-index complete.");
415 println!(
416 " Kept {} unchanged nodes.",
417 kept_nodes.len() - new_node_count
418 );
419 println!(" Added {} new/changed nodes.", new_node_count);
420 if !deleted_paths.is_empty() {
421 println!(" Removed {} deleted files.", deleted_paths.len());
422 }
423 println!(" Resolved {} cross-file edges.", resolved_count);
424 }
425
426 Ok(true)
427}