1pub mod bisect;
2pub mod cluster;
3pub mod config;
4pub mod deadcode;
5pub mod deps;
6pub mod diff;
7pub mod docs;
8pub mod dupes;
9pub mod export;
10pub mod git;
11pub mod graph;
12pub mod parser;
13pub mod parsers;
14pub mod registry;
15pub mod resolver;
16pub mod rules;
17pub mod skill;
18pub mod timeline;
19pub mod walker;
20
21pub use cluster::{detect_communities, run_clustering};
22pub use config::{
23 AnalyzeConfig, CgxConfig, ChatConfig, DocsConfig, ExportConfig, IndexConfig, McpConfig,
24 ProjectConfig, ServeConfig, SkillConfig, WatchConfig,
25};
26pub use deadcode::{
27 detect_dead_code, mark_dead_candidates, Confidence, DeadCodeReport, DeadNode, DeadReason,
28};
29pub use deps::{audit_dependencies, parse_manifests, DependencyReport};
30pub use diff::{
31 compute_impact, diff_graphs, snapshot_at_commit, GraphDiff, GraphSnapshot, ImpactReport,
32};
33pub use dupes::{detect_clones, CloneKind, ClonePair};
34pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
35pub use git::{analyze_repo, GitAnalysis};
36pub use graph::{
37 ApiScope, CloneRow, CommunityRow, CrossClusterEdge, DocsCoverage, Edge, EntryPoint,
38 FileSummary, GraphDb, Node, PublicSymbol, RepoStats, SnapshotEntry, TagRow,
39 TestCoverageSummary,
40};
41pub use parser::{
42 CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
43 ParserRegistry,
44};
45pub use registry::{Registry, RepoEntry};
46pub use resolver::{is_test_path, resolve};
47pub use rules::{run_rules, Rule, RuleResult, RuleViolation, RulesConfig};
48pub use skill::{
49 build_skill_data, generate_agents_md, generate_skill, install_git_hooks, write_agents_md,
50 write_skill, CommunityInfo, SkillData,
51};
52pub use timeline::build_timeline;
53pub use walker::{walk_repo, Language, SourceFile};
54
55use sha2::{Digest, Sha256};
56use std::collections::{HashMap, HashSet};
57use std::path::Path;
58
59pub fn analyze_repo_incremental(
62 repo_path: &Path,
63 db: &GraphDb,
64 quiet: bool,
65 no_git: bool,
66 no_cluster: bool,
67 verbose: bool,
68) -> anyhow::Result<bool> {
69 let _ = verbose;
70
71 let files = walk_repo(repo_path)?;
73 let mut current_hashes: HashMap<String, String> = HashMap::new();
74 for file in &files {
75 let mut hasher = Sha256::new();
76 hasher.update(file.content.as_bytes());
77 let hash = format!("{:x}", hasher.finalize());
78 current_hashes.insert(file.relative_path.clone(), hash);
79 }
80
81 let stored_hashes = db.get_file_hashes().unwrap_or_default();
83
84 let mut changed_paths: HashSet<String> = HashSet::new();
86 for (path, hash) in ¤t_hashes {
87 if stored_hashes.get(path) != Some(hash) {
88 changed_paths.insert(path.clone());
89 }
90 }
91
92 let mut deleted_paths: Vec<String> = Vec::new();
93 for path in stored_hashes.keys() {
94 if !current_hashes.contains_key(path) {
95 deleted_paths.push(path.clone());
96 changed_paths.insert(path.clone());
97 }
98 }
99
100 if changed_paths.is_empty() {
101 if !quiet {
102 println!(" No file changes detected. Index is up to date.");
103 }
104 return Ok(false);
105 }
106
107 if !quiet {
108 println!(
109 " Incremental: {} changed/new/deleted file(s)",
110 changed_paths.len()
111 );
112 }
113
114 let existing_nodes = db.get_all_nodes()?;
116 let existing_edges = db.get_all_edges()?;
117
118 let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
119 .into_iter()
120 .filter(|n| !changed_paths.contains(&n.path))
121 .collect();
122
123 let changed_files: Vec<_> = files
125 .into_iter()
126 .filter(|f| changed_paths.contains(&f.relative_path))
127 .collect();
128
129 if !quiet {
130 println!(" Re-parsing {} changed file(s)...", changed_files.len());
131 }
132
133 let registry = ParserRegistry::new();
134 let results = registry.parse_all(&changed_files);
135
136 let mut new_nodes: Vec<NodeDef> = Vec::new();
137 let mut new_edges: Vec<EdgeDef> = Vec::new();
138 let mut changed_file_paths: HashSet<String> = HashSet::new();
139 let mut lang_map: HashMap<String, &str> = changed_files
140 .iter()
141 .map(|f| {
142 let lang_str = match f.language {
143 walker::Language::TypeScript => "typescript",
144 walker::Language::JavaScript => "javascript",
145 walker::Language::Python => "python",
146 walker::Language::Rust => "rust",
147 walker::Language::Go => "go",
148 walker::Language::Java => "java",
149 walker::Language::CSharp => "csharp",
150 walker::Language::Php => "php",
151 walker::Language::Unknown => "unknown",
152 };
153 (f.relative_path.clone(), lang_str)
154 })
155 .collect();
156
157 for result in &results {
158 new_nodes.extend(result.nodes.clone());
159 new_edges.extend(result.edges.clone());
160 }
161 for file in &changed_files {
162 changed_file_paths.insert(file.relative_path.clone());
163 }
164
165 let parsed_lang_map = resolver::build_language_map(&new_nodes);
167 for (path, lang) in parsed_lang_map {
168 if lang != "unknown" {
169 lang_map.entry(path).or_insert(lang);
170 }
171 }
172 let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
173 new_nodes.extend(file_nodes);
174
175 let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
177 .iter()
178 .map(|n| {
179 let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
180 crate::graph::Node::from_def(n, lang)
181 })
182 .collect();
183
184 let new_node_count = new_graph_nodes.len();
186 kept_nodes.extend(new_graph_nodes);
187
188 db.clear()?;
190 db.upsert_nodes(&kept_nodes)?;
191
192 for result in &results {
194 for node_def in &result.nodes {
195 if let Some(doc) = node_def
196 .metadata
197 .get("doc_comment")
198 .and_then(|v| v.as_str())
199 {
200 if !doc.is_empty() {
201 let _ = db.update_node_doc_comment(&node_def.id, doc);
202 }
203 }
204 }
205 }
206
207 let all_node_defs: Vec<NodeDef> = kept_nodes
209 .iter()
210 .map(|n| NodeDef {
211 id: n.id.clone(),
212 kind: match n.kind.as_str() {
213 "File" => NodeKind::File,
214 "Function" => NodeKind::Function,
215 "Class" => NodeKind::Class,
216 "Variable" => NodeKind::Variable,
217 "Type" => NodeKind::Type,
218 "Module" => NodeKind::Module,
219 "Author" => NodeKind::Author,
220 _ => NodeKind::Variable,
221 },
222 name: n.name.clone(),
223 path: n.path.clone(),
224 line_start: n.line_start,
225 line_end: n.line_end,
226 metadata: serde_json::Value::Null,
227 })
228 .collect();
229
230 let kept_edge_defs: Vec<EdgeDef> = existing_edges
232 .iter()
233 .filter(|e| {
234 let src_file = all_node_defs
236 .iter()
237 .find(|n| n.id == e.src)
238 .map(|n| n.path.clone());
239 let dst_file = all_node_defs
240 .iter()
241 .find(|n| n.id == e.dst)
242 .map(|n| n.path.clone());
243 match (src_file, dst_file) {
244 (Some(sp), Some(dp)) => {
245 !changed_paths.contains(&sp) && !changed_paths.contains(&dp)
246 }
247 _ => false,
248 }
249 })
250 .map(|e| EdgeDef {
251 src: e.src.clone(),
252 dst: e.dst.clone(),
253 kind: match e.kind.as_str() {
254 "CALLS" => EdgeKind::Calls,
255 "IMPORTS" => EdgeKind::Imports,
256 "INHERITS" => EdgeKind::Inherits,
257 "EXPORTS" => EdgeKind::Exports,
258 "CO_CHANGES" => EdgeKind::CoChanges,
259 "OWNS" => EdgeKind::Owns,
260 "DEPENDS_ON" => EdgeKind::DependsOn,
261 _ => EdgeKind::Calls,
262 },
263 weight: e.weight,
264 confidence: e.confidence,
265 })
266 .collect();
267
268 let mut all_edge_defs = kept_edge_defs;
269 all_edge_defs.extend(new_edges);
270
271 let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
273 let resolved_count = resolved_edges.len();
274
275 let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
279 .iter()
280 .map(crate::graph::Edge::from_def)
281 .collect();
282 db.upsert_edges(&resolved_graph_edges)?;
283
284 if !no_git {
286 let all_file_paths: Vec<String> = kept_nodes
287 .iter()
288 .filter(|n| n.kind == "File")
289 .map(|n| n.path.clone())
290 .collect();
291 let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
292
293 let max_churn = git_analysis
294 .file_churn
295 .values()
296 .copied()
297 .fold(0.0, f64::max);
298 for (path, churn) in &git_analysis.file_churn {
299 let normalized = if max_churn > 0.0 {
300 churn / max_churn
301 } else {
302 0.0
303 };
304 let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
305 }
306
307 let mut author_nodes = Vec::new();
308 let mut own_edges = Vec::new();
309 for (author, files) in &git_analysis.file_owners {
310 let author_id = format!("author:{}", author);
311 author_nodes.push(crate::graph::Node {
312 id: author_id.clone(),
313 kind: "Author".to_string(),
314 name: author.clone(),
315 path: String::new(),
316 line_start: 0,
317 line_end: 0,
318 language: String::new(),
319 churn: 0.0,
320 coupling: 0.0,
321 community: 0,
322 in_degree: 0,
323 out_degree: 0,
324 exported: false,
325 is_dead_candidate: false,
326 dead_reason: None,
327 complexity: 0.0,
328 is_test_file: false,
329 test_count: 0,
330 is_tested: false,
331 });
332 for (file_path, _email, _percent) in files.iter().take(5) {
333 own_edges.push(crate::graph::Edge {
334 id: format!("owns:{}:{}", author_id, file_path),
335 src: author_id.clone(),
336 dst: format!("file:{}", file_path),
337 kind: "OWNS".to_string(),
338 weight: 1.0,
339 confidence: 1.0,
340 });
341 }
342 }
343 db.upsert_nodes(&author_nodes)?;
344 db.upsert_edges(&own_edges)?;
345
346 let mut cochange_edges = Vec::new();
347 for (a, b, weight) in &git_analysis.co_changes {
348 cochange_edges.push(crate::graph::Edge {
349 id: format!("cochange:{}:{}", a, b),
350 src: format!("file:{}", a),
351 dst: format!("file:{}", b),
352 kind: "CO_CHANGES".to_string(),
353 weight: *weight,
354 confidence: 1.0,
355 });
356 }
357 db.upsert_edges(&cochange_edges)?;
358 }
359
360 if !no_cluster {
362 let _ = run_clustering(db)?;
363 }
364
365 db.update_in_out_degrees()?;
367 db.compute_coupling()?;
368
369 let test_file_paths: Vec<String> = kept_nodes
371 .iter()
372 .filter(|n| n.kind == "File" && crate::resolver::is_test_path(&n.path))
373 .map(|n| n.path.clone())
374 .collect();
375 let test_node_paths: Vec<String> = kept_nodes
377 .iter()
378 .filter(|n| crate::resolver::is_test_path(&n.path))
379 .map(|n| n.path.clone())
380 .collect();
381 let all_test_paths: std::collections::HashSet<String> =
382 test_file_paths.into_iter().chain(test_node_paths).collect();
383 db.mark_test_files(&all_test_paths.into_iter().collect::<Vec<_>>())?;
384 db.update_test_coverage()?;
385
386 let changed_paths_vec: Vec<String> = changed_paths.iter().cloned().collect();
388 db.delete_tags_for_paths(&changed_paths_vec)?;
389 let new_tag_rows: Vec<crate::graph::TagRow> = results
390 .iter()
391 .zip(changed_files.iter())
392 .flat_map(|(result, file)| {
393 result
394 .comment_tags
395 .iter()
396 .map(move |t| crate::graph::TagRow {
397 id: format!("tag:{}:{}:{}", file.relative_path, t.line, t.tag_type),
398 file_path: file.relative_path.clone(),
399 line: t.line,
400 tag_type: t.tag_type.clone(),
401 text: t.text.clone(),
402 comment_type: t.comment_kind.as_str().to_string(),
403 })
404 })
405 .collect();
406 db.upsert_tags(&new_tag_rows)?;
407
408 for (path, hash) in ¤t_hashes {
410 db.set_file_hash(path, hash)?;
411 }
412 if !deleted_paths.is_empty() {
413 db.remove_file_hashes(&deleted_paths)?;
414 }
415
416 if !quiet {
417 println!(" Incremental re-index complete.");
418 println!(
419 " Kept {} unchanged nodes.",
420 kept_nodes.len() - new_node_count
421 );
422 println!(" Added {} new/changed nodes.", new_node_count);
423 if !deleted_paths.is_empty() {
424 println!(" Removed {} deleted files.", deleted_paths.len());
425 }
426 println!(" Resolved {} cross-file edges.", resolved_count);
427 }
428
429 Ok(true)
430}