1pub mod cluster;
2pub mod config;
3pub mod diff;
4pub mod export;
5pub mod git;
6pub mod graph;
7pub mod parser;
8pub mod parsers;
9pub mod registry;
10pub mod resolver;
11pub mod skill;
12pub mod walker;
13
14pub use cluster::{detect_communities, run_clustering};
15pub use config::{
16 AnalyzeConfig, CgxConfig, ChatConfig, ExportConfig, IndexConfig, McpConfig, ProjectConfig,
17 ServeConfig, SkillConfig, WatchConfig,
18};
19pub use diff::{
20 compute_impact, diff_graphs, snapshot_at_commit, GraphDiff, GraphSnapshot, ImpactReport,
21};
22pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
23pub use git::{analyze_repo, GitAnalysis};
24pub use graph::{CommunityRow, Edge, GraphDb, Node, RepoStats, TagRow};
25pub use parser::{
26 CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
27 ParserRegistry,
28};
29pub use registry::{Registry, RepoEntry};
30pub use resolver::resolve;
31pub use skill::{
32 build_skill_data, generate_agents_md, generate_skill, install_git_hooks, write_agents_md,
33 write_skill, CommunityInfo, SkillData,
34};
35pub use walker::{walk_repo, Language, SourceFile};
36
37use sha2::{Digest, Sha256};
38use std::collections::{HashMap, HashSet};
39use std::path::Path;
40
41pub fn analyze_repo_incremental(
44 repo_path: &Path,
45 db: &GraphDb,
46 quiet: bool,
47 no_git: bool,
48 no_cluster: bool,
49 verbose: bool,
50) -> anyhow::Result<bool> {
51 let _ = verbose;
52
53 let files = walk_repo(repo_path)?;
55 let mut current_hashes: HashMap<String, String> = HashMap::new();
56 for file in &files {
57 let mut hasher = Sha256::new();
58 hasher.update(file.content.as_bytes());
59 let hash = format!("{:x}", hasher.finalize());
60 current_hashes.insert(file.relative_path.clone(), hash);
61 }
62
63 let stored_hashes = db.get_file_hashes().unwrap_or_default();
65
66 let mut changed_paths: HashSet<String> = HashSet::new();
68 for (path, hash) in ¤t_hashes {
69 if stored_hashes.get(path) != Some(hash) {
70 changed_paths.insert(path.clone());
71 }
72 }
73
74 let mut deleted_paths: Vec<String> = Vec::new();
75 for path in stored_hashes.keys() {
76 if !current_hashes.contains_key(path) {
77 deleted_paths.push(path.clone());
78 changed_paths.insert(path.clone());
79 }
80 }
81
82 if changed_paths.is_empty() {
83 if !quiet {
84 println!(" No file changes detected. Index is up to date.");
85 }
86 return Ok(false);
87 }
88
89 if !quiet {
90 println!(
91 " Incremental: {} changed/new/deleted file(s)",
92 changed_paths.len()
93 );
94 }
95
96 let existing_nodes = db.get_all_nodes()?;
98 let existing_edges = db.get_all_edges()?;
99
100 let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
101 .into_iter()
102 .filter(|n| !changed_paths.contains(&n.path))
103 .collect();
104
105 let changed_files: Vec<_> = files
107 .into_iter()
108 .filter(|f| changed_paths.contains(&f.relative_path))
109 .collect();
110
111 if !quiet {
112 println!(" Re-parsing {} changed file(s)...", changed_files.len());
113 }
114
115 let registry = ParserRegistry::new();
116 let results = registry.parse_all(&changed_files);
117
118 let mut new_nodes: Vec<NodeDef> = Vec::new();
119 let mut new_edges: Vec<EdgeDef> = Vec::new();
120 let mut changed_file_paths: HashSet<String> = HashSet::new();
121 let mut lang_map: HashMap<String, &str> = changed_files
122 .iter()
123 .map(|f| {
124 let lang_str = match f.language {
125 walker::Language::TypeScript => "typescript",
126 walker::Language::JavaScript => "javascript",
127 walker::Language::Python => "python",
128 walker::Language::Rust => "rust",
129 walker::Language::Go => "go",
130 walker::Language::Java => "java",
131 walker::Language::CSharp => "csharp",
132 walker::Language::Php => "php",
133 walker::Language::Unknown => "unknown",
134 };
135 (f.relative_path.clone(), lang_str)
136 })
137 .collect();
138
139 for result in &results {
140 new_nodes.extend(result.nodes.clone());
141 new_edges.extend(result.edges.clone());
142 }
143 for file in &changed_files {
144 changed_file_paths.insert(file.relative_path.clone());
145 }
146
147 let parsed_lang_map = resolver::build_language_map(&new_nodes);
149 for (path, lang) in parsed_lang_map {
150 if lang != "unknown" {
151 lang_map.entry(path).or_insert(lang);
152 }
153 }
154 let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
155 new_nodes.extend(file_nodes);
156
157 let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
159 .iter()
160 .map(|n| {
161 let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
162 crate::graph::Node::from_def(n, lang)
163 })
164 .collect();
165
166 let new_node_count = new_graph_nodes.len();
168 kept_nodes.extend(new_graph_nodes);
169
170 db.clear()?;
172 db.upsert_nodes(&kept_nodes)?;
173
174 let all_node_defs: Vec<NodeDef> = kept_nodes
176 .iter()
177 .map(|n| NodeDef {
178 id: n.id.clone(),
179 kind: match n.kind.as_str() {
180 "File" => NodeKind::File,
181 "Function" => NodeKind::Function,
182 "Class" => NodeKind::Class,
183 "Variable" => NodeKind::Variable,
184 "Type" => NodeKind::Type,
185 "Module" => NodeKind::Module,
186 "Author" => NodeKind::Author,
187 _ => NodeKind::Variable,
188 },
189 name: n.name.clone(),
190 path: n.path.clone(),
191 line_start: n.line_start,
192 line_end: n.line_end,
193 metadata: serde_json::Value::Null,
194 })
195 .collect();
196
197 let kept_edge_defs: Vec<EdgeDef> = existing_edges
199 .iter()
200 .filter(|e| {
201 let src_file = all_node_defs
203 .iter()
204 .find(|n| n.id == e.src)
205 .map(|n| n.path.clone());
206 let dst_file = all_node_defs
207 .iter()
208 .find(|n| n.id == e.dst)
209 .map(|n| n.path.clone());
210 match (src_file, dst_file) {
211 (Some(sp), Some(dp)) => {
212 !changed_paths.contains(&sp) && !changed_paths.contains(&dp)
213 }
214 _ => false,
215 }
216 })
217 .map(|e| EdgeDef {
218 src: e.src.clone(),
219 dst: e.dst.clone(),
220 kind: match e.kind.as_str() {
221 "CALLS" => EdgeKind::Calls,
222 "IMPORTS" => EdgeKind::Imports,
223 "INHERITS" => EdgeKind::Inherits,
224 "EXPORTS" => EdgeKind::Exports,
225 "CO_CHANGES" => EdgeKind::CoChanges,
226 "OWNS" => EdgeKind::Owns,
227 "DEPENDS_ON" => EdgeKind::DependsOn,
228 _ => EdgeKind::Calls,
229 },
230 weight: e.weight,
231 confidence: e.confidence,
232 })
233 .collect();
234
235 let mut all_edge_defs = kept_edge_defs;
236 all_edge_defs.extend(new_edges);
237
238 let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
240 let resolved_count = resolved_edges.len();
241
242 let graph_edges: Vec<crate::graph::Edge> = all_edge_defs
243 .iter()
244 .map(crate::graph::Edge::from_def)
245 .collect();
246 let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
247 .iter()
248 .map(crate::graph::Edge::from_def)
249 .collect();
250
251 db.upsert_edges(&graph_edges)?;
252 db.upsert_edges(&resolved_graph_edges)?;
253
254 if !no_git {
256 let all_file_paths: Vec<String> = kept_nodes
257 .iter()
258 .filter(|n| n.kind == "File")
259 .map(|n| n.path.clone())
260 .collect();
261 let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
262
263 let max_churn = git_analysis
264 .file_churn
265 .values()
266 .copied()
267 .fold(0.0, f64::max);
268 for (path, churn) in &git_analysis.file_churn {
269 let normalized = if max_churn > 0.0 {
270 churn / max_churn
271 } else {
272 0.0
273 };
274 let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
275 }
276
277 let mut author_nodes = Vec::new();
278 let mut own_edges = Vec::new();
279 for (author, files) in &git_analysis.file_owners {
280 let author_id = format!("author:{}", author);
281 author_nodes.push(crate::graph::Node {
282 id: author_id.clone(),
283 kind: "Author".to_string(),
284 name: author.clone(),
285 path: String::new(),
286 line_start: 0,
287 line_end: 0,
288 language: String::new(),
289 churn: 0.0,
290 coupling: 0.0,
291 community: 0,
292 in_degree: 0,
293 out_degree: 0,
294 });
295 for (file_path, _email, _percent) in files.iter().take(5) {
296 own_edges.push(crate::graph::Edge {
297 id: format!("owns:{}:{}", author_id, file_path),
298 src: author_id.clone(),
299 dst: format!("file:{}", file_path),
300 kind: "OWNS".to_string(),
301 weight: 1.0,
302 confidence: 1.0,
303 });
304 }
305 }
306 db.upsert_nodes(&author_nodes)?;
307 db.upsert_edges(&own_edges)?;
308
309 let mut cochange_edges = Vec::new();
310 for (a, b, weight) in &git_analysis.co_changes {
311 cochange_edges.push(crate::graph::Edge {
312 id: format!("cochange:{}:{}", a, b),
313 src: format!("file:{}", a),
314 dst: format!("file:{}", b),
315 kind: "CO_CHANGES".to_string(),
316 weight: *weight,
317 confidence: 1.0,
318 });
319 }
320 db.upsert_edges(&cochange_edges)?;
321 }
322
323 if !no_cluster {
325 let _ = run_clustering(db)?;
326 }
327
328 db.update_in_out_degrees()?;
330 db.compute_coupling()?;
331
332 let changed_paths_vec: Vec<String> = changed_paths.iter().cloned().collect();
334 db.delete_tags_for_paths(&changed_paths_vec)?;
335 let new_tag_rows: Vec<crate::graph::TagRow> = results
336 .iter()
337 .zip(changed_files.iter())
338 .flat_map(|(result, file)| {
339 result
340 .comment_tags
341 .iter()
342 .map(move |t| crate::graph::TagRow {
343 id: format!("tag:{}:{}:{}", file.relative_path, t.line, t.tag_type),
344 file_path: file.relative_path.clone(),
345 line: t.line,
346 tag_type: t.tag_type.clone(),
347 text: t.text.clone(),
348 comment_type: t.comment_kind.as_str().to_string(),
349 })
350 })
351 .collect();
352 db.upsert_tags(&new_tag_rows)?;
353
354 for (path, hash) in ¤t_hashes {
356 db.set_file_hash(path, hash)?;
357 }
358 if !deleted_paths.is_empty() {
359 db.remove_file_hashes(&deleted_paths)?;
360 }
361
362 if !quiet {
363 println!(" Incremental re-index complete.");
364 println!(
365 " Kept {} unchanged nodes.",
366 kept_nodes.len() - new_node_count
367 );
368 println!(" Added {} new/changed nodes.", new_node_count);
369 if !deleted_paths.is_empty() {
370 println!(" Removed {} deleted files.", deleted_paths.len());
371 }
372 println!(" Resolved {} cross-file edges.", resolved_count);
373 }
374
375 Ok(true)
376}