1pub mod walker;
2pub mod parser;
3pub mod parsers;
4pub mod graph;
5pub mod registry;
6pub mod resolver;
7pub mod git;
8pub mod cluster;
9pub mod export;
10pub mod skill;
11pub mod diff;
12pub mod config;
13
14pub use walker::{walk_repo, Language, SourceFile};
15pub use parser::{
16 EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult, ParserRegistry,
17};
18pub use graph::{CommunityRow, Edge, GraphDb, Node, RepoStats};
19pub use registry::{Registry, RepoEntry};
20pub use resolver::resolve;
21pub use git::{analyze_repo, GitAnalysis};
22pub use cluster::{detect_communities, run_clustering};
23pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
24pub use skill::{
25 build_skill_data, generate_skill, generate_agents_md, write_skill, write_agents_md,
26 install_git_hooks, CommunityInfo, SkillData,
27};
28pub use diff::{
29 snapshot_at_commit, diff_graphs, compute_impact, GraphDiff, GraphSnapshot, ImpactReport,
30};
31pub use config::{CgxConfig, AnalyzeConfig, ChatConfig, IndexConfig, McpConfig, ProjectConfig, ServeConfig, SkillConfig, WatchConfig, ExportConfig};
32
33use std::collections::{HashMap, HashSet};
34use std::path::Path;
35use sha2::{Digest, Sha256};
36
37pub fn analyze_repo_incremental(
40 repo_path: &Path,
41 db: &GraphDb,
42 quiet: bool,
43 no_git: bool,
44 no_cluster: bool,
45 verbose: bool,
46) -> anyhow::Result<bool> {
47 let _ = verbose;
48
49 let files = walk_repo(repo_path)?;
51 let mut current_hashes: HashMap<String, String> = HashMap::new();
52 for file in &files {
53 let mut hasher = Sha256::new();
54 hasher.update(file.content.as_bytes());
55 let hash = format!("{:x}", hasher.finalize());
56 current_hashes.insert(file.relative_path.clone(), hash);
57 }
58
59 let stored_hashes = db.get_file_hashes().unwrap_or_default();
61
62 let mut changed_paths: HashSet<String> = HashSet::new();
64 for (path, hash) in ¤t_hashes {
65 if stored_hashes.get(path) != Some(hash) {
66 changed_paths.insert(path.clone());
67 }
68 }
69
70 let mut deleted_paths: Vec<String> = Vec::new();
71 for path in stored_hashes.keys() {
72 if !current_hashes.contains_key(path) {
73 deleted_paths.push(path.clone());
74 changed_paths.insert(path.clone());
75 }
76 }
77
78 if changed_paths.is_empty() {
79 if !quiet {
80 println!(" No file changes detected. Index is up to date.");
81 }
82 return Ok(false);
83 }
84
85 if !quiet {
86 println!(" Incremental: {} changed/new/deleted file(s)", changed_paths.len());
87 }
88
89 let existing_nodes = db.get_all_nodes()?;
91 let existing_edges = db.get_all_edges()?;
92
93 let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
94 .into_iter()
95 .filter(|n| !changed_paths.contains(&n.path))
96 .collect();
97
98 let changed_files: Vec<_> = files
100 .into_iter()
101 .filter(|f| changed_paths.contains(&f.relative_path))
102 .collect();
103
104 if !quiet {
105 println!(" Re-parsing {} changed file(s)...", changed_files.len());
106 }
107
108 let registry = ParserRegistry::new();
109 let results = registry.parse_all(&changed_files);
110
111 let mut new_nodes: Vec<NodeDef> = Vec::new();
112 let mut new_edges: Vec<EdgeDef> = Vec::new();
113 let mut changed_file_paths: HashSet<String> = HashSet::new();
114 let mut lang_map: HashMap<String, &str> = changed_files
115 .iter()
116 .map(|f| {
117 let lang_str = match f.language {
118 walker::Language::TypeScript => "typescript",
119 walker::Language::JavaScript => "javascript",
120 walker::Language::Python => "python",
121 walker::Language::Rust => "rust",
122 walker::Language::Go => "go",
123 walker::Language::Java => "java",
124 walker::Language::CSharp => "csharp",
125 walker::Language::Php => "php",
126 walker::Language::Unknown => "unknown",
127 };
128 (f.relative_path.clone(), lang_str)
129 })
130 .collect();
131
132 for result in &results {
133 new_nodes.extend(result.nodes.clone());
134 new_edges.extend(result.edges.clone());
135 }
136 for file in &changed_files {
137 changed_file_paths.insert(file.relative_path.clone());
138 }
139
140 let parsed_lang_map = resolver::build_language_map(&new_nodes);
142 for (path, lang) in parsed_lang_map {
143 if lang != "unknown" {
144 lang_map.entry(path).or_insert(lang);
145 }
146 }
147 let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
148 new_nodes.extend(file_nodes);
149
150 let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
152 .iter()
153 .map(|n| {
154 let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
155 crate::graph::Node::from_def(n, lang)
156 })
157 .collect();
158
159 let new_node_count = new_graph_nodes.len();
161 kept_nodes.extend(new_graph_nodes);
162
163 db.clear()?;
165 db.upsert_nodes(&kept_nodes)?;
166
167 let all_node_defs: Vec<NodeDef> = kept_nodes
169 .iter()
170 .map(|n| NodeDef {
171 id: n.id.clone(),
172 kind: match n.kind.as_str() {
173 "File" => NodeKind::File,
174 "Function" => NodeKind::Function,
175 "Class" => NodeKind::Class,
176 "Variable" => NodeKind::Variable,
177 "Type" => NodeKind::Type,
178 "Module" => NodeKind::Module,
179 "Author" => NodeKind::Author,
180 _ => NodeKind::Variable,
181 },
182 name: n.name.clone(),
183 path: n.path.clone(),
184 line_start: n.line_start,
185 line_end: n.line_end,
186 metadata: serde_json::Value::Null,
187 })
188 .collect();
189
190 let kept_edge_defs: Vec<EdgeDef> = existing_edges
192 .iter()
193 .filter(|e| {
194 let src_file = all_node_defs.iter().find(|n| n.id == e.src).map(|n| n.path.clone());
196 let dst_file = all_node_defs.iter().find(|n| n.id == e.dst).map(|n| n.path.clone());
197 match (src_file, dst_file) {
198 (Some(sp), Some(dp)) => !changed_paths.contains(&sp) && !changed_paths.contains(&dp),
199 _ => false,
200 }
201 })
202 .map(|e| EdgeDef {
203 src: e.src.clone(),
204 dst: e.dst.clone(),
205 kind: match e.kind.as_str() {
206 "CALLS" => EdgeKind::Calls,
207 "IMPORTS" => EdgeKind::Imports,
208 "INHERITS" => EdgeKind::Inherits,
209 "EXPORTS" => EdgeKind::Exports,
210 "CO_CHANGES" => EdgeKind::CoChanges,
211 "OWNS" => EdgeKind::Owns,
212 "DEPENDS_ON" => EdgeKind::DependsOn,
213 _ => EdgeKind::Calls,
214 },
215 weight: e.weight,
216 confidence: e.confidence,
217 })
218 .collect();
219
220 let mut all_edge_defs = kept_edge_defs;
221 all_edge_defs.extend(new_edges);
222
223 let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
225 let resolved_count = resolved_edges.len();
226
227 let graph_edges: Vec<crate::graph::Edge> = all_edge_defs
228 .iter()
229 .map(crate::graph::Edge::from_def)
230 .collect();
231 let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
232 .iter()
233 .map(crate::graph::Edge::from_def)
234 .collect();
235
236 db.upsert_edges(&graph_edges)?;
237 db.upsert_edges(&resolved_graph_edges)?;
238
239 if !no_git {
241 let all_file_paths: Vec<String> = kept_nodes
242 .iter()
243 .filter(|n| n.kind == "File")
244 .map(|n| n.path.clone())
245 .collect();
246 let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
247
248 let max_churn = git_analysis.file_churn.values().copied().fold(0.0, f64::max);
249 for (path, churn) in &git_analysis.file_churn {
250 let normalized = if max_churn > 0.0 { churn / max_churn } else { 0.0 };
251 let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
252 }
253
254 let mut author_nodes = Vec::new();
255 let mut own_edges = Vec::new();
256 for (author, files) in &git_analysis.file_owners {
257 let author_id = format!("author:{}", author);
258 author_nodes.push(crate::graph::Node {
259 id: author_id.clone(),
260 kind: "Author".to_string(),
261 name: author.clone(),
262 path: String::new(),
263 line_start: 0,
264 line_end: 0,
265 language: String::new(),
266 churn: 0.0,
267 coupling: 0.0,
268 community: 0,
269 in_degree: 0,
270 out_degree: 0,
271 });
272 for (file_path, _email, _percent) in files.iter().take(5) {
273 own_edges.push(crate::graph::Edge {
274 id: format!("owns:{}:{}", author_id, file_path),
275 src: author_id.clone(),
276 dst: format!("file:{}", file_path),
277 kind: "OWNS".to_string(),
278 weight: 1.0,
279 confidence: 1.0,
280 });
281 }
282 }
283 db.upsert_nodes(&author_nodes)?;
284 db.upsert_edges(&own_edges)?;
285
286 let mut cochange_edges = Vec::new();
287 for (a, b, weight) in &git_analysis.co_changes {
288 cochange_edges.push(crate::graph::Edge {
289 id: format!("cochange:{}:{}", a, b),
290 src: format!("file:{}", a),
291 dst: format!("file:{}", b),
292 kind: "CO_CHANGES".to_string(),
293 weight: *weight,
294 confidence: 1.0,
295 });
296 }
297 db.upsert_edges(&cochange_edges)?;
298 }
299
300 if !no_cluster {
302 let _ = run_clustering(db)?;
303 }
304
305 db.update_in_out_degrees()?;
307 db.compute_coupling()?;
308
309 for (path, hash) in ¤t_hashes {
311 db.set_file_hash(path, hash)?;
312 }
313 if !deleted_paths.is_empty() {
314 db.remove_file_hashes(&deleted_paths)?;
315 }
316
317 if !quiet {
318 println!(" Incremental re-index complete.");
319 println!(" Kept {} unchanged nodes.", kept_nodes.len() - new_node_count);
320 println!(" Added {} new/changed nodes.", new_node_count);
321 if !deleted_paths.is_empty() {
322 println!(" Removed {} deleted files.", deleted_paths.len());
323 }
324 println!(" Resolved {} cross-file edges.", resolved_count);
325 }
326
327 Ok(true)
328}