Skip to main content

cgx_engine/
lib.rs

1pub mod cluster;
2pub mod config;
3pub mod diff;
4pub mod export;
5pub mod git;
6pub mod graph;
7pub mod parser;
8pub mod parsers;
9pub mod registry;
10pub mod resolver;
11pub mod skill;
12pub mod walker;
13
14pub use cluster::{detect_communities, run_clustering};
15pub use config::{
16    AnalyzeConfig, CgxConfig, ChatConfig, ExportConfig, IndexConfig, McpConfig, ProjectConfig,
17    ServeConfig, SkillConfig, WatchConfig,
18};
19pub use diff::{
20    compute_impact, diff_graphs, snapshot_at_commit, GraphDiff, GraphSnapshot, ImpactReport,
21};
22pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
23pub use git::{analyze_repo, GitAnalysis};
24pub use graph::{CommunityRow, Edge, GraphDb, Node, RepoStats, TagRow};
25pub use parser::{
26    CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
27    ParserRegistry,
28};
29pub use registry::{Registry, RepoEntry};
30pub use resolver::resolve;
31pub use skill::{
32    build_skill_data, generate_agents_md, generate_skill, install_git_hooks, write_agents_md,
33    write_skill, CommunityInfo, SkillData,
34};
35pub use walker::{walk_repo, Language, SourceFile};
36
37use sha2::{Digest, Sha256};
38use std::collections::{HashMap, HashSet};
39use std::path::Path;
40
41/// Incremental repository analysis — only re-parses changed files.
42/// Returns true if analysis was performed, false if no changes detected.
43pub fn analyze_repo_incremental(
44    repo_path: &Path,
45    db: &GraphDb,
46    quiet: bool,
47    no_git: bool,
48    no_cluster: bool,
49    verbose: bool,
50) -> anyhow::Result<bool> {
51    let _ = verbose;
52
53    // 1. Walk all files and compute hashes
54    let files = walk_repo(repo_path)?;
55    let mut current_hashes: HashMap<String, String> = HashMap::new();
56    for file in &files {
57        let mut hasher = Sha256::new();
58        hasher.update(file.content.as_bytes());
59        let hash = format!("{:x}", hasher.finalize());
60        current_hashes.insert(file.relative_path.clone(), hash);
61    }
62
63    // 2. Load stored hashes
64    let stored_hashes = db.get_file_hashes().unwrap_or_default();
65
66    // 3. Determine changes
67    let mut changed_paths: HashSet<String> = HashSet::new();
68    for (path, hash) in &current_hashes {
69        if stored_hashes.get(path) != Some(hash) {
70            changed_paths.insert(path.clone());
71        }
72    }
73
74    let mut deleted_paths: Vec<String> = Vec::new();
75    for path in stored_hashes.keys() {
76        if !current_hashes.contains_key(path) {
77            deleted_paths.push(path.clone());
78            changed_paths.insert(path.clone());
79        }
80    }
81
82    if changed_paths.is_empty() {
83        if !quiet {
84            println!("  No file changes detected. Index is up to date.");
85        }
86        return Ok(false);
87    }
88
89    if !quiet {
90        println!(
91            "  Incremental: {} changed/new/deleted file(s)",
92            changed_paths.len()
93        );
94    }
95
96    // 4. Load existing nodes and filter out changed/deleted ones
97    let existing_nodes = db.get_all_nodes()?;
98    let existing_edges = db.get_all_edges()?;
99
100    let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
101        .into_iter()
102        .filter(|n| !changed_paths.contains(&n.path))
103        .collect();
104
105    // 5. Parse only changed/new files
106    let changed_files: Vec<_> = files
107        .into_iter()
108        .filter(|f| changed_paths.contains(&f.relative_path))
109        .collect();
110
111    if !quiet {
112        println!("  Re-parsing {} changed file(s)...", changed_files.len());
113    }
114
115    let registry = ParserRegistry::new();
116    let results = registry.parse_all(&changed_files);
117
118    let mut new_nodes: Vec<NodeDef> = Vec::new();
119    let mut new_edges: Vec<EdgeDef> = Vec::new();
120    let mut changed_file_paths: HashSet<String> = HashSet::new();
121    let mut lang_map: HashMap<String, &str> = changed_files
122        .iter()
123        .map(|f| {
124            let lang_str = match f.language {
125                walker::Language::TypeScript => "typescript",
126                walker::Language::JavaScript => "javascript",
127                walker::Language::Python => "python",
128                walker::Language::Rust => "rust",
129                walker::Language::Go => "go",
130                walker::Language::Java => "java",
131                walker::Language::CSharp => "csharp",
132                walker::Language::Php => "php",
133                walker::Language::Unknown => "unknown",
134            };
135            (f.relative_path.clone(), lang_str)
136        })
137        .collect();
138
139    for result in &results {
140        new_nodes.extend(result.nodes.clone());
141        new_edges.extend(result.edges.clone());
142    }
143    for file in &changed_files {
144        changed_file_paths.insert(file.relative_path.clone());
145    }
146
147    // Add file nodes for changed files
148    let parsed_lang_map = resolver::build_language_map(&new_nodes);
149    for (path, lang) in parsed_lang_map {
150        if lang != "unknown" {
151            lang_map.entry(path).or_insert(lang);
152        }
153    }
154    let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
155    new_nodes.extend(file_nodes);
156
157    // Convert new nodes to GraphDb format
158    let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
159        .iter()
160        .map(|n| {
161            let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
162            crate::graph::Node::from_def(n, lang)
163        })
164        .collect();
165
166    // 6. Merge kept + new nodes
167    let new_node_count = new_graph_nodes.len();
168    kept_nodes.extend(new_graph_nodes);
169
170    // 7. Clear and re-insert all nodes
171    db.clear()?;
172    db.upsert_nodes(&kept_nodes)?;
173
174    // Re-convert kept nodes back to NodeDef for resolution
175    let all_node_defs: Vec<NodeDef> = kept_nodes
176        .iter()
177        .map(|n| NodeDef {
178            id: n.id.clone(),
179            kind: match n.kind.as_str() {
180                "File" => NodeKind::File,
181                "Function" => NodeKind::Function,
182                "Class" => NodeKind::Class,
183                "Variable" => NodeKind::Variable,
184                "Type" => NodeKind::Type,
185                "Module" => NodeKind::Module,
186                "Author" => NodeKind::Author,
187                _ => NodeKind::Variable,
188            },
189            name: n.name.clone(),
190            path: n.path.clone(),
191            line_start: n.line_start,
192            line_end: n.line_end,
193            metadata: serde_json::Value::Null,
194        })
195        .collect();
196
197    // Convert new edges + existing edges to EdgeDef
198    let kept_edge_defs: Vec<EdgeDef> = existing_edges
199        .iter()
200        .filter(|e| {
201            // Keep edges that don't reference changed/deleted file nodes
202            let src_file = all_node_defs
203                .iter()
204                .find(|n| n.id == e.src)
205                .map(|n| n.path.clone());
206            let dst_file = all_node_defs
207                .iter()
208                .find(|n| n.id == e.dst)
209                .map(|n| n.path.clone());
210            match (src_file, dst_file) {
211                (Some(sp), Some(dp)) => {
212                    !changed_paths.contains(&sp) && !changed_paths.contains(&dp)
213                }
214                _ => false,
215            }
216        })
217        .map(|e| EdgeDef {
218            src: e.src.clone(),
219            dst: e.dst.clone(),
220            kind: match e.kind.as_str() {
221                "CALLS" => EdgeKind::Calls,
222                "IMPORTS" => EdgeKind::Imports,
223                "INHERITS" => EdgeKind::Inherits,
224                "EXPORTS" => EdgeKind::Exports,
225                "CO_CHANGES" => EdgeKind::CoChanges,
226                "OWNS" => EdgeKind::Owns,
227                "DEPENDS_ON" => EdgeKind::DependsOn,
228                _ => EdgeKind::Calls,
229            },
230            weight: e.weight,
231            confidence: e.confidence,
232        })
233        .collect();
234
235    let mut all_edge_defs = kept_edge_defs;
236    all_edge_defs.extend(new_edges);
237
238    // 8. Resolve cross-file symbols
239    let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
240    let resolved_count = resolved_edges.len();
241
242    let graph_edges: Vec<crate::graph::Edge> = all_edge_defs
243        .iter()
244        .map(crate::graph::Edge::from_def)
245        .collect();
246    let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
247        .iter()
248        .map(crate::graph::Edge::from_def)
249        .collect();
250
251    db.upsert_edges(&graph_edges)?;
252    db.upsert_edges(&resolved_graph_edges)?;
253
254    // 9. Git layer
255    if !no_git {
256        let all_file_paths: Vec<String> = kept_nodes
257            .iter()
258            .filter(|n| n.kind == "File")
259            .map(|n| n.path.clone())
260            .collect();
261        let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
262
263        let max_churn = git_analysis
264            .file_churn
265            .values()
266            .copied()
267            .fold(0.0, f64::max);
268        for (path, churn) in &git_analysis.file_churn {
269            let normalized = if max_churn > 0.0 {
270                churn / max_churn
271            } else {
272                0.0
273            };
274            let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
275        }
276
277        let mut author_nodes = Vec::new();
278        let mut own_edges = Vec::new();
279        for (author, files) in &git_analysis.file_owners {
280            let author_id = format!("author:{}", author);
281            author_nodes.push(crate::graph::Node {
282                id: author_id.clone(),
283                kind: "Author".to_string(),
284                name: author.clone(),
285                path: String::new(),
286                line_start: 0,
287                line_end: 0,
288                language: String::new(),
289                churn: 0.0,
290                coupling: 0.0,
291                community: 0,
292                in_degree: 0,
293                out_degree: 0,
294            });
295            for (file_path, _email, _percent) in files.iter().take(5) {
296                own_edges.push(crate::graph::Edge {
297                    id: format!("owns:{}:{}", author_id, file_path),
298                    src: author_id.clone(),
299                    dst: format!("file:{}", file_path),
300                    kind: "OWNS".to_string(),
301                    weight: 1.0,
302                    confidence: 1.0,
303                });
304            }
305        }
306        db.upsert_nodes(&author_nodes)?;
307        db.upsert_edges(&own_edges)?;
308
309        let mut cochange_edges = Vec::new();
310        for (a, b, weight) in &git_analysis.co_changes {
311            cochange_edges.push(crate::graph::Edge {
312                id: format!("cochange:{}:{}", a, b),
313                src: format!("file:{}", a),
314                dst: format!("file:{}", b),
315                kind: "CO_CHANGES".to_string(),
316                weight: *weight,
317                confidence: 1.0,
318            });
319        }
320        db.upsert_edges(&cochange_edges)?;
321    }
322
323    // 10. Clustering
324    if !no_cluster {
325        let _ = run_clustering(db)?;
326    }
327
328    // 11. Update degrees and coupling
329    db.update_in_out_degrees()?;
330    db.compute_coupling()?;
331
332    // 12. Update tags for changed/deleted files
333    let changed_paths_vec: Vec<String> = changed_paths.iter().cloned().collect();
334    db.delete_tags_for_paths(&changed_paths_vec)?;
335    let new_tag_rows: Vec<crate::graph::TagRow> = results
336        .iter()
337        .zip(changed_files.iter())
338        .flat_map(|(result, file)| {
339            result
340                .comment_tags
341                .iter()
342                .map(move |t| crate::graph::TagRow {
343                    id: format!("tag:{}:{}:{}", file.relative_path, t.line, t.tag_type),
344                    file_path: file.relative_path.clone(),
345                    line: t.line,
346                    tag_type: t.tag_type.clone(),
347                    text: t.text.clone(),
348                    comment_type: t.comment_kind.as_str().to_string(),
349                })
350        })
351        .collect();
352    db.upsert_tags(&new_tag_rows)?;
353
354    // 13. Store new file hashes
355    for (path, hash) in &current_hashes {
356        db.set_file_hash(path, hash)?;
357    }
358    if !deleted_paths.is_empty() {
359        db.remove_file_hashes(&deleted_paths)?;
360    }
361
362    if !quiet {
363        println!("  Incremental re-index complete.");
364        println!(
365            "  Kept {} unchanged nodes.",
366            kept_nodes.len() - new_node_count
367        );
368        println!("  Added {} new/changed nodes.", new_node_count);
369        if !deleted_paths.is_empty() {
370            println!("  Removed {} deleted files.", deleted_paths.len());
371        }
372        println!("  Resolved {} cross-file edges.", resolved_count);
373    }
374
375    Ok(true)
376}