Skip to main content

cgx_engine/
lib.rs

1pub mod cluster;
2pub mod config;
3pub mod deadcode;
4pub mod diff;
5pub mod export;
6pub mod git;
7pub mod graph;
8pub mod parser;
9pub mod parsers;
10pub mod registry;
11pub mod resolver;
12pub mod skill;
13pub mod walker;
14
15pub use cluster::{detect_communities, run_clustering};
16pub use config::{
17    AnalyzeConfig, CgxConfig, ChatConfig, ExportConfig, IndexConfig, McpConfig, ProjectConfig,
18    ServeConfig, SkillConfig, WatchConfig,
19};
20pub use deadcode::{
21    detect_dead_code, mark_dead_candidates, Confidence, DeadCodeReport, DeadNode, DeadReason,
22};
23pub use diff::{
24    compute_impact, diff_graphs, snapshot_at_commit, GraphDiff, GraphSnapshot, ImpactReport,
25};
26pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
27pub use git::{analyze_repo, GitAnalysis};
28pub use graph::{CommunityRow, Edge, GraphDb, Node, RepoStats, TagRow};
29pub use parser::{
30    CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
31    ParserRegistry,
32};
33pub use registry::{Registry, RepoEntry};
34pub use resolver::resolve;
35pub use skill::{
36    build_skill_data, generate_agents_md, generate_skill, install_git_hooks, write_agents_md,
37    write_skill, CommunityInfo, SkillData,
38};
39pub use walker::{walk_repo, Language, SourceFile};
40
41use sha2::{Digest, Sha256};
42use std::collections::{HashMap, HashSet};
43use std::path::Path;
44
45/// Incremental repository analysis — only re-parses changed files.
46/// Returns true if analysis was performed, false if no changes detected.
47pub fn analyze_repo_incremental(
48    repo_path: &Path,
49    db: &GraphDb,
50    quiet: bool,
51    no_git: bool,
52    no_cluster: bool,
53    verbose: bool,
54) -> anyhow::Result<bool> {
55    let _ = verbose;
56
57    // 1. Walk all files and compute hashes
58    let files = walk_repo(repo_path)?;
59    let mut current_hashes: HashMap<String, String> = HashMap::new();
60    for file in &files {
61        let mut hasher = Sha256::new();
62        hasher.update(file.content.as_bytes());
63        let hash = format!("{:x}", hasher.finalize());
64        current_hashes.insert(file.relative_path.clone(), hash);
65    }
66
67    // 2. Load stored hashes
68    let stored_hashes = db.get_file_hashes().unwrap_or_default();
69
70    // 3. Determine changes
71    let mut changed_paths: HashSet<String> = HashSet::new();
72    for (path, hash) in &current_hashes {
73        if stored_hashes.get(path) != Some(hash) {
74            changed_paths.insert(path.clone());
75        }
76    }
77
78    let mut deleted_paths: Vec<String> = Vec::new();
79    for path in stored_hashes.keys() {
80        if !current_hashes.contains_key(path) {
81            deleted_paths.push(path.clone());
82            changed_paths.insert(path.clone());
83        }
84    }
85
86    if changed_paths.is_empty() {
87        if !quiet {
88            println!("  No file changes detected. Index is up to date.");
89        }
90        return Ok(false);
91    }
92
93    if !quiet {
94        println!(
95            "  Incremental: {} changed/new/deleted file(s)",
96            changed_paths.len()
97        );
98    }
99
100    // 4. Load existing nodes and filter out changed/deleted ones
101    let existing_nodes = db.get_all_nodes()?;
102    let existing_edges = db.get_all_edges()?;
103
104    let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
105        .into_iter()
106        .filter(|n| !changed_paths.contains(&n.path))
107        .collect();
108
109    // 5. Parse only changed/new files
110    let changed_files: Vec<_> = files
111        .into_iter()
112        .filter(|f| changed_paths.contains(&f.relative_path))
113        .collect();
114
115    if !quiet {
116        println!("  Re-parsing {} changed file(s)...", changed_files.len());
117    }
118
119    let registry = ParserRegistry::new();
120    let results = registry.parse_all(&changed_files);
121
122    let mut new_nodes: Vec<NodeDef> = Vec::new();
123    let mut new_edges: Vec<EdgeDef> = Vec::new();
124    let mut changed_file_paths: HashSet<String> = HashSet::new();
125    let mut lang_map: HashMap<String, &str> = changed_files
126        .iter()
127        .map(|f| {
128            let lang_str = match f.language {
129                walker::Language::TypeScript => "typescript",
130                walker::Language::JavaScript => "javascript",
131                walker::Language::Python => "python",
132                walker::Language::Rust => "rust",
133                walker::Language::Go => "go",
134                walker::Language::Java => "java",
135                walker::Language::CSharp => "csharp",
136                walker::Language::Php => "php",
137                walker::Language::Unknown => "unknown",
138            };
139            (f.relative_path.clone(), lang_str)
140        })
141        .collect();
142
143    for result in &results {
144        new_nodes.extend(result.nodes.clone());
145        new_edges.extend(result.edges.clone());
146    }
147    for file in &changed_files {
148        changed_file_paths.insert(file.relative_path.clone());
149    }
150
151    // Add file nodes for changed files
152    let parsed_lang_map = resolver::build_language_map(&new_nodes);
153    for (path, lang) in parsed_lang_map {
154        if lang != "unknown" {
155            lang_map.entry(path).or_insert(lang);
156        }
157    }
158    let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
159    new_nodes.extend(file_nodes);
160
161    // Convert new nodes to GraphDb format
162    let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
163        .iter()
164        .map(|n| {
165            let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
166            crate::graph::Node::from_def(n, lang)
167        })
168        .collect();
169
170    // 6. Merge kept + new nodes
171    let new_node_count = new_graph_nodes.len();
172    kept_nodes.extend(new_graph_nodes);
173
174    // 7. Clear and re-insert all nodes
175    db.clear()?;
176    db.upsert_nodes(&kept_nodes)?;
177
178    // Re-convert kept nodes back to NodeDef for resolution
179    let all_node_defs: Vec<NodeDef> = kept_nodes
180        .iter()
181        .map(|n| NodeDef {
182            id: n.id.clone(),
183            kind: match n.kind.as_str() {
184                "File" => NodeKind::File,
185                "Function" => NodeKind::Function,
186                "Class" => NodeKind::Class,
187                "Variable" => NodeKind::Variable,
188                "Type" => NodeKind::Type,
189                "Module" => NodeKind::Module,
190                "Author" => NodeKind::Author,
191                _ => NodeKind::Variable,
192            },
193            name: n.name.clone(),
194            path: n.path.clone(),
195            line_start: n.line_start,
196            line_end: n.line_end,
197            metadata: serde_json::Value::Null,
198        })
199        .collect();
200
201    // Convert new edges + existing edges to EdgeDef
202    let kept_edge_defs: Vec<EdgeDef> = existing_edges
203        .iter()
204        .filter(|e| {
205            // Keep edges that don't reference changed/deleted file nodes
206            let src_file = all_node_defs
207                .iter()
208                .find(|n| n.id == e.src)
209                .map(|n| n.path.clone());
210            let dst_file = all_node_defs
211                .iter()
212                .find(|n| n.id == e.dst)
213                .map(|n| n.path.clone());
214            match (src_file, dst_file) {
215                (Some(sp), Some(dp)) => {
216                    !changed_paths.contains(&sp) && !changed_paths.contains(&dp)
217                }
218                _ => false,
219            }
220        })
221        .map(|e| EdgeDef {
222            src: e.src.clone(),
223            dst: e.dst.clone(),
224            kind: match e.kind.as_str() {
225                "CALLS" => EdgeKind::Calls,
226                "IMPORTS" => EdgeKind::Imports,
227                "INHERITS" => EdgeKind::Inherits,
228                "EXPORTS" => EdgeKind::Exports,
229                "CO_CHANGES" => EdgeKind::CoChanges,
230                "OWNS" => EdgeKind::Owns,
231                "DEPENDS_ON" => EdgeKind::DependsOn,
232                _ => EdgeKind::Calls,
233            },
234            weight: e.weight,
235            confidence: e.confidence,
236        })
237        .collect();
238
239    let mut all_edge_defs = kept_edge_defs;
240    all_edge_defs.extend(new_edges);
241
242    // 8. Resolve cross-file symbols
243    let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
244    let resolved_count = resolved_edges.len();
245
246    let graph_edges: Vec<crate::graph::Edge> = all_edge_defs
247        .iter()
248        .map(crate::graph::Edge::from_def)
249        .collect();
250    let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
251        .iter()
252        .map(crate::graph::Edge::from_def)
253        .collect();
254
255    db.upsert_edges(&graph_edges)?;
256    db.upsert_edges(&resolved_graph_edges)?;
257
258    // 9. Git layer
259    if !no_git {
260        let all_file_paths: Vec<String> = kept_nodes
261            .iter()
262            .filter(|n| n.kind == "File")
263            .map(|n| n.path.clone())
264            .collect();
265        let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
266
267        let max_churn = git_analysis
268            .file_churn
269            .values()
270            .copied()
271            .fold(0.0, f64::max);
272        for (path, churn) in &git_analysis.file_churn {
273            let normalized = if max_churn > 0.0 {
274                churn / max_churn
275            } else {
276                0.0
277            };
278            let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
279        }
280
281        let mut author_nodes = Vec::new();
282        let mut own_edges = Vec::new();
283        for (author, files) in &git_analysis.file_owners {
284            let author_id = format!("author:{}", author);
285            author_nodes.push(crate::graph::Node {
286                id: author_id.clone(),
287                kind: "Author".to_string(),
288                name: author.clone(),
289                path: String::new(),
290                line_start: 0,
291                line_end: 0,
292                language: String::new(),
293                churn: 0.0,
294                coupling: 0.0,
295                community: 0,
296                in_degree: 0,
297                out_degree: 0,
298                exported: false,
299                is_dead_candidate: false,
300                dead_reason: None,
301            });
302            for (file_path, _email, _percent) in files.iter().take(5) {
303                own_edges.push(crate::graph::Edge {
304                    id: format!("owns:{}:{}", author_id, file_path),
305                    src: author_id.clone(),
306                    dst: format!("file:{}", file_path),
307                    kind: "OWNS".to_string(),
308                    weight: 1.0,
309                    confidence: 1.0,
310                });
311            }
312        }
313        db.upsert_nodes(&author_nodes)?;
314        db.upsert_edges(&own_edges)?;
315
316        let mut cochange_edges = Vec::new();
317        for (a, b, weight) in &git_analysis.co_changes {
318            cochange_edges.push(crate::graph::Edge {
319                id: format!("cochange:{}:{}", a, b),
320                src: format!("file:{}", a),
321                dst: format!("file:{}", b),
322                kind: "CO_CHANGES".to_string(),
323                weight: *weight,
324                confidence: 1.0,
325            });
326        }
327        db.upsert_edges(&cochange_edges)?;
328    }
329
330    // 10. Clustering
331    if !no_cluster {
332        let _ = run_clustering(db)?;
333    }
334
335    // 11. Update degrees and coupling
336    db.update_in_out_degrees()?;
337    db.compute_coupling()?;
338
339    // 12. Update tags for changed/deleted files
340    let changed_paths_vec: Vec<String> = changed_paths.iter().cloned().collect();
341    db.delete_tags_for_paths(&changed_paths_vec)?;
342    let new_tag_rows: Vec<crate::graph::TagRow> = results
343        .iter()
344        .zip(changed_files.iter())
345        .flat_map(|(result, file)| {
346            result
347                .comment_tags
348                .iter()
349                .map(move |t| crate::graph::TagRow {
350                    id: format!("tag:{}:{}:{}", file.relative_path, t.line, t.tag_type),
351                    file_path: file.relative_path.clone(),
352                    line: t.line,
353                    tag_type: t.tag_type.clone(),
354                    text: t.text.clone(),
355                    comment_type: t.comment_kind.as_str().to_string(),
356                })
357        })
358        .collect();
359    db.upsert_tags(&new_tag_rows)?;
360
361    // 13. Store new file hashes
362    for (path, hash) in &current_hashes {
363        db.set_file_hash(path, hash)?;
364    }
365    if !deleted_paths.is_empty() {
366        db.remove_file_hashes(&deleted_paths)?;
367    }
368
369    if !quiet {
370        println!("  Incremental re-index complete.");
371        println!(
372            "  Kept {} unchanged nodes.",
373            kept_nodes.len() - new_node_count
374        );
375        println!("  Added {} new/changed nodes.", new_node_count);
376        if !deleted_paths.is_empty() {
377            println!("  Removed {} deleted files.", deleted_paths.len());
378        }
379        println!("  Resolved {} cross-file edges.", resolved_count);
380    }
381
382    Ok(true)
383}