Skip to main content

cgx_engine/
lib.rs

1pub mod cluster;
2pub mod config;
3pub mod diff;
4pub mod export;
5pub mod git;
6pub mod graph;
7pub mod parser;
8pub mod parsers;
9pub mod registry;
10pub mod resolver;
11pub mod skill;
12pub mod walker;
13
14pub use cluster::{detect_communities, run_clustering};
15pub use config::{
16    AnalyzeConfig, CgxConfig, ChatConfig, ExportConfig, IndexConfig, McpConfig, ProjectConfig,
17    ServeConfig, SkillConfig, WatchConfig,
18};
19pub use diff::{
20    compute_impact, diff_graphs, snapshot_at_commit, GraphDiff, GraphSnapshot, ImpactReport,
21};
22pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
23pub use git::{analyze_repo, GitAnalysis};
24pub use graph::{CommunityRow, Edge, GraphDb, Node, RepoStats};
25pub use parser::{
26    EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult, ParserRegistry,
27};
28pub use registry::{Registry, RepoEntry};
29pub use resolver::resolve;
30pub use skill::{
31    build_skill_data, generate_agents_md, generate_skill, install_git_hooks, write_agents_md,
32    write_skill, CommunityInfo, SkillData,
33};
34pub use walker::{walk_repo, Language, SourceFile};
35
36use sha2::{Digest, Sha256};
37use std::collections::{HashMap, HashSet};
38use std::path::Path;
39
40/// Incremental repository analysis — only re-parses changed files.
41/// Returns true if analysis was performed, false if no changes detected.
42pub fn analyze_repo_incremental(
43    repo_path: &Path,
44    db: &GraphDb,
45    quiet: bool,
46    no_git: bool,
47    no_cluster: bool,
48    verbose: bool,
49) -> anyhow::Result<bool> {
50    let _ = verbose;
51
52    // 1. Walk all files and compute hashes
53    let files = walk_repo(repo_path)?;
54    let mut current_hashes: HashMap<String, String> = HashMap::new();
55    for file in &files {
56        let mut hasher = Sha256::new();
57        hasher.update(file.content.as_bytes());
58        let hash = format!("{:x}", hasher.finalize());
59        current_hashes.insert(file.relative_path.clone(), hash);
60    }
61
62    // 2. Load stored hashes
63    let stored_hashes = db.get_file_hashes().unwrap_or_default();
64
65    // 3. Determine changes
66    let mut changed_paths: HashSet<String> = HashSet::new();
67    for (path, hash) in &current_hashes {
68        if stored_hashes.get(path) != Some(hash) {
69            changed_paths.insert(path.clone());
70        }
71    }
72
73    let mut deleted_paths: Vec<String> = Vec::new();
74    for path in stored_hashes.keys() {
75        if !current_hashes.contains_key(path) {
76            deleted_paths.push(path.clone());
77            changed_paths.insert(path.clone());
78        }
79    }
80
81    if changed_paths.is_empty() {
82        if !quiet {
83            println!("  No file changes detected. Index is up to date.");
84        }
85        return Ok(false);
86    }
87
88    if !quiet {
89        println!(
90            "  Incremental: {} changed/new/deleted file(s)",
91            changed_paths.len()
92        );
93    }
94
95    // 4. Load existing nodes and filter out changed/deleted ones
96    let existing_nodes = db.get_all_nodes()?;
97    let existing_edges = db.get_all_edges()?;
98
99    let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
100        .into_iter()
101        .filter(|n| !changed_paths.contains(&n.path))
102        .collect();
103
104    // 5. Parse only changed/new files
105    let changed_files: Vec<_> = files
106        .into_iter()
107        .filter(|f| changed_paths.contains(&f.relative_path))
108        .collect();
109
110    if !quiet {
111        println!("  Re-parsing {} changed file(s)...", changed_files.len());
112    }
113
114    let registry = ParserRegistry::new();
115    let results = registry.parse_all(&changed_files);
116
117    let mut new_nodes: Vec<NodeDef> = Vec::new();
118    let mut new_edges: Vec<EdgeDef> = Vec::new();
119    let mut changed_file_paths: HashSet<String> = HashSet::new();
120    let mut lang_map: HashMap<String, &str> = changed_files
121        .iter()
122        .map(|f| {
123            let lang_str = match f.language {
124                walker::Language::TypeScript => "typescript",
125                walker::Language::JavaScript => "javascript",
126                walker::Language::Python => "python",
127                walker::Language::Rust => "rust",
128                walker::Language::Go => "go",
129                walker::Language::Java => "java",
130                walker::Language::CSharp => "csharp",
131                walker::Language::Php => "php",
132                walker::Language::Unknown => "unknown",
133            };
134            (f.relative_path.clone(), lang_str)
135        })
136        .collect();
137
138    for result in &results {
139        new_nodes.extend(result.nodes.clone());
140        new_edges.extend(result.edges.clone());
141    }
142    for file in &changed_files {
143        changed_file_paths.insert(file.relative_path.clone());
144    }
145
146    // Add file nodes for changed files
147    let parsed_lang_map = resolver::build_language_map(&new_nodes);
148    for (path, lang) in parsed_lang_map {
149        if lang != "unknown" {
150            lang_map.entry(path).or_insert(lang);
151        }
152    }
153    let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
154    new_nodes.extend(file_nodes);
155
156    // Convert new nodes to GraphDb format
157    let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
158        .iter()
159        .map(|n| {
160            let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
161            crate::graph::Node::from_def(n, lang)
162        })
163        .collect();
164
165    // 6. Merge kept + new nodes
166    let new_node_count = new_graph_nodes.len();
167    kept_nodes.extend(new_graph_nodes);
168
169    // 7. Clear and re-insert all nodes
170    db.clear()?;
171    db.upsert_nodes(&kept_nodes)?;
172
173    // Re-convert kept nodes back to NodeDef for resolution
174    let all_node_defs: Vec<NodeDef> = kept_nodes
175        .iter()
176        .map(|n| NodeDef {
177            id: n.id.clone(),
178            kind: match n.kind.as_str() {
179                "File" => NodeKind::File,
180                "Function" => NodeKind::Function,
181                "Class" => NodeKind::Class,
182                "Variable" => NodeKind::Variable,
183                "Type" => NodeKind::Type,
184                "Module" => NodeKind::Module,
185                "Author" => NodeKind::Author,
186                _ => NodeKind::Variable,
187            },
188            name: n.name.clone(),
189            path: n.path.clone(),
190            line_start: n.line_start,
191            line_end: n.line_end,
192            metadata: serde_json::Value::Null,
193        })
194        .collect();
195
196    // Convert new edges + existing edges to EdgeDef
197    let kept_edge_defs: Vec<EdgeDef> = existing_edges
198        .iter()
199        .filter(|e| {
200            // Keep edges that don't reference changed/deleted file nodes
201            let src_file = all_node_defs
202                .iter()
203                .find(|n| n.id == e.src)
204                .map(|n| n.path.clone());
205            let dst_file = all_node_defs
206                .iter()
207                .find(|n| n.id == e.dst)
208                .map(|n| n.path.clone());
209            match (src_file, dst_file) {
210                (Some(sp), Some(dp)) => {
211                    !changed_paths.contains(&sp) && !changed_paths.contains(&dp)
212                }
213                _ => false,
214            }
215        })
216        .map(|e| EdgeDef {
217            src: e.src.clone(),
218            dst: e.dst.clone(),
219            kind: match e.kind.as_str() {
220                "CALLS" => EdgeKind::Calls,
221                "IMPORTS" => EdgeKind::Imports,
222                "INHERITS" => EdgeKind::Inherits,
223                "EXPORTS" => EdgeKind::Exports,
224                "CO_CHANGES" => EdgeKind::CoChanges,
225                "OWNS" => EdgeKind::Owns,
226                "DEPENDS_ON" => EdgeKind::DependsOn,
227                _ => EdgeKind::Calls,
228            },
229            weight: e.weight,
230            confidence: e.confidence,
231        })
232        .collect();
233
234    let mut all_edge_defs = kept_edge_defs;
235    all_edge_defs.extend(new_edges);
236
237    // 8. Resolve cross-file symbols
238    let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
239    let resolved_count = resolved_edges.len();
240
241    let graph_edges: Vec<crate::graph::Edge> = all_edge_defs
242        .iter()
243        .map(crate::graph::Edge::from_def)
244        .collect();
245    let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
246        .iter()
247        .map(crate::graph::Edge::from_def)
248        .collect();
249
250    db.upsert_edges(&graph_edges)?;
251    db.upsert_edges(&resolved_graph_edges)?;
252
253    // 9. Git layer
254    if !no_git {
255        let all_file_paths: Vec<String> = kept_nodes
256            .iter()
257            .filter(|n| n.kind == "File")
258            .map(|n| n.path.clone())
259            .collect();
260        let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
261
262        let max_churn = git_analysis
263            .file_churn
264            .values()
265            .copied()
266            .fold(0.0, f64::max);
267        for (path, churn) in &git_analysis.file_churn {
268            let normalized = if max_churn > 0.0 {
269                churn / max_churn
270            } else {
271                0.0
272            };
273            let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
274        }
275
276        let mut author_nodes = Vec::new();
277        let mut own_edges = Vec::new();
278        for (author, files) in &git_analysis.file_owners {
279            let author_id = format!("author:{}", author);
280            author_nodes.push(crate::graph::Node {
281                id: author_id.clone(),
282                kind: "Author".to_string(),
283                name: author.clone(),
284                path: String::new(),
285                line_start: 0,
286                line_end: 0,
287                language: String::new(),
288                churn: 0.0,
289                coupling: 0.0,
290                community: 0,
291                in_degree: 0,
292                out_degree: 0,
293            });
294            for (file_path, _email, _percent) in files.iter().take(5) {
295                own_edges.push(crate::graph::Edge {
296                    id: format!("owns:{}:{}", author_id, file_path),
297                    src: author_id.clone(),
298                    dst: format!("file:{}", file_path),
299                    kind: "OWNS".to_string(),
300                    weight: 1.0,
301                    confidence: 1.0,
302                });
303            }
304        }
305        db.upsert_nodes(&author_nodes)?;
306        db.upsert_edges(&own_edges)?;
307
308        let mut cochange_edges = Vec::new();
309        for (a, b, weight) in &git_analysis.co_changes {
310            cochange_edges.push(crate::graph::Edge {
311                id: format!("cochange:{}:{}", a, b),
312                src: format!("file:{}", a),
313                dst: format!("file:{}", b),
314                kind: "CO_CHANGES".to_string(),
315                weight: *weight,
316                confidence: 1.0,
317            });
318        }
319        db.upsert_edges(&cochange_edges)?;
320    }
321
322    // 10. Clustering
323    if !no_cluster {
324        let _ = run_clustering(db)?;
325    }
326
327    // 11. Update degrees and coupling
328    db.update_in_out_degrees()?;
329    db.compute_coupling()?;
330
331    // 12. Store new file hashes
332    for (path, hash) in &current_hashes {
333        db.set_file_hash(path, hash)?;
334    }
335    if !deleted_paths.is_empty() {
336        db.remove_file_hashes(&deleted_paths)?;
337    }
338
339    if !quiet {
340        println!("  Incremental re-index complete.");
341        println!(
342            "  Kept {} unchanged nodes.",
343            kept_nodes.len() - new_node_count
344        );
345        println!("  Added {} new/changed nodes.", new_node_count);
346        if !deleted_paths.is_empty() {
347            println!("  Removed {} deleted files.", deleted_paths.len());
348        }
349        println!("  Resolved {} cross-file edges.", resolved_count);
350    }
351
352    Ok(true)
353}