Skip to main content

cgx_engine/
lib.rs

1pub mod cluster;
2pub mod config;
3pub mod deadcode;
4pub mod deps;
5pub mod diff;
6pub mod dupes;
7pub mod export;
8pub mod git;
9pub mod graph;
10pub mod parser;
11pub mod parsers;
12pub mod registry;
13pub mod resolver;
14pub mod rules;
15pub mod skill;
16pub mod timeline;
17pub mod walker;
18
19pub use cluster::{detect_communities, run_clustering};
20pub use config::{
21    AnalyzeConfig, CgxConfig, ChatConfig, ExportConfig, IndexConfig, McpConfig, ProjectConfig,
22    ServeConfig, SkillConfig, WatchConfig,
23};
24pub use deadcode::{
25    detect_dead_code, mark_dead_candidates, Confidence, DeadCodeReport, DeadNode, DeadReason,
26};
27pub use deps::{audit_dependencies, parse_manifests, DependencyReport};
28pub use diff::{
29    compute_impact, diff_graphs, snapshot_at_commit, GraphDiff, GraphSnapshot, ImpactReport,
30};
31pub use dupes::{detect_clones, CloneKind, ClonePair};
32pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
33pub use git::{analyze_repo, GitAnalysis};
34pub use graph::{
35    CloneRow, CommunityRow, DocsCoverage, Edge, GraphDb, Node, RepoStats, SnapshotEntry, TagRow,
36    TestCoverageSummary,
37};
38pub use parser::{
39    CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
40    ParserRegistry,
41};
42pub use registry::{Registry, RepoEntry};
43pub use resolver::{is_test_path, resolve};
44pub use rules::{run_rules, Rule, RuleResult, RuleViolation, RulesConfig};
45pub use skill::{
46    build_skill_data, generate_agents_md, generate_skill, install_git_hooks, write_agents_md,
47    write_skill, CommunityInfo, SkillData,
48};
49pub use timeline::build_timeline;
50pub use walker::{walk_repo, Language, SourceFile};
51
52use sha2::{Digest, Sha256};
53use std::collections::{HashMap, HashSet};
54use std::path::Path;
55
56/// Incremental repository analysis — only re-parses changed files.
57/// Returns true if analysis was performed, false if no changes detected.
58pub fn analyze_repo_incremental(
59    repo_path: &Path,
60    db: &GraphDb,
61    quiet: bool,
62    no_git: bool,
63    no_cluster: bool,
64    verbose: bool,
65) -> anyhow::Result<bool> {
66    let _ = verbose;
67
68    // 1. Walk all files and compute hashes
69    let files = walk_repo(repo_path)?;
70    let mut current_hashes: HashMap<String, String> = HashMap::new();
71    for file in &files {
72        let mut hasher = Sha256::new();
73        hasher.update(file.content.as_bytes());
74        let hash = format!("{:x}", hasher.finalize());
75        current_hashes.insert(file.relative_path.clone(), hash);
76    }
77
78    // 2. Load stored hashes
79    let stored_hashes = db.get_file_hashes().unwrap_or_default();
80
81    // 3. Determine changes
82    let mut changed_paths: HashSet<String> = HashSet::new();
83    for (path, hash) in &current_hashes {
84        if stored_hashes.get(path) != Some(hash) {
85            changed_paths.insert(path.clone());
86        }
87    }
88
89    let mut deleted_paths: Vec<String> = Vec::new();
90    for path in stored_hashes.keys() {
91        if !current_hashes.contains_key(path) {
92            deleted_paths.push(path.clone());
93            changed_paths.insert(path.clone());
94        }
95    }
96
97    if changed_paths.is_empty() {
98        if !quiet {
99            println!("  No file changes detected. Index is up to date.");
100        }
101        return Ok(false);
102    }
103
104    if !quiet {
105        println!(
106            "  Incremental: {} changed/new/deleted file(s)",
107            changed_paths.len()
108        );
109    }
110
111    // 4. Load existing nodes and filter out changed/deleted ones
112    let existing_nodes = db.get_all_nodes()?;
113    let existing_edges = db.get_all_edges()?;
114
115    let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
116        .into_iter()
117        .filter(|n| !changed_paths.contains(&n.path))
118        .collect();
119
120    // 5. Parse only changed/new files
121    let changed_files: Vec<_> = files
122        .into_iter()
123        .filter(|f| changed_paths.contains(&f.relative_path))
124        .collect();
125
126    if !quiet {
127        println!("  Re-parsing {} changed file(s)...", changed_files.len());
128    }
129
130    let registry = ParserRegistry::new();
131    let results = registry.parse_all(&changed_files);
132
133    let mut new_nodes: Vec<NodeDef> = Vec::new();
134    let mut new_edges: Vec<EdgeDef> = Vec::new();
135    let mut changed_file_paths: HashSet<String> = HashSet::new();
136    let mut lang_map: HashMap<String, &str> = changed_files
137        .iter()
138        .map(|f| {
139            let lang_str = match f.language {
140                walker::Language::TypeScript => "typescript",
141                walker::Language::JavaScript => "javascript",
142                walker::Language::Python => "python",
143                walker::Language::Rust => "rust",
144                walker::Language::Go => "go",
145                walker::Language::Java => "java",
146                walker::Language::CSharp => "csharp",
147                walker::Language::Php => "php",
148                walker::Language::Unknown => "unknown",
149            };
150            (f.relative_path.clone(), lang_str)
151        })
152        .collect();
153
154    for result in &results {
155        new_nodes.extend(result.nodes.clone());
156        new_edges.extend(result.edges.clone());
157    }
158    for file in &changed_files {
159        changed_file_paths.insert(file.relative_path.clone());
160    }
161
162    // Add file nodes for changed files
163    let parsed_lang_map = resolver::build_language_map(&new_nodes);
164    for (path, lang) in parsed_lang_map {
165        if lang != "unknown" {
166            lang_map.entry(path).or_insert(lang);
167        }
168    }
169    let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
170    new_nodes.extend(file_nodes);
171
172    // Convert new nodes to GraphDb format
173    let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
174        .iter()
175        .map(|n| {
176            let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
177            crate::graph::Node::from_def(n, lang)
178        })
179        .collect();
180
181    // 6. Merge kept + new nodes
182    let new_node_count = new_graph_nodes.len();
183    kept_nodes.extend(new_graph_nodes);
184
185    // 7. Clear and re-insert all nodes
186    db.clear()?;
187    db.upsert_nodes(&kept_nodes)?;
188
189    // Update doc_comment for nodes that have it in metadata (from changed files)
190    for result in &results {
191        for node_def in &result.nodes {
192            if let Some(doc) = node_def
193                .metadata
194                .get("doc_comment")
195                .and_then(|v| v.as_str())
196            {
197                if !doc.is_empty() {
198                    let _ = db.update_node_doc_comment(&node_def.id, doc);
199                }
200            }
201        }
202    }
203
204    // Re-convert kept nodes back to NodeDef for resolution
205    let all_node_defs: Vec<NodeDef> = kept_nodes
206        .iter()
207        .map(|n| NodeDef {
208            id: n.id.clone(),
209            kind: match n.kind.as_str() {
210                "File" => NodeKind::File,
211                "Function" => NodeKind::Function,
212                "Class" => NodeKind::Class,
213                "Variable" => NodeKind::Variable,
214                "Type" => NodeKind::Type,
215                "Module" => NodeKind::Module,
216                "Author" => NodeKind::Author,
217                _ => NodeKind::Variable,
218            },
219            name: n.name.clone(),
220            path: n.path.clone(),
221            line_start: n.line_start,
222            line_end: n.line_end,
223            metadata: serde_json::Value::Null,
224        })
225        .collect();
226
227    // Convert new edges + existing edges to EdgeDef
228    let kept_edge_defs: Vec<EdgeDef> = existing_edges
229        .iter()
230        .filter(|e| {
231            // Keep edges that don't reference changed/deleted file nodes
232            let src_file = all_node_defs
233                .iter()
234                .find(|n| n.id == e.src)
235                .map(|n| n.path.clone());
236            let dst_file = all_node_defs
237                .iter()
238                .find(|n| n.id == e.dst)
239                .map(|n| n.path.clone());
240            match (src_file, dst_file) {
241                (Some(sp), Some(dp)) => {
242                    !changed_paths.contains(&sp) && !changed_paths.contains(&dp)
243                }
244                _ => false,
245            }
246        })
247        .map(|e| EdgeDef {
248            src: e.src.clone(),
249            dst: e.dst.clone(),
250            kind: match e.kind.as_str() {
251                "CALLS" => EdgeKind::Calls,
252                "IMPORTS" => EdgeKind::Imports,
253                "INHERITS" => EdgeKind::Inherits,
254                "EXPORTS" => EdgeKind::Exports,
255                "CO_CHANGES" => EdgeKind::CoChanges,
256                "OWNS" => EdgeKind::Owns,
257                "DEPENDS_ON" => EdgeKind::DependsOn,
258                _ => EdgeKind::Calls,
259            },
260            weight: e.weight,
261            confidence: e.confidence,
262        })
263        .collect();
264
265    let mut all_edge_defs = kept_edge_defs;
266    all_edge_defs.extend(new_edges);
267
268    // 8. Resolve cross-file symbols
269    let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
270    let resolved_count = resolved_edges.len();
271
272    // Only upsert the resolved edge set — this matches the full-analyze flow and
273    // avoids feeding DuckDB the same edge id twice in close succession, which
274    // hits its INSERT OR REPLACE / ART-index bulk-delete limitation.
275    let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
276        .iter()
277        .map(crate::graph::Edge::from_def)
278        .collect();
279    db.upsert_edges(&resolved_graph_edges)?;
280
281    // 9. Git layer
282    if !no_git {
283        let all_file_paths: Vec<String> = kept_nodes
284            .iter()
285            .filter(|n| n.kind == "File")
286            .map(|n| n.path.clone())
287            .collect();
288        let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
289
290        let max_churn = git_analysis
291            .file_churn
292            .values()
293            .copied()
294            .fold(0.0, f64::max);
295        for (path, churn) in &git_analysis.file_churn {
296            let normalized = if max_churn > 0.0 {
297                churn / max_churn
298            } else {
299                0.0
300            };
301            let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
302        }
303
304        let mut author_nodes = Vec::new();
305        let mut own_edges = Vec::new();
306        for (author, files) in &git_analysis.file_owners {
307            let author_id = format!("author:{}", author);
308            author_nodes.push(crate::graph::Node {
309                id: author_id.clone(),
310                kind: "Author".to_string(),
311                name: author.clone(),
312                path: String::new(),
313                line_start: 0,
314                line_end: 0,
315                language: String::new(),
316                churn: 0.0,
317                coupling: 0.0,
318                community: 0,
319                in_degree: 0,
320                out_degree: 0,
321                exported: false,
322                is_dead_candidate: false,
323                dead_reason: None,
324                complexity: 0.0,
325                is_test_file: false,
326                test_count: 0,
327                is_tested: false,
328            });
329            for (file_path, _email, _percent) in files.iter().take(5) {
330                own_edges.push(crate::graph::Edge {
331                    id: format!("owns:{}:{}", author_id, file_path),
332                    src: author_id.clone(),
333                    dst: format!("file:{}", file_path),
334                    kind: "OWNS".to_string(),
335                    weight: 1.0,
336                    confidence: 1.0,
337                });
338            }
339        }
340        db.upsert_nodes(&author_nodes)?;
341        db.upsert_edges(&own_edges)?;
342
343        let mut cochange_edges = Vec::new();
344        for (a, b, weight) in &git_analysis.co_changes {
345            cochange_edges.push(crate::graph::Edge {
346                id: format!("cochange:{}:{}", a, b),
347                src: format!("file:{}", a),
348                dst: format!("file:{}", b),
349                kind: "CO_CHANGES".to_string(),
350                weight: *weight,
351                confidence: 1.0,
352            });
353        }
354        db.upsert_edges(&cochange_edges)?;
355    }
356
357    // 10. Clustering
358    if !no_cluster {
359        let _ = run_clustering(db)?;
360    }
361
362    // 11. Update degrees and coupling
363    db.update_in_out_degrees()?;
364    db.compute_coupling()?;
365
366    // 11b. Mark test files and update test coverage
367    let test_file_paths: Vec<String> = kept_nodes
368        .iter()
369        .filter(|n| n.kind == "File" && crate::resolver::is_test_path(&n.path))
370        .map(|n| n.path.clone())
371        .collect();
372    // Also mark function/class nodes from test paths
373    let test_node_paths: Vec<String> = kept_nodes
374        .iter()
375        .filter(|n| crate::resolver::is_test_path(&n.path))
376        .map(|n| n.path.clone())
377        .collect();
378    let all_test_paths: std::collections::HashSet<String> =
379        test_file_paths.into_iter().chain(test_node_paths).collect();
380    db.mark_test_files(&all_test_paths.into_iter().collect::<Vec<_>>())?;
381    db.update_test_coverage()?;
382
383    // 12. Update tags for changed/deleted files
384    let changed_paths_vec: Vec<String> = changed_paths.iter().cloned().collect();
385    db.delete_tags_for_paths(&changed_paths_vec)?;
386    let new_tag_rows: Vec<crate::graph::TagRow> = results
387        .iter()
388        .zip(changed_files.iter())
389        .flat_map(|(result, file)| {
390            result
391                .comment_tags
392                .iter()
393                .map(move |t| crate::graph::TagRow {
394                    id: format!("tag:{}:{}:{}", file.relative_path, t.line, t.tag_type),
395                    file_path: file.relative_path.clone(),
396                    line: t.line,
397                    tag_type: t.tag_type.clone(),
398                    text: t.text.clone(),
399                    comment_type: t.comment_kind.as_str().to_string(),
400                })
401        })
402        .collect();
403    db.upsert_tags(&new_tag_rows)?;
404
405    // 13. Store new file hashes
406    for (path, hash) in &current_hashes {
407        db.set_file_hash(path, hash)?;
408    }
409    if !deleted_paths.is_empty() {
410        db.remove_file_hashes(&deleted_paths)?;
411    }
412
413    if !quiet {
414        println!("  Incremental re-index complete.");
415        println!(
416            "  Kept {} unchanged nodes.",
417            kept_nodes.len() - new_node_count
418        );
419        println!("  Added {} new/changed nodes.", new_node_count);
420        if !deleted_paths.is_empty() {
421            println!("  Removed {} deleted files.", deleted_paths.len());
422        }
423        println!("  Resolved {} cross-file edges.", resolved_count);
424    }
425
426    Ok(true)
427}