Skip to main content

cgx_engine/
lib.rs

1pub mod bisect;
2pub mod cluster;
3pub mod config;
4pub mod deadcode;
5pub mod deps;
6pub mod diff;
7pub mod docs;
8pub mod dupes;
9pub mod export;
10pub mod git;
11pub mod graph;
12pub mod parser;
13pub mod parsers;
14pub mod registry;
15pub mod resolver;
16pub mod rules;
17pub mod skill;
18pub mod timeline;
19pub mod walker;
20
21pub use cluster::{detect_communities, run_clustering};
22pub use config::{
23    AnalyzeConfig, CgxConfig, ChatConfig, DocsConfig, ExportConfig, IndexConfig, McpConfig,
24    ProjectConfig, ServeConfig, SkillConfig, WatchConfig,
25};
26pub use deadcode::{
27    detect_dead_code, mark_dead_candidates, Confidence, DeadCodeReport, DeadNode, DeadReason,
28};
29pub use deps::{audit_dependencies, parse_manifests, DependencyReport};
30pub use diff::{
31    compute_impact, diff_graphs, snapshot_at_commit, GraphDiff, GraphSnapshot, ImpactReport,
32};
33pub use dupes::{detect_clones, CloneKind, ClonePair};
34pub use export::{export_dot, export_graphml, export_json, export_mermaid, export_svg};
35pub use git::{analyze_repo, GitAnalysis};
36pub use graph::{
37    ApiScope, CloneRow, CommunityRow, CrossClusterEdge, DocsCoverage, Edge, EntryPoint,
38    FileSummary, GraphDb, Node, PublicSymbol, RepoStats, SnapshotEntry, TagRow,
39    TestCoverageSummary,
40};
41pub use parser::{
42    CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
43    ParserRegistry,
44};
45pub use registry::{Registry, RepoEntry};
46pub use resolver::{is_test_path, resolve};
47pub use rules::{run_rules, Rule, RuleResult, RuleViolation, RulesConfig};
48pub use skill::{
49    build_skill_data, generate_agents_md, generate_skill, install_git_hooks, write_agents_md,
50    write_skill, CommunityInfo, SkillData,
51};
52pub use timeline::build_timeline;
53pub use walker::{walk_repo, Language, SourceFile};
54
55use sha2::{Digest, Sha256};
56use std::collections::{HashMap, HashSet};
57use std::path::Path;
58
59/// Incremental repository analysis — only re-parses changed files.
60/// Returns true if analysis was performed, false if no changes detected.
61pub fn analyze_repo_incremental(
62    repo_path: &Path,
63    db: &GraphDb,
64    quiet: bool,
65    no_git: bool,
66    no_cluster: bool,
67    verbose: bool,
68) -> anyhow::Result<bool> {
69    let _ = verbose;
70
71    // 1. Walk all files and compute hashes
72    let files = walk_repo(repo_path)?;
73    let mut current_hashes: HashMap<String, String> = HashMap::new();
74    for file in &files {
75        let mut hasher = Sha256::new();
76        hasher.update(file.content.as_bytes());
77        let hash = format!("{:x}", hasher.finalize());
78        current_hashes.insert(file.relative_path.clone(), hash);
79    }
80
81    // 2. Load stored hashes
82    let stored_hashes = db.get_file_hashes().unwrap_or_default();
83
84    // 3. Determine changes
85    let mut changed_paths: HashSet<String> = HashSet::new();
86    for (path, hash) in &current_hashes {
87        if stored_hashes.get(path) != Some(hash) {
88            changed_paths.insert(path.clone());
89        }
90    }
91
92    let mut deleted_paths: Vec<String> = Vec::new();
93    for path in stored_hashes.keys() {
94        if !current_hashes.contains_key(path) {
95            deleted_paths.push(path.clone());
96            changed_paths.insert(path.clone());
97        }
98    }
99
100    if changed_paths.is_empty() {
101        if !quiet {
102            println!("  No file changes detected. Index is up to date.");
103        }
104        return Ok(false);
105    }
106
107    if !quiet {
108        println!(
109            "  Incremental: {} changed/new/deleted file(s)",
110            changed_paths.len()
111        );
112    }
113
114    // 4. Load existing nodes and filter out changed/deleted ones
115    let existing_nodes = db.get_all_nodes()?;
116    let existing_edges = db.get_all_edges()?;
117
118    let mut kept_nodes: Vec<crate::graph::Node> = existing_nodes
119        .into_iter()
120        .filter(|n| !changed_paths.contains(&n.path))
121        .collect();
122
123    // 5. Parse only changed/new files
124    let changed_files: Vec<_> = files
125        .into_iter()
126        .filter(|f| changed_paths.contains(&f.relative_path))
127        .collect();
128
129    if !quiet {
130        println!("  Re-parsing {} changed file(s)...", changed_files.len());
131    }
132
133    let registry = ParserRegistry::new();
134    let results = registry.parse_all(&changed_files);
135
136    let mut new_nodes: Vec<NodeDef> = Vec::new();
137    let mut new_edges: Vec<EdgeDef> = Vec::new();
138    let mut changed_file_paths: HashSet<String> = HashSet::new();
139    let mut lang_map: HashMap<String, &str> = changed_files
140        .iter()
141        .map(|f| {
142            let lang_str = match f.language {
143                walker::Language::TypeScript => "typescript",
144                walker::Language::JavaScript => "javascript",
145                walker::Language::Python => "python",
146                walker::Language::Rust => "rust",
147                walker::Language::Go => "go",
148                walker::Language::Java => "java",
149                walker::Language::CSharp => "csharp",
150                walker::Language::Php => "php",
151                walker::Language::Unknown => "unknown",
152            };
153            (f.relative_path.clone(), lang_str)
154        })
155        .collect();
156
157    for result in &results {
158        new_nodes.extend(result.nodes.clone());
159        new_edges.extend(result.edges.clone());
160    }
161    for file in &changed_files {
162        changed_file_paths.insert(file.relative_path.clone());
163    }
164
165    // Add file nodes for changed files
166    let parsed_lang_map = resolver::build_language_map(&new_nodes);
167    for (path, lang) in parsed_lang_map {
168        if lang != "unknown" {
169            lang_map.entry(path).or_insert(lang);
170        }
171    }
172    let file_nodes = resolver::create_file_nodes(&changed_file_paths, &lang_map);
173    new_nodes.extend(file_nodes);
174
175    // Convert new nodes to GraphDb format
176    let new_graph_nodes: Vec<crate::graph::Node> = new_nodes
177        .iter()
178        .map(|n| {
179            let lang = lang_map.get(&n.path).copied().unwrap_or("unknown");
180            crate::graph::Node::from_def(n, lang)
181        })
182        .collect();
183
184    // 6. Merge kept + new nodes
185    let new_node_count = new_graph_nodes.len();
186    kept_nodes.extend(new_graph_nodes);
187
188    // 7. Clear and re-insert all nodes
189    db.clear()?;
190    db.upsert_nodes(&kept_nodes)?;
191
192    // Update doc_comment for nodes that have it in metadata (from changed files)
193    for result in &results {
194        for node_def in &result.nodes {
195            if let Some(doc) = node_def
196                .metadata
197                .get("doc_comment")
198                .and_then(|v| v.as_str())
199            {
200                if !doc.is_empty() {
201                    let _ = db.update_node_doc_comment(&node_def.id, doc);
202                }
203            }
204        }
205    }
206
207    // Re-convert kept nodes back to NodeDef for resolution
208    let all_node_defs: Vec<NodeDef> = kept_nodes
209        .iter()
210        .map(|n| NodeDef {
211            id: n.id.clone(),
212            kind: match n.kind.as_str() {
213                "File" => NodeKind::File,
214                "Function" => NodeKind::Function,
215                "Class" => NodeKind::Class,
216                "Variable" => NodeKind::Variable,
217                "Type" => NodeKind::Type,
218                "Module" => NodeKind::Module,
219                "Author" => NodeKind::Author,
220                _ => NodeKind::Variable,
221            },
222            name: n.name.clone(),
223            path: n.path.clone(),
224            line_start: n.line_start,
225            line_end: n.line_end,
226            metadata: serde_json::Value::Null,
227        })
228        .collect();
229
230    // Convert new edges + existing edges to EdgeDef
231    let kept_edge_defs: Vec<EdgeDef> = existing_edges
232        .iter()
233        .filter(|e| {
234            // Keep edges that don't reference changed/deleted file nodes
235            let src_file = all_node_defs
236                .iter()
237                .find(|n| n.id == e.src)
238                .map(|n| n.path.clone());
239            let dst_file = all_node_defs
240                .iter()
241                .find(|n| n.id == e.dst)
242                .map(|n| n.path.clone());
243            match (src_file, dst_file) {
244                (Some(sp), Some(dp)) => {
245                    !changed_paths.contains(&sp) && !changed_paths.contains(&dp)
246                }
247                _ => false,
248            }
249        })
250        .map(|e| EdgeDef {
251            src: e.src.clone(),
252            dst: e.dst.clone(),
253            kind: match e.kind.as_str() {
254                "CALLS" => EdgeKind::Calls,
255                "IMPORTS" => EdgeKind::Imports,
256                "INHERITS" => EdgeKind::Inherits,
257                "EXPORTS" => EdgeKind::Exports,
258                "CO_CHANGES" => EdgeKind::CoChanges,
259                "OWNS" => EdgeKind::Owns,
260                "DEPENDS_ON" => EdgeKind::DependsOn,
261                _ => EdgeKind::Calls,
262            },
263            weight: e.weight,
264            confidence: e.confidence,
265        })
266        .collect();
267
268    let mut all_edge_defs = kept_edge_defs;
269    all_edge_defs.extend(new_edges);
270
271    // 8. Resolve cross-file symbols
272    let resolved_edges = resolve(&all_node_defs, &all_edge_defs, repo_path)?;
273    let resolved_count = resolved_edges.len();
274
275    // Only upsert the resolved edge set — this matches the full-analyze flow and
276    // avoids feeding DuckDB the same edge id twice in close succession, which
277    // hits its INSERT OR REPLACE / ART-index bulk-delete limitation.
278    let resolved_graph_edges: Vec<crate::graph::Edge> = resolved_edges
279        .iter()
280        .map(crate::graph::Edge::from_def)
281        .collect();
282    db.upsert_edges(&resolved_graph_edges)?;
283
284    // 9. Git layer
285    if !no_git {
286        let all_file_paths: Vec<String> = kept_nodes
287            .iter()
288            .filter(|n| n.kind == "File")
289            .map(|n| n.path.clone())
290            .collect();
291        let git_analysis = analyze_repo(repo_path, &all_file_paths)?;
292
293        let max_churn = git_analysis
294            .file_churn
295            .values()
296            .copied()
297            .fold(0.0, f64::max);
298        for (path, churn) in &git_analysis.file_churn {
299            let normalized = if max_churn > 0.0 {
300                churn / max_churn
301            } else {
302                0.0
303            };
304            let _ = db.upsert_node_scores(&format!("file:{}", path), normalized, 0.0);
305        }
306
307        let mut author_nodes = Vec::new();
308        let mut own_edges = Vec::new();
309        for (author, files) in &git_analysis.file_owners {
310            let author_id = format!("author:{}", author);
311            author_nodes.push(crate::graph::Node {
312                id: author_id.clone(),
313                kind: "Author".to_string(),
314                name: author.clone(),
315                path: String::new(),
316                line_start: 0,
317                line_end: 0,
318                language: String::new(),
319                churn: 0.0,
320                coupling: 0.0,
321                community: 0,
322                in_degree: 0,
323                out_degree: 0,
324                exported: false,
325                is_dead_candidate: false,
326                dead_reason: None,
327                complexity: 0.0,
328                is_test_file: false,
329                test_count: 0,
330                is_tested: false,
331            });
332            for (file_path, _email, _percent) in files.iter().take(5) {
333                own_edges.push(crate::graph::Edge {
334                    id: format!("owns:{}:{}", author_id, file_path),
335                    src: author_id.clone(),
336                    dst: format!("file:{}", file_path),
337                    kind: "OWNS".to_string(),
338                    weight: 1.0,
339                    confidence: 1.0,
340                });
341            }
342        }
343        db.upsert_nodes(&author_nodes)?;
344        db.upsert_edges(&own_edges)?;
345
346        let mut cochange_edges = Vec::new();
347        for (a, b, weight) in &git_analysis.co_changes {
348            cochange_edges.push(crate::graph::Edge {
349                id: format!("cochange:{}:{}", a, b),
350                src: format!("file:{}", a),
351                dst: format!("file:{}", b),
352                kind: "CO_CHANGES".to_string(),
353                weight: *weight,
354                confidence: 1.0,
355            });
356        }
357        db.upsert_edges(&cochange_edges)?;
358    }
359
360    // 10. Clustering
361    if !no_cluster {
362        let _ = run_clustering(db)?;
363    }
364
365    // 11. Update degrees and coupling
366    db.update_in_out_degrees()?;
367    db.compute_coupling()?;
368
369    // 11b. Mark test files and update test coverage
370    let test_file_paths: Vec<String> = kept_nodes
371        .iter()
372        .filter(|n| n.kind == "File" && crate::resolver::is_test_path(&n.path))
373        .map(|n| n.path.clone())
374        .collect();
375    // Also mark function/class nodes from test paths
376    let test_node_paths: Vec<String> = kept_nodes
377        .iter()
378        .filter(|n| crate::resolver::is_test_path(&n.path))
379        .map(|n| n.path.clone())
380        .collect();
381    let all_test_paths: std::collections::HashSet<String> =
382        test_file_paths.into_iter().chain(test_node_paths).collect();
383    db.mark_test_files(&all_test_paths.into_iter().collect::<Vec<_>>())?;
384    db.update_test_coverage()?;
385
386    // 12. Update tags for changed/deleted files
387    let changed_paths_vec: Vec<String> = changed_paths.iter().cloned().collect();
388    db.delete_tags_for_paths(&changed_paths_vec)?;
389    let new_tag_rows: Vec<crate::graph::TagRow> = results
390        .iter()
391        .zip(changed_files.iter())
392        .flat_map(|(result, file)| {
393            result
394                .comment_tags
395                .iter()
396                .map(move |t| crate::graph::TagRow {
397                    id: format!("tag:{}:{}:{}", file.relative_path, t.line, t.tag_type),
398                    file_path: file.relative_path.clone(),
399                    line: t.line,
400                    tag_type: t.tag_type.clone(),
401                    text: t.text.clone(),
402                    comment_type: t.comment_kind.as_str().to_string(),
403                })
404        })
405        .collect();
406    db.upsert_tags(&new_tag_rows)?;
407
408    // 13. Store new file hashes
409    for (path, hash) in &current_hashes {
410        db.set_file_hash(path, hash)?;
411    }
412    if !deleted_paths.is_empty() {
413        db.remove_file_hashes(&deleted_paths)?;
414    }
415
416    if !quiet {
417        println!("  Incremental re-index complete.");
418        println!(
419            "  Kept {} unchanged nodes.",
420            kept_nodes.len() - new_node_count
421        );
422        println!("  Added {} new/changed nodes.", new_node_count);
423        if !deleted_paths.is_empty() {
424            println!("  Removed {} deleted files.", deleted_paths.len());
425        }
426        println!("  Resolved {} cross-file edges.", resolved_count);
427    }
428
429    Ok(true)
430}