Skip to main content

cgx_engine/
skill.rs

1use std::path::Path;
2
3use crate::graph::{GraphDb, Node};
4
5pub struct SkillData {
6    pub indexed_at: String,
7    pub node_count: u64,
8    pub function_count: u64,
9    pub class_count: u64,
10    pub file_count: u64,
11    pub edge_count: u64,
12    pub language_breakdown: String,
13    pub community_count: u32,
14    pub top_communities: Vec<CommunityInfo>,
15    pub hotspots: Vec<Node>,
16    pub entry_points: Vec<Node>,
17    pub god_nodes: Vec<Node>,
18}
19
20pub struct CommunityInfo {
21    pub id: i64,
22    pub label: String,
23    pub node_count: i64,
24}
25
26const SKILL_TEMPLATE: &str = r#"# cgx — Codebase Knowledge Graph
27
28> Auto-generated by cgx on {{ indexed_at }}. Do not edit manually.
29> Re-run `cgx analyze` to refresh.
30
31## When to Use cgx
32
33Use cgx commands INSTEAD of reading source files when you need to:
34- Find where any function, class, or variable is defined
35- Understand what depends on a specific piece of code
36- Know what will break before making a change
37- Understand the architecture of an unfamiliar area
38- Find who owns a file or module
39- Find dead code or unused exports
40- Trace a full call chain from entry point to implementation
41
42**Rule: Never open a file speculatively. Query first. Open only if you need
43the implementation body — not to find where something lives.**
44
45## Trigger Patterns
46
47Run cgx automatically when the user says or implies any of:
48- "what calls X" / "who uses X" / "what depends on X"
49- "show me the architecture" / "how does this work"
50- "what breaks if I change X" / "blast radius of X"
51- "find X in the codebase" / "where is X defined"
52- "who owns X" / "who wrote X"
53- "is X used anywhere" / "is X dead code"
54- Starting a new task in an unfamiliar part of the codebase
55- Before making any edit to a function with many callers
56
57## Commands
58
59```bash
60# Always run first in a new session
61cgx summary
62
63# Find any symbol
64cgx query find <name>
65cgx query find <name> --kind=Function
66
67# Dependencies of a node
68cgx query deps <node-name>
69
70# Blast radius — run BEFORE every edit
71cgx query blast-radius <function-name>
72
73# Trace a call path
74cgx query chain "<A> -> <B>"
75
76# High-risk files
77cgx hotspots
78
79# Code ownership
80cgx query owners <path>
81
82# Search by concept
83cgx query search "<phrase>"
84
85# Community / cluster
86cgx query community <id-or-name>
87
88# Dead code
89cgx query dead-code
90```
91
92## Workflow: Starting a Task
93
941. `cgx summary`                        — orient yourself
952. `cgx query find <entry-point>`       — locate the relevant node
963. `cgx query blast-radius <node>`      — know the risk before touching it
974. Open only the specific files you need
98
99## Workflow: Before Every Edit
100
1011. `cgx query blast-radius <function>`  — what breaks?
1022. `cgx query deps <function>`          — what does it depend on?
1033. Make the change
1044. `cgx query blast-radius <function>`  — verify ripple is as expected
105
106## Token Budget
107
108| Action                    | Approx tokens |
109|---------------------------|---------------|
110| `cgx summary`             | ~400          |
111| `cgx query find X`        | ~200          |
112| `cgx query blast-radius X`| ~300-800      |
113| Opening one source file   | ~2,000-15,000 |
114
115Prefer 3 cgx queries over opening 1 file speculatively.
116
117## This Codebase
118
119- **Indexed:** {{ indexed_at }}
120- **Nodes:** {{ node_count }} ({{ function_count }} functions,
121  {{ class_count }} classes, {{ file_count }} files)
122- **Edges:** {{ edge_count }}
123- **Languages:** {{ language_breakdown }}
124- **Communities:** {{ community_count }}
125
126### Top Communities
127{{ top_communities_list }}
128
129### Hotspots (highest risk — review carefully before editing)
130{{ hotspots_list }}
131
132### Entry Points (nothing imports these — safe starting points)
133{{ entry_points_list }}
134
135### Most Depended-On Nodes (god nodes — change with extreme care)
136{{ god_nodes_list }}
137"#;
138
139const AGENTS_TEMPLATE: &str = r#"# Codebase Architecture
140> Auto-generated by cgx {{ indexed_at }}. Re-run `cgx analyze` to refresh.
141> For the full skill context used by AI agents, see `CGX_SKILL.md`.
142
143## Overview
144- **Nodes:** {{ node_count }} across {{ file_count }} files
145- **Languages:** {{ language_breakdown }}
146- **Communities:** {{ community_count }} architectural clusters
147
148## Module Map
149The graph is partitioned into {{ community_count }} communities via Louvain clustering.
150Each community is a cohesive module — edits inside one community rarely ripple outside it.
151{{ community_descriptions }}
152
153## Hotspots (High Risk — Review Before Editing)
154Files ranked by churn × coupling score. Editing these is likely to break things.
155{{ hotspots_table }}
156
157## Entry Points
158Files/functions with no inbound dependencies — safe places to start tracing.
159{{ entry_points_list }}
160
161## God Nodes (Most Depended-On)
162These are used everywhere. Breaking them has maximum blast radius.
163{{ god_nodes_list }}
164
165## How to Use This Index
166
167### With MCP (structured queries — recommended)
168After `cgx setup` + editor restart, cgx tools are available directly in chat:
169- `get_repo_summary` — full architectural overview
170- `find_symbol <name>` — locate any function, class, or variable
171- `get_neighbors <node_id>` — direct dependencies of a node
172- `get_blast_radius <node_id>` — what breaks if this changes
173- `get_call_chain <from> <to>` — trace a call path
174- `get_hotspots` — riskiest files to edit
175- `get_file_owners <path>` — git blame ownership
176- `run_query <sql>` — raw SQL against the graph database
177
178### With CLI (fallback)
179```
180cgx summary                     # architectural overview
181cgx query find <name>           # locate a symbol
182cgx query blast-radius <name>   # change impact analysis
183cgx query deps <name>           # what does this depend on
184cgx query chain "<A> -> <B>"    # trace call path
185cgx query owners <path>         # file ownership
186cgx query dead-code             # unused exports
187```
188
189## AI Integration
1901. **Start every session** with `get_repo_summary` (MCP) or `cgx summary` (CLI) to orient yourself.
1912. **Before editing** any hotspot file, call `get_blast_radius` — the risk score tells you how careful to be.
1923. **To find a symbol**, use `find_symbol` instead of grepping source files — it's 10× faster.
1934. **Community IDs** in node metadata tell you which module a node belongs to; use `get_community` to explore the whole cluster.
1945. **God nodes** (above) are used by many callers — any change there needs tests across all callers.
195"#;
196
197pub fn build_skill_data(db: &GraphDb) -> anyhow::Result<SkillData> {
198    let node_count = db.node_count()?;
199    let edge_count = db.edge_count()?;
200    let lang_breakdown = db.get_language_breakdown()?;
201    let communities = db.get_communities()?;
202    let counts_by_kind = db.get_node_counts_by_kind()?;
203
204    let function_count = counts_by_kind.get("Function").copied().unwrap_or(0);
205    let class_count = counts_by_kind.get("Class").copied().unwrap_or(0);
206    let file_count = counts_by_kind.get("File").copied().unwrap_or(0);
207
208    let language_breakdown = if lang_breakdown.is_empty() {
209        "none".to_string()
210    } else {
211        let mut entries: Vec<_> = lang_breakdown.iter().collect();
212        entries.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap_or(std::cmp::Ordering::Equal));
213        entries
214            .iter()
215            .map(|(lang, pct)| format!("{} {:.0}%", lang, *pct * 100.0))
216            .collect::<Vec<_>>()
217            .join(", ")
218    };
219
220    let top_communities: Vec<CommunityInfo> = communities
221        .iter()
222        .take(5)
223        .map(|(id, label, count, _top_nodes)| CommunityInfo {
224            id: *id,
225            label: label.clone(),
226            node_count: *count,
227        })
228        .collect();
229
230    let all_nodes = db.get_all_nodes()?;
231
232    let mut file_nodes: Vec<&Node> = all_nodes
233        .iter()
234        .filter(|n| n.kind == "File" && n.churn > 0.0)
235        .collect();
236    file_nodes.sort_by(|a, b| {
237        let sa = a.churn * a.coupling + a.in_degree as f64 * 0.01;
238        let sb = b.churn * b.coupling + b.in_degree as f64 * 0.01;
239        sb.partial_cmp(&sa).unwrap_or(std::cmp::Ordering::Equal)
240    });
241    let hotspots: Vec<Node> = file_nodes.iter().take(5).map(|&n| n.clone()).collect();
242
243    let mut entry_nodes: Vec<&Node> = all_nodes
244        .iter()
245        .filter(|n| n.in_degree == 0 && n.kind != "File" && n.kind != "Author")
246        .collect();
247    entry_nodes.sort_by_key(|node| std::cmp::Reverse(node.out_degree));
248    let entry_points: Vec<Node> = entry_nodes.iter().take(5).map(|&n| n.clone()).collect();
249
250    let mut god_nodes: Vec<&Node> = all_nodes
251        .iter()
252        .filter(|n| n.in_degree > 0 && n.kind != "File")
253        .collect();
254    god_nodes.sort_by_key(|node| std::cmp::Reverse(node.in_degree));
255    // Deduplicate by name (same function defined in multiple files shows same name)
256    let mut seen_names = std::collections::HashSet::new();
257    let top_god_nodes: Vec<Node> = god_nodes
258        .iter()
259        .filter(|n| seen_names.insert(n.name.clone()))
260        .take(5)
261        .map(|&n| n.clone())
262        .collect();
263
264    Ok(SkillData {
265        indexed_at: chrono::Utc::now().to_rfc3339(),
266        node_count,
267        function_count,
268        class_count,
269        file_count,
270        edge_count,
271        language_breakdown,
272        community_count: communities.len() as u32,
273        top_communities,
274        hotspots,
275        entry_points,
276        god_nodes: top_god_nodes,
277    })
278}
279
280pub fn generate_skill(data: &SkillData) -> String {
281    let mut c = SKILL_TEMPLATE.to_string();
282
283    c = c.replace("{{ indexed_at }}", &data.indexed_at);
284    c = c.replace("{{ node_count }}", &data.node_count.to_string());
285    c = c.replace("{{ function_count }}", &data.function_count.to_string());
286    c = c.replace("{{ class_count }}", &data.class_count.to_string());
287    c = c.replace("{{ file_count }}", &data.file_count.to_string());
288    c = c.replace("{{ edge_count }}", &data.edge_count.to_string());
289    c = c.replace("{{ language_breakdown }}", &data.language_breakdown);
290    c = c.replace("{{ community_count }}", &data.community_count.to_string());
291
292    let communities_list = if data.top_communities.is_empty() {
293        "_(none detected)_\n".to_string()
294    } else {
295        data.top_communities
296            .iter()
297            .map(|ci| format!("- **#{}** — {} ({} nodes)", ci.id, ci.label, ci.node_count))
298            .collect::<Vec<_>>()
299            .join("\n")
300    };
301    c = c.replace("{{ top_communities_list }}", &communities_list);
302
303    let hotspots_list = if data.hotspots.is_empty() {
304        "_(none — no git history or low churn)_\n".to_string()
305    } else {
306        data.hotspots
307            .iter()
308            .map(|n| {
309                format!(
310                    "- `{}` — churn {:.2}, {} callers",
311                    n.path, n.churn, n.in_degree
312                )
313            })
314            .collect::<Vec<_>>()
315            .join("\n")
316    };
317    c = c.replace("{{ hotspots_list }}", &hotspots_list);
318
319    let entry_list = if data.entry_points.is_empty() {
320        "_(none detected)_\n".to_string()
321    } else {
322        data.entry_points
323            .iter()
324            .map(|n| format!("- `{}` ({})", n.name, n.kind))
325            .collect::<Vec<_>>()
326            .join("\n")
327    };
328    c = c.replace("{{ entry_points_list }}", &entry_list);
329
330    let god_list = if data.god_nodes.is_empty() {
331        "_(none detected)_\n".to_string()
332    } else {
333        data.god_nodes
334            .iter()
335            .map(|n| format!("- `{}` — {} callers", n.name, n.in_degree))
336            .collect::<Vec<_>>()
337            .join("\n")
338    };
339    c = c.replace("{{ god_nodes_list }}", &god_list);
340
341    if c.contains("{{") {
342        eprintln!("  Warning: CGX_SKILL.md contains unreplaced placeholder tokens");
343    }
344    c
345}
346
347pub fn generate_agents_md(data: &SkillData) -> String {
348    let mut c = AGENTS_TEMPLATE.to_string();
349
350    c = c.replace("{{ indexed_at }}", &data.indexed_at);
351    c = c.replace("{{ node_count }}", &data.node_count.to_string());
352    c = c.replace("{{ file_count }}", &data.file_count.to_string());
353    c = c.replace("{{ language_breakdown }}", &data.language_breakdown);
354    c = c.replace("{{ community_count }}", &data.community_count.to_string());
355
356    let community_descriptions = if data.top_communities.is_empty() {
357        "No architectural communities detected.\n".to_string()
358    } else {
359        data.top_communities
360            .iter()
361            .map(|ci| format!("- **#{} — {}** ({} nodes)", ci.id, ci.label, ci.node_count))
362            .collect::<Vec<_>>()
363            .join("\n")
364    };
365    c = c.replace("{{ community_descriptions }}", &community_descriptions);
366
367    let hotspots_table = if data.hotspots.is_empty() {
368        "No hotspots detected (no git history or low churn).\n".to_string()
369    } else {
370        let mut t = String::from("| File | Churn | Callers |\n|------|-------|--------|\n");
371        for n in &data.hotspots {
372            t.push_str(&format!(
373                "| `{}` | {:.2} | {} |\n",
374                n.path, n.churn, n.in_degree
375            ));
376        }
377        t
378    };
379    c = c.replace("{{ hotspots_table }}", &hotspots_table);
380
381    let entry_list = if data.entry_points.is_empty() {
382        "_(none detected)_\n".to_string()
383    } else {
384        data.entry_points
385            .iter()
386            .map(|n| format!("- `{}` ({})", n.name, n.kind))
387            .collect::<Vec<_>>()
388            .join("\n")
389    };
390    c = c.replace("{{ entry_points_list }}", &entry_list);
391
392    let god_list = if data.god_nodes.is_empty() {
393        "_(none detected)_\n".to_string()
394    } else {
395        data.god_nodes
396            .iter()
397            .map(|n| {
398                format!(
399                    "- `{}` ({}) — {} callers, in `{}`",
400                    n.name, n.kind, n.in_degree, n.path
401                )
402            })
403            .collect::<Vec<_>>()
404            .join("\n")
405    };
406    c = c.replace("{{ god_nodes_list }}", &god_list);
407
408    if c.contains("{{") {
409        eprintln!("  Warning: AGENTS.md contains unreplaced placeholder tokens");
410    }
411
412    c
413}
414
415pub fn write_skill(repo_root: &Path, data: &SkillData) -> anyhow::Result<()> {
416    std::fs::write(repo_root.join("CGX_SKILL.md"), generate_skill(data))?;
417    Ok(())
418}
419
420pub fn write_agents_md(repo_root: &Path, data: &SkillData) -> anyhow::Result<()> {
421    std::fs::write(repo_root.join("AGENTS.md"), generate_agents_md(data))?;
422    Ok(())
423}
424
425pub fn install_git_hooks(repo_root: &Path) -> anyhow::Result<(bool, bool)> {
426    let hooks_dir = repo_root.join(".git").join("hooks");
427    if !hooks_dir.exists() {
428        return Ok((false, false));
429    }
430    Ok((
431        install_one_hook(&hooks_dir.join("post-commit")),
432        install_one_hook(&hooks_dir.join("post-checkout")),
433    ))
434}
435
436fn install_one_hook(path: &Path) -> bool {
437    if path.exists() {
438        if let Ok(existing) = std::fs::read_to_string(path) {
439            let lines: Vec<&str> = existing.lines().collect();
440            if lines.len() < 2 || !lines[1].contains("cgx-managed") {
441                eprintln!(
442                    "  Warning: {} exists but was not created by cgx. Skipping.",
443                    path.display()
444                );
445                return false;
446            }
447        } else {
448            return false;
449        }
450    }
451    // Use the running binary's path so the hook works even if cgx isn't on PATH
452    let bin = std::env::current_exe()
453        .ok()
454        .and_then(|p| p.to_str().map(|s| s.to_string()))
455        .unwrap_or_else(|| "cgx".to_string());
456    let content = format!(
457        "#!/bin/sh\n# cgx-managed\n{} analyze --incremental --quiet\n",
458        bin
459    );
460    if std::fs::write(path, content).is_err() {
461        return false;
462    }
463    #[cfg(unix)]
464    {
465        use std::os::unix::fs::PermissionsExt;
466        let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o755));
467    }
468    true
469}