1use std::path::Path;
2
3use crate::graph::{GraphDb, Node};
4
5pub struct SkillData {
6 pub indexed_at: String,
7 pub node_count: u64,
8 pub function_count: u64,
9 pub class_count: u64,
10 pub file_count: u64,
11 pub edge_count: u64,
12 pub language_breakdown: String,
13 pub community_count: u32,
14 pub top_communities: Vec<CommunityInfo>,
15 pub hotspots: Vec<Node>,
16 pub entry_points: Vec<Node>,
17 pub god_nodes: Vec<Node>,
18 pub dead_code_count: u64,
19 pub dead_code_high: u64,
20}
21
22pub struct CommunityInfo {
23 pub id: i64,
24 pub label: String,
25 pub node_count: i64,
26}
27
28const SKILL_TEMPLATE: &str = r#"# cgx — Codebase Knowledge Graph
29
30> Auto-generated by cgx on {{ indexed_at }}. Do not edit manually.
31> Re-run `cgx analyze` to refresh.
32
33## When to Use cgx
34
35Use cgx commands INSTEAD of reading source files when you need to:
36- Find where any function, class, or variable is defined
37- Understand what depends on a specific piece of code
38- Know what will break before making a change
39- Understand the architecture of an unfamiliar area
40- Find who owns a file or module
41- Find dead code or unused exports
42- Trace a full call chain from entry point to implementation
43
44**Rule: Never open a file speculatively. Query first. Open only if you need
45the implementation body — not to find where something lives.**
46
47## Trigger Patterns — MANDATORY
48
49You MUST run cgx (CLI or MCP) automatically — without waiting to be asked — whenever:
50- "what calls X" / "who uses X" / "what depends on X"
51- "show me the architecture" / "how does this work" / "explain X"
52- "what breaks if I change X" / "blast radius of X"
53- "find X in the codebase" / "where is X defined" / "where is X used"
54- "who owns X" / "who wrote X" / "git blame X"
55- "is X used anywhere" / "is X dead code" / "can I delete X"
56- Starting ANY new task — run `cgx summary` before touching any file
57- Before ANY edit — run `cgx query blast-radius` on the target function
58- After any question about code structure, dependencies, or architecture
59
60Never grep or open source files to answer structural questions. cgx is always faster and cheaper.
61
62## Commands
63
64If cgx is available as an MCP tool (preferred — no subprocess):
65```
66get_repo_summary # orient: nodes, communities, hotspots
67find_symbol <name> # locate any function/class/type
68get_blast_radius <node_id> # what breaks if this changes
69get_neighbors <node_id> # direct dependencies
70get_call_chain <from> <to> # trace a call path
71get_hotspots # riskiest files to edit
72get_file_owners <path> # git blame ownership
73search_graph <query> # full-text search
74run_query <sql> # raw SQL against the graph
75```
76
77If cgx is available as a CLI:
78```bash
79cgx summary # orient yourself
80cgx query find <name> # locate a symbol
81cgx query find <name> --kind=Function
82cgx query blast-radius <function> # change impact (run BEFORE every edit)
83cgx query deps <node-name> # what does this depend on
84cgx query chain "<A> -> <B>" # trace call path
85cgx hotspots # high-risk files
86cgx query owners <path> # file ownership
87cgx query search "<phrase>" # search by concept
88cgx query community <id-or-name> # explore a cluster
89cgx query dead-code # find unused exports
90```
91
92## Dead Code Commands
93```bash
94cgx query dead-code --summary
95cgx query dead-code --safe-to-delete
96cgx query dead-code --kind=exports --path=src/auth/
97cgx query dead-code --kind=files
98```
99
100## Workflow: Starting a Task
101
1021. `cgx summary` — orient yourself
1032. `cgx query find <entry-point>` — locate the relevant node
1043. `cgx query blast-radius <node>` — know the risk before touching it
1054. Open only the specific files you need
106
107## Workflow: Before Every Edit
108
1091. `cgx query blast-radius <function>` — what breaks?
1102. `cgx query deps <function>` — what does it depend on?
1113. Make the change
1124. `cgx query blast-radius <function>` — verify ripple is as expected
113
114## Token Budget
115
116| Action | Approx tokens |
117|---------------------------|---------------|
118| `cgx summary` | ~400 |
119| `cgx query find X` | ~200 |
120| `cgx query blast-radius X`| ~300-800 |
121| Opening one source file | ~2,000-15,000 |
122
123Prefer 3 cgx queries over opening 1 file speculatively.
124
125## This Codebase
126
127- **Indexed:** {{ indexed_at }}
128- **Nodes:** {{ node_count }} ({{ function_count }} functions,
129 {{ class_count }} classes, {{ file_count }} files)
130- **Edges:** {{ edge_count }}
131- **Languages:** {{ language_breakdown }}
132- **Communities:** {{ community_count }}
133- **Dead code candidates:** {{ dead_code_count }} ({{ dead_code_high }} high confidence · safe to investigate)
134
135### Top Communities
136{{ top_communities_list }}
137
138### Hotspots (highest risk — review carefully before editing)
139{{ hotspots_list }}
140
141### Entry Points (nothing imports these — safe starting points)
142{{ entry_points_list }}
143
144### Most Depended-On Nodes (god nodes — change with extreme care)
145{{ god_nodes_list }}
146"#;
147
148const AGENTS_TEMPLATE: &str = r#"# Codebase Architecture
149> Auto-generated by cgx {{ indexed_at }}. Re-run `cgx analyze` to refresh.
150> For the full skill context used by AI agents, see `CGX_SKILL.md`.
151
152## Overview
153- **Nodes:** {{ node_count }} across {{ file_count }} files
154- **Languages:** {{ language_breakdown }}
155- **Communities:** {{ community_count }} architectural clusters
156
157## Module Map
158The graph is partitioned into {{ community_count }} communities via Louvain clustering.
159Each community is a cohesive module — edits inside one community rarely ripple outside it.
160{{ community_descriptions }}
161
162## Hotspots (High Risk — Review Before Editing)
163Files ranked by churn × coupling score. Editing these is likely to break things.
164{{ hotspots_table }}
165
166## Entry Points
167Files/functions with no inbound dependencies — safe places to start tracing.
168{{ entry_points_list }}
169
170## God Nodes (Most Depended-On)
171These are used everywhere. Breaking them has maximum blast radius.
172{{ god_nodes_list }}
173
174> Query this graph before opening any file. See `CGX_SKILL.md` for full command reference.
175"#;
176
177pub fn build_skill_data(db: &GraphDb) -> anyhow::Result<SkillData> {
178 let node_count = db.node_count()?;
179 let edge_count = db.edge_count()?;
180 let lang_breakdown = db.get_language_breakdown()?;
181 let communities = db.get_communities()?;
182 let counts_by_kind = db.get_node_counts_by_kind()?;
183
184 let function_count = counts_by_kind.get("Function").copied().unwrap_or(0);
185 let class_count = counts_by_kind.get("Class").copied().unwrap_or(0);
186 let file_count = counts_by_kind.get("File").copied().unwrap_or(0);
187
188 let language_breakdown = if lang_breakdown.is_empty() {
189 "none".to_string()
190 } else {
191 let mut entries: Vec<_> = lang_breakdown.iter().collect();
192 entries.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap_or(std::cmp::Ordering::Equal));
193 entries
194 .iter()
195 .map(|(lang, pct)| format!("{} {:.0}%", lang, *pct * 100.0))
196 .collect::<Vec<_>>()
197 .join(", ")
198 };
199
200 let top_communities: Vec<CommunityInfo> = communities
201 .iter()
202 .take(5)
203 .map(|(id, label, count, _top_nodes)| CommunityInfo {
204 id: *id,
205 label: label.clone(),
206 node_count: *count,
207 })
208 .collect();
209
210 let all_nodes = db.get_all_nodes()?;
211
212 let mut file_nodes: Vec<&Node> = all_nodes
213 .iter()
214 .filter(|n| n.kind == "File" && n.churn > 0.0)
215 .collect();
216 file_nodes.sort_by(|a, b| {
217 let sa = a.churn * a.coupling + a.in_degree as f64 * 0.01;
218 let sb = b.churn * b.coupling + b.in_degree as f64 * 0.01;
219 sb.partial_cmp(&sa).unwrap_or(std::cmp::Ordering::Equal)
220 });
221 let hotspots: Vec<Node> = file_nodes.iter().take(5).map(|&n| n.clone()).collect();
222
223 let mut entry_nodes: Vec<&Node> = all_nodes
224 .iter()
225 .filter(|n| n.in_degree == 0 && n.kind != "File" && n.kind != "Author")
226 .collect();
227 entry_nodes.sort_by_key(|node| std::cmp::Reverse(node.out_degree));
228 let entry_points: Vec<Node> = entry_nodes.iter().take(5).map(|&n| n.clone()).collect();
229
230 let mut god_nodes: Vec<&Node> = all_nodes
231 .iter()
232 .filter(|n| n.in_degree > 0 && n.kind != "File")
233 .collect();
234 god_nodes.sort_by_key(|node| std::cmp::Reverse(node.in_degree));
235 let mut seen_names = std::collections::HashSet::new();
237 let top_god_nodes: Vec<Node> = god_nodes
238 .iter()
239 .filter(|n| seen_names.insert(n.name.clone()))
240 .take(5)
241 .map(|&n| n.clone())
242 .collect();
243
244 let dead_code_count = db.get_dead_code_stats().map(|(t, _)| t as u64).unwrap_or(0);
245 let dead_code_high = db.get_dead_code_stats().map(|(_, h)| h as u64).unwrap_or(0);
246
247 Ok(SkillData {
248 indexed_at: chrono::Utc::now().to_rfc3339(),
249 node_count,
250 function_count,
251 class_count,
252 file_count,
253 edge_count,
254 language_breakdown,
255 community_count: communities.len() as u32,
256 top_communities,
257 hotspots,
258 entry_points,
259 god_nodes: top_god_nodes,
260 dead_code_count,
261 dead_code_high,
262 })
263}
264
265pub fn generate_skill(data: &SkillData) -> String {
266 let mut c = SKILL_TEMPLATE.to_string();
267
268 c = c.replace("{{ indexed_at }}", &data.indexed_at);
269 c = c.replace("{{ node_count }}", &data.node_count.to_string());
270 c = c.replace("{{ function_count }}", &data.function_count.to_string());
271 c = c.replace("{{ class_count }}", &data.class_count.to_string());
272 c = c.replace("{{ file_count }}", &data.file_count.to_string());
273 c = c.replace("{{ edge_count }}", &data.edge_count.to_string());
274 c = c.replace("{{ language_breakdown }}", &data.language_breakdown);
275 c = c.replace("{{ community_count }}", &data.community_count.to_string());
276 c = c.replace("{{ dead_code_count }}", &data.dead_code_count.to_string());
277 c = c.replace("{{ dead_code_high }}", &data.dead_code_high.to_string());
278
279 let communities_list = if data.top_communities.is_empty() {
280 "_(none detected)_\n".to_string()
281 } else {
282 data.top_communities
283 .iter()
284 .map(|ci| format!("- **#{}** — {} ({} nodes)", ci.id, ci.label, ci.node_count))
285 .collect::<Vec<_>>()
286 .join("\n")
287 };
288 c = c.replace("{{ top_communities_list }}", &communities_list);
289
290 let hotspots_list = if data.hotspots.is_empty() {
291 "_(none — no git history or low churn)_\n".to_string()
292 } else {
293 data.hotspots
294 .iter()
295 .map(|n| {
296 format!(
297 "- `{}` — churn {:.2}, {} callers",
298 n.path, n.churn, n.in_degree
299 )
300 })
301 .collect::<Vec<_>>()
302 .join("\n")
303 };
304 c = c.replace("{{ hotspots_list }}", &hotspots_list);
305
306 let entry_list = if data.entry_points.is_empty() {
307 "_(none detected)_\n".to_string()
308 } else {
309 data.entry_points
310 .iter()
311 .map(|n| format!("- `{}` ({})", n.name, n.kind))
312 .collect::<Vec<_>>()
313 .join("\n")
314 };
315 c = c.replace("{{ entry_points_list }}", &entry_list);
316
317 let god_list = if data.god_nodes.is_empty() {
318 "_(none detected)_\n".to_string()
319 } else {
320 data.god_nodes
321 .iter()
322 .map(|n| format!("- `{}` — {} callers", n.name, n.in_degree))
323 .collect::<Vec<_>>()
324 .join("\n")
325 };
326 c = c.replace("{{ god_nodes_list }}", &god_list);
327
328 if c.contains("{{") {
329 eprintln!(" Warning: CGX_SKILL.md contains unreplaced placeholder tokens");
330 }
331 c
332}
333
334pub fn generate_agents_md(data: &SkillData) -> String {
335 let mut c = AGENTS_TEMPLATE.to_string();
336
337 c = c.replace("{{ indexed_at }}", &data.indexed_at);
338 c = c.replace("{{ node_count }}", &data.node_count.to_string());
339 c = c.replace("{{ file_count }}", &data.file_count.to_string());
340 c = c.replace("{{ language_breakdown }}", &data.language_breakdown);
341 c = c.replace("{{ community_count }}", &data.community_count.to_string());
342
343 let community_descriptions = if data.top_communities.is_empty() {
344 "No architectural communities detected.\n".to_string()
345 } else {
346 data.top_communities
347 .iter()
348 .map(|ci| format!("- **#{} — {}** ({} nodes)", ci.id, ci.label, ci.node_count))
349 .collect::<Vec<_>>()
350 .join("\n")
351 };
352 c = c.replace("{{ community_descriptions }}", &community_descriptions);
353
354 let hotspots_table = if data.hotspots.is_empty() {
355 "No hotspots detected (no git history or low churn).\n".to_string()
356 } else {
357 let mut t = String::from("| File | Churn | Callers |\n|------|-------|--------|\n");
358 for n in &data.hotspots {
359 t.push_str(&format!(
360 "| `{}` | {:.2} | {} |\n",
361 n.path, n.churn, n.in_degree
362 ));
363 }
364 t
365 };
366 c = c.replace("{{ hotspots_table }}", &hotspots_table);
367
368 let entry_list = if data.entry_points.is_empty() {
369 "_(none detected)_\n".to_string()
370 } else {
371 data.entry_points
372 .iter()
373 .map(|n| format!("- `{}` ({})", n.name, n.kind))
374 .collect::<Vec<_>>()
375 .join("\n")
376 };
377 c = c.replace("{{ entry_points_list }}", &entry_list);
378
379 let god_list = if data.god_nodes.is_empty() {
380 "_(none detected)_\n".to_string()
381 } else {
382 data.god_nodes
383 .iter()
384 .map(|n| {
385 format!(
386 "- `{}` ({}) — {} callers, in `{}`",
387 n.name, n.kind, n.in_degree, n.path
388 )
389 })
390 .collect::<Vec<_>>()
391 .join("\n")
392 };
393 c = c.replace("{{ god_nodes_list }}", &god_list);
394
395 if c.contains("{{") {
396 eprintln!(" Warning: AGENTS.md contains unreplaced placeholder tokens");
397 }
398
399 c
400}
401
402pub fn write_skill(repo_root: &Path, data: &SkillData) -> anyhow::Result<()> {
403 std::fs::write(repo_root.join("CGX_SKILL.md"), generate_skill(data))?;
404 Ok(())
405}
406
407pub fn write_agents_md(repo_root: &Path, data: &SkillData) -> anyhow::Result<()> {
408 std::fs::write(repo_root.join("AGENTS.md"), generate_agents_md(data))?;
409 Ok(())
410}
411
412pub fn install_git_hooks(repo_root: &Path) -> anyhow::Result<(bool, bool)> {
413 let hooks_dir = repo_root.join(".git").join("hooks");
414 if !hooks_dir.exists() {
415 return Ok((false, false));
416 }
417 Ok((
418 install_one_hook(&hooks_dir.join("post-commit")),
419 install_one_hook(&hooks_dir.join("post-checkout")),
420 ))
421}
422
423fn install_one_hook(path: &Path) -> bool {
424 if path.exists() {
425 if let Ok(existing) = std::fs::read_to_string(path) {
426 let lines: Vec<&str> = existing.lines().collect();
427 if lines.len() < 2 || !lines[1].contains("cgx-managed") {
428 eprintln!(
429 " Warning: {} exists but was not created by cgx. Skipping.",
430 path.display()
431 );
432 return false;
433 }
434 } else {
435 return false;
436 }
437 }
438 let bin = std::env::current_exe()
440 .ok()
441 .and_then(|p| p.to_str().map(|s| s.to_string()))
442 .unwrap_or_else(|| "cgx".to_string());
443 let content = format!(
444 "#!/bin/sh\n# cgx-managed\n{} analyze --incremental --quiet\n",
445 bin
446 );
447 if std::fs::write(path, content).is_err() {
448 return false;
449 }
450 #[cfg(unix)]
451 {
452 use std::os::unix::fs::PermissionsExt;
453 let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o755));
454 }
455 true
456}