1use std::path::Path;
2
3use crate::graph::{GraphDb, Node};
4
5pub struct SkillData {
6 pub indexed_at: String,
7 pub node_count: u64,
8 pub function_count: u64,
9 pub class_count: u64,
10 pub file_count: u64,
11 pub edge_count: u64,
12 pub language_breakdown: String,
13 pub community_count: u32,
14 pub top_communities: Vec<CommunityInfo>,
15 pub hotspots: Vec<Node>,
16 pub entry_points: Vec<Node>,
17 pub god_nodes: Vec<Node>,
18}
19
20pub struct CommunityInfo {
21 pub id: i64,
22 pub label: String,
23 pub node_count: i64,
24}
25
26const SKILL_TEMPLATE: &str = r#"# cgx — Codebase Knowledge Graph
27
28> Auto-generated by cgx on {{ indexed_at }}. Do not edit manually.
29> Re-run `cgx analyze` to refresh.
30
31## When to Use cgx
32
33Use cgx commands INSTEAD of reading source files when you need to:
34- Find where any function, class, or variable is defined
35- Understand what depends on a specific piece of code
36- Know what will break before making a change
37- Understand the architecture of an unfamiliar area
38- Find who owns a file or module
39- Find dead code or unused exports
40- Trace a full call chain from entry point to implementation
41
42**Rule: Never open a file speculatively. Query first. Open only if you need
43the implementation body — not to find where something lives.**
44
45## Trigger Patterns — MANDATORY
46
47You MUST run cgx (CLI or MCP) automatically — without waiting to be asked — whenever:
48- "what calls X" / "who uses X" / "what depends on X"
49- "show me the architecture" / "how does this work" / "explain X"
50- "what breaks if I change X" / "blast radius of X"
51- "find X in the codebase" / "where is X defined" / "where is X used"
52- "who owns X" / "who wrote X" / "git blame X"
53- "is X used anywhere" / "is X dead code" / "can I delete X"
54- Starting ANY new task — run `cgx summary` before touching any file
55- Before ANY edit — run `cgx query blast-radius` on the target function
56- After any question about code structure, dependencies, or architecture
57
58Never grep or open source files to answer structural questions. cgx is always faster and cheaper.
59
60## Commands
61
62If cgx is available as an MCP tool (preferred — no subprocess):
63```
64get_repo_summary # orient: nodes, communities, hotspots
65find_symbol <name> # locate any function/class/type
66get_blast_radius <node_id> # what breaks if this changes
67get_neighbors <node_id> # direct dependencies
68get_call_chain <from> <to> # trace a call path
69get_hotspots # riskiest files to edit
70get_file_owners <path> # git blame ownership
71search_graph <query> # full-text search
72run_query <sql> # raw SQL against the graph
73```
74
75If cgx is available as a CLI:
76```bash
77cgx summary # orient yourself
78cgx query find <name> # locate a symbol
79cgx query find <name> --kind=Function
80cgx query blast-radius <function> # change impact (run BEFORE every edit)
81cgx query deps <node-name> # what does this depend on
82cgx query chain "<A> -> <B>" # trace call path
83cgx hotspots # high-risk files
84cgx query owners <path> # file ownership
85cgx query search "<phrase>" # search by concept
86cgx query community <id-or-name> # explore a cluster
87cgx query dead-code # find unused exports
88```
89
90## Workflow: Starting a Task
91
921. `cgx summary` — orient yourself
932. `cgx query find <entry-point>` — locate the relevant node
943. `cgx query blast-radius <node>` — know the risk before touching it
954. Open only the specific files you need
96
97## Workflow: Before Every Edit
98
991. `cgx query blast-radius <function>` — what breaks?
1002. `cgx query deps <function>` — what does it depend on?
1013. Make the change
1024. `cgx query blast-radius <function>` — verify ripple is as expected
103
104## Token Budget
105
106| Action | Approx tokens |
107|---------------------------|---------------|
108| `cgx summary` | ~400 |
109| `cgx query find X` | ~200 |
110| `cgx query blast-radius X`| ~300-800 |
111| Opening one source file | ~2,000-15,000 |
112
113Prefer 3 cgx queries over opening 1 file speculatively.
114
115## This Codebase
116
117- **Indexed:** {{ indexed_at }}
118- **Nodes:** {{ node_count }} ({{ function_count }} functions,
119 {{ class_count }} classes, {{ file_count }} files)
120- **Edges:** {{ edge_count }}
121- **Languages:** {{ language_breakdown }}
122- **Communities:** {{ community_count }}
123
124### Top Communities
125{{ top_communities_list }}
126
127### Hotspots (highest risk — review carefully before editing)
128{{ hotspots_list }}
129
130### Entry Points (nothing imports these — safe starting points)
131{{ entry_points_list }}
132
133### Most Depended-On Nodes (god nodes — change with extreme care)
134{{ god_nodes_list }}
135"#;
136
137const AGENTS_TEMPLATE: &str = r#"# Codebase Architecture
138> Auto-generated by cgx {{ indexed_at }}. Re-run `cgx analyze` to refresh.
139> For the full skill context used by AI agents, see `CGX_SKILL.md`.
140
141## Overview
142- **Nodes:** {{ node_count }} across {{ file_count }} files
143- **Languages:** {{ language_breakdown }}
144- **Communities:** {{ community_count }} architectural clusters
145
146## Module Map
147The graph is partitioned into {{ community_count }} communities via Louvain clustering.
148Each community is a cohesive module — edits inside one community rarely ripple outside it.
149{{ community_descriptions }}
150
151## Hotspots (High Risk — Review Before Editing)
152Files ranked by churn × coupling score. Editing these is likely to break things.
153{{ hotspots_table }}
154
155## Entry Points
156Files/functions with no inbound dependencies — safe places to start tracing.
157{{ entry_points_list }}
158
159## God Nodes (Most Depended-On)
160These are used everywhere. Breaking them has maximum blast radius.
161{{ god_nodes_list }}
162
163> Query this graph before opening any file. See `CGX_SKILL.md` for full command reference.
164"#;
165
166pub fn build_skill_data(db: &GraphDb) -> anyhow::Result<SkillData> {
167 let node_count = db.node_count()?;
168 let edge_count = db.edge_count()?;
169 let lang_breakdown = db.get_language_breakdown()?;
170 let communities = db.get_communities()?;
171 let counts_by_kind = db.get_node_counts_by_kind()?;
172
173 let function_count = counts_by_kind.get("Function").copied().unwrap_or(0);
174 let class_count = counts_by_kind.get("Class").copied().unwrap_or(0);
175 let file_count = counts_by_kind.get("File").copied().unwrap_or(0);
176
177 let language_breakdown = if lang_breakdown.is_empty() {
178 "none".to_string()
179 } else {
180 let mut entries: Vec<_> = lang_breakdown.iter().collect();
181 entries.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap_or(std::cmp::Ordering::Equal));
182 entries
183 .iter()
184 .map(|(lang, pct)| format!("{} {:.0}%", lang, *pct * 100.0))
185 .collect::<Vec<_>>()
186 .join(", ")
187 };
188
189 let top_communities: Vec<CommunityInfo> = communities
190 .iter()
191 .take(5)
192 .map(|(id, label, count, _top_nodes)| CommunityInfo {
193 id: *id,
194 label: label.clone(),
195 node_count: *count,
196 })
197 .collect();
198
199 let all_nodes = db.get_all_nodes()?;
200
201 let mut file_nodes: Vec<&Node> = all_nodes
202 .iter()
203 .filter(|n| n.kind == "File" && n.churn > 0.0)
204 .collect();
205 file_nodes.sort_by(|a, b| {
206 let sa = a.churn * a.coupling + a.in_degree as f64 * 0.01;
207 let sb = b.churn * b.coupling + b.in_degree as f64 * 0.01;
208 sb.partial_cmp(&sa).unwrap_or(std::cmp::Ordering::Equal)
209 });
210 let hotspots: Vec<Node> = file_nodes.iter().take(5).map(|&n| n.clone()).collect();
211
212 let mut entry_nodes: Vec<&Node> = all_nodes
213 .iter()
214 .filter(|n| n.in_degree == 0 && n.kind != "File" && n.kind != "Author")
215 .collect();
216 entry_nodes.sort_by_key(|node| std::cmp::Reverse(node.out_degree));
217 let entry_points: Vec<Node> = entry_nodes.iter().take(5).map(|&n| n.clone()).collect();
218
219 let mut god_nodes: Vec<&Node> = all_nodes
220 .iter()
221 .filter(|n| n.in_degree > 0 && n.kind != "File")
222 .collect();
223 god_nodes.sort_by_key(|node| std::cmp::Reverse(node.in_degree));
224 let mut seen_names = std::collections::HashSet::new();
226 let top_god_nodes: Vec<Node> = god_nodes
227 .iter()
228 .filter(|n| seen_names.insert(n.name.clone()))
229 .take(5)
230 .map(|&n| n.clone())
231 .collect();
232
233 Ok(SkillData {
234 indexed_at: chrono::Utc::now().to_rfc3339(),
235 node_count,
236 function_count,
237 class_count,
238 file_count,
239 edge_count,
240 language_breakdown,
241 community_count: communities.len() as u32,
242 top_communities,
243 hotspots,
244 entry_points,
245 god_nodes: top_god_nodes,
246 })
247}
248
249pub fn generate_skill(data: &SkillData) -> String {
250 let mut c = SKILL_TEMPLATE.to_string();
251
252 c = c.replace("{{ indexed_at }}", &data.indexed_at);
253 c = c.replace("{{ node_count }}", &data.node_count.to_string());
254 c = c.replace("{{ function_count }}", &data.function_count.to_string());
255 c = c.replace("{{ class_count }}", &data.class_count.to_string());
256 c = c.replace("{{ file_count }}", &data.file_count.to_string());
257 c = c.replace("{{ edge_count }}", &data.edge_count.to_string());
258 c = c.replace("{{ language_breakdown }}", &data.language_breakdown);
259 c = c.replace("{{ community_count }}", &data.community_count.to_string());
260
261 let communities_list = if data.top_communities.is_empty() {
262 "_(none detected)_\n".to_string()
263 } else {
264 data.top_communities
265 .iter()
266 .map(|ci| format!("- **#{}** — {} ({} nodes)", ci.id, ci.label, ci.node_count))
267 .collect::<Vec<_>>()
268 .join("\n")
269 };
270 c = c.replace("{{ top_communities_list }}", &communities_list);
271
272 let hotspots_list = if data.hotspots.is_empty() {
273 "_(none — no git history or low churn)_\n".to_string()
274 } else {
275 data.hotspots
276 .iter()
277 .map(|n| {
278 format!(
279 "- `{}` — churn {:.2}, {} callers",
280 n.path, n.churn, n.in_degree
281 )
282 })
283 .collect::<Vec<_>>()
284 .join("\n")
285 };
286 c = c.replace("{{ hotspots_list }}", &hotspots_list);
287
288 let entry_list = if data.entry_points.is_empty() {
289 "_(none detected)_\n".to_string()
290 } else {
291 data.entry_points
292 .iter()
293 .map(|n| format!("- `{}` ({})", n.name, n.kind))
294 .collect::<Vec<_>>()
295 .join("\n")
296 };
297 c = c.replace("{{ entry_points_list }}", &entry_list);
298
299 let god_list = if data.god_nodes.is_empty() {
300 "_(none detected)_\n".to_string()
301 } else {
302 data.god_nodes
303 .iter()
304 .map(|n| format!("- `{}` — {} callers", n.name, n.in_degree))
305 .collect::<Vec<_>>()
306 .join("\n")
307 };
308 c = c.replace("{{ god_nodes_list }}", &god_list);
309
310 if c.contains("{{") {
311 eprintln!(" Warning: CGX_SKILL.md contains unreplaced placeholder tokens");
312 }
313 c
314}
315
316pub fn generate_agents_md(data: &SkillData) -> String {
317 let mut c = AGENTS_TEMPLATE.to_string();
318
319 c = c.replace("{{ indexed_at }}", &data.indexed_at);
320 c = c.replace("{{ node_count }}", &data.node_count.to_string());
321 c = c.replace("{{ file_count }}", &data.file_count.to_string());
322 c = c.replace("{{ language_breakdown }}", &data.language_breakdown);
323 c = c.replace("{{ community_count }}", &data.community_count.to_string());
324
325 let community_descriptions = if data.top_communities.is_empty() {
326 "No architectural communities detected.\n".to_string()
327 } else {
328 data.top_communities
329 .iter()
330 .map(|ci| format!("- **#{} — {}** ({} nodes)", ci.id, ci.label, ci.node_count))
331 .collect::<Vec<_>>()
332 .join("\n")
333 };
334 c = c.replace("{{ community_descriptions }}", &community_descriptions);
335
336 let hotspots_table = if data.hotspots.is_empty() {
337 "No hotspots detected (no git history or low churn).\n".to_string()
338 } else {
339 let mut t = String::from("| File | Churn | Callers |\n|------|-------|--------|\n");
340 for n in &data.hotspots {
341 t.push_str(&format!(
342 "| `{}` | {:.2} | {} |\n",
343 n.path, n.churn, n.in_degree
344 ));
345 }
346 t
347 };
348 c = c.replace("{{ hotspots_table }}", &hotspots_table);
349
350 let entry_list = if data.entry_points.is_empty() {
351 "_(none detected)_\n".to_string()
352 } else {
353 data.entry_points
354 .iter()
355 .map(|n| format!("- `{}` ({})", n.name, n.kind))
356 .collect::<Vec<_>>()
357 .join("\n")
358 };
359 c = c.replace("{{ entry_points_list }}", &entry_list);
360
361 let god_list = if data.god_nodes.is_empty() {
362 "_(none detected)_\n".to_string()
363 } else {
364 data.god_nodes
365 .iter()
366 .map(|n| {
367 format!(
368 "- `{}` ({}) — {} callers, in `{}`",
369 n.name, n.kind, n.in_degree, n.path
370 )
371 })
372 .collect::<Vec<_>>()
373 .join("\n")
374 };
375 c = c.replace("{{ god_nodes_list }}", &god_list);
376
377 if c.contains("{{") {
378 eprintln!(" Warning: AGENTS.md contains unreplaced placeholder tokens");
379 }
380
381 c
382}
383
384pub fn write_skill(repo_root: &Path, data: &SkillData) -> anyhow::Result<()> {
385 std::fs::write(repo_root.join("CGX_SKILL.md"), generate_skill(data))?;
386 Ok(())
387}
388
389pub fn write_agents_md(repo_root: &Path, data: &SkillData) -> anyhow::Result<()> {
390 std::fs::write(repo_root.join("AGENTS.md"), generate_agents_md(data))?;
391 Ok(())
392}
393
394pub fn install_git_hooks(repo_root: &Path) -> anyhow::Result<(bool, bool)> {
395 let hooks_dir = repo_root.join(".git").join("hooks");
396 if !hooks_dir.exists() {
397 return Ok((false, false));
398 }
399 Ok((
400 install_one_hook(&hooks_dir.join("post-commit")),
401 install_one_hook(&hooks_dir.join("post-checkout")),
402 ))
403}
404
405fn install_one_hook(path: &Path) -> bool {
406 if path.exists() {
407 if let Ok(existing) = std::fs::read_to_string(path) {
408 let lines: Vec<&str> = existing.lines().collect();
409 if lines.len() < 2 || !lines[1].contains("cgx-managed") {
410 eprintln!(
411 " Warning: {} exists but was not created by cgx. Skipping.",
412 path.display()
413 );
414 return false;
415 }
416 } else {
417 return false;
418 }
419 }
420 let bin = std::env::current_exe()
422 .ok()
423 .and_then(|p| p.to_str().map(|s| s.to_string()))
424 .unwrap_or_else(|| "cgx".to_string());
425 let content = format!(
426 "#!/bin/sh\n# cgx-managed\n{} analyze --incremental --quiet\n",
427 bin
428 );
429 if std::fs::write(path, content).is_err() {
430 return false;
431 }
432 #[cfg(unix)]
433 {
434 use std::os::unix::fs::PermissionsExt;
435 let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o755));
436 }
437 true
438}