1use std::path::Path;
2
3use crate::graph::{GraphDb, Node};
4
5pub struct SkillData {
6 pub indexed_at: String,
7 pub node_count: u64,
8 pub function_count: u64,
9 pub class_count: u64,
10 pub file_count: u64,
11 pub edge_count: u64,
12 pub language_breakdown: String,
13 pub community_count: u32,
14 pub top_communities: Vec<CommunityInfo>,
15 pub hotspots: Vec<Node>,
16 pub entry_points: Vec<Node>,
17 pub god_nodes: Vec<Node>,
18}
19
20pub struct CommunityInfo {
21 pub id: i64,
22 pub label: String,
23 pub node_count: i64,
24}
25
26const SKILL_TEMPLATE: &str = r#"# cgx — Codebase Knowledge Graph
27
28> Auto-generated by cgx on {{ indexed_at }}. Do not edit manually.
29> Re-run `cgx analyze` to refresh.
30
31## When to Use cgx
32
33Use cgx commands INSTEAD of reading source files when you need to:
34- Find where any function, class, or variable is defined
35- Understand what depends on a specific piece of code
36- Know what will break before making a change
37- Understand the architecture of an unfamiliar area
38- Find who owns a file or module
39- Find dead code or unused exports
40- Trace a full call chain from entry point to implementation
41
42**Rule: Never open a file speculatively. Query first. Open only if you need
43the implementation body — not to find where something lives.**
44
45## Trigger Patterns
46
47Run cgx automatically when the user says or implies any of:
48- "what calls X" / "who uses X" / "what depends on X"
49- "show me the architecture" / "how does this work"
50- "what breaks if I change X" / "blast radius of X"
51- "find X in the codebase" / "where is X defined"
52- "who owns X" / "who wrote X"
53- "is X used anywhere" / "is X dead code"
54- Starting a new task in an unfamiliar part of the codebase
55- Before making any edit to a function with many callers
56
57## Commands
58
59```bash
60# Always run first in a new session
61cgx summary
62
63# Find any symbol
64cgx query find <name>
65cgx query find <name> --kind=Function
66
67# Dependencies of a node
68cgx query deps <node-name>
69
70# Blast radius — run BEFORE every edit
71cgx query blast-radius <function-name>
72
73# Trace a call path
74cgx query chain "<A> -> <B>"
75
76# High-risk files
77cgx hotspots
78
79# Code ownership
80cgx query owners <path>
81
82# Search by concept
83cgx query search "<phrase>"
84
85# Community / cluster
86cgx query community <id-or-name>
87
88# Dead code
89cgx query dead-code
90```
91
92## Workflow: Starting a Task
93
941. `cgx summary` — orient yourself
952. `cgx query find <entry-point>` — locate the relevant node
963. `cgx query blast-radius <node>` — know the risk before touching it
974. Open only the specific files you need
98
99## Workflow: Before Every Edit
100
1011. `cgx query blast-radius <function>` — what breaks?
1022. `cgx query deps <function>` — what does it depend on?
1033. Make the change
1044. `cgx query blast-radius <function>` — verify ripple is as expected
105
106## Token Budget
107
108| Action | Approx tokens |
109|---------------------------|---------------|
110| `cgx summary` | ~400 |
111| `cgx query find X` | ~200 |
112| `cgx query blast-radius X`| ~300-800 |
113| Opening one source file | ~2,000-15,000 |
114
115Prefer 3 cgx queries over opening 1 file speculatively.
116
117## This Codebase
118
119- **Indexed:** {{ indexed_at }}
120- **Nodes:** {{ node_count }} ({{ function_count }} functions,
121 {{ class_count }} classes, {{ file_count }} files)
122- **Edges:** {{ edge_count }}
123- **Languages:** {{ language_breakdown }}
124- **Communities:** {{ community_count }}
125
126### Top Communities
127{{ top_communities_list }}
128
129### Hotspots (highest risk — review carefully before editing)
130{{ hotspots_list }}
131
132### Entry Points (nothing imports these — safe starting points)
133{{ entry_points_list }}
134
135### Most Depended-On Nodes (god nodes — change with extreme care)
136{{ god_nodes_list }}
137"#;
138
139const AGENTS_TEMPLATE: &str = r#"# Codebase Architecture
140
141> Auto-generated by cgx {{ indexed_at }}
142
143## Overview
144{{ node_count }} nodes across {{ file_count }} files.
145Primary languages: {{ language_breakdown }}.
146{{ community_count }} architectural communities detected.
147
148## Module Map
149{{ community_descriptions }}
150
151## Hotspots
152These files change frequently and have many dependents.
153Review carefully before editing.
154{{ hotspots_table }}
155
156## Entry Points
157These files have no inbound imports — they are roots.
158{{ entry_points_list }}
159
160## AI Integration
161This repo is indexed by cgx. Two integration modes are available:
162
163**Skills (zero config):** Read `CGX_SKILL.md` for command reference.
164
165**MCP (structured):** Run `cgx setup` to configure your editor,
166then `cgx mcp` to start the server.
167"#;
168
169pub fn build_skill_data(db: &GraphDb) -> anyhow::Result<SkillData> {
170 let node_count = db.node_count()?;
171 let edge_count = db.edge_count()?;
172 let lang_breakdown = db.get_language_breakdown()?;
173 let communities = db.get_communities()?;
174 let counts_by_kind = db.get_node_counts_by_kind()?;
175
176 let function_count = counts_by_kind.get("Function").copied().unwrap_or(0);
177 let class_count = counts_by_kind.get("Class").copied().unwrap_or(0);
178 let file_count = counts_by_kind.get("File").copied().unwrap_or(0);
179
180 let language_breakdown = if lang_breakdown.is_empty() {
181 "none".to_string()
182 } else {
183 let mut entries: Vec<_> = lang_breakdown.iter().collect();
184 entries.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap_or(std::cmp::Ordering::Equal));
185 entries
186 .iter()
187 .map(|(lang, pct)| format!("{} {:.0}%", lang, *pct * 100.0))
188 .collect::<Vec<_>>()
189 .join(", ")
190 };
191
192 let top_communities: Vec<CommunityInfo> = communities
193 .iter()
194 .take(5)
195 .map(|(id, label, count, _top_nodes)| CommunityInfo {
196 id: *id,
197 label: label.clone(),
198 node_count: *count,
199 })
200 .collect();
201
202 let all_nodes = db.get_all_nodes()?;
203
204 let mut file_nodes: Vec<&Node> = all_nodes
205 .iter()
206 .filter(|n| n.kind == "File" && n.churn > 0.0)
207 .collect();
208 file_nodes.sort_by(|a, b| {
209 let sa = a.churn * a.coupling + a.in_degree as f64 * 0.01;
210 let sb = b.churn * b.coupling + b.in_degree as f64 * 0.01;
211 sb.partial_cmp(&sa).unwrap_or(std::cmp::Ordering::Equal)
212 });
213 let hotspots: Vec<Node> = file_nodes.iter().take(5).map(|&n| n.clone()).collect();
214
215 let mut entry_nodes: Vec<&Node> = all_nodes
216 .iter()
217 .filter(|n| n.in_degree == 0 && n.kind != "File" && n.kind != "Author")
218 .collect();
219 entry_nodes.sort_by(|a, b| b.out_degree.cmp(&a.out_degree));
220 let entry_points: Vec<Node> = entry_nodes.iter().take(5).map(|&n| n.clone()).collect();
221
222 let mut god_nodes: Vec<&Node> = all_nodes
223 .iter()
224 .filter(|n| n.in_degree > 0 && n.kind != "File")
225 .collect();
226 god_nodes.sort_by(|a, b| b.in_degree.cmp(&a.in_degree));
227 let mut seen_names = std::collections::HashSet::new();
229 let top_god_nodes: Vec<Node> = god_nodes.iter()
230 .filter(|n| seen_names.insert(n.name.clone()))
231 .take(5)
232 .map(|&n| n.clone())
233 .collect();
234
235 Ok(SkillData {
236 indexed_at: chrono::Utc::now().to_rfc3339(),
237 node_count,
238 function_count,
239 class_count,
240 file_count,
241 edge_count,
242 language_breakdown,
243 community_count: communities.len() as u32,
244 top_communities,
245 hotspots,
246 entry_points,
247 god_nodes: top_god_nodes,
248 })
249}
250
251pub fn generate_skill(data: &SkillData) -> String {
252 let mut c = SKILL_TEMPLATE.to_string();
253
254 c = c.replace("{{ indexed_at }}", &data.indexed_at);
255 c = c.replace("{{ node_count }}", &data.node_count.to_string());
256 c = c.replace("{{ function_count }}", &data.function_count.to_string());
257 c = c.replace("{{ class_count }}", &data.class_count.to_string());
258 c = c.replace("{{ file_count }}", &data.file_count.to_string());
259 c = c.replace("{{ edge_count }}", &data.edge_count.to_string());
260 c = c.replace("{{ language_breakdown }}", &data.language_breakdown);
261 c = c.replace("{{ community_count }}", &data.community_count.to_string());
262
263 let communities_list = if data.top_communities.is_empty() {
264 "_(none detected)_\n".to_string()
265 } else {
266 data.top_communities.iter()
267 .map(|ci| format!("- **#{}** — {} ({} nodes)", ci.id, ci.label, ci.node_count))
268 .collect::<Vec<_>>().join("\n")
269 };
270 c = c.replace("{{ top_communities_list }}", &communities_list);
271
272 let hotspots_list = if data.hotspots.is_empty() {
273 "_(none — no git history or low churn)_\n".to_string()
274 } else {
275 data.hotspots.iter()
276 .map(|n| format!("- `{}` — churn {:.2}, {} callers", n.path, n.churn, n.in_degree))
277 .collect::<Vec<_>>().join("\n")
278 };
279 c = c.replace("{{ hotspots_list }}", &hotspots_list);
280
281 let entry_list = if data.entry_points.is_empty() {
282 "_(none detected)_\n".to_string()
283 } else {
284 data.entry_points.iter()
285 .map(|n| format!("- `{}` ({})", n.name, n.kind))
286 .collect::<Vec<_>>().join("\n")
287 };
288 c = c.replace("{{ entry_points_list }}", &entry_list);
289
290 let god_list = if data.god_nodes.is_empty() {
291 "_(none detected)_\n".to_string()
292 } else {
293 data.god_nodes.iter()
294 .map(|n| format!("- `{}` — {} callers", n.name, n.in_degree))
295 .collect::<Vec<_>>().join("\n")
296 };
297 c = c.replace("{{ god_nodes_list }}", &god_list);
298
299 if c.contains("{{") {
300 eprintln!(" Warning: CGX_SKILL.md contains unreplaced placeholder tokens");
301 }
302 c
303}
304
305pub fn generate_agents_md(data: &SkillData) -> String {
306 let mut c = AGENTS_TEMPLATE.to_string();
307
308 c = c.replace("{{ indexed_at }}", &data.indexed_at);
309 c = c.replace("{{ node_count }}", &data.node_count.to_string());
310 c = c.replace("{{ file_count }}", &data.file_count.to_string());
311 c = c.replace("{{ language_breakdown }}", &data.language_breakdown);
312 c = c.replace("{{ community_count }}", &data.community_count.to_string());
313
314 let community_descriptions = if data.top_communities.is_empty() {
315 "No architectural communities detected.\n".to_string()
316 } else {
317 data.top_communities.iter()
318 .map(|ci| format!("- **#{} — {}** ({} nodes)", ci.id, ci.label, ci.node_count))
319 .collect::<Vec<_>>().join("\n")
320 };
321 c = c.replace("{{ community_descriptions }}", &community_descriptions);
322
323 let hotspots_table = if data.hotspots.is_empty() {
324 "No hotspots detected (no git history or low churn).\n".to_string()
325 } else {
326 let mut t = String::from("| File | Churn | Callers |\n|------|-------|--------|\n");
327 for n in &data.hotspots {
328 t.push_str(&format!("| `{}` | {:.2} | {} |\n", n.path, n.churn, n.in_degree));
329 }
330 t
331 };
332 c = c.replace("{{ hotspots_table }}", &hotspots_table);
333
334 let entry_list = if data.entry_points.is_empty() {
335 "_(none detected)_\n".to_string()
336 } else {
337 data.entry_points.iter()
338 .map(|n| format!("- `{}` ({})", n.name, n.kind))
339 .collect::<Vec<_>>().join("\n")
340 };
341 c = c.replace("{{ entry_points_list }}", &entry_list);
342
343 if c.contains("{{") {
344 eprintln!(" Warning: AGENTS.md contains unreplaced placeholder tokens");
345 }
346
347 c
348}
349
350pub fn write_skill(repo_root: &Path, data: &SkillData) -> anyhow::Result<()> {
351 std::fs::write(repo_root.join("CGX_SKILL.md"), generate_skill(data))?;
352 Ok(())
353}
354
355pub fn write_agents_md(repo_root: &Path, data: &SkillData) -> anyhow::Result<()> {
356 std::fs::write(repo_root.join("AGENTS.md"), generate_agents_md(data))?;
357 Ok(())
358}
359
360pub fn install_git_hooks(repo_root: &Path) -> anyhow::Result<(bool, bool)> {
361 let hooks_dir = repo_root.join(".git").join("hooks");
362 if !hooks_dir.exists() {
363 return Ok((false, false));
364 }
365 Ok((install_one_hook(&hooks_dir.join("post-commit")), install_one_hook(&hooks_dir.join("post-checkout"))))
366}
367
368fn install_one_hook(path: &Path) -> bool {
369 if path.exists() {
370 if let Ok(existing) = std::fs::read_to_string(path) {
371 let lines: Vec<&str> = existing.lines().collect();
372 if lines.len() < 2 || !lines[1].contains("cgx-managed") {
373 eprintln!(" Warning: {} exists but was not created by cgx. Skipping.", path.display());
374 return false;
375 }
376 } else {
377 return false;
378 }
379 }
380 let bin = std::env::current_exe()
382 .ok()
383 .and_then(|p| p.to_str().map(|s| s.to_string()))
384 .unwrap_or_else(|| "cgx".to_string());
385 let content = format!("#!/bin/sh\n# cgx-managed\n{} analyze --incremental --quiet\n", bin);
386 if std::fs::write(path, content).is_err() {
387 return false;
388 }
389 #[cfg(unix)]
390 {
391 use std::os::unix::fs::PermissionsExt;
392 let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o755));
393 }
394 true
395}