1use anyhow::{Context, Result};
19use rusqlite::Connection;
20use serde::Deserialize;
21use std::collections::HashMap;
22
23use crate::cache::CacheManager;
24use crate::models::SearchResult;
25
26const ANCHOR_SYMBOLS_PER_MODULE: usize = 5;
28
29const MAX_MODULES_IN_EVIDENCE: usize = 25;
32
33#[derive(Debug, Clone)]
35pub struct Concept {
36 pub name: String,
38 pub definition: String,
40 pub related_modules: Vec<String>,
43 pub category: Option<String>,
45}
46
47#[derive(Debug, Clone, Default)]
49pub struct GlossaryData {
50 pub concepts: Vec<Concept>,
51 pub intro: Option<String>,
53}
54
55#[derive(Debug, Clone)]
57pub struct ModuleEvidence {
58 pub path: String,
60 pub file_count: usize,
62 pub anchor_symbols: Vec<String>,
64}
65
66#[derive(Debug, Clone, Default)]
68pub struct GlossaryEvidence {
69 pub total_files: usize,
70 pub total_lines: usize,
71 pub language_mix: Vec<(String, usize)>,
72 pub dependency_edges: usize,
73 pub hotspot_files: Vec<String>,
74 pub modules: Vec<ModuleEvidence>,
75}
76
77#[derive(Debug, Clone, Deserialize)]
80pub struct ConceptsResponse {
81 #[serde(default)]
82 pub intro: Option<String>,
83 #[serde(default)]
84 pub concepts: Vec<RawConcept>,
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct RawConcept {
89 pub name: String,
90 #[serde(default)]
91 pub definition: String,
92 #[serde(default)]
93 pub category: Option<String>,
94 #[serde(default)]
95 pub related_modules: Vec<String>,
96}
97
98impl From<RawConcept> for Concept {
99 fn from(raw: RawConcept) -> Self {
100 Concept {
101 name: raw.name,
102 definition: raw.definition,
103 category: raw.category,
104 related_modules: raw.related_modules,
105 }
106 }
107}
108
109impl From<ConceptsResponse> for GlossaryData {
110 fn from(resp: ConceptsResponse) -> Self {
111 GlossaryData {
112 concepts: resp.concepts.into_iter().map(Into::into).collect(),
113 intro: resp.intro,
114 }
115 }
116}
117
118fn module_of(file_path: &str) -> String {
124 let parts: Vec<&str> = file_path.split('/').collect();
125 match parts.len() {
126 0 | 1 => String::new(),
127 2 => parts[0].to_string(),
128 _ => format!("{}/{}", parts[0], parts[1]),
129 }
130}
131
132fn module_slug(module_path: &str) -> String {
134 module_path.replace('/', "-")
135}
136
137fn anchor_priority(kind: &str) -> u8 {
142 match kind.to_lowercase().as_str() {
143 "struct" | "class" | "trait" | "interface" | "enum" | "type" | "typedef" => 0,
144 "function" | "method" | "macro" | "module" => 1,
145 "constant" | "property" | "event" | "attribute" | "export" => 2,
146 _ => 3,
149 }
150}
151
152pub fn collect_glossary_evidence(cache: &CacheManager) -> Result<Option<GlossaryEvidence>> {
159 let db_path = cache.path().join("meta.db");
160 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
161
162 let has_symbols: bool = conn
163 .query_row(
164 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='symbols'",
165 [],
166 |row| row.get::<_, i64>(0),
167 )
168 .map(|c| c > 0)
169 .unwrap_or(false);
170
171 if !has_symbols {
172 return Ok(None);
173 }
174
175 let total_files: usize = conn
176 .query_row("SELECT COUNT(*) FROM files", [], |r| r.get(0))
177 .unwrap_or(0);
178 let total_lines: usize = conn
179 .query_row("SELECT COALESCE(SUM(line_count), 0) FROM files", [], |r| {
180 r.get(0)
181 })
182 .unwrap_or(0);
183
184 let mut language_mix: Vec<(String, usize)> = Vec::new();
186 if let Ok(mut stmt) = conn.prepare(
187 "SELECT COALESCE(language, 'other'), COUNT(*) FROM files \
188 GROUP BY language ORDER BY COUNT(*) DESC LIMIT 10",
189 ) {
190 if let Ok(rows) =
191 stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, usize>(1)?)))
192 {
193 language_mix = rows.flatten().collect();
194 }
195 }
196
197 let dependency_edges: usize = conn
199 .query_row::<usize, _, _>(
200 "SELECT COUNT(*) FROM file_dependencies WHERE resolved_file_id IS NOT NULL",
201 [],
202 |row| row.get(0),
203 )
204 .unwrap_or(0);
205
206 let mut hotspot_files: Vec<String> = Vec::new();
208 if dependency_edges > 0 {
209 if let Ok(mut stmt) = conn.prepare(
210 "SELECT f.path, COUNT(DISTINCT fd.file_id) as dep_count \
211 FROM file_dependencies fd JOIN files f ON fd.resolved_file_id = f.id \
212 GROUP BY fd.resolved_file_id ORDER BY dep_count DESC LIMIT 8",
213 ) {
214 if let Ok(rows) = stmt.query_map([], |row| row.get::<_, String>(0)) {
215 hotspot_files = rows.flatten().collect();
216 }
217 }
218 }
219
220 let mut stmt = conn.prepare(
224 "SELECT s.symbols_json, f.path, f.line_count \
225 FROM symbols s JOIN files f ON s.file_id = f.id",
226 )?;
227 let rows: Vec<(String, String, usize)> = stmt
228 .query_map([], |row| {
229 Ok((
230 row.get::<_, String>(0)?,
231 row.get::<_, String>(1)?,
232 row.get::<_, usize>(2).unwrap_or(0),
233 ))
234 })?
235 .filter_map(|r| r.ok())
236 .collect();
237
238 #[derive(Default)]
239 struct ModuleBucket {
240 file_count: usize,
241 candidates: Vec<(u8, String)>,
243 }
244
245 let mut by_module: HashMap<String, ModuleBucket> = HashMap::new();
246
247 for (symbols_json, file_path, _line_count) in rows {
248 let module = module_of(&file_path);
249 if module.is_empty() {
250 continue;
251 }
252 let bucket = by_module.entry(module.clone()).or_default();
253 bucket.file_count += 1;
254
255 let symbols: Vec<SearchResult> = match serde_json::from_str(&symbols_json) {
256 Ok(s) => s,
257 Err(_) => continue,
258 };
259
260 for sr in symbols {
261 let Some(name) = sr.symbol else { continue };
262 if name.len() < 3 {
263 continue;
264 }
265 let kind_str = sr.kind.to_string();
266 let kl = kind_str.to_lowercase();
268 if kl == "variable" || kl == "import" || kl == "export" || kl == "unknown" {
269 continue;
270 }
271 let priority = anchor_priority(&kind_str);
272 bucket.candidates.push((priority, name));
273 }
274 }
275
276 let mut modules: Vec<ModuleEvidence> = by_module
279 .into_iter()
280 .map(|(path, mut bucket)| {
281 bucket
282 .candidates
283 .sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
284 let mut anchors: Vec<String> = Vec::new();
285 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
286 for (_, name) in bucket.candidates {
287 if seen.insert(name.clone()) {
288 anchors.push(name);
289 if anchors.len() >= ANCHOR_SYMBOLS_PER_MODULE {
290 break;
291 }
292 }
293 }
294 ModuleEvidence {
295 path,
296 file_count: bucket.file_count,
297 anchor_symbols: anchors,
298 }
299 })
300 .collect();
301
302 modules.sort_by(|a, b| b.file_count.cmp(&a.file_count).then_with(|| a.path.cmp(&b.path)));
304 modules.truncate(MAX_MODULES_IN_EVIDENCE);
305
306 Ok(Some(GlossaryEvidence {
307 total_files,
308 total_lines,
309 language_mix,
310 dependency_edges,
311 hotspot_files,
312 modules,
313 }))
314}
315
316pub fn build_concepts_context(evidence: &GlossaryEvidence, project_name: &str) -> String {
320 let mut ctx = String::new();
321
322 ctx.push_str(&format!("Project: {}\n", project_name));
323 ctx.push_str(&format!(
324 "Scale: {} files, {} lines, {} modules, {} dependency edges\n",
325 evidence.total_files,
326 evidence.total_lines,
327 evidence.modules.len(),
328 evidence.dependency_edges,
329 ));
330
331 if !evidence.language_mix.is_empty() {
332 let langs: Vec<String> = evidence
333 .language_mix
334 .iter()
335 .map(|(lang, count)| format!("{} ({})", lang, count))
336 .collect();
337 ctx.push_str(&format!("Languages: {}\n", langs.join(", ")));
338 }
339 ctx.push('\n');
340
341 ctx.push_str("Top-level modules (with anchor symbol names):\n");
342 for m in &evidence.modules {
343 if m.anchor_symbols.is_empty() {
344 ctx.push_str(&format!("- {} ({} files)\n", m.path, m.file_count));
345 } else {
346 ctx.push_str(&format!(
347 "- {} ({} files) — key symbols: {}\n",
348 m.path,
349 m.file_count,
350 m.anchor_symbols.join(", ")
351 ));
352 }
353 }
354 ctx.push('\n');
355
356 if !evidence.hotspot_files.is_empty() {
357 ctx.push_str("Dependency hotspots (most-imported files):\n");
358 for path in &evidence.hotspot_files {
359 ctx.push_str(&format!("- {}\n", path));
360 }
361 ctx.push('\n');
362 }
363
364 ctx
365}
366
367pub fn parse_concepts_response(raw: &str) -> Result<ConceptsResponse> {
374 let trimmed = raw.trim();
375
376 let cleaned: &str = if let Some(rest) = trimmed.strip_prefix("```json") {
378 rest.trim_start().trim_end_matches("```").trim()
379 } else if let Some(rest) = trimmed.strip_prefix("```") {
380 rest.trim_start().trim_end_matches("```").trim()
381 } else {
382 trimmed
383 };
384
385 let slice = if cleaned.starts_with('{') {
388 cleaned
389 } else if let (Some(start), Some(end)) = (cleaned.find('{'), cleaned.rfind('}')) {
390 &cleaned[start..=end]
391 } else {
392 cleaned
393 };
394
395 serde_json::from_str::<ConceptsResponse>(slice)
396 .context("Failed to parse concepts JSON response from LLM")
397}
398
399pub fn render_glossary_markdown(data: &GlossaryData) -> String {
403 if data.concepts.is_empty() {
404 return "*Concepts are generated by the LLM narration pipeline. \
405 Re-run `rfx pulse generate` with LLM enabled to populate this page.*\n"
406 .to_string();
407 }
408
409 let mut md = String::new();
410
411 if let Some(ref intro) = data.intro {
412 md.push_str(intro.trim());
413 md.push_str("\n\n");
414 }
415
416 let mut order: Vec<String> = Vec::new();
419 let mut grouped: HashMap<String, Vec<&Concept>> = HashMap::new();
420 for concept in &data.concepts {
421 let cat = concept
422 .category
423 .clone()
424 .unwrap_or_else(|| "Concepts".to_string());
425 if !grouped.contains_key(&cat) {
426 order.push(cat.clone());
427 }
428 grouped.entry(cat).or_default().push(concept);
429 }
430
431 md.push_str(&format!(
432 "**{}** core concepts across {} {}.\n\n",
433 data.concepts.len(),
434 order.len(),
435 if order.len() == 1 { "category" } else { "categories" },
436 ));
437
438 for cat in &order {
439 md.push_str(&format!("## {}\n\n", cat));
440 if let Some(items) = grouped.get(cat) {
441 for concept in items {
442 md.push_str(&format!("### {}\n\n", concept.name));
443
444 for line in concept.definition.trim().lines() {
446 md.push_str("> ");
447 md.push_str(line);
448 md.push('\n');
449 }
450 md.push('\n');
451
452 if !concept.related_modules.is_empty() {
453 let links: Vec<String> = concept
454 .related_modules
455 .iter()
456 .map(|m| {
457 format!("[`{}`](/wiki/{}/)", m.trim(), module_slug(m.trim()))
458 })
459 .collect();
460 md.push_str(&format!("*Implemented in {}*\n\n", links.join(", ")));
461 }
462 }
463 }
464 }
465
466 md
467}
468
469pub fn render_glossary_no_llm(evidence: &GlossaryEvidence) -> String {
473 let mut md = String::new();
474 md.push_str(
475 "*Concepts are generated by the LLM narration pipeline. \
476 Re-run `rfx pulse generate` with LLM enabled to populate this page.*\n\n",
477 );
478
479 if evidence.modules.is_empty() {
480 return md;
481 }
482
483 md.push_str("**Modules in this codebase:**\n\n");
484 for m in &evidence.modules {
485 md.push_str(&format!(
486 "- [`{}`](/wiki/{}/) ({} files)\n",
487 m.path,
488 module_slug(&m.path),
489 m.file_count
490 ));
491 }
492 md.push('\n');
493 md
494}
495
496#[cfg(test)]
497mod tests {
498 use super::*;
499 use crate::cache::CacheManager;
500 use tempfile::TempDir;
501
502 fn empty_cache() -> (TempDir, CacheManager) {
503 let tmp = TempDir::new().unwrap();
504 let cache = CacheManager::new(tmp.path().to_str().unwrap());
505 cache.init().unwrap();
506 (tmp, cache)
507 }
508
509 #[test]
510 fn test_module_of() {
511 assert_eq!(module_of("src/models.rs"), "src");
512 assert_eq!(module_of("src/pulse/wiki.rs"), "src/pulse");
513 assert_eq!(module_of("src/parsers/rust/mod.rs"), "src/parsers");
514 assert_eq!(module_of("README.md"), "");
515 }
516
517 #[test]
518 fn test_module_slug() {
519 assert_eq!(module_slug("src"), "src");
520 assert_eq!(module_slug("src/pulse"), "src-pulse");
521 assert_eq!(module_slug("src/parsers/rust"), "src-parsers-rust");
522 }
523
524 #[test]
525 fn test_anchor_priority_orders_types_first() {
526 assert!(anchor_priority("struct") < anchor_priority("function"));
527 assert!(anchor_priority("trait") < anchor_priority("constant"));
528 assert!(anchor_priority("enum") < anchor_priority("variable"));
529 }
530
531 #[test]
532 fn test_collect_glossary_evidence_empty_cache() {
533 let (_tmp, cache) = empty_cache();
534 let result = collect_glossary_evidence(&cache).unwrap();
535 assert!(result.is_none());
537 }
538
539 #[test]
540 fn test_build_concepts_context_includes_modules() {
541 let evidence = GlossaryEvidence {
542 total_files: 120,
543 total_lines: 18_500,
544 language_mix: vec![("rust".to_string(), 110), ("toml".to_string(), 10)],
545 dependency_edges: 340,
546 hotspot_files: vec!["src/models.rs".to_string()],
547 modules: vec![
548 ModuleEvidence {
549 path: "src".to_string(),
550 file_count: 42,
551 anchor_symbols: vec![
552 "Cli".to_string(),
553 "SearchResult".to_string(),
554 "run".to_string(),
555 ],
556 },
557 ModuleEvidence {
558 path: "src/pulse".to_string(),
559 file_count: 18,
560 anchor_symbols: vec![
561 "generate_site".to_string(),
562 "PulseReport".to_string(),
563 ],
564 },
565 ModuleEvidence {
566 path: "src/query".to_string(),
567 file_count: 9,
568 anchor_symbols: vec!["QueryEngine".to_string()],
569 },
570 ],
571 };
572 let ctx = build_concepts_context(&evidence, "Reflex");
573
574 assert!(ctx.contains("Project: Reflex"));
575 assert!(ctx.contains("120 files"));
576 assert!(ctx.contains("src (42 files)"));
577 assert!(ctx.contains("src/pulse"));
578 assert!(ctx.contains("src/query"));
579 assert!(ctx.contains("SearchResult"));
580 assert!(ctx.contains("QueryEngine"));
581 assert!(ctx.contains("Languages: rust (110)"));
582 assert!(ctx.contains("Dependency hotspots"));
583 }
584
585 #[test]
586 fn test_parse_concepts_response_valid_json() {
587 let raw = r#"{
588 "intro": "Reflex catalogs search primitives and indexing building blocks.",
589 "concepts": [
590 {
591 "name": "Trigram Index",
592 "category": "Core Capabilities",
593 "definition": "A fast inverted index built from three-character substrings.",
594 "related_modules": ["src/index", "src/query"]
595 },
596 {
597 "name": "Symbol Cache",
598 "category": "Data Model",
599 "definition": "A persistent store of parsed language symbols keyed by content hash.",
600 "related_modules": ["src/cache"]
601 }
602 ]
603 }"#;
604
605 let parsed = parse_concepts_response(raw).expect("should parse");
606 assert_eq!(parsed.concepts.len(), 2);
607 assert_eq!(parsed.concepts[0].name, "Trigram Index");
608 assert_eq!(parsed.concepts[0].related_modules, vec!["src/index", "src/query"]);
609 assert!(parsed.intro.as_ref().unwrap().contains("search primitives"));
610 }
611
612 #[test]
613 fn test_parse_concepts_response_strips_markdown_fence() {
614 let raw = "```json\n{\"intro\":\"x\",\"concepts\":[]}\n```";
615 let parsed = parse_concepts_response(raw).expect("should parse");
616 assert_eq!(parsed.concepts.len(), 0);
617 assert_eq!(parsed.intro.as_deref(), Some("x"));
618 }
619
620 #[test]
621 fn test_parse_concepts_response_extracts_embedded_json() {
622 let raw = "Here is the output you requested:\n\
623 {\"intro\":\"y\",\"concepts\":[{\"name\":\"X\",\"definition\":\"d\"}]}\n\
624 Hope that helps!";
625 let parsed = parse_concepts_response(raw).expect("should parse");
626 assert_eq!(parsed.concepts.len(), 1);
627 assert_eq!(parsed.concepts[0].name, "X");
628 }
629
630 #[test]
631 fn test_parse_concepts_response_rejects_malformed() {
632 let raw = "this is definitely not JSON at all";
633 assert!(parse_concepts_response(raw).is_err());
634 }
635
636 #[test]
637 fn test_render_with_concepts() {
638 let data = GlossaryData {
639 intro: Some(
640 "Reflex catalogs the core pieces of a local code-search engine."
641 .to_string(),
642 ),
643 concepts: vec![
644 Concept {
645 name: "Trigram Index".to_string(),
646 definition: "A fast inverted index built from three-character substrings."
647 .to_string(),
648 category: Some("Core Capabilities".to_string()),
649 related_modules: vec!["src/index".to_string(), "src/query".to_string()],
650 },
651 Concept {
652 name: "Symbol Cache".to_string(),
653 definition: "A persistent store of parsed language symbols.".to_string(),
654 category: Some("Data Model".to_string()),
655 related_modules: vec!["src/cache".to_string()],
656 },
657 ],
658 };
659
660 let md = render_glossary_markdown(&data);
661
662 assert!(md.contains("Reflex catalogs"));
664 assert!(md.contains("## Core Capabilities"));
665 assert!(md.contains("## Data Model"));
666 assert!(md.contains("### Trigram Index"));
667 assert!(md.contains("### Symbol Cache"));
668 assert!(md.contains("> A fast inverted index"));
669 assert!(md.contains("[`src/index`](/wiki/src-index/)"));
670 assert!(md.contains("[`src/query`](/wiki/src-query/)"));
671 assert!(md.contains("Implemented in"));
672
673 assert!(!md.contains("```rust"), "no signature code blocks");
675 assert!(!md.contains(":1"), "no file:line markers (cheap check)");
676 assert!(!md.contains("| Symbol | Kind"), "no flat table");
677 }
678
679 #[test]
680 fn test_render_no_llm_fallback() {
681 let data = GlossaryData::default();
682 let md = render_glossary_markdown(&data);
683 assert!(md.contains("LLM narration pipeline"));
684 assert!(md.contains("rfx pulse generate"));
685 }
686
687 #[test]
688 fn test_render_no_llm_fallback_with_evidence_lists_modules() {
689 let evidence = GlossaryEvidence {
690 total_files: 10,
691 total_lines: 500,
692 language_mix: vec![],
693 dependency_edges: 0,
694 hotspot_files: vec![],
695 modules: vec![
696 ModuleEvidence {
697 path: "src".to_string(),
698 file_count: 5,
699 anchor_symbols: vec![],
700 },
701 ModuleEvidence {
702 path: "src/pulse".to_string(),
703 file_count: 3,
704 anchor_symbols: vec![],
705 },
706 ],
707 };
708 let md = render_glossary_no_llm(&evidence);
709 assert!(md.contains("LLM narration pipeline"));
710 assert!(md.contains("[`src`](/wiki/src/)"));
711 assert!(md.contains("[`src/pulse`](/wiki/src-pulse/)"));
712 assert!(md.contains("(5 files)"));
713 }
714
715 #[test]
716 fn test_concepts_response_into_glossary_data() {
717 let resp = ConceptsResponse {
718 intro: Some("hi".to_string()),
719 concepts: vec![RawConcept {
720 name: "Concept".to_string(),
721 definition: "def".to_string(),
722 category: Some("Cat".to_string()),
723 related_modules: vec!["src".to_string()],
724 }],
725 };
726 let data: GlossaryData = resp.into();
727 assert_eq!(data.concepts.len(), 1);
728 assert_eq!(data.concepts[0].name, "Concept");
729 assert_eq!(data.intro.as_deref(), Some("hi"));
730 }
731}