1use anyhow::{Context, Result};
19use rusqlite::Connection;
20use serde::Deserialize;
21use std::collections::HashMap;
22
23use crate::cache::CacheManager;
24use crate::models::SearchResult;
25
26const ANCHOR_SYMBOLS_PER_MODULE: usize = 5;
28
29const MAX_MODULES_IN_EVIDENCE: usize = 25;
32
33#[derive(Debug, Clone)]
35pub struct Concept {
36 pub name: String,
38 pub definition: String,
40 pub related_modules: Vec<String>,
43 pub category: Option<String>,
45}
46
47#[derive(Debug, Clone, Default)]
49pub struct GlossaryData {
50 pub concepts: Vec<Concept>,
51 pub intro: Option<String>,
53}
54
55#[derive(Debug, Clone)]
57pub struct ModuleEvidence {
58 pub path: String,
60 pub file_count: usize,
62 pub anchor_symbols: Vec<String>,
64}
65
66#[derive(Debug, Clone, Default)]
68pub struct GlossaryEvidence {
69 pub total_files: usize,
70 pub total_lines: usize,
71 pub language_mix: Vec<(String, usize)>,
72 pub dependency_edges: usize,
73 pub hotspot_files: Vec<String>,
74 pub modules: Vec<ModuleEvidence>,
75}
76
77#[derive(Debug, Clone, Deserialize)]
80pub struct ConceptsResponse {
81 #[serde(default)]
82 pub intro: Option<String>,
83 #[serde(default)]
84 pub concepts: Vec<RawConcept>,
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct RawConcept {
89 pub name: String,
90 #[serde(default)]
91 pub definition: String,
92 #[serde(default)]
93 pub category: Option<String>,
94 #[serde(default)]
95 pub related_modules: Vec<String>,
96}
97
98impl From<RawConcept> for Concept {
99 fn from(raw: RawConcept) -> Self {
100 Concept {
101 name: raw.name,
102 definition: raw.definition,
103 category: raw.category,
104 related_modules: raw.related_modules,
105 }
106 }
107}
108
109impl From<ConceptsResponse> for GlossaryData {
110 fn from(resp: ConceptsResponse) -> Self {
111 GlossaryData {
112 concepts: resp.concepts.into_iter().map(Into::into).collect(),
113 intro: resp.intro,
114 }
115 }
116}
117
118fn module_of(file_path: &str) -> String {
124 let parts: Vec<&str> = file_path.split('/').collect();
125 match parts.len() {
126 0 | 1 => String::new(),
127 2 => parts[0].to_string(),
128 _ => format!("{}/{}", parts[0], parts[1]),
129 }
130}
131
132fn module_slug(module_path: &str) -> String {
134 module_path.replace('/', "-")
135}
136
137fn anchor_priority(kind: &str) -> u8 {
142 match kind.to_lowercase().as_str() {
143 "struct" | "class" | "trait" | "interface" | "enum" | "type" | "typedef" => 0,
144 "function" | "method" | "macro" | "module" => 1,
145 "constant" | "property" | "event" | "attribute" | "export" => 2,
146 _ => 3,
149 }
150}
151
152pub fn collect_glossary_evidence(cache: &CacheManager) -> Result<Option<GlossaryEvidence>> {
159 let db_path = cache.path().join("meta.db");
160 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
161
162 let has_symbols: bool = conn
163 .query_row(
164 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='symbols'",
165 [],
166 |row| row.get::<_, i64>(0),
167 )
168 .map(|c| c > 0)
169 .unwrap_or(false);
170
171 if !has_symbols {
172 return Ok(None);
173 }
174
175 let total_files: usize = conn
176 .query_row("SELECT COUNT(*) FROM files", [], |r| r.get(0))
177 .unwrap_or(0);
178 let total_lines: usize = conn
179 .query_row("SELECT COALESCE(SUM(line_count), 0) FROM files", [], |r| {
180 r.get(0)
181 })
182 .unwrap_or(0);
183
184 let mut language_mix: Vec<(String, usize)> = Vec::new();
186 if let Ok(mut stmt) = conn.prepare(
187 "SELECT COALESCE(language, 'other'), COUNT(*) FROM files \
188 GROUP BY language ORDER BY COUNT(*) DESC LIMIT 10",
189 ) {
190 if let Ok(rows) = stmt.query_map([], |row| {
191 Ok((row.get::<_, String>(0)?, row.get::<_, usize>(1)?))
192 }) {
193 language_mix = rows.flatten().collect();
194 }
195 }
196
197 let dependency_edges: usize = conn
199 .query_row::<usize, _, _>(
200 "SELECT COUNT(*) FROM file_dependencies WHERE resolved_file_id IS NOT NULL",
201 [],
202 |row| row.get(0),
203 )
204 .unwrap_or(0);
205
206 let mut hotspot_files: Vec<String> = Vec::new();
208 if dependency_edges > 0 {
209 if let Ok(mut stmt) = conn.prepare(
210 "SELECT f.path, COUNT(DISTINCT fd.file_id) as dep_count \
211 FROM file_dependencies fd JOIN files f ON fd.resolved_file_id = f.id \
212 GROUP BY fd.resolved_file_id ORDER BY dep_count DESC LIMIT 8",
213 ) {
214 if let Ok(rows) = stmt.query_map([], |row| row.get::<_, String>(0)) {
215 hotspot_files = rows.flatten().collect();
216 }
217 }
218 }
219
220 let mut stmt = conn.prepare(
224 "SELECT s.symbols_json, f.path, f.line_count \
225 FROM symbols s JOIN files f ON s.file_id = f.id",
226 )?;
227 let rows: Vec<(String, String, usize)> = stmt
228 .query_map([], |row| {
229 Ok((
230 row.get::<_, String>(0)?,
231 row.get::<_, String>(1)?,
232 row.get::<_, usize>(2).unwrap_or(0),
233 ))
234 })?
235 .filter_map(|r| r.ok())
236 .collect();
237
238 #[derive(Default)]
239 struct ModuleBucket {
240 file_count: usize,
241 candidates: Vec<(u8, String)>,
243 }
244
245 let mut by_module: HashMap<String, ModuleBucket> = HashMap::new();
246
247 for (symbols_json, file_path, _line_count) in rows {
248 let module = module_of(&file_path);
249 if module.is_empty() {
250 continue;
251 }
252 let bucket = by_module.entry(module.clone()).or_default();
253 bucket.file_count += 1;
254
255 let symbols: Vec<SearchResult> = match serde_json::from_str(&symbols_json) {
256 Ok(s) => s,
257 Err(_) => continue,
258 };
259
260 for sr in symbols {
261 let Some(name) = sr.symbol else { continue };
262 if name.len() < 3 {
263 continue;
264 }
265 let kind_str = sr.kind.to_string();
266 let kl = kind_str.to_lowercase();
268 if kl == "variable" || kl == "import" || kl == "export" || kl == "unknown" {
269 continue;
270 }
271 let priority = anchor_priority(&kind_str);
272 bucket.candidates.push((priority, name));
273 }
274 }
275
276 let mut modules: Vec<ModuleEvidence> = by_module
279 .into_iter()
280 .map(|(path, mut bucket)| {
281 bucket
282 .candidates
283 .sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
284 let mut anchors: Vec<String> = Vec::new();
285 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
286 for (_, name) in bucket.candidates {
287 if seen.insert(name.clone()) {
288 anchors.push(name);
289 if anchors.len() >= ANCHOR_SYMBOLS_PER_MODULE {
290 break;
291 }
292 }
293 }
294 ModuleEvidence {
295 path,
296 file_count: bucket.file_count,
297 anchor_symbols: anchors,
298 }
299 })
300 .collect();
301
302 modules.sort_by(|a, b| {
304 b.file_count
305 .cmp(&a.file_count)
306 .then_with(|| a.path.cmp(&b.path))
307 });
308 modules.truncate(MAX_MODULES_IN_EVIDENCE);
309
310 Ok(Some(GlossaryEvidence {
311 total_files,
312 total_lines,
313 language_mix,
314 dependency_edges,
315 hotspot_files,
316 modules,
317 }))
318}
319
320pub fn build_concepts_context(evidence: &GlossaryEvidence, project_name: &str) -> String {
324 let mut ctx = String::new();
325
326 ctx.push_str(&format!("Project: {}\n", project_name));
327 ctx.push_str(&format!(
328 "Scale: {} files, {} lines, {} modules, {} dependency edges\n",
329 evidence.total_files,
330 evidence.total_lines,
331 evidence.modules.len(),
332 evidence.dependency_edges,
333 ));
334
335 if !evidence.language_mix.is_empty() {
336 let langs: Vec<String> = evidence
337 .language_mix
338 .iter()
339 .map(|(lang, count)| format!("{} ({})", lang, count))
340 .collect();
341 ctx.push_str(&format!("Languages: {}\n", langs.join(", ")));
342 }
343 ctx.push('\n');
344
345 ctx.push_str("Top-level modules (with anchor symbol names):\n");
346 for m in &evidence.modules {
347 if m.anchor_symbols.is_empty() {
348 ctx.push_str(&format!("- {} ({} files)\n", m.path, m.file_count));
349 } else {
350 ctx.push_str(&format!(
351 "- {} ({} files) — key symbols: {}\n",
352 m.path,
353 m.file_count,
354 m.anchor_symbols.join(", ")
355 ));
356 }
357 }
358 ctx.push('\n');
359
360 if !evidence.hotspot_files.is_empty() {
361 ctx.push_str("Dependency hotspots (most-imported files):\n");
362 for path in &evidence.hotspot_files {
363 ctx.push_str(&format!("- {}\n", path));
364 }
365 ctx.push('\n');
366 }
367
368 ctx
369}
370
371pub fn parse_concepts_response(raw: &str) -> Result<ConceptsResponse> {
378 let trimmed = raw.trim();
379
380 let cleaned: &str = if let Some(rest) = trimmed.strip_prefix("```json") {
382 rest.trim_start().trim_end_matches("```").trim()
383 } else if let Some(rest) = trimmed.strip_prefix("```") {
384 rest.trim_start().trim_end_matches("```").trim()
385 } else {
386 trimmed
387 };
388
389 let slice = if cleaned.starts_with('{') {
392 cleaned
393 } else if let (Some(start), Some(end)) = (cleaned.find('{'), cleaned.rfind('}')) {
394 &cleaned[start..=end]
395 } else {
396 cleaned
397 };
398
399 serde_json::from_str::<ConceptsResponse>(slice)
400 .context("Failed to parse concepts JSON response from LLM")
401}
402
403pub fn render_glossary_markdown(data: &GlossaryData) -> String {
407 if data.concepts.is_empty() {
408 return "*Concepts are generated by the LLM narration pipeline. \
409 Re-run `rfx pulse generate` with LLM enabled to populate this page.*\n"
410 .to_string();
411 }
412
413 let mut md = String::new();
414
415 if let Some(ref intro) = data.intro {
416 md.push_str(intro.trim());
417 md.push_str("\n\n");
418 }
419
420 let mut order: Vec<String> = Vec::new();
423 let mut grouped: HashMap<String, Vec<&Concept>> = HashMap::new();
424 for concept in &data.concepts {
425 let cat = concept
426 .category
427 .clone()
428 .unwrap_or_else(|| "Concepts".to_string());
429 if !grouped.contains_key(&cat) {
430 order.push(cat.clone());
431 }
432 grouped.entry(cat).or_default().push(concept);
433 }
434
435 md.push_str(&format!(
436 "**{}** core concepts across {} {}.\n\n",
437 data.concepts.len(),
438 order.len(),
439 if order.len() == 1 {
440 "category"
441 } else {
442 "categories"
443 },
444 ));
445
446 for cat in &order {
447 md.push_str(&format!("## {}\n\n", cat));
448 if let Some(items) = grouped.get(cat) {
449 for concept in items {
450 md.push_str(&format!("### {}\n\n", concept.name));
451
452 for line in concept.definition.trim().lines() {
454 md.push_str("> ");
455 md.push_str(line);
456 md.push('\n');
457 }
458 md.push('\n');
459
460 if !concept.related_modules.is_empty() {
461 let links: Vec<String> = concept
462 .related_modules
463 .iter()
464 .map(|m| format!("[`{}`](/wiki/{}/)", m.trim(), module_slug(m.trim())))
465 .collect();
466 md.push_str(&format!("*Implemented in {}*\n\n", links.join(", ")));
467 }
468 }
469 }
470 }
471
472 md
473}
474
475pub fn render_glossary_no_llm(evidence: &GlossaryEvidence) -> String {
479 let mut md = String::new();
480 md.push_str(
481 "*Concepts are generated by the LLM narration pipeline. \
482 Re-run `rfx pulse generate` with LLM enabled to populate this page.*\n\n",
483 );
484
485 if evidence.modules.is_empty() {
486 return md;
487 }
488
489 md.push_str("**Modules in this codebase:**\n\n");
490 for m in &evidence.modules {
491 md.push_str(&format!(
492 "- [`{}`](/wiki/{}/) ({} files)\n",
493 m.path,
494 module_slug(&m.path),
495 m.file_count
496 ));
497 }
498 md.push('\n');
499 md
500}
501
502#[cfg(test)]
503mod tests {
504 use super::*;
505 use crate::cache::CacheManager;
506 use tempfile::TempDir;
507
508 fn empty_cache() -> (TempDir, CacheManager) {
509 let tmp = TempDir::new().unwrap();
510 let cache = CacheManager::new(tmp.path().to_str().unwrap());
511 cache.init().unwrap();
512 (tmp, cache)
513 }
514
515 #[test]
516 fn test_module_of() {
517 assert_eq!(module_of("src/models.rs"), "src");
518 assert_eq!(module_of("src/pulse/wiki.rs"), "src/pulse");
519 assert_eq!(module_of("src/parsers/rust/mod.rs"), "src/parsers");
520 assert_eq!(module_of("README.md"), "");
521 }
522
523 #[test]
524 fn test_module_slug() {
525 assert_eq!(module_slug("src"), "src");
526 assert_eq!(module_slug("src/pulse"), "src-pulse");
527 assert_eq!(module_slug("src/parsers/rust"), "src-parsers-rust");
528 }
529
530 #[test]
531 fn test_anchor_priority_orders_types_first() {
532 assert!(anchor_priority("struct") < anchor_priority("function"));
533 assert!(anchor_priority("trait") < anchor_priority("constant"));
534 assert!(anchor_priority("enum") < anchor_priority("variable"));
535 }
536
537 #[test]
538 fn test_collect_glossary_evidence_empty_cache() {
539 let (_tmp, cache) = empty_cache();
540 let result = collect_glossary_evidence(&cache).unwrap();
541 assert!(result.is_none());
543 }
544
545 #[test]
546 fn test_build_concepts_context_includes_modules() {
547 let evidence = GlossaryEvidence {
548 total_files: 120,
549 total_lines: 18_500,
550 language_mix: vec![("rust".to_string(), 110), ("toml".to_string(), 10)],
551 dependency_edges: 340,
552 hotspot_files: vec!["src/models.rs".to_string()],
553 modules: vec![
554 ModuleEvidence {
555 path: "src".to_string(),
556 file_count: 42,
557 anchor_symbols: vec![
558 "Cli".to_string(),
559 "SearchResult".to_string(),
560 "run".to_string(),
561 ],
562 },
563 ModuleEvidence {
564 path: "src/pulse".to_string(),
565 file_count: 18,
566 anchor_symbols: vec!["generate_site".to_string(), "PulseReport".to_string()],
567 },
568 ModuleEvidence {
569 path: "src/query".to_string(),
570 file_count: 9,
571 anchor_symbols: vec!["QueryEngine".to_string()],
572 },
573 ],
574 };
575 let ctx = build_concepts_context(&evidence, "Reflex");
576
577 assert!(ctx.contains("Project: Reflex"));
578 assert!(ctx.contains("120 files"));
579 assert!(ctx.contains("src (42 files)"));
580 assert!(ctx.contains("src/pulse"));
581 assert!(ctx.contains("src/query"));
582 assert!(ctx.contains("SearchResult"));
583 assert!(ctx.contains("QueryEngine"));
584 assert!(ctx.contains("Languages: rust (110)"));
585 assert!(ctx.contains("Dependency hotspots"));
586 }
587
588 #[test]
589 fn test_parse_concepts_response_valid_json() {
590 let raw = r#"{
591 "intro": "Reflex catalogs search primitives and indexing building blocks.",
592 "concepts": [
593 {
594 "name": "Trigram Index",
595 "category": "Core Capabilities",
596 "definition": "A fast inverted index built from three-character substrings.",
597 "related_modules": ["src/index", "src/query"]
598 },
599 {
600 "name": "Symbol Cache",
601 "category": "Data Model",
602 "definition": "A persistent store of parsed language symbols keyed by content hash.",
603 "related_modules": ["src/cache"]
604 }
605 ]
606 }"#;
607
608 let parsed = parse_concepts_response(raw).expect("should parse");
609 assert_eq!(parsed.concepts.len(), 2);
610 assert_eq!(parsed.concepts[0].name, "Trigram Index");
611 assert_eq!(
612 parsed.concepts[0].related_modules,
613 vec!["src/index", "src/query"]
614 );
615 assert!(parsed.intro.as_ref().unwrap().contains("search primitives"));
616 }
617
618 #[test]
619 fn test_parse_concepts_response_strips_markdown_fence() {
620 let raw = "```json\n{\"intro\":\"x\",\"concepts\":[]}\n```";
621 let parsed = parse_concepts_response(raw).expect("should parse");
622 assert_eq!(parsed.concepts.len(), 0);
623 assert_eq!(parsed.intro.as_deref(), Some("x"));
624 }
625
626 #[test]
627 fn test_parse_concepts_response_extracts_embedded_json() {
628 let raw = "Here is the output you requested:\n\
629 {\"intro\":\"y\",\"concepts\":[{\"name\":\"X\",\"definition\":\"d\"}]}\n\
630 Hope that helps!";
631 let parsed = parse_concepts_response(raw).expect("should parse");
632 assert_eq!(parsed.concepts.len(), 1);
633 assert_eq!(parsed.concepts[0].name, "X");
634 }
635
636 #[test]
637 fn test_parse_concepts_response_rejects_malformed() {
638 let raw = "this is definitely not JSON at all";
639 assert!(parse_concepts_response(raw).is_err());
640 }
641
642 #[test]
643 fn test_render_with_concepts() {
644 let data = GlossaryData {
645 intro: Some(
646 "Reflex catalogs the core pieces of a local code-search engine.".to_string(),
647 ),
648 concepts: vec![
649 Concept {
650 name: "Trigram Index".to_string(),
651 definition: "A fast inverted index built from three-character substrings."
652 .to_string(),
653 category: Some("Core Capabilities".to_string()),
654 related_modules: vec!["src/index".to_string(), "src/query".to_string()],
655 },
656 Concept {
657 name: "Symbol Cache".to_string(),
658 definition: "A persistent store of parsed language symbols.".to_string(),
659 category: Some("Data Model".to_string()),
660 related_modules: vec!["src/cache".to_string()],
661 },
662 ],
663 };
664
665 let md = render_glossary_markdown(&data);
666
667 assert!(md.contains("Reflex catalogs"));
669 assert!(md.contains("## Core Capabilities"));
670 assert!(md.contains("## Data Model"));
671 assert!(md.contains("### Trigram Index"));
672 assert!(md.contains("### Symbol Cache"));
673 assert!(md.contains("> A fast inverted index"));
674 assert!(md.contains("[`src/index`](/wiki/src-index/)"));
675 assert!(md.contains("[`src/query`](/wiki/src-query/)"));
676 assert!(md.contains("Implemented in"));
677
678 assert!(!md.contains("```rust"), "no signature code blocks");
680 assert!(!md.contains(":1"), "no file:line markers (cheap check)");
681 assert!(!md.contains("| Symbol | Kind"), "no flat table");
682 }
683
684 #[test]
685 fn test_render_no_llm_fallback() {
686 let data = GlossaryData::default();
687 let md = render_glossary_markdown(&data);
688 assert!(md.contains("LLM narration pipeline"));
689 assert!(md.contains("rfx pulse generate"));
690 }
691
692 #[test]
693 fn test_render_no_llm_fallback_with_evidence_lists_modules() {
694 let evidence = GlossaryEvidence {
695 total_files: 10,
696 total_lines: 500,
697 language_mix: vec![],
698 dependency_edges: 0,
699 hotspot_files: vec![],
700 modules: vec![
701 ModuleEvidence {
702 path: "src".to_string(),
703 file_count: 5,
704 anchor_symbols: vec![],
705 },
706 ModuleEvidence {
707 path: "src/pulse".to_string(),
708 file_count: 3,
709 anchor_symbols: vec![],
710 },
711 ],
712 };
713 let md = render_glossary_no_llm(&evidence);
714 assert!(md.contains("LLM narration pipeline"));
715 assert!(md.contains("[`src`](/wiki/src/)"));
716 assert!(md.contains("[`src/pulse`](/wiki/src-pulse/)"));
717 assert!(md.contains("(5 files)"));
718 }
719
720 #[test]
721 fn test_concepts_response_into_glossary_data() {
722 let resp = ConceptsResponse {
723 intro: Some("hi".to_string()),
724 concepts: vec![RawConcept {
725 name: "Concept".to_string(),
726 definition: "def".to_string(),
727 category: Some("Cat".to_string()),
728 related_modules: vec!["src".to_string()],
729 }],
730 };
731 let data: GlossaryData = resp.into();
732 assert_eq!(data.concepts.len(), 1);
733 assert_eq!(data.concepts[0].name, "Concept");
734 assert_eq!(data.intro.as_deref(), Some("hi"));
735 }
736}