Skip to main content

reflex/pulse/
narrate.rs

1//! LLM narration helpers for Pulse
2//!
3//! Provides centralized LLM calling for digest and wiki surfaces.
4//! Handles provider setup, caching, content gating, and async bridging.
5
6use anyhow::Result;
7use std::path::Path;
8use std::sync::Arc;
9
10use crate::semantic::config;
11use crate::semantic::providers::{self, LlmProvider};
12
13use super::llm_cache::LlmCache;
14
15/// System prompt for changelog narration
16const CHANGELOG_SYSTEM_PROMPT: &str = "\
17You are a technical writer creating a product-level changelog from recent development activity.
18Your audience is developers and stakeholders who want to understand what changed, why, and what it impacts — NOT the raw commit details.
19
20Guidelines:
21- Group related commits into 3-8 high-level changelog entries.
22- Each entry needs a clear title (what changed) and a 2-4 sentence description (why it matters, what it impacts).
23- Include an approximate date or date range in parentheses after each entry's title, like \"Added search (Apr 10–12)\".
24- Write at a product/feature level, not code level. Say \"Added search to documentation\" not \"Integrated pagefind library into site.rs\".
25- Focus on user-visible impact and system-level consequences.
26- Do NOT include commit hashes, file paths, or diff statistics in your output.
27- Do NOT speculate beyond what the commit messages and file changes reveal.
28
29Output VALID JSON:
30{
31  \"entries\": [
32    {
33      \"title\": \"Short descriptive title (Apr 10–12)\",
34      \"description\": \"2-4 sentences explaining what changed, why, and what it impacts.\"
35    }
36  ]
37}
38
39COMMIT DATA:
40";
41
42/// System prompt for wiki module summary
43const WIKI_SYSTEM_PROMPT: &str = "\
44You are a technical writer creating a module overview for a codebase wiki.
45You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
46
47CRITICAL RULES:
48- NEVER start with 'The X module consists of...', 'This module contains...', or any variant.
49- Your first sentence MUST state what the module DOES or what PURPOSE it serves — infer this from file names, symbol names, and its dependency position.
50- Focus on PURPOSE, RESPONSIBILITIES, and ARCHITECTURAL ROLE — not on listing individual files or classes.
51- Describe the module's architectural role: Is it a hub (many dependents)? A leaf (few dependents)? A bridge between subsystems?
52- Explain how this module fits into the larger system — what it provides to modules that depend on it, and what it consumes from its own dependencies.
53- If the module has high fan-in (many dependents), note that changes to it have wide blast radius.
54- If the module has significantly more or fewer files/lines than average for the codebase, note that.
55- Note complexity: file count, line count, symbol density.
56- Do NOT enumerate specific file names, class names, or function names unless they represent a truly central abstraction that defines the module's identity (e.g., a primary entry point or the single core type). When in doubt, describe WHAT it does rather than naming the file that does it.
57- Vary your sentence structure. Do NOT repeat patterns across modules.
58- Write 4-8 sentences. Be specific about what the module does and its scale, not about which files it contains.
59- Do NOT speculate about design intent or add information not in the context.
60- NEVER leave missing spaces between words. Proofread your output.
61
62STRUCTURAL CONTEXT:
63";
64
65/// System prompt for project overview narration
66const PROJECT_OVERVIEW_SYSTEM_PROMPT: &str = "\
67You are a technical writer creating a project overview for auto-generated codebase documentation.
68You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
69
70CRITICAL RULES:
71- NEVER start with 'This project consists of...' or 'The codebase is...'
72- Your first sentence MUST describe what this software DOES — its purpose and primary function. Use evidence from module names and symbol names to infer the specific domain (e.g., 'code search' from TrigramIndex, QueryEngine, ParserFactory).
73- Paragraph 1: What it does and how (infer from module names, key symbols, languages used).
74- Paragraph 2: Architecture — how the major modules relate. Which modules are central hubs? What are the natural boundaries? Describe the data flow direction — which modules produce data and which consume it.
75- Paragraph 3: Scale and notable patterns — file/line counts, language mix, dependency health (cycles, hotspots).
76- Write exactly 3-4 paragraphs. Be specific: use module names, file counts, and dependency numbers.
77- Do NOT speculate or add information not in the context.
78- NEVER leave missing spaces between words. Proofread your output.
79
80STRUCTURAL CONTEXT:
81";
82
83/// System prompt for architecture narrative narration
84const ARCHITECTURE_NARRATIVE_SYSTEM_PROMPT: &str = "\
85You are a technical writer narrating the architecture of a codebase based on its dependency graph.
86You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
87
88CRITICAL RULES:
89- NEVER start with 'The architecture consists of...' or 'This codebase is organized...'
90- Lead with the most connected module and explain WHY it's central (what it provides to others).
91- Describe data flow: which modules are producers (depended-on) vs consumers (depend on many).
92- Identify if the codebase follows a layered pattern (e.g., parsers → models → query engine → CLI) and describe the information flow between layers.
93- Identify natural boundaries: groups of tightly-coupled modules that form subsystems.
94- Call out concerning patterns: circular dependencies, extreme fan-in hotspots, isolated modules.
95- Note peripheral modules: what sits at the edges and what role they serve.
96- Write 3-5 paragraphs. Every claim must reference specific module names and dependency counts.
97- Do NOT speculate about design intent or add information not in the context.
98- NEVER leave missing spaces between words. Proofread your output.
99
100STRUCTURAL CONTEXT:
101";
102
103/// System prompt for onboard guide narration
104const ONBOARD_SYSTEM_PROMPT: &str = "\
105You are a technical writer creating a \"Getting Started\" guide for a developer's first day on this codebase.
106You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
107
108CRITICAL RULES:
109- Write 4-5 paragraphs in plain language that a new team member could follow.
110- Paragraph 1: What this project does — its purpose and primary function, in one or two sentences a non-developer could understand.
111- Paragraph 2: How the code is organized — the major directories/modules, what each is responsible for.
112- Paragraph 3: Where to start reading — which entry points to look at first, and why.
113- Paragraph 4: Key patterns and conventions — recurring design patterns, naming conventions, or architectural idioms a newcomer should know.
114- Use specific file and module names from the context.
115- Do NOT speculate or add information not in the context.
116- NEVER leave missing spaces between words. Proofread your output.
117
118STRUCTURAL CONTEXT:
119";
120
121/// System prompt for timeline narration
122const TIMELINE_SYSTEM_PROMPT: &str = "\
123You are a technical writer summarizing recent development activity for a codebase.
124You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
125
126CRITICAL RULES:
127- Lead with the most active area of the codebase and explain what's happening there.
128- Identify stable modules (few recent changes) vs evolving modules (many recent changes).
129- Flag high-churn files that may warrant attention — files changing very frequently could indicate active development or instability.
130- Note contributor patterns — is this a solo project or a team effort? Who owns which areas?
131- Write 3-5 concise paragraphs with specific numbers, file names, and module names.
132- Do NOT speculate about intent or add information not in the context.
133- NEVER leave missing spaces between words. Proofread your output.
134
135STRUCTURAL CONTEXT:
136";
137
138/// System prompt for product-concept glossary generation.
139///
140/// The LLM receives structural evidence (module paths, anchor symbol names,
141/// scale stats) and returns a single JSON document containing an intro
142/// paragraph plus ~10-15 high-level product concepts with plain-language
143/// definitions. The response is parsed in `glossary::parse_concepts_response`.
144const CONCEPTS_SYSTEM_PROMPT: &str = "\
145You are documenting a software product's core vocabulary for a non-technical reader.
146
147From the structural evidence below, identify 10-15 HIGH-LEVEL product concepts that someone needs to understand to know what this product DOES and how it works. Concepts are NOUN PHRASES describing capabilities, data ideas, or workflows — NOT specific class names, function names, or file names.
148
149GOOD concept examples: 'Trigram Index', 'Symbol Cache', 'AST Query', 'Dependency Graph', 'LLM Narration', 'Runtime Symbol Detection'
150BAD concept examples: 'SearchResult struct', 'QueryEngine class', 'extract_symbols function'
151
152Rules:
153- Each definition must be 1-3 sentences in plain language a product person could understand.
154- Do NOT start definitions with 'This is a...', 'Represents a...', 'A struct that...'
155- Group concepts into 2-4 categories of your choice (e.g. 'Core Capabilities', 'Data Model', 'Workflows', 'Developer Tools').
156- Anchor each concept to 1-3 module paths from the evidence — these become wiki links.
157- Write exactly ONE intro paragraph (2-3 sentences) describing what kind of vocabulary this page catalogs for this specific product.
158
159Output VALID JSON MATCHING THIS SCHEMA EXACTLY — no markdown fences, no commentary before or after:
160{
161  \"intro\": \"...\",
162  \"concepts\": [
163    {
164      \"name\": \"Concept Name\",
165      \"category\": \"Category Name\",
166      \"definition\": \"1-3 sentence plain-language definition.\",
167      \"related_modules\": [\"src/foo\", \"src/bar\"]
168    }
169  ]
170}
171
172STRUCTURAL EVIDENCE:
173";
174
175/// Minimum word count to attempt narration.
176/// Sections below this threshold are too brief to produce useful summaries.
177const MIN_CONTENT_WORDS: usize = 15;
178
179/// Create an LLM provider using the user's ~/.reflex/config.toml (same config as `rfx ask`)
180///
181/// If the configured provider has no API key, auto-detects from available keys.
182/// This handles CI environments where users may set provider-specific secrets
183/// (e.g. `OPENROUTER_API_KEY`) without also setting `REFLEX_PROVIDER`.
184pub fn create_pulse_provider() -> Result<Box<dyn LlmProvider>> {
185    let semantic_config = config::load_config(Path::new("."))?;
186
187    // Try the configured provider first
188    let (provider, api_key) = match config::get_api_key(&semantic_config.provider) {
189        Ok(key) => (semantic_config.provider.clone(), key),
190        Err(configured_err) => {
191            // Auto-detect: try other providers before giving up
192            let fallbacks: &[&str] = &["openrouter", "anthropic", "openai"];
193            let mut found = None;
194            for &candidate in fallbacks {
195                if candidate == semantic_config.provider {
196                    continue;
197                }
198                if let Ok(key) = config::get_api_key(candidate) {
199                    eprintln!(
200                        "Note: no API key for configured provider '{}', using auto-detected '{}'",
201                        semantic_config.provider, candidate
202                    );
203                    found = Some((candidate.to_string(), key));
204                    break;
205                }
206            }
207            found.ok_or(configured_err)?
208        }
209    };
210
211    let model = config::resolve_model_for(&provider, semantic_config.model.as_deref(), None);
212
213    let options = config::get_provider_options(&provider);
214
215    providers::create_provider(&provider, api_key, model, options)
216}
217
218/// Narrate a structural context block using LLM.
219///
220/// Returns `None` if:
221/// - Content is too brief (fewer than MIN_CONTENT_WORDS words)
222/// - LLM call fails (degrades gracefully, logs warning)
223/// - Cache hit returns previously generated narration
224///
225/// Checks `LlmCache` first; stores response on success.
226pub fn narrate_section(
227    provider: &dyn LlmProvider,
228    system_prompt: &str,
229    structural_context: &str,
230    cache: &LlmCache,
231    snapshot_id: &str,
232    cache_key_suffix: &str,
233) -> Option<String> {
234    // Check minimum content length
235    let word_count = structural_context.split_whitespace().count();
236    if word_count < MIN_CONTENT_WORDS {
237        eprintln!("  Skipping: {} (too brief, {} words)", cache_key_suffix, word_count);
238        return None;
239    }
240
241    // Check cache
242    let cache_key = LlmCache::compute_key(snapshot_id, cache_key_suffix, structural_context);
243    match cache.get(&cache_key) {
244        Ok(Some(cached)) => {
245            log::debug!("LLM cache hit for '{}'", cache_key_suffix);
246            eprintln!("  Narrating: {} (cached)", cache_key_suffix);
247            return Some(cached.response);
248        }
249        Ok(None) => {}
250        Err(e) => {
251            log::warn!("Failed to read LLM cache: {}", e);
252        }
253    }
254
255    // Build prompt
256    let prompt = format!("{}{}", system_prompt, structural_context);
257
258    eprintln!("  Narrating: {}...", cache_key_suffix);
259
260    // Call LLM with retry (sync bridge over async)
261    let result = call_llm_sync(provider, &prompt);
262
263    match result {
264        Ok(response) => {
265            let response = postprocess_narration(&response);
266
267            // Cache the response
268            let context_hash = blake3::hash(structural_context.as_bytes()).to_hex().to_string();
269            if let Err(e) = cache.put(&cache_key, &context_hash, &response) {
270                log::warn!("Failed to write LLM cache: {}", e);
271            }
272
273            Some(response)
274        }
275        Err(e) => {
276            log::warn!("LLM narration failed for '{}': {}", cache_key_suffix, e);
277            None
278        }
279    }
280}
281
282/// A narration task for batch dispatch
283pub struct NarrationTask {
284    pub system_prompt: &'static str,
285    pub structural_context: String,
286    pub snapshot_id: String,
287    pub cache_key_suffix: String,
288}
289
290/// Result of a narration task
291pub struct NarrationResult {
292    pub cache_key_suffix: String,
293    pub response: Option<String>,
294}
295
296/// Narrate multiple sections concurrently using a single tokio runtime.
297///
298/// Pre-filters cache hits and too-brief content. Remaining tasks are dispatched
299/// concurrently with a semaphore bound. Results are returned in order.
300pub fn narrate_batch(
301    provider: Arc<dyn LlmProvider>,
302    tasks: Vec<NarrationTask>,
303    cache: &LlmCache,
304    concurrency: usize,
305) -> Vec<NarrationResult> {
306    let total = tasks.len();
307    if total == 0 {
308        return Vec::new();
309    }
310
311    // Pre-filter: resolve cache hits and too-brief content synchronously
312    let mut results: Vec<NarrationResult> = Vec::with_capacity(total);
313    let mut pending: Vec<(usize, NarrationTask, String)> = Vec::new(); // (result_index, task, cache_key)
314
315    for task in tasks {
316        let word_count = task.structural_context.split_whitespace().count();
317        if word_count < MIN_CONTENT_WORDS {
318            eprintln!("  Skipping: {} (too brief, {} words)", task.cache_key_suffix, word_count);
319            results.push(NarrationResult {
320                cache_key_suffix: task.cache_key_suffix,
321                response: None,
322            });
323            continue;
324        }
325
326        let cache_key = LlmCache::compute_key(
327            &task.snapshot_id,
328            &task.cache_key_suffix,
329            &task.structural_context,
330        );
331        match cache.get(&cache_key) {
332            Ok(Some(cached)) => {
333                eprintln!("  Narrating: {} (cached)", task.cache_key_suffix);
334                results.push(NarrationResult {
335                    cache_key_suffix: task.cache_key_suffix,
336                    response: Some(cached.response),
337                });
338            }
339            _ => {
340                let idx = results.len();
341                results.push(NarrationResult {
342                    cache_key_suffix: task.cache_key_suffix.clone(),
343                    response: None,
344                });
345                pending.push((idx, task, cache_key));
346            }
347        }
348    }
349
350    if pending.is_empty() {
351        return results;
352    }
353
354    let pending_count = pending.len();
355    let effective_concurrency = if concurrency == 0 { pending_count } else { concurrency };
356    eprintln!(
357        "  Dispatching {} LLM calls ({} concurrent)...",
358        pending_count, effective_concurrency
359    );
360
361    // Clone cache_dir for use inside async tasks
362    let cache_dir = cache.cache_dir().to_path_buf();
363
364    // Single tokio runtime for all concurrent LLM calls
365    let rt = match tokio::runtime::Runtime::new() {
366        Ok(rt) => rt,
367        Err(e) => {
368            log::warn!("Failed to create tokio runtime for batch narration: {}", e);
369            return results;
370        }
371    };
372
373    let async_results = rt.block_on(async {
374        let semaphore = Arc::new(tokio::sync::Semaphore::new(effective_concurrency));
375        let mut join_set = tokio::task::JoinSet::new();
376
377        for (idx, task, cache_key) in pending {
378            let provider = Arc::clone(&provider);
379            let sem = Arc::clone(&semaphore);
380            let cache_dir = cache_dir.clone();
381
382            join_set.spawn(async move {
383                let _permit = sem.acquire().await.expect("semaphore closed");
384                let start = std::time::Instant::now();
385                eprintln!("  Narrating: {}...", task.cache_key_suffix);
386
387                let prompt = format!("{}{}", task.system_prompt, task.structural_context);
388                let result = call_llm_async(&*provider, &prompt).await;
389
390                let response = match result {
391                    Ok(raw) => {
392                        let response = postprocess_narration(&raw);
393
394                        // Write to cache (file-based, unique key per task — no conflicts)
395                        let task_cache = LlmCache::from_dir(cache_dir);
396                        let context_hash = blake3::hash(task.structural_context.as_bytes())
397                            .to_hex()
398                            .to_string();
399                        if let Err(e) = task_cache.put(&cache_key, &context_hash, &response) {
400                            log::warn!("Failed to write LLM cache for '{}': {}", task.cache_key_suffix, e);
401                        }
402
403                        eprintln!(
404                            "  Narrating: {} (done, {:.1}s)",
405                            task.cache_key_suffix,
406                            start.elapsed().as_secs_f64()
407                        );
408                        Some(response)
409                    }
410                    Err(e) => {
411                        log::warn!("LLM narration failed for '{}': {}", task.cache_key_suffix, e);
412                        eprintln!(
413                            "  Narrating: {} (failed, {:.1}s)",
414                            task.cache_key_suffix,
415                            start.elapsed().as_secs_f64()
416                        );
417                        None
418                    }
419                };
420
421                (idx, task.cache_key_suffix, response)
422            });
423        }
424
425        let mut async_results = Vec::new();
426        while let Some(result) = join_set.join_next().await {
427            match result {
428                Ok(r) => async_results.push(r),
429                Err(e) => log::warn!("Narration task panicked: {}", e),
430            }
431        }
432        async_results
433    });
434
435    // Distribute results back
436    for (idx, cache_key_suffix, response) in async_results {
437        results[idx] = NarrationResult {
438            cache_key_suffix,
439            response,
440        };
441    }
442
443    results
444}
445
446/// Async LLM call with retry logic (native async, no per-call Runtime)
447async fn call_llm_async(provider: &dyn LlmProvider, prompt: &str) -> Result<String> {
448    let max_retries = 2;
449    let mut last_error = None;
450
451    for attempt in 0..=max_retries {
452        if attempt > 0 {
453            log::debug!("Retrying LLM narration (attempt {}/{})", attempt + 1, max_retries + 1);
454            tokio::time::sleep(tokio::time::Duration::from_millis(500 * attempt as u64)).await;
455        }
456
457        match provider.complete(prompt, false).await {
458            Ok(response) => return Ok(response),
459            Err(e) => {
460                log::debug!("LLM call attempt {} failed: {}", attempt + 1, e);
461                last_error = Some(e);
462            }
463        }
464    }
465
466    Err(last_error.unwrap_or_else(|| anyhow::anyhow!("LLM call failed")))
467}
468
469/// Get the system prompt for changelog narration
470pub fn changelog_system_prompt() -> &'static str {
471    CHANGELOG_SYSTEM_PROMPT
472}
473
474/// Get the system prompt for wiki narration
475pub fn wiki_system_prompt() -> &'static str {
476    WIKI_SYSTEM_PROMPT
477}
478
479/// Get the system prompt for project overview narration
480pub fn project_overview_system_prompt() -> &'static str {
481    PROJECT_OVERVIEW_SYSTEM_PROMPT
482}
483
484/// Get the system prompt for architecture narrative narration
485pub fn architecture_narrative_system_prompt() -> &'static str {
486    ARCHITECTURE_NARRATIVE_SYSTEM_PROMPT
487}
488
489/// Get the system prompt for onboard guide narration
490pub fn onboard_system_prompt() -> &'static str {
491    ONBOARD_SYSTEM_PROMPT
492}
493
494/// Get the system prompt for timeline narration
495pub fn timeline_system_prompt() -> &'static str {
496    TIMELINE_SYSTEM_PROMPT
497}
498
499/// Get the system prompt for product-concept glossary generation.
500pub fn concepts_system_prompt() -> &'static str {
501    CONCEPTS_SYSTEM_PROMPT
502}
503
504/// Known compound words / proper nouns that should NOT be split by camelCase regex.
505/// These are common technical terms found in codebases.
506const CAMEL_CASE_BLOCKLIST: &[&str] = &[
507    "TypeScript", "JavaScript", "CoffeeScript", "ActionScript",
508    "PostgreSQL", "MySQL", "MariaDB", "MongoDB", "CouchDB", "GraphQL",
509    "GitHub", "GitLab", "BitBucket", "WordPress", "PostCSS",
510    "IntelliJ", "WebSocket", "WebAssembly", "DevOps", "DevTools",
511    "DataFrame", "NumPy", "PyTorch", "TensorFlow", "FastAPI",
512    "NextJS", "NestJS", "NodeJS", "ExpressJS", "AngularJS",
513    "iPhone", "iPad", "macOS", "iOS", "FreeBSD", "OpenBSD",
514    "CodePen", "CodeSandbox", "JetBrains", "PhpStorm", "AppKit",
515    "SwiftUI", "UIKit", "CoreData", "MapReduce",
516    "CloudFormation", "CloudFront", "CloudWatch",
517    "RedHat", "OpenShift", "OpenStack",
518    "SourceMap", "AutoComplete", "IntelliSense",
519];
520
521/// Post-process LLM narration output to fix common formatting issues.
522fn postprocess_narration(text: &str) -> String {
523    let mut result = text.trim().to_string();
524
525    // Fix missing spaces after periods (e.g., "module.The" → "module. The" but not "config.toml")
526    // Only insert space when followed by an uppercase letter (sentence boundary)
527    let re = regex::Regex::new(r"([a-z])\.([A-Z])").unwrap();
528    result = re.replace_all(&result, "$1. $2").to_string();
529
530    // Fix missing spaces between lowercase and uppercase (e.g., "moduledrives" → "module drives")
531    // Protect known compound words with placeholders before applying the regex
532    let mut placeholders: Vec<(&str, String)> = Vec::new();
533    for (i, term) in CAMEL_CASE_BLOCKLIST.iter().enumerate() {
534        if result.contains(*term) {
535            let placeholder = format!("\x00KEEP{}\x00", i);
536            result = result.replace(*term, &placeholder);
537            placeholders.push((term, placeholder));
538        }
539    }
540
541    // Apply camelCase splitting only to non-code segments (outside backticks)
542    let re = regex::Regex::new(r"([a-z]{3,})([A-Z][a-z]{2,})").unwrap();
543    let parts: Vec<&str> = result.split('`').collect();
544    let mut assembled = String::new();
545    for (i, part) in parts.iter().enumerate() {
546        if i % 2 == 0 {
547            // Outside backticks — apply fix
548            assembled.push_str(&re.replace_all(part, "$1 $2"));
549        } else {
550            // Inside backticks — preserve as-is
551            assembled.push('`');
552            assembled.push_str(part);
553            assembled.push('`');
554        }
555    }
556    result = assembled;
557
558    // Restore protected compound words
559    for (term, placeholder) in &placeholders {
560        result = result.replace(placeholder, term);
561    }
562
563    // Fix double spaces
564    while result.contains("  ") {
565        result = result.replace("  ", " ");
566    }
567
568    result
569}
570
571/// Synchronous LLM call with retry logic.
572/// Uses tokio runtime to bridge async provider calls.
573fn call_llm_sync(provider: &dyn LlmProvider, prompt: &str) -> Result<String> {
574    let rt = tokio::runtime::Runtime::new()?;
575    rt.block_on(async {
576        let mut last_error = None;
577        let max_retries = 2;
578
579        for attempt in 0..=max_retries {
580            if attempt > 0 {
581                log::debug!("Retrying LLM narration (attempt {}/{})", attempt + 1, max_retries + 1);
582                tokio::time::sleep(tokio::time::Duration::from_millis(500 * attempt as u64)).await;
583            }
584
585            match provider.complete(prompt, false).await {
586                Ok(response) => return Ok(response),
587                Err(e) => {
588                    log::debug!("LLM call attempt {} failed: {}", attempt + 1, e);
589                    last_error = Some(e);
590                }
591            }
592        }
593
594        Err(last_error.unwrap_or_else(|| anyhow::anyhow!("LLM call failed")))
595    })
596}
597
598#[cfg(test)]
599mod tests {
600    use super::*;
601
602    #[test]
603    fn test_word_count_sufficient() {
604        // 15+ words should pass the gate
605        let text = "src/parsers/rust.rs has 250 lines and contains extract_symbols fn_name and other important functions used for parsing code";
606        let count = text.split_whitespace().count();
607        assert!(count >= MIN_CONTENT_WORDS, "Word count {} should be >= {}", count, MIN_CONTENT_WORDS);
608    }
609
610    #[test]
611    fn test_word_count_too_brief() {
612        // < 15 words should be rejected
613        let text = "No data available yet.";
614        let count = text.split_whitespace().count();
615        assert!(count < MIN_CONTENT_WORDS, "Word count {} should be < {}", count, MIN_CONTENT_WORDS);
616    }
617
618    #[test]
619    fn test_word_count_empty() {
620        let count = "".split_whitespace().count();
621        assert!(count < MIN_CONTENT_WORDS);
622    }
623
624    #[test]
625    fn test_word_count_wiki_structural() {
626        // Typical wiki page with markdown table + file list should pass
627        let text = "| Language | Files | Lines |\n| --- | --- | --- |\n| Rust | 45 | 12,500 |\n\n**Files:** src/main.rs src/lib.rs src/query/mod.rs src/parsers/rust.rs";
628        let count = text.split_whitespace().count();
629        assert!(count >= MIN_CONTENT_WORDS, "Wiki structural word count {} should be >= {}", count, MIN_CONTENT_WORDS);
630    }
631
632    #[test]
633    fn test_word_count_digest_bootstrap() {
634        // Typical digest with structural data should pass
635        let text = "Branch: feature/pulse Commit: abc1234 Files: 120 Edges: 340 Modules: src tests build.rs config.toml main.rs lib.rs";
636        let count = text.split_whitespace().count();
637        assert!(count >= MIN_CONTENT_WORDS, "Digest bootstrap word count {} should be >= {}", count, MIN_CONTENT_WORDS);
638    }
639
640    #[test]
641    fn test_changelog_system_prompt() {
642        assert!(changelog_system_prompt().contains("COMMIT DATA"));
643    }
644
645    #[test]
646    fn test_wiki_system_prompt() {
647        assert!(wiki_system_prompt().contains("STRUCTURAL CONTEXT"));
648    }
649
650    #[test]
651    fn test_postprocess_preserves_proper_nouns() {
652        let input = "The TypeScript module handles JavaScript compilation.";
653        let result = postprocess_narration(input);
654        assert!(result.contains("TypeScript"), "Should preserve TypeScript, got: {}", result);
655        assert!(result.contains("JavaScript"), "Should preserve JavaScript, got: {}", result);
656    }
657
658    #[test]
659    fn test_postprocess_splits_run_on_words() {
660        // "moduledrives" should become "module drives"
661        let input = "The parseModule drives the query engine.";
662        let result = postprocess_narration(input);
663        assert!(result.contains("parse Module"), "Should split run-on camelCase: {}", result);
664    }
665
666    #[test]
667    fn test_postprocess_preserves_backtick_code() {
668        let input = "Uses `TypeScript` and `parseModule` for processing.";
669        let result = postprocess_narration(input);
670        assert!(result.contains("`TypeScript`"), "Should preserve code: {}", result);
671        assert!(result.contains("`parseModule`"), "Should preserve code: {}", result);
672    }
673
674    #[test]
675    fn test_postprocess_fixes_missing_sentence_space() {
676        let input = "First sentence.Second sentence starts here.";
677        let result = postprocess_narration(input);
678        assert!(result.contains(". S"), "Should add space after period: {}", result);
679    }
680
681    #[test]
682    fn test_postprocess_fixes_double_spaces() {
683        let input = "Too  many  spaces  here.";
684        let result = postprocess_narration(input);
685        assert!(!result.contains("  "), "Should remove double spaces: {}", result);
686    }
687}