Skip to main content

reflex/pulse/
narrate.rs

1//! LLM narration helpers for Pulse
2//!
3//! Provides centralized LLM calling for digest and wiki surfaces.
4//! Handles provider setup, caching, content gating, and async bridging.
5
6use anyhow::Result;
7use std::path::Path;
8use std::sync::Arc;
9
10use crate::semantic::config;
11use crate::semantic::providers::{self, LlmProvider};
12
13use super::llm_cache::LlmCache;
14
15/// System prompt for changelog narration
16const CHANGELOG_SYSTEM_PROMPT: &str = "\
17You are a technical writer creating a product-level changelog from recent development activity.
18Your audience is developers and stakeholders who want to understand what changed, why, and what it impacts — NOT the raw commit details.
19
20Guidelines:
21- Group related commits into 3-8 high-level changelog entries.
22- Each entry needs a clear title (what changed) and a 2-4 sentence description (why it matters, what it impacts).
23- Include an approximate date or date range in parentheses after each entry's title, like \"Added search (Apr 10–12)\".
24- Write at a product/feature level, not code level. Say \"Added search to documentation\" not \"Integrated pagefind library into site.rs\".
25- Focus on user-visible impact and system-level consequences.
26- Do NOT include commit hashes, file paths, or diff statistics in your output.
27- Do NOT speculate beyond what the commit messages and file changes reveal.
28
29Output VALID JSON:
30{
31  \"entries\": [
32    {
33      \"title\": \"Short descriptive title (Apr 10–12)\",
34      \"description\": \"2-4 sentences explaining what changed, why, and what it impacts.\"
35    }
36  ]
37}
38
39COMMIT DATA:
40";
41
42/// System prompt for wiki module summary
43const WIKI_SYSTEM_PROMPT: &str = "\
44You are a technical writer creating a module overview for a codebase wiki.
45You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
46
47CRITICAL RULES:
48- NEVER start with 'The X module consists of...', 'This module contains...', or any variant.
49- Your first sentence MUST state what the module DOES or what PURPOSE it serves — infer this from file names, symbol names, and its dependency position.
50- Focus on PURPOSE, RESPONSIBILITIES, and ARCHITECTURAL ROLE — not on listing individual files or classes.
51- Describe the module's architectural role: Is it a hub (many dependents)? A leaf (few dependents)? A bridge between subsystems?
52- Explain how this module fits into the larger system — what it provides to modules that depend on it, and what it consumes from its own dependencies.
53- If the module has high fan-in (many dependents), note that changes to it have wide blast radius.
54- If the module has significantly more or fewer files/lines than average for the codebase, note that.
55- Note complexity: file count, line count, symbol density.
56- Do NOT enumerate specific file names, class names, or function names unless they represent a truly central abstraction that defines the module's identity (e.g., a primary entry point or the single core type). When in doubt, describe WHAT it does rather than naming the file that does it.
57- Vary your sentence structure. Do NOT repeat patterns across modules.
58- Write 4-8 sentences. Be specific about what the module does and its scale, not about which files it contains.
59- Do NOT speculate about design intent or add information not in the context.
60- NEVER leave missing spaces between words. Proofread your output.
61
62STRUCTURAL CONTEXT:
63";
64
65/// System prompt for project overview narration
66const PROJECT_OVERVIEW_SYSTEM_PROMPT: &str = "\
67You are a technical writer creating a project overview for auto-generated codebase documentation.
68You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
69
70CRITICAL RULES:
71- NEVER start with 'This project consists of...' or 'The codebase is...'
72- Your first sentence MUST describe what this software DOES — its purpose and primary function. Use evidence from module names and symbol names to infer the specific domain (e.g., 'code search' from TrigramIndex, QueryEngine, ParserFactory).
73- Paragraph 1: What it does and how (infer from module names, key symbols, languages used).
74- Paragraph 2: Architecture — how the major modules relate. Which modules are central hubs? What are the natural boundaries? Describe the data flow direction — which modules produce data and which consume it.
75- Paragraph 3: Scale and notable patterns — file/line counts, language mix, dependency health (cycles, hotspots).
76- Write exactly 3-4 paragraphs. Be specific: use module names, file counts, and dependency numbers.
77- Do NOT speculate or add information not in the context.
78- NEVER leave missing spaces between words. Proofread your output.
79
80STRUCTURAL CONTEXT:
81";
82
83/// System prompt for architecture narrative narration
84const ARCHITECTURE_NARRATIVE_SYSTEM_PROMPT: &str = "\
85You are a technical writer narrating the architecture of a codebase based on its dependency graph.
86You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
87
88CRITICAL RULES:
89- NEVER start with 'The architecture consists of...' or 'This codebase is organized...'
90- Lead with the most connected module and explain WHY it's central (what it provides to others).
91- Describe data flow: which modules are producers (depended-on) vs consumers (depend on many).
92- Identify if the codebase follows a layered pattern (e.g., parsers → models → query engine → CLI) and describe the information flow between layers.
93- Identify natural boundaries: groups of tightly-coupled modules that form subsystems.
94- Call out concerning patterns: circular dependencies, extreme fan-in hotspots, isolated modules.
95- Note peripheral modules: what sits at the edges and what role they serve.
96- Write 3-5 paragraphs. Every claim must reference specific module names and dependency counts.
97- Do NOT speculate about design intent or add information not in the context.
98- NEVER leave missing spaces between words. Proofread your output.
99
100STRUCTURAL CONTEXT:
101";
102
103/// System prompt for onboard guide narration
104const ONBOARD_SYSTEM_PROMPT: &str = "\
105You are a technical writer creating a \"Getting Started\" guide for a developer's first day on this codebase.
106You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
107
108CRITICAL RULES:
109- Write 4-5 paragraphs in plain language that a new team member could follow.
110- Paragraph 1: What this project does — its purpose and primary function, in one or two sentences a non-developer could understand.
111- Paragraph 2: How the code is organized — the major directories/modules, what each is responsible for.
112- Paragraph 3: Where to start reading — which entry points to look at first, and why.
113- Paragraph 4: Key patterns and conventions — recurring design patterns, naming conventions, or architectural idioms a newcomer should know.
114- Use specific file and module names from the context.
115- Do NOT speculate or add information not in the context.
116- NEVER leave missing spaces between words. Proofread your output.
117
118STRUCTURAL CONTEXT:
119";
120
121/// System prompt for timeline narration
122const TIMELINE_SYSTEM_PROMPT: &str = "\
123You are a technical writer summarizing recent development activity for a codebase.
124You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
125
126CRITICAL RULES:
127- Lead with the most active area of the codebase and explain what's happening there.
128- Identify stable modules (few recent changes) vs evolving modules (many recent changes).
129- Flag high-churn files that may warrant attention — files changing very frequently could indicate active development or instability.
130- Note contributor patterns — is this a solo project or a team effort? Who owns which areas?
131- Write 3-5 concise paragraphs with specific numbers, file names, and module names.
132- Do NOT speculate about intent or add information not in the context.
133- NEVER leave missing spaces between words. Proofread your output.
134
135STRUCTURAL CONTEXT:
136";
137
138/// System prompt for product-concept glossary generation.
139///
140/// The LLM receives structural evidence (module paths, anchor symbol names,
141/// scale stats) and returns a single JSON document containing an intro
142/// paragraph plus ~10-15 high-level product concepts with plain-language
143/// definitions. The response is parsed in `glossary::parse_concepts_response`.
144const CONCEPTS_SYSTEM_PROMPT: &str = "\
145You are documenting a software product's core vocabulary for a non-technical reader.
146
147From the structural evidence below, identify 10-15 HIGH-LEVEL product concepts that someone needs to understand to know what this product DOES and how it works. Concepts are NOUN PHRASES describing capabilities, data ideas, or workflows — NOT specific class names, function names, or file names.
148
149GOOD concept examples: 'Trigram Index', 'Symbol Cache', 'AST Query', 'Dependency Graph', 'LLM Narration', 'Runtime Symbol Detection'
150BAD concept examples: 'SearchResult struct', 'QueryEngine class', 'extract_symbols function'
151
152Rules:
153- Each definition must be 1-3 sentences in plain language a product person could understand.
154- Do NOT start definitions with 'This is a...', 'Represents a...', 'A struct that...'
155- Group concepts into 2-4 categories of your choice (e.g. 'Core Capabilities', 'Data Model', 'Workflows', 'Developer Tools').
156- Anchor each concept to 1-3 module paths from the evidence — these become wiki links.
157- Write exactly ONE intro paragraph (2-3 sentences) describing what kind of vocabulary this page catalogs for this specific product.
158
159Output VALID JSON MATCHING THIS SCHEMA EXACTLY — no markdown fences, no commentary before or after:
160{
161  \"intro\": \"...\",
162  \"concepts\": [
163    {
164      \"name\": \"Concept Name\",
165      \"category\": \"Category Name\",
166      \"definition\": \"1-3 sentence plain-language definition.\",
167      \"related_modules\": [\"src/foo\", \"src/bar\"]
168    }
169  ]
170}
171
172STRUCTURAL EVIDENCE:
173";
174
175/// Minimum word count to attempt narration.
176/// Sections below this threshold are too brief to produce useful summaries.
177const MIN_CONTENT_WORDS: usize = 15;
178
179/// Create an LLM provider using the user's ~/.reflex/config.toml (same config as `rfx ask`)
180///
181/// If the configured provider has no API key, auto-detects from available keys.
182/// This handles CI environments where users may set provider-specific secrets
183/// (e.g. `OPENROUTER_API_KEY`) without also setting `REFLEX_PROVIDER`.
184pub fn create_pulse_provider() -> Result<Box<dyn LlmProvider>> {
185    let semantic_config = config::load_config(Path::new("."))?;
186
187    // Try the configured provider first
188    let (provider, api_key) = match config::get_api_key(&semantic_config.provider) {
189        Ok(key) => (semantic_config.provider.clone(), key),
190        Err(configured_err) => {
191            // Auto-detect: try other providers before giving up
192            let fallbacks: &[&str] = &["openrouter", "anthropic", "openai"];
193            let mut found = None;
194            for &candidate in fallbacks {
195                if candidate == semantic_config.provider {
196                    continue;
197                }
198                if let Ok(key) = config::get_api_key(candidate) {
199                    eprintln!(
200                        "Note: no API key for configured provider '{}', using auto-detected '{}'",
201                        semantic_config.provider, candidate
202                    );
203                    found = Some((candidate.to_string(), key));
204                    break;
205                }
206            }
207            found.ok_or(configured_err)?
208        }
209    };
210
211    let model = config::resolve_model_for(&provider, semantic_config.model.as_deref(), None);
212
213    let options = config::get_provider_options(&provider);
214
215    providers::create_provider(
216        &provider,
217        api_key,
218        model,
219        options,
220        semantic_config.timeout_seconds,
221    )
222}
223
224/// Narrate a structural context block using LLM.
225///
226/// Returns `None` if:
227/// - Content is too brief (fewer than MIN_CONTENT_WORDS words)
228/// - LLM call fails (degrades gracefully, logs warning)
229/// - Cache hit returns previously generated narration
230///
231/// Checks `LlmCache` first; stores response on success.
232pub fn narrate_section(
233    provider: &dyn LlmProvider,
234    system_prompt: &str,
235    structural_context: &str,
236    cache: &LlmCache,
237    snapshot_id: &str,
238    cache_key_suffix: &str,
239) -> Option<String> {
240    // Check minimum content length
241    let word_count = structural_context.split_whitespace().count();
242    if word_count < MIN_CONTENT_WORDS {
243        eprintln!(
244            "  Skipping: {} (too brief, {} words)",
245            cache_key_suffix, word_count
246        );
247        return None;
248    }
249
250    // Check cache
251    let cache_key = LlmCache::compute_key(snapshot_id, cache_key_suffix, structural_context);
252    match cache.get(&cache_key) {
253        Ok(Some(cached)) => {
254            log::debug!("LLM cache hit for '{}'", cache_key_suffix);
255            eprintln!("  Narrating: {} (cached)", cache_key_suffix);
256            return Some(cached.response);
257        }
258        Ok(None) => {}
259        Err(e) => {
260            log::warn!("Failed to read LLM cache: {}", e);
261        }
262    }
263
264    // Build prompt
265    let prompt = format!("{}{}", system_prompt, structural_context);
266
267    eprintln!("  Narrating: {}...", cache_key_suffix);
268
269    // Call LLM with retry (sync bridge over async)
270    let result = call_llm_sync(provider, &prompt);
271
272    match result {
273        Ok(response) => {
274            let response = postprocess_narration(&response);
275
276            // Cache the response
277            let context_hash = blake3::hash(structural_context.as_bytes())
278                .to_hex()
279                .to_string();
280            if let Err(e) = cache.put(&cache_key, &context_hash, &response) {
281                log::warn!("Failed to write LLM cache: {}", e);
282            }
283
284            Some(response)
285        }
286        Err(e) => {
287            log::warn!("LLM narration failed for '{}': {}", cache_key_suffix, e);
288            None
289        }
290    }
291}
292
293/// A narration task for batch dispatch
294pub struct NarrationTask {
295    pub system_prompt: &'static str,
296    pub structural_context: String,
297    pub snapshot_id: String,
298    pub cache_key_suffix: String,
299}
300
301/// Result of a narration task
302pub struct NarrationResult {
303    pub cache_key_suffix: String,
304    pub response: Option<String>,
305}
306
307/// Narrate multiple sections concurrently using a single tokio runtime.
308///
309/// Pre-filters cache hits and too-brief content. Remaining tasks are dispatched
310/// concurrently with a semaphore bound. Results are returned in order.
311pub fn narrate_batch(
312    provider: Arc<dyn LlmProvider>,
313    tasks: Vec<NarrationTask>,
314    cache: &LlmCache,
315    concurrency: usize,
316) -> Vec<NarrationResult> {
317    let total = tasks.len();
318    if total == 0 {
319        return Vec::new();
320    }
321
322    // Pre-filter: resolve cache hits and too-brief content synchronously
323    let mut results: Vec<NarrationResult> = Vec::with_capacity(total);
324    let mut pending: Vec<(usize, NarrationTask, String)> = Vec::new(); // (result_index, task, cache_key)
325
326    for task in tasks {
327        let word_count = task.structural_context.split_whitespace().count();
328        if word_count < MIN_CONTENT_WORDS {
329            eprintln!(
330                "  Skipping: {} (too brief, {} words)",
331                task.cache_key_suffix, word_count
332            );
333            results.push(NarrationResult {
334                cache_key_suffix: task.cache_key_suffix,
335                response: None,
336            });
337            continue;
338        }
339
340        let cache_key = LlmCache::compute_key(
341            &task.snapshot_id,
342            &task.cache_key_suffix,
343            &task.structural_context,
344        );
345        match cache.get(&cache_key) {
346            Ok(Some(cached)) => {
347                eprintln!("  Narrating: {} (cached)", task.cache_key_suffix);
348                results.push(NarrationResult {
349                    cache_key_suffix: task.cache_key_suffix,
350                    response: Some(cached.response),
351                });
352            }
353            _ => {
354                let idx = results.len();
355                results.push(NarrationResult {
356                    cache_key_suffix: task.cache_key_suffix.clone(),
357                    response: None,
358                });
359                pending.push((idx, task, cache_key));
360            }
361        }
362    }
363
364    if pending.is_empty() {
365        return results;
366    }
367
368    let pending_count = pending.len();
369    let effective_concurrency = if concurrency == 0 {
370        pending_count
371    } else {
372        concurrency
373    };
374    eprintln!(
375        "  Dispatching {} LLM calls ({} concurrent)...",
376        pending_count, effective_concurrency
377    );
378
379    // Clone cache_dir for use inside async tasks
380    let cache_dir = cache.cache_dir().to_path_buf();
381
382    // Single tokio runtime for all concurrent LLM calls
383    let rt = match tokio::runtime::Runtime::new() {
384        Ok(rt) => rt,
385        Err(e) => {
386            log::warn!("Failed to create tokio runtime for batch narration: {}", e);
387            return results;
388        }
389    };
390
391    let async_results = rt.block_on(async {
392        let semaphore = Arc::new(tokio::sync::Semaphore::new(effective_concurrency));
393        let mut join_set = tokio::task::JoinSet::new();
394
395        for (idx, task, cache_key) in pending {
396            let provider = Arc::clone(&provider);
397            let sem = Arc::clone(&semaphore);
398            let cache_dir = cache_dir.clone();
399
400            join_set.spawn(async move {
401                let _permit = sem.acquire().await.expect("semaphore closed");
402                let start = std::time::Instant::now();
403                eprintln!("  Narrating: {}...", task.cache_key_suffix);
404
405                let prompt = format!("{}{}", task.system_prompt, task.structural_context);
406                let result = call_llm_async(&*provider, &prompt).await;
407
408                let response = match result {
409                    Ok(raw) => {
410                        let response = postprocess_narration(&raw);
411
412                        // Write to cache (file-based, unique key per task — no conflicts)
413                        let task_cache = LlmCache::from_dir(cache_dir);
414                        let context_hash = blake3::hash(task.structural_context.as_bytes())
415                            .to_hex()
416                            .to_string();
417                        if let Err(e) = task_cache.put(&cache_key, &context_hash, &response) {
418                            log::warn!(
419                                "Failed to write LLM cache for '{}': {}",
420                                task.cache_key_suffix,
421                                e
422                            );
423                        }
424
425                        eprintln!(
426                            "  Narrating: {} (done, {:.1}s)",
427                            task.cache_key_suffix,
428                            start.elapsed().as_secs_f64()
429                        );
430                        Some(response)
431                    }
432                    Err(e) => {
433                        log::warn!(
434                            "LLM narration failed for '{}': {}",
435                            task.cache_key_suffix,
436                            e
437                        );
438                        eprintln!(
439                            "  Narrating: {} (failed, {:.1}s)",
440                            task.cache_key_suffix,
441                            start.elapsed().as_secs_f64()
442                        );
443                        None
444                    }
445                };
446
447                (idx, task.cache_key_suffix, response)
448            });
449        }
450
451        let mut async_results = Vec::new();
452        while let Some(result) = join_set.join_next().await {
453            match result {
454                Ok(r) => async_results.push(r),
455                Err(e) => log::warn!("Narration task panicked: {}", e),
456            }
457        }
458        async_results
459    });
460
461    // Distribute results back
462    for (idx, cache_key_suffix, response) in async_results {
463        results[idx] = NarrationResult {
464            cache_key_suffix,
465            response,
466        };
467    }
468
469    results
470}
471
472/// Async LLM call with retry logic (native async, no per-call Runtime)
473async fn call_llm_async(provider: &dyn LlmProvider, prompt: &str) -> Result<String> {
474    let max_retries = 2;
475    let mut last_error = None;
476
477    for attempt in 0..=max_retries {
478        if attempt > 0 {
479            log::debug!(
480                "Retrying LLM narration (attempt {}/{})",
481                attempt + 1,
482                max_retries + 1
483            );
484            tokio::time::sleep(tokio::time::Duration::from_millis(500 * attempt as u64)).await;
485        }
486
487        match provider.complete(prompt, false).await {
488            Ok(response) => return Ok(response),
489            Err(e) => {
490                log::debug!("LLM call attempt {} failed: {}", attempt + 1, e);
491                last_error = Some(e);
492            }
493        }
494    }
495
496    Err(last_error.unwrap_or_else(|| anyhow::anyhow!("LLM call failed")))
497}
498
499/// Get the system prompt for changelog narration
500pub fn changelog_system_prompt() -> &'static str {
501    CHANGELOG_SYSTEM_PROMPT
502}
503
504/// Get the system prompt for wiki narration
505pub fn wiki_system_prompt() -> &'static str {
506    WIKI_SYSTEM_PROMPT
507}
508
509/// Get the system prompt for project overview narration
510pub fn project_overview_system_prompt() -> &'static str {
511    PROJECT_OVERVIEW_SYSTEM_PROMPT
512}
513
514/// Get the system prompt for architecture narrative narration
515pub fn architecture_narrative_system_prompt() -> &'static str {
516    ARCHITECTURE_NARRATIVE_SYSTEM_PROMPT
517}
518
519/// Get the system prompt for onboard guide narration
520pub fn onboard_system_prompt() -> &'static str {
521    ONBOARD_SYSTEM_PROMPT
522}
523
524/// Get the system prompt for timeline narration
525pub fn timeline_system_prompt() -> &'static str {
526    TIMELINE_SYSTEM_PROMPT
527}
528
529/// Get the system prompt for product-concept glossary generation.
530pub fn concepts_system_prompt() -> &'static str {
531    CONCEPTS_SYSTEM_PROMPT
532}
533
534/// Known compound words / proper nouns that should NOT be split by camelCase regex.
535/// These are common technical terms found in codebases.
536const CAMEL_CASE_BLOCKLIST: &[&str] = &[
537    "TypeScript",
538    "JavaScript",
539    "CoffeeScript",
540    "ActionScript",
541    "PostgreSQL",
542    "MySQL",
543    "MariaDB",
544    "MongoDB",
545    "CouchDB",
546    "GraphQL",
547    "GitHub",
548    "GitLab",
549    "BitBucket",
550    "WordPress",
551    "PostCSS",
552    "IntelliJ",
553    "WebSocket",
554    "WebAssembly",
555    "DevOps",
556    "DevTools",
557    "DataFrame",
558    "NumPy",
559    "PyTorch",
560    "TensorFlow",
561    "FastAPI",
562    "NextJS",
563    "NestJS",
564    "NodeJS",
565    "ExpressJS",
566    "AngularJS",
567    "iPhone",
568    "iPad",
569    "macOS",
570    "iOS",
571    "FreeBSD",
572    "OpenBSD",
573    "CodePen",
574    "CodeSandbox",
575    "JetBrains",
576    "PhpStorm",
577    "AppKit",
578    "SwiftUI",
579    "UIKit",
580    "CoreData",
581    "MapReduce",
582    "CloudFormation",
583    "CloudFront",
584    "CloudWatch",
585    "RedHat",
586    "OpenShift",
587    "OpenStack",
588    "SourceMap",
589    "AutoComplete",
590    "IntelliSense",
591];
592
593/// Post-process LLM narration output to fix common formatting issues.
594fn postprocess_narration(text: &str) -> String {
595    let mut result = text.trim().to_string();
596
597    // Fix missing spaces after periods (e.g., "module.The" → "module. The" but not "config.toml")
598    // Only insert space when followed by an uppercase letter (sentence boundary)
599    let re = regex::Regex::new(r"([a-z])\.([A-Z])").unwrap();
600    result = re.replace_all(&result, "$1. $2").to_string();
601
602    // Fix missing spaces between lowercase and uppercase (e.g., "moduledrives" → "module drives")
603    // Protect known compound words with placeholders before applying the regex
604    let mut placeholders: Vec<(&str, String)> = Vec::new();
605    for (i, term) in CAMEL_CASE_BLOCKLIST.iter().enumerate() {
606        if result.contains(*term) {
607            let placeholder = format!("\x00KEEP{}\x00", i);
608            result = result.replace(*term, &placeholder);
609            placeholders.push((term, placeholder));
610        }
611    }
612
613    // Apply camelCase splitting only to non-code segments (outside backticks)
614    let re = regex::Regex::new(r"([a-z]{3,})([A-Z][a-z]{2,})").unwrap();
615    let parts: Vec<&str> = result.split('`').collect();
616    let mut assembled = String::new();
617    for (i, part) in parts.iter().enumerate() {
618        if i % 2 == 0 {
619            // Outside backticks — apply fix
620            assembled.push_str(&re.replace_all(part, "$1 $2"));
621        } else {
622            // Inside backticks — preserve as-is
623            assembled.push('`');
624            assembled.push_str(part);
625            assembled.push('`');
626        }
627    }
628    result = assembled;
629
630    // Restore protected compound words
631    for (term, placeholder) in &placeholders {
632        result = result.replace(placeholder, term);
633    }
634
635    // Fix double spaces
636    while result.contains("  ") {
637        result = result.replace("  ", " ");
638    }
639
640    result
641}
642
643/// Synchronous LLM call with retry logic.
644/// Uses tokio runtime to bridge async provider calls.
645fn call_llm_sync(provider: &dyn LlmProvider, prompt: &str) -> Result<String> {
646    let rt = tokio::runtime::Runtime::new()?;
647    rt.block_on(async {
648        let mut last_error = None;
649        let max_retries = 2;
650
651        for attempt in 0..=max_retries {
652            if attempt > 0 {
653                log::debug!(
654                    "Retrying LLM narration (attempt {}/{})",
655                    attempt + 1,
656                    max_retries + 1
657                );
658                tokio::time::sleep(tokio::time::Duration::from_millis(500 * attempt as u64)).await;
659            }
660
661            match provider.complete(prompt, false).await {
662                Ok(response) => return Ok(response),
663                Err(e) => {
664                    log::debug!("LLM call attempt {} failed: {}", attempt + 1, e);
665                    last_error = Some(e);
666                }
667            }
668        }
669
670        Err(last_error.unwrap_or_else(|| anyhow::anyhow!("LLM call failed")))
671    })
672}
673
674#[cfg(test)]
675mod tests {
676    use super::*;
677
678    #[test]
679    fn test_word_count_sufficient() {
680        // 15+ words should pass the gate
681        let text = "src/parsers/rust.rs has 250 lines and contains extract_symbols fn_name and other important functions used for parsing code";
682        let count = text.split_whitespace().count();
683        assert!(
684            count >= MIN_CONTENT_WORDS,
685            "Word count {} should be >= {}",
686            count,
687            MIN_CONTENT_WORDS
688        );
689    }
690
691    #[test]
692    fn test_word_count_too_brief() {
693        // < 15 words should be rejected
694        let text = "No data available yet.";
695        let count = text.split_whitespace().count();
696        assert!(
697            count < MIN_CONTENT_WORDS,
698            "Word count {} should be < {}",
699            count,
700            MIN_CONTENT_WORDS
701        );
702    }
703
704    #[test]
705    fn test_word_count_empty() {
706        let count = "".split_whitespace().count();
707        assert!(count < MIN_CONTENT_WORDS);
708    }
709
710    #[test]
711    fn test_word_count_wiki_structural() {
712        // Typical wiki page with markdown table + file list should pass
713        let text = "| Language | Files | Lines |\n| --- | --- | --- |\n| Rust | 45 | 12,500 |\n\n**Files:** src/main.rs src/lib.rs src/query/mod.rs src/parsers/rust.rs";
714        let count = text.split_whitespace().count();
715        assert!(
716            count >= MIN_CONTENT_WORDS,
717            "Wiki structural word count {} should be >= {}",
718            count,
719            MIN_CONTENT_WORDS
720        );
721    }
722
723    #[test]
724    fn test_word_count_digest_bootstrap() {
725        // Typical digest with structural data should pass
726        let text = "Branch: feature/pulse Commit: abc1234 Files: 120 Edges: 340 Modules: src tests build.rs config.toml main.rs lib.rs";
727        let count = text.split_whitespace().count();
728        assert!(
729            count >= MIN_CONTENT_WORDS,
730            "Digest bootstrap word count {} should be >= {}",
731            count,
732            MIN_CONTENT_WORDS
733        );
734    }
735
736    #[test]
737    fn test_changelog_system_prompt() {
738        assert!(changelog_system_prompt().contains("COMMIT DATA"));
739    }
740
741    #[test]
742    fn test_wiki_system_prompt() {
743        assert!(wiki_system_prompt().contains("STRUCTURAL CONTEXT"));
744    }
745
746    #[test]
747    fn test_postprocess_preserves_proper_nouns() {
748        let input = "The TypeScript module handles JavaScript compilation.";
749        let result = postprocess_narration(input);
750        assert!(
751            result.contains("TypeScript"),
752            "Should preserve TypeScript, got: {}",
753            result
754        );
755        assert!(
756            result.contains("JavaScript"),
757            "Should preserve JavaScript, got: {}",
758            result
759        );
760    }
761
762    #[test]
763    fn test_postprocess_splits_run_on_words() {
764        // "moduledrives" should become "module drives"
765        let input = "The parseModule drives the query engine.";
766        let result = postprocess_narration(input);
767        assert!(
768            result.contains("parse Module"),
769            "Should split run-on camelCase: {}",
770            result
771        );
772    }
773
774    #[test]
775    fn test_postprocess_preserves_backtick_code() {
776        let input = "Uses `TypeScript` and `parseModule` for processing.";
777        let result = postprocess_narration(input);
778        assert!(
779            result.contains("`TypeScript`"),
780            "Should preserve code: {}",
781            result
782        );
783        assert!(
784            result.contains("`parseModule`"),
785            "Should preserve code: {}",
786            result
787        );
788    }
789
790    #[test]
791    fn test_postprocess_fixes_missing_sentence_space() {
792        let input = "First sentence.Second sentence starts here.";
793        let result = postprocess_narration(input);
794        assert!(
795            result.contains(". S"),
796            "Should add space after period: {}",
797            result
798        );
799    }
800
801    #[test]
802    fn test_postprocess_fixes_double_spaces() {
803        let input = "Too  many  spaces  here.";
804        let result = postprocess_narration(input);
805        assert!(
806            !result.contains("  "),
807            "Should remove double spaces: {}",
808            result
809        );
810    }
811}