1use anyhow::Result;
7use std::path::Path;
8use std::sync::Arc;
9
10use crate::semantic::config;
11use crate::semantic::providers::{self, LlmProvider};
12
13use super::llm_cache::LlmCache;
14
15const CHANGELOG_SYSTEM_PROMPT: &str = "\
17You are a technical writer creating a product-level changelog from recent development activity.
18Your audience is developers and stakeholders who want to understand what changed, why, and what it impacts — NOT the raw commit details.
19
20Guidelines:
21- Group related commits into 3-8 high-level changelog entries.
22- Each entry needs a clear title (what changed) and a 2-4 sentence description (why it matters, what it impacts).
23- Include an approximate date or date range in parentheses after each entry's title, like \"Added search (Apr 10–12)\".
24- Write at a product/feature level, not code level. Say \"Added search to documentation\" not \"Integrated pagefind library into site.rs\".
25- Focus on user-visible impact and system-level consequences.
26- Do NOT include commit hashes, file paths, or diff statistics in your output.
27- Do NOT speculate beyond what the commit messages and file changes reveal.
28
29Output VALID JSON:
30{
31 \"entries\": [
32 {
33 \"title\": \"Short descriptive title (Apr 10–12)\",
34 \"description\": \"2-4 sentences explaining what changed, why, and what it impacts.\"
35 }
36 ]
37}
38
39COMMIT DATA:
40";
41
42const WIKI_SYSTEM_PROMPT: &str = "\
44You are a technical writer creating a module overview for a codebase wiki.
45You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
46
47CRITICAL RULES:
48- NEVER start with 'The X module consists of...', 'This module contains...', or any variant.
49- Your first sentence MUST state what the module DOES or what PURPOSE it serves — infer this from file names, symbol names, and its dependency position.
50- Focus on PURPOSE, RESPONSIBILITIES, and ARCHITECTURAL ROLE — not on listing individual files or classes.
51- Describe the module's architectural role: Is it a hub (many dependents)? A leaf (few dependents)? A bridge between subsystems?
52- Explain how this module fits into the larger system — what it provides to modules that depend on it, and what it consumes from its own dependencies.
53- If the module has high fan-in (many dependents), note that changes to it have wide blast radius.
54- If the module has significantly more or fewer files/lines than average for the codebase, note that.
55- Note complexity: file count, line count, symbol density.
56- Do NOT enumerate specific file names, class names, or function names unless they represent a truly central abstraction that defines the module's identity (e.g., a primary entry point or the single core type). When in doubt, describe WHAT it does rather than naming the file that does it.
57- Vary your sentence structure. Do NOT repeat patterns across modules.
58- Write 4-8 sentences. Be specific about what the module does and its scale, not about which files it contains.
59- Do NOT speculate about design intent or add information not in the context.
60- NEVER leave missing spaces between words. Proofread your output.
61
62STRUCTURAL CONTEXT:
63";
64
65const PROJECT_OVERVIEW_SYSTEM_PROMPT: &str = "\
67You are a technical writer creating a project overview for auto-generated codebase documentation.
68You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
69
70CRITICAL RULES:
71- NEVER start with 'This project consists of...' or 'The codebase is...'
72- Your first sentence MUST describe what this software DOES — its purpose and primary function. Use evidence from module names and symbol names to infer the specific domain (e.g., 'code search' from TrigramIndex, QueryEngine, ParserFactory).
73- Paragraph 1: What it does and how (infer from module names, key symbols, languages used).
74- Paragraph 2: Architecture — how the major modules relate. Which modules are central hubs? What are the natural boundaries? Describe the data flow direction — which modules produce data and which consume it.
75- Paragraph 3: Scale and notable patterns — file/line counts, language mix, dependency health (cycles, hotspots).
76- Write exactly 3-4 paragraphs. Be specific: use module names, file counts, and dependency numbers.
77- Do NOT speculate or add information not in the context.
78- NEVER leave missing spaces between words. Proofread your output.
79
80STRUCTURAL CONTEXT:
81";
82
83const ARCHITECTURE_NARRATIVE_SYSTEM_PROMPT: &str = "\
85You are a technical writer narrating the architecture of a codebase based on its dependency graph.
86You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
87
88CRITICAL RULES:
89- NEVER start with 'The architecture consists of...' or 'This codebase is organized...'
90- Lead with the most connected module and explain WHY it's central (what it provides to others).
91- Describe data flow: which modules are producers (depended-on) vs consumers (depend on many).
92- Identify if the codebase follows a layered pattern (e.g., parsers → models → query engine → CLI) and describe the information flow between layers.
93- Identify natural boundaries: groups of tightly-coupled modules that form subsystems.
94- Call out concerning patterns: circular dependencies, extreme fan-in hotspots, isolated modules.
95- Note peripheral modules: what sits at the edges and what role they serve.
96- Write 3-5 paragraphs. Every claim must reference specific module names and dependency counts.
97- Do NOT speculate about design intent or add information not in the context.
98- NEVER leave missing spaces between words. Proofread your output.
99
100STRUCTURAL CONTEXT:
101";
102
103const ONBOARD_SYSTEM_PROMPT: &str = "\
105You are a technical writer creating a \"Getting Started\" guide for a developer's first day on this codebase.
106You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
107
108CRITICAL RULES:
109- Write 4-5 paragraphs in plain language that a new team member could follow.
110- Paragraph 1: What this project does — its purpose and primary function, in one or two sentences a non-developer could understand.
111- Paragraph 2: How the code is organized — the major directories/modules, what each is responsible for.
112- Paragraph 3: Where to start reading — which entry points to look at first, and why.
113- Paragraph 4: Key patterns and conventions — recurring design patterns, naming conventions, or architectural idioms a newcomer should know.
114- Use specific file and module names from the context.
115- Do NOT speculate or add information not in the context.
116- NEVER leave missing spaces between words. Proofread your output.
117
118STRUCTURAL CONTEXT:
119";
120
121const TIMELINE_SYSTEM_PROMPT: &str = "\
123You are a technical writer summarizing recent development activity for a codebase.
124You may ONLY describe facts present in the STRUCTURAL CONTEXT below.
125
126CRITICAL RULES:
127- Lead with the most active area of the codebase and explain what's happening there.
128- Identify stable modules (few recent changes) vs evolving modules (many recent changes).
129- Flag high-churn files that may warrant attention — files changing very frequently could indicate active development or instability.
130- Note contributor patterns — is this a solo project or a team effort? Who owns which areas?
131- Write 3-5 concise paragraphs with specific numbers, file names, and module names.
132- Do NOT speculate about intent or add information not in the context.
133- NEVER leave missing spaces between words. Proofread your output.
134
135STRUCTURAL CONTEXT:
136";
137
138const CONCEPTS_SYSTEM_PROMPT: &str = "\
145You are documenting a software product's core vocabulary for a non-technical reader.
146
147From the structural evidence below, identify 10-15 HIGH-LEVEL product concepts that someone needs to understand to know what this product DOES and how it works. Concepts are NOUN PHRASES describing capabilities, data ideas, or workflows — NOT specific class names, function names, or file names.
148
149GOOD concept examples: 'Trigram Index', 'Symbol Cache', 'AST Query', 'Dependency Graph', 'LLM Narration', 'Runtime Symbol Detection'
150BAD concept examples: 'SearchResult struct', 'QueryEngine class', 'extract_symbols function'
151
152Rules:
153- Each definition must be 1-3 sentences in plain language a product person could understand.
154- Do NOT start definitions with 'This is a...', 'Represents a...', 'A struct that...'
155- Group concepts into 2-4 categories of your choice (e.g. 'Core Capabilities', 'Data Model', 'Workflows', 'Developer Tools').
156- Anchor each concept to 1-3 module paths from the evidence — these become wiki links.
157- Write exactly ONE intro paragraph (2-3 sentences) describing what kind of vocabulary this page catalogs for this specific product.
158
159Output VALID JSON MATCHING THIS SCHEMA EXACTLY — no markdown fences, no commentary before or after:
160{
161 \"intro\": \"...\",
162 \"concepts\": [
163 {
164 \"name\": \"Concept Name\",
165 \"category\": \"Category Name\",
166 \"definition\": \"1-3 sentence plain-language definition.\",
167 \"related_modules\": [\"src/foo\", \"src/bar\"]
168 }
169 ]
170}
171
172STRUCTURAL EVIDENCE:
173";
174
175const MIN_CONTENT_WORDS: usize = 15;
178
179pub fn create_pulse_provider() -> Result<Box<dyn LlmProvider>> {
185 let semantic_config = config::load_config(Path::new("."))?;
186
187 let (provider, api_key) = match config::get_api_key(&semantic_config.provider) {
189 Ok(key) => (semantic_config.provider.clone(), key),
190 Err(configured_err) => {
191 let fallbacks: &[&str] = &["openrouter", "anthropic", "openai"];
193 let mut found = None;
194 for &candidate in fallbacks {
195 if candidate == semantic_config.provider {
196 continue;
197 }
198 if let Ok(key) = config::get_api_key(candidate) {
199 eprintln!(
200 "Note: no API key for configured provider '{}', using auto-detected '{}'",
201 semantic_config.provider, candidate
202 );
203 found = Some((candidate.to_string(), key));
204 break;
205 }
206 }
207 found.ok_or(configured_err)?
208 }
209 };
210
211 let model = config::resolve_model_for(&provider, semantic_config.model.as_deref(), None);
212
213 let options = config::get_provider_options(&provider);
214
215 providers::create_provider(
216 &provider,
217 api_key,
218 model,
219 options,
220 semantic_config.timeout_seconds,
221 )
222}
223
224pub fn narrate_section(
233 provider: &dyn LlmProvider,
234 system_prompt: &str,
235 structural_context: &str,
236 cache: &LlmCache,
237 snapshot_id: &str,
238 cache_key_suffix: &str,
239) -> Option<String> {
240 let word_count = structural_context.split_whitespace().count();
242 if word_count < MIN_CONTENT_WORDS {
243 eprintln!(
244 " Skipping: {} (too brief, {} words)",
245 cache_key_suffix, word_count
246 );
247 return None;
248 }
249
250 let cache_key = LlmCache::compute_key(snapshot_id, cache_key_suffix, structural_context);
252 match cache.get(&cache_key) {
253 Ok(Some(cached)) => {
254 log::debug!("LLM cache hit for '{}'", cache_key_suffix);
255 eprintln!(" Narrating: {} (cached)", cache_key_suffix);
256 return Some(cached.response);
257 }
258 Ok(None) => {}
259 Err(e) => {
260 log::warn!("Failed to read LLM cache: {}", e);
261 }
262 }
263
264 let prompt = format!("{}{}", system_prompt, structural_context);
266
267 eprintln!(" Narrating: {}...", cache_key_suffix);
268
269 let result = call_llm_sync(provider, &prompt);
271
272 match result {
273 Ok(response) => {
274 let response = postprocess_narration(&response);
275
276 let context_hash = blake3::hash(structural_context.as_bytes())
278 .to_hex()
279 .to_string();
280 if let Err(e) = cache.put(&cache_key, &context_hash, &response) {
281 log::warn!("Failed to write LLM cache: {}", e);
282 }
283
284 Some(response)
285 }
286 Err(e) => {
287 log::warn!("LLM narration failed for '{}': {}", cache_key_suffix, e);
288 None
289 }
290 }
291}
292
293pub struct NarrationTask {
295 pub system_prompt: &'static str,
296 pub structural_context: String,
297 pub snapshot_id: String,
298 pub cache_key_suffix: String,
299}
300
301pub struct NarrationResult {
303 pub cache_key_suffix: String,
304 pub response: Option<String>,
305}
306
307pub fn narrate_batch(
312 provider: Arc<dyn LlmProvider>,
313 tasks: Vec<NarrationTask>,
314 cache: &LlmCache,
315 concurrency: usize,
316) -> Vec<NarrationResult> {
317 let total = tasks.len();
318 if total == 0 {
319 return Vec::new();
320 }
321
322 let mut results: Vec<NarrationResult> = Vec::with_capacity(total);
324 let mut pending: Vec<(usize, NarrationTask, String)> = Vec::new(); for task in tasks {
327 let word_count = task.structural_context.split_whitespace().count();
328 if word_count < MIN_CONTENT_WORDS {
329 eprintln!(
330 " Skipping: {} (too brief, {} words)",
331 task.cache_key_suffix, word_count
332 );
333 results.push(NarrationResult {
334 cache_key_suffix: task.cache_key_suffix,
335 response: None,
336 });
337 continue;
338 }
339
340 let cache_key = LlmCache::compute_key(
341 &task.snapshot_id,
342 &task.cache_key_suffix,
343 &task.structural_context,
344 );
345 match cache.get(&cache_key) {
346 Ok(Some(cached)) => {
347 eprintln!(" Narrating: {} (cached)", task.cache_key_suffix);
348 results.push(NarrationResult {
349 cache_key_suffix: task.cache_key_suffix,
350 response: Some(cached.response),
351 });
352 }
353 _ => {
354 let idx = results.len();
355 results.push(NarrationResult {
356 cache_key_suffix: task.cache_key_suffix.clone(),
357 response: None,
358 });
359 pending.push((idx, task, cache_key));
360 }
361 }
362 }
363
364 if pending.is_empty() {
365 return results;
366 }
367
368 let pending_count = pending.len();
369 let effective_concurrency = if concurrency == 0 {
370 pending_count
371 } else {
372 concurrency
373 };
374 eprintln!(
375 " Dispatching {} LLM calls ({} concurrent)...",
376 pending_count, effective_concurrency
377 );
378
379 let cache_dir = cache.cache_dir().to_path_buf();
381
382 let rt = match tokio::runtime::Runtime::new() {
384 Ok(rt) => rt,
385 Err(e) => {
386 log::warn!("Failed to create tokio runtime for batch narration: {}", e);
387 return results;
388 }
389 };
390
391 let async_results = rt.block_on(async {
392 let semaphore = Arc::new(tokio::sync::Semaphore::new(effective_concurrency));
393 let mut join_set = tokio::task::JoinSet::new();
394
395 for (idx, task, cache_key) in pending {
396 let provider = Arc::clone(&provider);
397 let sem = Arc::clone(&semaphore);
398 let cache_dir = cache_dir.clone();
399
400 join_set.spawn(async move {
401 let _permit = sem.acquire().await.expect("semaphore closed");
402 let start = std::time::Instant::now();
403 eprintln!(" Narrating: {}...", task.cache_key_suffix);
404
405 let prompt = format!("{}{}", task.system_prompt, task.structural_context);
406 let result = call_llm_async(&*provider, &prompt).await;
407
408 let response = match result {
409 Ok(raw) => {
410 let response = postprocess_narration(&raw);
411
412 let task_cache = LlmCache::from_dir(cache_dir);
414 let context_hash = blake3::hash(task.structural_context.as_bytes())
415 .to_hex()
416 .to_string();
417 if let Err(e) = task_cache.put(&cache_key, &context_hash, &response) {
418 log::warn!(
419 "Failed to write LLM cache for '{}': {}",
420 task.cache_key_suffix,
421 e
422 );
423 }
424
425 eprintln!(
426 " Narrating: {} (done, {:.1}s)",
427 task.cache_key_suffix,
428 start.elapsed().as_secs_f64()
429 );
430 Some(response)
431 }
432 Err(e) => {
433 log::warn!(
434 "LLM narration failed for '{}': {}",
435 task.cache_key_suffix,
436 e
437 );
438 eprintln!(
439 " Narrating: {} (failed, {:.1}s)",
440 task.cache_key_suffix,
441 start.elapsed().as_secs_f64()
442 );
443 None
444 }
445 };
446
447 (idx, task.cache_key_suffix, response)
448 });
449 }
450
451 let mut async_results = Vec::new();
452 while let Some(result) = join_set.join_next().await {
453 match result {
454 Ok(r) => async_results.push(r),
455 Err(e) => log::warn!("Narration task panicked: {}", e),
456 }
457 }
458 async_results
459 });
460
461 for (idx, cache_key_suffix, response) in async_results {
463 results[idx] = NarrationResult {
464 cache_key_suffix,
465 response,
466 };
467 }
468
469 results
470}
471
472async fn call_llm_async(provider: &dyn LlmProvider, prompt: &str) -> Result<String> {
474 let max_retries = 2;
475 let mut last_error = None;
476
477 for attempt in 0..=max_retries {
478 if attempt > 0 {
479 log::debug!(
480 "Retrying LLM narration (attempt {}/{})",
481 attempt + 1,
482 max_retries + 1
483 );
484 tokio::time::sleep(tokio::time::Duration::from_millis(500 * attempt as u64)).await;
485 }
486
487 match provider.complete(prompt, false).await {
488 Ok(response) => return Ok(response),
489 Err(e) => {
490 log::debug!("LLM call attempt {} failed: {}", attempt + 1, e);
491 last_error = Some(e);
492 }
493 }
494 }
495
496 Err(last_error.unwrap_or_else(|| anyhow::anyhow!("LLM call failed")))
497}
498
499pub fn changelog_system_prompt() -> &'static str {
501 CHANGELOG_SYSTEM_PROMPT
502}
503
504pub fn wiki_system_prompt() -> &'static str {
506 WIKI_SYSTEM_PROMPT
507}
508
509pub fn project_overview_system_prompt() -> &'static str {
511 PROJECT_OVERVIEW_SYSTEM_PROMPT
512}
513
514pub fn architecture_narrative_system_prompt() -> &'static str {
516 ARCHITECTURE_NARRATIVE_SYSTEM_PROMPT
517}
518
519pub fn onboard_system_prompt() -> &'static str {
521 ONBOARD_SYSTEM_PROMPT
522}
523
524pub fn timeline_system_prompt() -> &'static str {
526 TIMELINE_SYSTEM_PROMPT
527}
528
529pub fn concepts_system_prompt() -> &'static str {
531 CONCEPTS_SYSTEM_PROMPT
532}
533
534const CAMEL_CASE_BLOCKLIST: &[&str] = &[
537 "TypeScript",
538 "JavaScript",
539 "CoffeeScript",
540 "ActionScript",
541 "PostgreSQL",
542 "MySQL",
543 "MariaDB",
544 "MongoDB",
545 "CouchDB",
546 "GraphQL",
547 "GitHub",
548 "GitLab",
549 "BitBucket",
550 "WordPress",
551 "PostCSS",
552 "IntelliJ",
553 "WebSocket",
554 "WebAssembly",
555 "DevOps",
556 "DevTools",
557 "DataFrame",
558 "NumPy",
559 "PyTorch",
560 "TensorFlow",
561 "FastAPI",
562 "NextJS",
563 "NestJS",
564 "NodeJS",
565 "ExpressJS",
566 "AngularJS",
567 "iPhone",
568 "iPad",
569 "macOS",
570 "iOS",
571 "FreeBSD",
572 "OpenBSD",
573 "CodePen",
574 "CodeSandbox",
575 "JetBrains",
576 "PhpStorm",
577 "AppKit",
578 "SwiftUI",
579 "UIKit",
580 "CoreData",
581 "MapReduce",
582 "CloudFormation",
583 "CloudFront",
584 "CloudWatch",
585 "RedHat",
586 "OpenShift",
587 "OpenStack",
588 "SourceMap",
589 "AutoComplete",
590 "IntelliSense",
591];
592
593fn postprocess_narration(text: &str) -> String {
595 let mut result = text.trim().to_string();
596
597 let re = regex::Regex::new(r"([a-z])\.([A-Z])").unwrap();
600 result = re.replace_all(&result, "$1. $2").to_string();
601
602 let mut placeholders: Vec<(&str, String)> = Vec::new();
605 for (i, term) in CAMEL_CASE_BLOCKLIST.iter().enumerate() {
606 if result.contains(*term) {
607 let placeholder = format!("\x00KEEP{}\x00", i);
608 result = result.replace(*term, &placeholder);
609 placeholders.push((term, placeholder));
610 }
611 }
612
613 let re = regex::Regex::new(r"([a-z]{3,})([A-Z][a-z]{2,})").unwrap();
615 let parts: Vec<&str> = result.split('`').collect();
616 let mut assembled = String::new();
617 for (i, part) in parts.iter().enumerate() {
618 if i % 2 == 0 {
619 assembled.push_str(&re.replace_all(part, "$1 $2"));
621 } else {
622 assembled.push('`');
624 assembled.push_str(part);
625 assembled.push('`');
626 }
627 }
628 result = assembled;
629
630 for (term, placeholder) in &placeholders {
632 result = result.replace(placeholder, term);
633 }
634
635 while result.contains(" ") {
637 result = result.replace(" ", " ");
638 }
639
640 result
641}
642
643fn call_llm_sync(provider: &dyn LlmProvider, prompt: &str) -> Result<String> {
646 let rt = tokio::runtime::Runtime::new()?;
647 rt.block_on(async {
648 let mut last_error = None;
649 let max_retries = 2;
650
651 for attempt in 0..=max_retries {
652 if attempt > 0 {
653 log::debug!(
654 "Retrying LLM narration (attempt {}/{})",
655 attempt + 1,
656 max_retries + 1
657 );
658 tokio::time::sleep(tokio::time::Duration::from_millis(500 * attempt as u64)).await;
659 }
660
661 match provider.complete(prompt, false).await {
662 Ok(response) => return Ok(response),
663 Err(e) => {
664 log::debug!("LLM call attempt {} failed: {}", attempt + 1, e);
665 last_error = Some(e);
666 }
667 }
668 }
669
670 Err(last_error.unwrap_or_else(|| anyhow::anyhow!("LLM call failed")))
671 })
672}
673
674#[cfg(test)]
675mod tests {
676 use super::*;
677
678 #[test]
679 fn test_word_count_sufficient() {
680 let text = "src/parsers/rust.rs has 250 lines and contains extract_symbols fn_name and other important functions used for parsing code";
682 let count = text.split_whitespace().count();
683 assert!(
684 count >= MIN_CONTENT_WORDS,
685 "Word count {} should be >= {}",
686 count,
687 MIN_CONTENT_WORDS
688 );
689 }
690
691 #[test]
692 fn test_word_count_too_brief() {
693 let text = "No data available yet.";
695 let count = text.split_whitespace().count();
696 assert!(
697 count < MIN_CONTENT_WORDS,
698 "Word count {} should be < {}",
699 count,
700 MIN_CONTENT_WORDS
701 );
702 }
703
704 #[test]
705 fn test_word_count_empty() {
706 let count = "".split_whitespace().count();
707 assert!(count < MIN_CONTENT_WORDS);
708 }
709
710 #[test]
711 fn test_word_count_wiki_structural() {
712 let text = "| Language | Files | Lines |\n| --- | --- | --- |\n| Rust | 45 | 12,500 |\n\n**Files:** src/main.rs src/lib.rs src/query/mod.rs src/parsers/rust.rs";
714 let count = text.split_whitespace().count();
715 assert!(
716 count >= MIN_CONTENT_WORDS,
717 "Wiki structural word count {} should be >= {}",
718 count,
719 MIN_CONTENT_WORDS
720 );
721 }
722
723 #[test]
724 fn test_word_count_digest_bootstrap() {
725 let text = "Branch: feature/pulse Commit: abc1234 Files: 120 Edges: 340 Modules: src tests build.rs config.toml main.rs lib.rs";
727 let count = text.split_whitespace().count();
728 assert!(
729 count >= MIN_CONTENT_WORDS,
730 "Digest bootstrap word count {} should be >= {}",
731 count,
732 MIN_CONTENT_WORDS
733 );
734 }
735
736 #[test]
737 fn test_changelog_system_prompt() {
738 assert!(changelog_system_prompt().contains("COMMIT DATA"));
739 }
740
741 #[test]
742 fn test_wiki_system_prompt() {
743 assert!(wiki_system_prompt().contains("STRUCTURAL CONTEXT"));
744 }
745
746 #[test]
747 fn test_postprocess_preserves_proper_nouns() {
748 let input = "The TypeScript module handles JavaScript compilation.";
749 let result = postprocess_narration(input);
750 assert!(
751 result.contains("TypeScript"),
752 "Should preserve TypeScript, got: {}",
753 result
754 );
755 assert!(
756 result.contains("JavaScript"),
757 "Should preserve JavaScript, got: {}",
758 result
759 );
760 }
761
762 #[test]
763 fn test_postprocess_splits_run_on_words() {
764 let input = "The parseModule drives the query engine.";
766 let result = postprocess_narration(input);
767 assert!(
768 result.contains("parse Module"),
769 "Should split run-on camelCase: {}",
770 result
771 );
772 }
773
774 #[test]
775 fn test_postprocess_preserves_backtick_code() {
776 let input = "Uses `TypeScript` and `parseModule` for processing.";
777 let result = postprocess_narration(input);
778 assert!(
779 result.contains("`TypeScript`"),
780 "Should preserve code: {}",
781 result
782 );
783 assert!(
784 result.contains("`parseModule`"),
785 "Should preserve code: {}",
786 result
787 );
788 }
789
790 #[test]
791 fn test_postprocess_fixes_missing_sentence_space() {
792 let input = "First sentence.Second sentence starts here.";
793 let result = postprocess_narration(input);
794 assert!(
795 result.contains(". S"),
796 "Should add space after period: {}",
797 result
798 );
799 }
800
801 #[test]
802 fn test_postprocess_fixes_double_spaces() {
803 let input = "Too many spaces here.";
804 let result = postprocess_narration(input);
805 assert!(
806 !result.contains(" "),
807 "Should remove double spaces: {}",
808 result
809 );
810 }
811}