trusty-analyze 0.1.6

Sidecar code-analysis daemon for trusty-search: complexity, smells, quality, facts
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
//! LLM-backed deep analysis pass: turns a deterministic [`ReviewReport`] into a
//! prose narrative plus framework-aware recommendations.
//!
//! Why: the deterministic review pipeline produces structured data (grades,
//! smells, complexity numbers) that a reviewer still has to interpret. An LLM
//! "explain" pass adds the natural-language synthesis a human reviewer wants:
//! what the change is doing, why the worst files are worst, and which
//! framework-specific concerns apply. Keeping the LLM call *out of* the
//! deterministic [`crate::core::review`] pipeline preserves the `ReviewReport`
//! as a clean, reproducible artifact — the narrative lives on a separate
//! [`DeepAnalysisReport`] so callers can opt into the slower, non-deterministic
//! path without changing the existing review contract.
//!
//! What: [`explain_report`] takes a [`ReviewReport`] + a list of detected
//! framework strings + a [`ChatProvider`], builds a grounded prompt, drains the
//! provider's streaming response into a `String`, and returns the prose
//! narrative. [`deep_analysis`] wraps that into a full [`DeepAnalysisReport`]
//! (narrative + frameworks + recommendations + model id + the underlying
//! deterministic report).
//!
//! Test: see `mod tests` — covers prompt construction, streaming accumulation
//! against a stub provider, the stream-error path, and JSON round-tripping of
//! [`DeepAnalysisReport`].

use serde::{Deserialize, Serialize};
use trusty_common::chat::{ChatEvent, ChatProvider, ToolDef};
use trusty_common::ChatMessage;

use crate::core::review::ReviewReport;
use crate::types::complexity::ComplexityGrade;

/// Default OpenRouter model used by [`deep_analysis`] when the caller doesn't
/// override and `TRUSTY_LLM_MODEL` is unset.
pub const DEFAULT_MODEL: &str = "openai/gpt-4o-mini";

/// Env var holding the OpenRouter API key.
pub const ENV_API_KEY: &str = "OPENROUTER_API_KEY";

/// Env var overriding the default model.
pub const ENV_MODEL: &str = "TRUSTY_LLM_MODEL";

/// Cap on the number of files / smells / recommendations we list in the prompt.
/// Keeps the prompt under ~1500 tokens on a typical fast model so the call
/// stays cheap.
const MAX_FILES_IN_PROMPT: usize = 3;
const MAX_SMELLS_IN_PROMPT: usize = 5;
const MAX_RECS_IN_PROMPT: usize = 5;

/// LLM-augmented deep analysis report.
///
/// Why: keeps the LLM-generated narrative separate from the deterministic
/// [`ReviewReport`] so the two artifacts can be cached, transported, and
/// reasoned about independently. The deterministic report stays a clean
/// fixed-point input; the narrative + framework-aware recommendations are
/// non-deterministic LLM outputs that live on their own wrapper struct.
/// What: `index_id` echoes the request; `narrative` is the LLM prose;
/// `frameworks` is the list passed in (echoed for traceability);
/// `recommendations` is the (best-effort) parsed list of LLM follow-ups;
/// `model_used` is the actual model id; `based_on` is the deterministic input.
/// Test: `deep_report_round_trips_json` confirms the serde shape.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct DeepAnalysisReport {
    pub index_id: String,
    pub narrative: String,
    pub frameworks: Vec<String>,
    pub recommendations: Vec<String>,
    pub model_used: String,
    pub based_on: ReviewReport,
}

/// Errors returned by [`deep_analysis`] / [`explain_report`].
///
/// Why: keeps the failure surface typed so callers (CLI / HTTP / MCP) can
/// distinguish configuration problems (missing API key) from runtime ones
/// (chat transport failure). The chat-stream surface itself is anyhow-friendly
/// inside [`explain_report`]; this enum wraps the orchestrator entry point.
/// Test: `missing_api_key_returns_typed_error`.
#[derive(Debug, thiserror::Error)]
pub enum DeepAnalysisError {
    /// `OPENROUTER_API_KEY` is not set and no key was passed explicitly.
    #[error("OPENROUTER_API_KEY is not set; deep analysis requires an OpenRouter API key")]
    MissingApiKey,
    /// The chat call to OpenRouter failed (network, auth, rate limit, ...).
    #[error("openrouter chat failed: {0}")]
    Chat(String),
}

/// Generate a prose explanation of `report` using `provider`.
///
/// Why: turns a structured `ReviewReport` into a paragraph a human can read
/// and act on. Calls out the highest-leverage smells, ties them to the worst
/// files, and (when `frameworks` is non-empty) frames the advice in terms of
/// the project's detected frameworks. Taking the provider as a `dyn` lets
/// tests inject a stub without hitting the network.
/// What: builds a grounded prompt with [`build_explain_prompt`], sends it as a
/// single `user` message via the provider's streaming chat API, and
/// accumulates `Delta` events into a `String`. Returns an error if the
/// provider returns an error event or its stream task fails.
/// Test: `explain_report_collects_stream_deltas` and
/// `explain_report_surfaces_stream_error`.
pub async fn explain_report(
    report: &ReviewReport,
    frameworks: &[String],
    provider: &dyn ChatProvider,
) -> anyhow::Result<String> {
    let prompt = build_explain_prompt(report, frameworks);
    let messages = vec![
        ChatMessage {
            role: "system".to_string(),
            content: SYSTEM_PROMPT.to_string(),
            tool_call_id: None,
            tool_calls: None,
        },
        ChatMessage {
            role: "user".to_string(),
            content: prompt,
            tool_call_id: None,
            tool_calls: None,
        },
    ];

    // Buffered channel keeps the provider task from blocking on a slow reader.
    let (tx, mut rx) = tokio::sync::mpsc::channel::<ChatEvent>(32);
    let stream_fut = provider.chat_stream(messages, Vec::<ToolDef>::new(), tx);

    let drain = async {
        let mut out = String::new();
        let mut stream_error: Option<String> = None;
        while let Some(event) = rx.recv().await {
            match event {
                ChatEvent::Delta(s) => out.push_str(&s),
                ChatEvent::ToolCall(_) => {
                    // Tools aren't used for explain; ignore spurious calls.
                }
                ChatEvent::Done => break,
                ChatEvent::Error(msg) => {
                    stream_error = Some(msg);
                    break;
                }
            }
        }
        if let Some(msg) = stream_error {
            anyhow::bail!("chat provider stream error: {msg}");
        }
        Ok::<String, anyhow::Error>(out)
    };

    let (stream_res, narrative) = tokio::join!(stream_fut, drain);
    stream_res?;
    narrative
}

/// System prompt used for every explain call.
const SYSTEM_PROMPT: &str = "You are a code review assistant. Given structured \
metrics from a pull-request review (complexity grade, code smells, recommendations, \
and detected frameworks), explain the findings in 2-3 short paragraphs of plain prose. \
Focus on why these issues matter and the highest-priority next steps the developer \
should take. Be specific: reference the file paths, smell categories, and frameworks \
provided. Do not invent metrics that aren't in the input.";

/// Build the user-message prompt body from `report` and `frameworks`.
///
/// Why: deterministic prompt construction keeps the LLM output grounded in the
/// exact metrics from the report and isolates the network-free part of the
/// pipeline so it can be unit-tested without a real provider.
/// What: assembles a text block listing overall grade, smell count,
/// changed-line count, detected frameworks, the worst [`MAX_FILES_IN_PROMPT`]
/// files (worst first), the first [`MAX_SMELLS_IN_PROMPT`] smells across the
/// report, and the first [`MAX_RECS_IN_PROMPT`] static recommendations.
/// Test: `build_explain_prompt_*` tests.
pub fn build_explain_prompt(report: &ReviewReport, frameworks: &[String]) -> String {
    let mut out = String::new();

    out.push_str(&format!(
        "Overall grade: {}\nSmell count: {}\nChanged lines: {}\nFiles reviewed: {}\n",
        report.overall_grade,
        report.smell_count,
        report.changed_lines,
        report.files.len(),
    ));

    if !frameworks.is_empty() {
        out.push_str(&format!("Detected frameworks: {}\n", frameworks.join(", ")));
    }

    out.push('\n');
    out.push_str(&format!("Summary: {}\n\n", report.summary));

    // Worst files (highest grade enum value wins). Stable sort preserves the
    // original ordering for files with the same grade.
    let mut worst: Vec<&_> = report.files.iter().collect();
    worst.sort_by(|a, b| b.grade.cmp(&a.grade));
    worst.truncate(MAX_FILES_IN_PROMPT);

    if !worst.is_empty() {
        out.push_str("Worst files (worst first):\n");
        for f in &worst {
            out.push_str(&format!(
                "- {} (grade {}, cyclomatic {}, cognitive {}, {} smell(s))\n",
                f.path,
                f.grade,
                f.complexity.cyclomatic,
                f.complexity.cognitive,
                f.smells.len(),
            ));
        }
        out.push('\n');
    }

    // Top smells across all files.
    let mut smells: Vec<(&str, &_)> = Vec::new();
    for f in &report.files {
        for s in &f.smells {
            smells.push((f.path.as_str(), s));
            if smells.len() >= MAX_SMELLS_IN_PROMPT {
                break;
            }
        }
        if smells.len() >= MAX_SMELLS_IN_PROMPT {
            break;
        }
    }
    if !smells.is_empty() {
        out.push_str("Top smells:\n");
        for (path, s) in &smells {
            out.push_str(&format!(
                "- {} at {}:{} (severity {})\n",
                s.category, path, s.line, s.severity,
            ));
        }
        out.push('\n');
    }

    // Feed existing static recommendations so the LLM expands on them rather
    // than reinventing them.
    let recs: Vec<&str> = report
        .files
        .iter()
        .flat_map(|f| f.recommendations.iter().map(String::as_str))
        .take(MAX_RECS_IN_PROMPT)
        .collect();
    if !recs.is_empty() {
        out.push_str("Existing static recommendations:\n");
        for r in &recs {
            out.push_str(&format!("- {r}\n"));
        }
        out.push('\n');
    }

    out.push_str(
        "Write 2-3 short paragraphs explaining why these findings matter and the prioritised \
         next steps for the developer. Reference specific files, smells, and (where relevant) \
         the detected frameworks.",
    );

    // Hint: avoid an empty grade-A report producing a confusing prompt.
    if report.files.is_empty() && report.overall_grade == ComplexityGrade::A {
        out.push_str(
            "\n\nNote: this diff is empty or has no measurable findings; \
             explain briefly that no review-worthy issues were detected.",
        );
    }

    out
}

/// Extract recommendation bullet points from an LLM prose narrative.
///
/// Why: the LLM is asked for prose; downstream consumers (CLI, dashboards)
/// often also want a separate "what to do" list. Rather than ask the model for
/// strict JSON (and then deal with malformed responses), we mine the narrative
/// itself for any bullet-list lines the model emitted. This is a best-effort
/// projection — empty when the narrative is pure prose with no bullets.
/// What: scans lines, trims a leading `-`, `*`, or `1.`-style marker, and
/// keeps every non-empty line that had a marker.
/// Test: `extract_recommendations_picks_bullets`.
fn extract_recommendations(narrative: &str) -> Vec<String> {
    let mut out = Vec::new();
    for raw in narrative.lines() {
        let line = raw.trim();
        if let Some(rest) = strip_bullet_marker(line) {
            let cleaned = rest.trim();
            if !cleaned.is_empty() {
                out.push(cleaned.to_string());
            }
        }
    }
    out
}

/// Strip a leading bullet marker (`-`, `*`, `•`, or `N.`) from `line` if
/// present. Returns the remainder, or `None` if the line is unmarked.
fn strip_bullet_marker(line: &str) -> Option<&str> {
    for marker in ["- ", "* ", ""] {
        if let Some(rest) = line.strip_prefix(marker) {
            return Some(rest);
        }
    }
    // `1.` / `12.` style: split on first '.' if the prefix is all digits.
    if let Some(dot) = line.find('.') {
        let head = &line[..dot];
        if !head.is_empty() && head.chars().all(|c| c.is_ascii_digit()) {
            // require a space after the dot
            let rest = &line[dot + 1..];
            if let Some(stripped) = rest.strip_prefix(' ') {
                return Some(stripped);
            }
        }
    }
    None
}

/// Read `OPENROUTER_API_KEY` from the env, preferring an explicit override.
///
/// Why: the binary layer typically reads the env var once at startup and
/// threads it through; tests may want to pass a key explicitly without
/// touching the environment.
/// What: returns the explicit key when non-empty, then the env var, then
/// [`DeepAnalysisError::MissingApiKey`].
/// Test: covered transitively by `missing_api_key_returns_typed_error`.
pub fn resolve_api_key(explicit: Option<&str>) -> Result<String, DeepAnalysisError> {
    if let Some(k) = explicit.filter(|s| !s.is_empty()) {
        return Ok(k.to_string());
    }
    std::env::var(ENV_API_KEY)
        .ok()
        .filter(|s| !s.is_empty())
        .ok_or(DeepAnalysisError::MissingApiKey)
}

/// Resolve the model id from explicit > [`ENV_MODEL`] > [`DEFAULT_MODEL`].
pub fn resolve_model(explicit: Option<&str>) -> String {
    if let Some(m) = explicit.filter(|s| !s.is_empty()) {
        return m.to_string();
    }
    std::env::var(ENV_MODEL)
        .ok()
        .filter(|s| !s.is_empty())
        .unwrap_or_else(|| DEFAULT_MODEL.to_string())
}

/// Run a full deep-analysis pass: build an [`OpenRouterProvider`], call
/// [`explain_report`], and wrap the result in a [`DeepAnalysisReport`].
///
/// Why: the single orchestration entry point used by the HTTP, MCP, and CLI
/// layers so they all produce identical [`DeepAnalysisReport`]s regardless of
/// transport. Keeping the provider construction in one place means env-var
/// resolution and model-default behaviour live in exactly one location.
/// What: resolves the API key + model (env or override), constructs an
/// [`trusty_common::chat::OpenRouterProvider`], calls [`explain_report`],
/// best-effort extracts a recommendations list from the narrative, and
/// returns the assembled [`DeepAnalysisReport`].
/// Test: `missing_api_key_returns_typed_error` covers the no-key path; the
/// happy path requires `OPENROUTER_API_KEY` and is exercised by integration
/// tests in the CLI/HTTP layers.
pub async fn deep_analysis(
    index_id: &str,
    report: ReviewReport,
    frameworks: Vec<String>,
    api_key: Option<&str>,
    model: Option<&str>,
) -> Result<DeepAnalysisReport, DeepAnalysisError> {
    use trusty_common::chat::OpenRouterProvider;

    let api_key = resolve_api_key(api_key)?;
    let model = resolve_model(model);
    let provider = OpenRouterProvider::new(api_key, &model);

    let narrative = explain_report(&report, &frameworks, &provider)
        .await
        .map_err(|e| DeepAnalysisError::Chat(format!("{e:#}")))?;

    let recommendations = extract_recommendations(&narrative);

    Ok(DeepAnalysisReport {
        index_id: index_id.to_string(),
        narrative,
        frameworks,
        recommendations,
        model_used: model,
        based_on: report,
    })
}

/// Render a [`DeepAnalysisReport`] as a human-readable text block.
///
/// Why: the CLI `deep --format text` mode wants something a person can scan in
/// a terminal, parallel to [`crate::core::render_review_text`].
/// What: emits the index id, model, frameworks, narrative, parsed
/// recommendations, and a one-line summary of the underlying deterministic
/// report.
/// Test: `render_text_includes_narrative_and_frameworks`.
pub fn render_text(report: &DeepAnalysisReport) -> String {
    let mut out = String::new();
    out.push_str("=== Deep Analysis ===\n");
    out.push_str(&format!("index: {}\n", report.index_id));
    out.push_str(&format!("model: {}\n", report.model_used));
    if report.frameworks.is_empty() {
        out.push_str("frameworks: none detected\n");
    } else {
        out.push_str(&format!("frameworks: {}\n", report.frameworks.join(", ")));
    }
    out.push_str("\n--- Narrative ---\n");
    out.push_str(report.narrative.trim());
    out.push('\n');
    if !report.recommendations.is_empty() {
        out.push_str("\n--- Recommendations ---\n");
        for r in &report.recommendations {
            out.push_str(&format!("  - {r}\n"));
        }
    }
    out.push_str(&format!(
        "\n--- Based On ---\n{}\n",
        report.based_on.summary
    ));
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::core::review::{FileReview, ReviewComplexity, ReviewSource, SmellHit};
    use async_trait::async_trait;
    use tokio::sync::mpsc::Sender;

    /// Stub provider: replays a fixed string as a single `Delta` then `Done`.
    struct StubProvider {
        text: String,
    }

    #[async_trait]
    impl ChatProvider for StubProvider {
        fn name(&self) -> &str {
            "stub"
        }
        fn model(&self) -> &str {
            "stub-model"
        }
        async fn chat_stream(
            &self,
            _messages: Vec<ChatMessage>,
            _tools: Vec<ToolDef>,
            tx: Sender<ChatEvent>,
        ) -> anyhow::Result<()> {
            tx.send(ChatEvent::Delta(self.text.clone())).await.ok();
            tx.send(ChatEvent::Done).await.ok();
            Ok(())
        }
    }

    /// Stub provider that errors mid-stream.
    struct ErrorProvider;

    #[async_trait]
    impl ChatProvider for ErrorProvider {
        fn name(&self) -> &str {
            "stub-err"
        }
        fn model(&self) -> &str {
            "stub-err"
        }
        async fn chat_stream(
            &self,
            _messages: Vec<ChatMessage>,
            _tools: Vec<ToolDef>,
            tx: Sender<ChatEvent>,
        ) -> anyhow::Result<()> {
            tx.send(ChatEvent::Error("boom".into())).await.ok();
            Ok(())
        }
    }

    fn sample_report() -> ReviewReport {
        ReviewReport {
            files: vec![
                FileReview {
                    path: "src/big.rs".to_string(),
                    grade: ComplexityGrade::D,
                    complexity: ReviewComplexity {
                        cyclomatic: 32,
                        cognitive: 50,
                    },
                    smells: vec![SmellHit {
                        category: "long_method".into(),
                        line: 12,
                        severity: "medium".into(),
                    }],
                    recommendations: vec!["Split into helpers".into()],
                    source: ReviewSource::NewFile,
                },
                FileReview {
                    path: "src/small.rs".to_string(),
                    grade: ComplexityGrade::A,
                    complexity: ReviewComplexity {
                        cyclomatic: 1,
                        cognitive: 1,
                    },
                    smells: vec![],
                    recommendations: vec![],
                    source: ReviewSource::NewFile,
                },
            ],
            overall_grade: ComplexityGrade::D,
            changed_lines: 80,
            smell_count: 1,
            summary: "2 files analyzed".into(),
        }
    }

    #[test]
    fn build_explain_prompt_includes_top_level_metrics() {
        let report = sample_report();
        let prompt = build_explain_prompt(&report, &[]);
        assert!(prompt.contains("Overall grade: D"), "got:\n{prompt}");
        assert!(prompt.contains("Smell count: 1"));
        assert!(prompt.contains("Changed lines: 80"));
        assert!(prompt.contains("Files reviewed: 2"));
    }

    #[test]
    fn build_explain_prompt_lists_worst_files_first() {
        let report = sample_report();
        let prompt = build_explain_prompt(&report, &[]);
        let big_idx = prompt.find("src/big.rs").expect("big.rs listed");
        let small_idx = prompt.find("src/small.rs");
        if let Some(small_idx) = small_idx {
            assert!(big_idx < small_idx, "worst file must come first");
        }
    }

    #[test]
    fn build_explain_prompt_mentions_detected_frameworks() {
        let report = sample_report();
        let frameworks = vec!["Next.js".to_string(), "React".to_string()];
        let prompt = build_explain_prompt(&report, &frameworks);
        assert!(prompt.contains("Detected frameworks:"));
        assert!(prompt.contains("Next.js"));
        assert!(prompt.contains("React"));
    }

    #[test]
    fn build_explain_prompt_omits_frameworks_when_empty() {
        let prompt = build_explain_prompt(&sample_report(), &[]);
        assert!(!prompt.contains("Detected frameworks:"));
    }

    #[test]
    fn build_explain_prompt_includes_smells_and_recommendations() {
        let report = sample_report();
        let prompt = build_explain_prompt(&report, &[]);
        assert!(prompt.contains("long_method"));
        assert!(prompt.contains("src/big.rs:12"));
        assert!(prompt.contains("Split into helpers"));
    }

    #[test]
    fn build_explain_prompt_handles_empty_report() {
        let report = ReviewReport {
            files: vec![],
            overall_grade: ComplexityGrade::A,
            changed_lines: 0,
            smell_count: 0,
            summary: "0 files".into(),
        };
        let prompt = build_explain_prompt(&report, &[]);
        assert!(prompt.contains("no review-worthy issues"));
    }

    #[tokio::test]
    async fn explain_report_collects_stream_deltas() {
        let provider = StubProvider {
            text: "This change is mostly fine.".to_string(),
        };
        let narrative = explain_report(&sample_report(), &[], &provider)
            .await
            .unwrap();
        assert_eq!(narrative, "This change is mostly fine.");
    }

    #[tokio::test]
    async fn explain_report_surfaces_stream_error() {
        let provider = ErrorProvider;
        let err = explain_report(&sample_report(), &[], &provider)
            .await
            .expect_err("error event should propagate");
        assert!(err.to_string().contains("boom"), "got: {err}");
    }

    #[test]
    fn resolve_model_defaults_when_no_override() {
        let prev = std::env::var(ENV_MODEL).ok();
        // SAFETY: tests serially set this env var; no concurrent access.
        unsafe { std::env::remove_var(ENV_MODEL) };
        assert_eq!(resolve_model(None), DEFAULT_MODEL);
        // SAFETY: restoring previous value preserves test isolation.
        if let Some(v) = prev {
            unsafe { std::env::set_var(ENV_MODEL, v) };
        }
    }

    #[test]
    fn resolve_model_prefers_explicit() {
        assert_eq!(resolve_model(Some("explicit/model")), "explicit/model");
    }

    #[tokio::test]
    async fn missing_api_key_returns_typed_error() {
        let prev = std::env::var(ENV_API_KEY).ok();
        // SAFETY: serial test access; no other thread touches this var.
        unsafe { std::env::remove_var(ENV_API_KEY) };
        let err = deep_analysis("idx", sample_report(), vec![], None, None)
            .await
            .expect_err("missing key should error");
        assert!(matches!(err, DeepAnalysisError::MissingApiKey));
        // SAFETY: restoring previous env state.
        if let Some(v) = prev {
            unsafe { std::env::set_var(ENV_API_KEY, v) };
        }
    }

    #[test]
    fn extract_recommendations_picks_bullets() {
        let prose = "Here is the situation.\n\n- Refactor src/big.rs\n* Add tests\n1. Document the helper\nLast sentence.\n";
        let recs = extract_recommendations(prose);
        assert_eq!(
            recs,
            vec![
                "Refactor src/big.rs".to_string(),
                "Add tests".to_string(),
                "Document the helper".to_string(),
            ]
        );
    }

    #[test]
    fn extract_recommendations_empty_when_no_bullets() {
        assert!(extract_recommendations("just prose, nothing else").is_empty());
    }

    #[test]
    fn strip_bullet_marker_rejects_non_numeric_prefix() {
        assert!(strip_bullet_marker("foo bar").is_none());
        assert!(strip_bullet_marker("a. not a number").is_none());
        assert_eq!(strip_bullet_marker("12. nice"), Some("nice"));
    }

    #[test]
    fn deep_report_round_trips_json() {
        let r = DeepAnalysisReport {
            index_id: "idx".into(),
            narrative: "n".into(),
            frameworks: vec!["React".into()],
            recommendations: vec!["r1".into()],
            model_used: "m".into(),
            based_on: sample_report(),
        };
        let json = serde_json::to_string(&r).unwrap();
        let back: DeepAnalysisReport = serde_json::from_str(&json).unwrap();
        assert_eq!(r, back);
    }

    #[test]
    fn render_text_includes_narrative_and_frameworks() {
        let r = DeepAnalysisReport {
            index_id: "idx".into(),
            narrative: "the narrative".into(),
            frameworks: vec!["Next.js".into(), "React".into()],
            recommendations: vec!["use server components".into()],
            model_used: "openai/gpt-4o-mini".into(),
            based_on: sample_report(),
        };
        let text = render_text(&r);
        assert!(text.contains("=== Deep Analysis ==="));
        assert!(text.contains("the narrative"));
        assert!(text.contains("Next.js, React"));
        assert!(text.contains("use server components"));
        assert!(text.contains("idx"));
    }

    #[test]
    fn render_text_handles_empty_frameworks() {
        let r = DeepAnalysisReport {
            index_id: "idx".into(),
            narrative: "n".into(),
            frameworks: vec![],
            recommendations: vec![],
            model_used: "m".into(),
            based_on: sample_report(),
        };
        let text = render_text(&r);
        assert!(text.contains("frameworks: none detected"));
    }
}