gobby-wiki 0.2.0

use std::path::{Path, PathBuf};

use gobby_core::ai_context::AiContext;
use gobby_core::config::{AiCapability, AiRouting};

#[cfg(feature = "ai")]
use crate::ai::clients::ProductionTranscriptionClient;
use crate::ingest::{
    IngestResult, index_after_ingest, markdown_metadata, markdown_title, path_to_string,
    write_asset, write_raw_markdown,
};
use crate::sources::{CompileStatus, IngestionMethod, SourceDraft, SourceKind, SourceManifest};
use crate::store::WikiIndexStore;
use crate::transcribe::{
    TranscriptionDegradation, TranscriptionEndpoint, TranscriptionMarkdownInput,
    TranscriptionRequest, write_audio_transcript_markdown,
};
use crate::{ScopeIdentity, WikiError};

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AudioSnapshot {
    pub location: String,
    pub file_name: String,
    pub fetched_at: String,
    pub bytes: Vec<u8>,
    pub mime_type: Option<String>,
    pub duration_seconds: Option<u32>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AudioIngestResult {
    pub record: crate::sources::SourceRecord,
    pub raw_path: PathBuf,
    pub asset_path: PathBuf,
    pub transcript_path: PathBuf,
    pub transcription_degradation: Option<TranscriptionDegradation>,
}

pub fn ingest_audio(
    vault_root: &Path,
    store: &mut impl WikiIndexStore,
    scope: ScopeIdentity,
    snapshot: AudioSnapshot,
    ai_context: &AiContext,
) -> Result<AudioIngestResult, WikiError> {
    ingest_audio_with_transcription(
        vault_root,
        store,
        scope,
        snapshot,
        production_transcription_endpoint(ai_context, false),
    )
}

pub fn production_transcription_endpoint(
    context: &AiContext,
    translate: bool,
) -> TranscriptionEndpoint<'static> {
    let capability = if translate {
        AiCapability::AudioTranslate
    } else {
        AiCapability::AudioTranscribe
    };
    let route = resolved_transcription_route(context, capability);
    if translate {
        let transcribe_route = resolved_transcription_route(context, AiCapability::AudioTranscribe);
        let text_route = resolved_transcription_route(context, AiCapability::TextGenerate);
        if route_available(route)
            || (route_available(transcribe_route) && route_available(text_route))
        {
            available_production_transcription_endpoint(context, route, translate)
        } else {
            TranscriptionEndpoint::Unavailable(transcription_degradation(route, translate))
        }
    } else if matches!(route, AiRouting::Daemon | AiRouting::Direct) {
        available_production_transcription_endpoint(context, route, translate)
    } else {
        TranscriptionEndpoint::Unavailable(transcription_degradation(route, translate))
    }
}

fn route_available(route: AiRouting) -> bool {
    matches!(route, AiRouting::Daemon | AiRouting::Direct)
}

#[cfg(feature = "ai")]
fn resolved_transcription_route(context: &AiContext, capability: AiCapability) -> AiRouting {
    gobby_core::ai::effective_route(context, capability)
}

#[cfg(not(feature = "ai"))]
fn resolved_transcription_route(context: &AiContext, capability: AiCapability) -> AiRouting {
    context.binding(capability).routing
}

#[cfg(feature = "ai")]
fn available_production_transcription_endpoint(
    context: &AiContext,
    _route: AiRouting,
    translate: bool,
) -> TranscriptionEndpoint<'static> {
    let client = Box::new(ProductionTranscriptionClient::new(context.clone()));
    if translate {
        TranscriptionEndpoint::Translating {
            client,
            target_lang: context
                .binding(AiCapability::AudioTranslate)
                .target_lang
                .clone(),
            language_hint: context
                .binding(AiCapability::AudioTranscribe)
                .language
                .clone(),
        }
    } else {
        TranscriptionEndpoint::Available(client)
    }
}

#[cfg(not(feature = "ai"))]
fn available_production_transcription_endpoint(
    _context: &AiContext,
    route: AiRouting,
    translate: bool,
) -> TranscriptionEndpoint<'static> {
    TranscriptionEndpoint::Unavailable(transcription_degradation(route, translate))
}

pub fn ingest_audio_with_transcription(
    vault_root: &Path,
    store: &mut impl WikiIndexStore,
    scope: ScopeIdentity,
    snapshot: AudioSnapshot,
    endpoint: TranscriptionEndpoint<'_>,
) -> Result<AudioIngestResult, WikiError> {
    let result =
        ingest_audio_with_transcription_without_index(vault_root, scope, snapshot, endpoint)?;
    index_after_ingest(vault_root, store)?;
    Ok(result)
}

pub(crate) fn ingest_audio_with_transcription_without_index(
    vault_root: &Path,
    scope: ScopeIdentity,
    snapshot: AudioSnapshot,
    endpoint: TranscriptionEndpoint<'_>,
) -> Result<AudioIngestResult, WikiError> {
    let title = markdown_title(&snapshot.file_name);
    let content_hash = gobby_core::indexing::content_hash(&snapshot.bytes);
    let draft = SourceDraft {
        location: snapshot.location.clone(),
        kind: SourceKind::Audio,
        fetched_at: snapshot.fetched_at.clone(),
        content: Vec::new(),
        title: Some(title),
        citation: Some(snapshot.location.clone()),
        license: None,
        ingestion_method: IngestionMethod::Manual,
        compile_status: CompileStatus::Pending,
    };
    let record = SourceManifest::register_with_content_hash(vault_root, draft, content_hash)?;
    let asset_path = write_asset(vault_root, &record, &snapshot.file_name, &snapshot.bytes)?;
    let raw_markdown = render_raw_audio_markdown(&snapshot, &record.content_hash, &asset_path);
    let raw_path = write_raw_markdown(vault_root, &record, &raw_markdown)?;
    let request = TranscriptionRequest {
        file_name: &snapshot.file_name,
        mime_type: snapshot.mime_type.as_deref(),
        asset_path: &asset_path,
        bytes: &snapshot.bytes,
    };
    let transcript = write_audio_transcript_markdown(
        vault_root,
        &scope,
        &record,
        request,
        transcribe_for_markdown(&request, endpoint),
    )?;

    Ok(AudioIngestResult {
        record,
        raw_path,
        asset_path,
        transcript_path: transcript.path,
        transcription_degradation: transcript.degradation,
    })
}

pub(crate) fn transcribe_for_markdown(
    request: &TranscriptionRequest<'_>,
    endpoint: TranscriptionEndpoint<'_>,
) -> TranscriptionMarkdownInput {
    match endpoint {
        TranscriptionEndpoint::Available(client) => {
            transcription_result_for_markdown(request, client.as_ref())
        }
        TranscriptionEndpoint::Unavailable(degradation) => {
            TranscriptionMarkdownInput::Degraded(degradation)
        }
        TranscriptionEndpoint::Translating {
            client,
            target_lang,
            language_hint,
        } => translate_for_markdown(
            request,
            client.as_ref(),
            target_lang.as_deref(),
            language_hint.as_deref(),
        ),
    }
}

fn transcription_result_for_markdown(
    request: &TranscriptionRequest<'_>,
    client: &dyn crate::transcribe::TranscriptionClient,
) -> TranscriptionMarkdownInput {
    let result = transcribe_available(request, client);
    transcription_result_to_markdown(result, "transcription_error", "Transcription failed")
}

#[cfg(feature = "ai")]
fn transcribe_available(
    request: &TranscriptionRequest<'_>,
    client: &dyn crate::transcribe::TranscriptionClient,
) -> Result<crate::transcribe::TranscriptionOutput, WikiError> {
    crate::ai::chunk::transcribe_audio_request(
        request,
        client,
        crate::ai::chunk::ChunkTranscriptionMode::Transcribe,
    )
}

#[cfg(not(feature = "ai"))]
fn transcribe_available(
    request: &TranscriptionRequest<'_>,
    client: &dyn crate::transcribe::TranscriptionClient,
) -> Result<crate::transcribe::TranscriptionOutput, WikiError> {
    client.transcribe(request)
}

#[cfg(feature = "ai")]
fn translate_for_markdown(
    request: &TranscriptionRequest<'_>,
    client: &dyn crate::transcribe::TranscriptionClient,
    target_lang: Option<&str>,
    language_hint: Option<&str>,
) -> TranscriptionMarkdownInput {
    let result = if crate::ai::chunk::requires_chunking(request.bytes.len()) {
        let target_lang = target_lang.unwrap_or("en");
        let mode = if is_english_target(target_lang) {
            crate::ai::chunk::ChunkTranscriptionMode::TranslateToEnglish { language_hint }
        } else {
            crate::ai::chunk::ChunkTranscriptionMode::TranslateSegments {
                target_lang,
                language_hint,
            }
        };
        crate::ai::chunk::transcribe_audio_request(request, client, mode)
    } else {
        crate::ai::translate::translate_audio(request, client, target_lang, language_hint)
    };
    transcription_result_to_markdown(result, "translation_error", "Translation failed")
}

#[cfg(not(feature = "ai"))]
fn translate_for_markdown(
    _request: &TranscriptionRequest<'_>,
    _client: &dyn crate::transcribe::TranscriptionClient,
    _target_lang: Option<&str>,
    _language_hint: Option<&str>,
) -> TranscriptionMarkdownInput {
    TranscriptionMarkdownInput::Degraded(TranscriptionDegradation {
        reason: "translation_unavailable".to_string(),
        fallback: "Translation requires the ai feature.".to_string(),
    })
}

fn transcription_result_to_markdown(
    result: Result<crate::transcribe::TranscriptionOutput, WikiError>,
    reason: &str,
    prefix: &str,
) -> TranscriptionMarkdownInput {
    match result {
        Ok(output) => TranscriptionMarkdownInput::Transcribed(output),
        Err(error) => TranscriptionMarkdownInput::Degraded(TranscriptionDegradation {
            reason: reason.to_string(),
            fallback: format!(
                "{prefix}: {error}; keep raw audio assets and require supplied transcripts."
            ),
        }),
    }
}

#[cfg(feature = "ai")]
fn is_english_target(target_lang: &str) -> bool {
    target_lang
        .trim()
        .split(['-', '_'])
        .next()
        .unwrap_or("")
        .eq_ignore_ascii_case("en")
}

fn transcription_degradation(routing: AiRouting, translate: bool) -> TranscriptionDegradation {
    let action = if translate {
        "translation"
    } else {
        "transcription"
    };
    let reason = match routing {
        AiRouting::Off => "disabled",
        AiRouting::Auto | AiRouting::Daemon | AiRouting::Direct => "missing_endpoint",
    };
    TranscriptionDegradation {
        reason: reason.to_string(),
        fallback: format!("Keep raw audio assets and skip daemon {action}."),
    }
}

impl From<AudioIngestResult> for IngestResult {
    fn from(result: AudioIngestResult) -> Self {
        Self {
            record: result.record,
            raw_path: result.raw_path,
            asset_path: Some(result.asset_path),
        }
    }
}

fn render_raw_audio_markdown(
    snapshot: &AudioSnapshot,
    source_hash: &str,
    asset_path: &Path,
) -> String {
    let asset_path = path_to_string(asset_path);
    let mut fields = vec![
        ("source_kind", "audio".to_string()),
        ("source_location", snapshot.location.clone()),
        ("fetched_at", snapshot.fetched_at.clone()),
        ("source_hash", source_hash.to_string()),
        ("source_asset", asset_path.clone()),
    ];
    if let Some(mime_type) = &snapshot.mime_type {
        fields.push(("audio_mime_type", mime_type.clone()));
    }
    if let Some(duration_seconds) = snapshot.duration_seconds {
        fields.push(("audio_duration_seconds", duration_seconds.to_string()));
    }

    let mut markdown = markdown_metadata(&fields);
    markdown.push_str("# ");
    markdown.push_str(&markdown_title(&snapshot.file_name));
    markdown.push_str("\n\n");
    markdown.push_str("Original audio stored under `");
    markdown.push_str(&asset_path);
    markdown.push_str("`.\n");
    markdown
}

#[cfg(test)]
mod tests {
    #[cfg(feature = "ai")]
    use std::cell::RefCell;
    #[cfg(feature = "ai")]
    use std::rc::Rc;

    use gobby_core::ai_context::{AiBindings, AiContext, AiLimiter};
    use gobby_core::config::{AiRouting, AiTuning, CapabilityBinding};
    use gobby_core::indexing::content_hash;

    use super::*;
    use crate::sources::{SourceKind, SourceManifest};
    use crate::store::{MemoryWikiStore, WikiDocumentKind};
    use crate::transcribe::{
        TranscriptSegment, TranscriptionClient, TranscriptionOutput, TranscriptionRequest,
    };

    fn sample_snapshot() -> AudioSnapshot {
        AudioSnapshot {
            location: "/tmp/interview.wav".to_string(),
            file_name: "interview.wav".to_string(),
            fetched_at: "2026-05-29T21:15:00Z".to_string(),
            bytes: b"RIFF....WAVEaudio-bytes".to_vec(),
            mime_type: Some("audio/wav".to_string()),
            duration_seconds: Some(12),
        }
    }

    #[cfg(feature = "ai")]
    fn long_snapshot() -> AudioSnapshot {
        AudioSnapshot {
            bytes: vec![b'a'; crate::ai::chunk::MAX_AUDIO_UPLOAD_BYTES + 1],
            duration_seconds: Some(1_200),
            ..sample_snapshot()
        }
    }

    struct FakeTranscriptionClient;

    impl TranscriptionClient for FakeTranscriptionClient {
        fn transcribe(
            &self,
            _request: &TranscriptionRequest<'_>,
        ) -> Result<TranscriptionOutput, WikiError> {
            Ok(TranscriptionOutput {
                segments: vec![TranscriptSegment {
                    start_ms: 2_000,
                    end_ms: 4_000,
                    text: "Scope searchable hydrophone transcript phrase.".to_string(),
                }],
                language: Some("en".to_string()),
                model: Some("fake-stt".to_string()),
                source_language: Some("en".to_string()),
                task: Some("transcribe".to_string()),
                target_language: None,
                translated: false,
                translation_degraded: false,
                partial: false,
                completed_ranges: Vec::new(),
                missing_ranges: Vec::new(),
            })
        }
    }

    #[cfg(feature = "ai")]
    struct ScriptedTranscriptionClient {
        transcriptions: RefCell<Vec<Result<TranscriptionOutput, WikiError>>>,
        english: RefCell<Vec<Result<TranscriptionOutput, WikiError>>>,
        translations: RefCell<Vec<Vec<String>>>,
        calls: Rc<RefCell<Vec<&'static str>>>,
    }

    #[cfg(feature = "ai")]
    impl ScriptedTranscriptionClient {
        fn new(transcriptions: Vec<TranscriptionOutput>) -> Self {
            Self {
                transcriptions: RefCell::new(transcriptions.into_iter().map(Ok).collect()),
                english: RefCell::new(Vec::new()),
                translations: RefCell::new(Vec::new()),
                calls: Rc::new(RefCell::new(Vec::new())),
            }
        }

        fn with_english(english: Vec<TranscriptionOutput>) -> Self {
            Self {
                transcriptions: RefCell::new(Vec::new()),
                english: RefCell::new(english.into_iter().map(Ok).collect()),
                translations: RefCell::new(Vec::new()),
                calls: Rc::new(RefCell::new(Vec::new())),
            }
        }

        fn calls(&self) -> Rc<RefCell<Vec<&'static str>>> {
            Rc::clone(&self.calls)
        }
    }

    #[cfg(feature = "ai")]
    impl TranscriptionClient for ScriptedTranscriptionClient {
        fn transcribe(
            &self,
            _request: &TranscriptionRequest<'_>,
        ) -> Result<TranscriptionOutput, WikiError> {
            self.calls.borrow_mut().push("transcribe");
            self.transcriptions.borrow_mut().remove(0)
        }

        fn translate_to_english(
            &self,
            _request: &TranscriptionRequest<'_>,
            _language_hint: Option<&str>,
        ) -> Result<TranscriptionOutput, WikiError> {
            self.calls.borrow_mut().push("translate_to_english");
            self.english.borrow_mut().remove(0)
        }

        fn translate_segments(
            &self,
            segments: &[TranscriptSegment],
            _source_lang: &str,
            _target_lang: &str,
        ) -> Result<Vec<String>, WikiError> {
            self.calls.borrow_mut().push("translate_segments");
            let mut translations = self.translations.borrow_mut();
            if translations.is_empty() {
                return Ok(segments
                    .iter()
                    .map(|segment| format!("translated {}", segment.text))
                    .collect());
            }
            Ok(translations.remove(0))
        }
    }

    fn test_context(routing: AiRouting, api_base: Option<String>) -> AiContext {
        let binding = CapabilityBinding {
            routing,
            transport: None,
            api_base,
            api_key: None,
            model: Some("whisper-1".to_string()),
            provider: None,
            task: None,
            language: None,
            target_lang: None,
        };
        AiContext {
            bindings: AiBindings {
                embed: binding.clone(),
                audio_transcribe: binding.clone(),
                audio_translate: binding.clone(),
                vision_extract: binding.clone(),
                text_generate: binding,
            },
            tuning: AiTuning {
                max_concurrency: 1,
                keep_alive: None,
            },
            limiter: AiLimiter::new(1),
            project_id: None,
        }
    }

    #[cfg(feature = "ai")]
    fn spawn_transcription_server(
        response: &'static str,
    ) -> (String, gobby_core::test_http::RequestHandle) {
        gobby_core::test_http::spawn_json_response(response).expect("spawn test server")
    }

    #[cfg(feature = "ai")]
    fn test_chunk(start_ms: u64, end_ms: u64) -> crate::ai::chunk::AudioChunk {
        crate::ai::chunk::AudioChunk {
            start_ms,
            end_ms,
            file_name: format!("chunk-{start_ms}.wav"),
            path: PathBuf::from(format!("chunk-{start_ms}.wav")),
            bytes: vec![b'w', b'a', b'v'],
        }
    }

    #[cfg(feature = "ai")]
    fn transcript_output(
        source_lang: &str,
        translated: bool,
        task: &str,
        segments: &[(u64, u64, &str)],
    ) -> TranscriptionOutput {
        TranscriptionOutput {
            segments: segments
                .iter()
                .map(|(start_ms, end_ms, text)| TranscriptSegment {
                    start_ms: *start_ms,
                    end_ms: *end_ms,
                    text: (*text).to_string(),
                })
                .collect(),
            language: Some(if translated { "en" } else { source_lang }.to_string()),
            model: Some("fake-stt".to_string()),
            source_language: Some(source_lang.to_string()),
            task: Some(task.to_string()),
            target_language: translated.then(|| "en".to_string()),
            translated,
            translation_degraded: false,
            partial: false,
            completed_ranges: Vec::new(),
            missing_ranges: Vec::new(),
        }
    }

    #[cfg(feature = "ai")]
    #[test]
    fn english_target_uses_primary_language_subtag() {
        assert!(is_english_target("en"));
        assert!(is_english_target("EN-us"));
        assert!(is_english_target("en_US"));
        assert!(!is_english_target("eng"));
        assert!(!is_english_target("fr-en"));
    }

    #[cfg(feature = "ai")]
    #[test]
    fn production_transcription_writes_fields() {
        let response = r#"{"text":"Production routed transcript.","source_language":"es","language":"en","model":"whisper-prod","task":"transcribe","translated":false,"segments":[{"start":0.0,"end":1.25,"text":"Production routed transcript."}]}"#;
        let (api_base, request) = spawn_transcription_server(response);
        let context = test_context(AiRouting::Direct, Some(api_base));
        let temp = tempfile::tempdir().expect("tempdir");
        let mut store = MemoryWikiStore::default();

        let result = ingest_audio(
            temp.path(),
            &mut store,
            ScopeIdentity::topic("field-work"),
            sample_snapshot(),
            &context,
        )
        .expect("ingest audio with production transcript");

        let request = request.join().expect("request").expect("request ok");
        assert!(request.starts_with("POST /v1/audio/transcriptions HTTP/1.1"));
        assert!(result.transcription_degradation.is_none());

        let markdown =
            std::fs::read_to_string(temp.path().join(&result.transcript_path)).expect("markdown");
        assert!(markdown.contains("transcription_status: transcribed"));
        assert!(markdown.contains("transcription_language: en"));
        assert!(markdown.contains("transcription_source_language: es"));
        assert!(markdown.contains("transcription_model: whisper-prod"));
        assert!(markdown.contains("transcription_task: transcribe"));
        assert!(markdown.contains("translated: false"));
        assert!(markdown.contains("[00:00:00] Production routed transcript."));
    }

    #[cfg(feature = "ai")]
    #[test]
    fn production_path_applies_translation() {
        let response = r#"{"text":"Translated transcript.","source_language":"es","language":"es","model":"whisper-prod","task":"translate","translated":true,"segments":[{"start":0.0,"end":1.25,"text":"Translated transcript."}]}"#;
        let (api_base, request) = spawn_transcription_server(response);
        let context = test_context(AiRouting::Direct, Some(api_base));
        let temp = tempfile::tempdir().expect("tempdir");
        let mut store = MemoryWikiStore::default();

        let result = ingest_audio_with_transcription(
            temp.path(),
            &mut store,
            ScopeIdentity::topic("field-work"),
            sample_snapshot(),
            production_transcription_endpoint(&context, true),
        )
        .expect("ingest translated audio");

        let request = request.join().expect("request").expect("request ok");
        assert!(request.starts_with("POST /v1/audio/translations HTTP/1.1"));
        assert!(result.transcription_degradation.is_none());

        let markdown =
            std::fs::read_to_string(temp.path().join(&result.transcript_path)).expect("markdown");
        assert!(markdown.contains("transcription_status: transcribed"));
        assert!(markdown.contains("transcription_language: en"));
        assert!(markdown.contains("transcription_source_language: es"));
        assert!(markdown.contains("transcription_target_language: en"));
        assert!(markdown.contains("transcription_task: translate"));
        assert!(markdown.contains("translated: true"));
        assert!(markdown.contains("[00:00:00] Translated transcript."));
    }

    #[cfg(feature = "ai")]
    #[test]
    fn production_path_chunks_long_audio() {
        let _chunks = crate::ai::chunk::install_test_chunks(vec![
            test_chunk(0, 10_000),
            test_chunk(9_000, 19_000),
        ]);
        let client = ScriptedTranscriptionClient::new(vec![
            transcript_output("en", false, "transcribe", &[(0, 1_000, "first chunk")]),
            transcript_output("en", false, "transcribe", &[(0, 1_000, "second chunk")]),
        ]);
        let temp = tempfile::tempdir().expect("tempdir");
        let mut store = MemoryWikiStore::default();

        let result = ingest_audio_with_transcription(
            temp.path(),
            &mut store,
            ScopeIdentity::topic("field-work"),
            long_snapshot(),
            TranscriptionEndpoint::Available(Box::new(client)),
        )
        .expect("ingest long audio");

        let markdown =
            std::fs::read_to_string(temp.path().join(&result.transcript_path)).expect("markdown");
        assert!(markdown.contains("transcription_completed_ranges: 0-10000,9000-19000"));
        assert!(markdown.contains("[00:00:00] first chunk"));
        assert!(markdown.contains("[00:00:09] second chunk"));
    }

    #[cfg(feature = "ai")]
    #[test]
    fn long_media_chunks_then_translates() {
        let _chunks = crate::ai::chunk::install_test_chunks(vec![
            test_chunk(0, 10_000),
            test_chunk(9_000, 19_000),
        ]);
        let client = ScriptedTranscriptionClient::new(vec![
            transcript_output("es", false, "transcribe", &[(0, 1_000, "hola")]),
            transcript_output("es", false, "transcribe", &[(0, 1_000, "mundo")]),
        ]);
        client
            .translations
            .borrow_mut()
            .push(vec!["bonjour".to_string(), "monde".to_string()]);
        let temp = tempfile::tempdir().expect("tempdir");
        let mut store = MemoryWikiStore::default();

        let result = ingest_audio_with_transcription(
            temp.path(),
            &mut store,
            ScopeIdentity::topic("field-work"),
            long_snapshot(),
            TranscriptionEndpoint::Translating {
                client: Box::new(client),
                target_lang: Some("fr".to_string()),
                language_hint: None,
            },
        )
        .expect("ingest translated long audio");

        let markdown =
            std::fs::read_to_string(temp.path().join(&result.transcript_path)).expect("markdown");
        assert!(markdown.contains("transcription_source_language: es"));
        assert!(markdown.contains("transcription_target_language: fr"));
        assert!(markdown.contains("transcription_task: translate"));
        assert!(markdown.contains("translated: true"));
        assert!(markdown.contains("[00:00:00] bonjour"));
        assert!(markdown.contains("[00:00:09] monde"));
    }

    #[cfg(feature = "ai")]
    #[test]
    fn long_english_translation_per_chunk() {
        let _chunks = crate::ai::chunk::install_test_chunks(vec![
            test_chunk(0, 10_000),
            test_chunk(9_000, 19_000),
        ]);
        let client = ScriptedTranscriptionClient::with_english(vec![
            transcript_output("es", true, "translate", &[(0, 1_000, "hello")]),
            transcript_output("es", true, "translate", &[(0, 1_000, "world")]),
        ]);
        let calls = client.calls();
        let temp = tempfile::tempdir().expect("tempdir");
        let mut store = MemoryWikiStore::default();

        let result = ingest_audio_with_transcription(
            temp.path(),
            &mut store,
            ScopeIdentity::topic("field-work"),
            long_snapshot(),
            TranscriptionEndpoint::Translating {
                client: Box::new(client),
                target_lang: Some("en".to_string()),
                language_hint: Some("es".to_string()),
            },
        )
        .expect("ingest English translated long audio");

        let markdown =
            std::fs::read_to_string(temp.path().join(&result.transcript_path)).expect("markdown");
        assert!(markdown.contains("transcription_source_language: es"));
        assert!(markdown.contains("transcription_target_language: en"));
        assert!(markdown.contains("transcription_task: translate"));
        assert!(markdown.contains("translated: true"));
        assert!(markdown.contains("[00:00:00] hello"));
        assert!(markdown.contains("[00:00:09] world"));
        assert_eq!(
            calls.borrow().as_slice(),
            &["translate_to_english", "translate_to_english"]
        );
    }

    #[test]
    fn off_routing_degrades() {
        let context = test_context(AiRouting::Off, None);
        let temp = tempfile::tempdir().expect("tempdir");
        let snapshot = sample_snapshot();
        let mut store = MemoryWikiStore::default();

        let result = ingest_audio(
            temp.path(),
            &mut store,
            ScopeIdentity::topic("field-work"),
            snapshot.clone(),
            &context,
        )
        .expect("ingest degraded audio");

        assert_eq!(
            std::fs::read(temp.path().join(&result.asset_path)).expect("asset bytes"),
            snapshot.bytes
        );
        assert_eq!(
            result
                .transcription_degradation
                .as_ref()
                .map(|degradation| degradation.reason.as_str()),
            Some("disabled")
        );
        let markdown =
            std::fs::read_to_string(temp.path().join(&result.transcript_path)).expect("markdown");
        assert!(markdown.contains("transcription_status: unavailable"));
        assert!(markdown.contains("transcription_degradation: disabled"));
        assert!(markdown.contains("Keep raw audio assets"));
    }

    #[test]
    fn stores_original_audio() {
        let temp = tempfile::tempdir().expect("tempdir");
        let snapshot = sample_snapshot();
        let expected_hash = content_hash(&snapshot.bytes);
        let mut store = MemoryWikiStore::default();
        let context = test_context(AiRouting::Off, None);

        let result = ingest_audio(
            temp.path(),
            &mut store,
            ScopeIdentity::topic("field-work"),
            snapshot.clone(),
            &context,
        )
        .expect("ingest audio");

        assert_eq!(
            result.asset_path.parent(),
            Some(PathBuf::from("raw/assets").as_path())
        );
        assert_eq!(
            std::fs::read(temp.path().join(&result.asset_path)).expect("asset bytes"),
            snapshot.bytes
        );
        let raw =
            std::fs::read_to_string(temp.path().join(&result.raw_path)).expect("raw markdown");
        assert!(raw.contains("source_kind: audio"));
        assert!(raw.contains("source_asset: raw/assets/"));
        assert!(raw.contains("audio_mime_type: audio/wav"));
        assert!(raw.contains("audio_duration_seconds: 12"));

        let manifest = SourceManifest::read(temp.path()).expect("read source manifest");
        assert_eq!(manifest.entries.len(), 1);
        assert_eq!(manifest.entries[0].kind, SourceKind::Audio);
        assert_eq!(manifest.entries[0].content_hash, expected_hash);
    }

    #[test]
    fn transcript_chunks_are_scope_searchable() {
        let temp = tempfile::tempdir().expect("tempdir");
        let mut store = MemoryWikiStore::default();

        let result = ingest_audio_with_transcription(
            temp.path(),
            &mut store,
            ScopeIdentity::project("project-123"),
            sample_snapshot(),
            TranscriptionEndpoint::Available(Box::new(FakeTranscriptionClient)),
        )
        .expect("ingest audio with transcript");

        let document = store
            .documents
            .get(&result.transcript_path)
            .expect("transcript document indexed");
        assert_eq!(document.kind, WikiDocumentKind::SourceNote);
        assert!(document.body.contains("scope_kind: project"));
        assert!(document.body.contains("scope_id: project-123"));
        assert!(
            document
                .body
                .contains("Scope searchable hydrophone transcript phrase.")
        );
        assert!(store.sources.contains_key(&result.transcript_path));
        let chunks = store
            .chunks
            .get(&result.transcript_path)
            .expect("transcript chunks indexed");
        assert!(chunks.iter().any(|chunk| {
            chunk
                .content
                .contains("Scope searchable hydrophone transcript phrase.")
        }));
    }
}