gobby-wiki 0.7.0

Gobby wiki CLI shell
#[cfg(feature = "ai")]
use std::cell::RefCell;
use std::io::Write;
#[cfg(feature = "ai")]
use std::rc::Rc;
use std::time::Duration;

use gobby_core::indexing::{content_hash, file_content_hash};

use super::*;
use crate::sources::{CompileStatus, IngestionMethod, SourceKind, SourceManifest};
use crate::store::{MemoryWikiStore, WikiDocumentKind};
use crate::transcribe::{
    TranscriptionClient, TranscriptionEndpoint, TranscriptionOutput, TranscriptionRequest,
};
use crate::vision::{VisionClient, VisionEndpoint, VisionExtraction, VisionRequest};

mod degradation;
mod frame_assets;
mod processing;
mod storage;
#[cfg(feature = "ai")]
mod translation;

fn sample_snapshot() -> VideoSnapshot {
    VideoSnapshot {
        location: "/tmp/lecture.mp4".to_string(),
        file_name: "lecture.mp4".to_string(),
        fetched_at: "2026-05-29T21:30:00Z".to_string(),
        bytes: b"\0\0\0\x18ftypmp42video-bytes".to_vec(),
        mime_type: Some("video/mp4".to_string()),
        duration_seconds: Some(8),
        frame_interval_seconds: Some(4),
        frame_samples: Vec::new(),
        frame_image_paths: Vec::new(),
        frame_descriptions: vec![
            VideoFrameDescription {
                timestamp: "00:00:00".to_string(),
                source_reference: "raw/assets/lecture.mp4#t=00:00:00".to_string(),
                description: "Speaker stands beside a field recorder.".to_string(),
            },
            VideoFrameDescription {
                timestamp: "00:00:04".to_string(),
                source_reference: "raw/assets/lecture.mp4#t=00:00:04".to_string(),
                description: "Slide shows synchronized audio and frames.".to_string(),
            },
        ],
        transcript_segments: vec![
            TranscriptSegment {
                start_ms: 1_000,
                end_ms: 3_000,
                text: "We start by recording the scene.".to_string(),
            },
            TranscriptSegment {
                start_ms: 5_000,
                end_ms: 7_000,
                text: "Each transcript segment lines up with sampled frames.".to_string(),
            },
        ],
        transcription: None,
    }
}

struct FakeVideoMediaExtractor {
    audio_bytes: Vec<u8>,
    frames: Vec<(u64, Vec<u8>)>,
    fail_audio: Option<&'static str>,
    fail_frames: Option<&'static str>,
}

impl VideoMediaExtractor for FakeVideoMediaExtractor {
    fn extract_audio(&self, _video: &Path) -> Result<tempfile::NamedTempFile, WikiError> {
        if let Some(detail) = self.fail_audio {
            return Err(WikiError::Config {
                detail: detail.to_string(),
            });
        }
        temp_file_with_bytes(".wav", &self.audio_bytes)
    }

    fn sample_frame_images(
        &self,
        _video: &Path,
        _interval: Duration,
    ) -> Result<Vec<(u64, tempfile::NamedTempFile)>, WikiError> {
        if let Some(detail) = self.fail_frames {
            return Err(WikiError::Config {
                detail: detail.to_string(),
            });
        }
        self.frames
            .iter()
            .map(|(start_ms, bytes)| Ok((*start_ms, temp_file_with_bytes(".jpg", bytes)?)))
            .collect()
    }
}

fn temp_file_with_bytes(suffix: &str, bytes: &[u8]) -> Result<tempfile::NamedTempFile, WikiError> {
    let mut file = tempfile::Builder::new()
        .suffix(suffix)
        .tempfile()
        .map_err(|source| WikiError::Io {
            action: "create test media file",
            path: None,
            source,
        })?;
    file.write_all(bytes).map_err(|source| WikiError::Io {
        action: "write test media file",
        path: Some(file.path().to_path_buf()),
        source,
    })?;
    file.flush().map_err(|source| WikiError::Io {
        action: "flush test media file",
        path: Some(file.path().to_path_buf()),
        source,
    })?;
    Ok(file)
}

struct FakeTranscriptionClient;

impl TranscriptionClient for FakeTranscriptionClient {
    fn transcribe(
        &self,
        _request: &TranscriptionRequest<'_>,
    ) -> Result<TranscriptionOutput, WikiError> {
        Ok(transcript_output(
            "en",
            false,
            "transcribe",
            &[(
                1_000,
                2_000,
                "Audio-first transcript from extracted video audio.",
            )],
        ))
    }
}

struct FailingTranscriptionClient;

impl TranscriptionClient for FailingTranscriptionClient {
    fn transcribe(
        &self,
        _request: &TranscriptionRequest<'_>,
    ) -> Result<TranscriptionOutput, WikiError> {
        Err(WikiError::Config {
            detail: "stt provider failed".to_string(),
        })
    }
}

struct FakeVisionClient;

impl VisionClient for FakeVisionClient {
    fn extract(&self, request: &VisionRequest<'_>) -> Result<VisionExtraction, WikiError> {
        Ok(VisionExtraction {
            description: format!(
                "frame {} has {} bytes",
                request.file_name,
                request.bytes.len()
            ),
            ocr_text: None,
            metadata: Vec::new(),
        })
    }
}

struct FailingVisionClient;

impl VisionClient for FailingVisionClient {
    fn extract(&self, _request: &VisionRequest<'_>) -> Result<VisionExtraction, WikiError> {
        Err(WikiError::Config {
            detail: "vision provider failed".to_string(),
        })
    }
}

fn transcript_output(
    source_lang: &str,
    translated: bool,
    task: &str,
    segments: &[(u64, u64, &str)],
) -> TranscriptionOutput {
    TranscriptionOutput {
        segments: segments
            .iter()
            .map(|(start_ms, end_ms, text)| TranscriptSegment {
                start_ms: *start_ms,
                end_ms: *end_ms,
                text: (*text).to_string(),
            })
            .collect(),
        language: Some(if translated { "en" } else { source_lang }.to_string()),
        model: Some("fake-stt".to_string()),
        source_language: Some(source_lang.to_string()),
        task: Some(task.to_string()),
        target_language: translated.then(|| "en".to_string()),
        translated,
        translation_degraded: false,
        partial: false,
        completed_ranges: Vec::new(),
        missing_ranges: Vec::new(),
    }
}

#[cfg(feature = "ai")]
struct ScriptedTranscriptionClient {
    english: RefCell<Vec<Result<TranscriptionOutput, WikiError>>>,
    calls: Rc<RefCell<Vec<&'static str>>>,
}

#[cfg(feature = "ai")]
impl TranscriptionClient for ScriptedTranscriptionClient {
    fn transcribe(
        &self,
        _request: &TranscriptionRequest<'_>,
    ) -> Result<TranscriptionOutput, WikiError> {
        Err(WikiError::Config {
            detail: "unexpected transcribe fallback".to_string(),
        })
    }

    fn translate_to_english(
        &self,
        _request: &TranscriptionRequest<'_>,
        _language_hint: Option<&str>,
    ) -> Result<TranscriptionOutput, WikiError> {
        self.calls.borrow_mut().push("translate_to_english");
        self.english.borrow_mut().remove(0)
    }
}

#[cfg(feature = "ai")]
struct ScriptedChunkTranscriptionClient {
    outputs: RefCell<Vec<Result<TranscriptionOutput, WikiError>>>,
}

#[cfg(feature = "ai")]
impl TranscriptionClient for ScriptedChunkTranscriptionClient {
    fn transcribe(
        &self,
        _request: &TranscriptionRequest<'_>,
    ) -> Result<TranscriptionOutput, WikiError> {
        self.outputs.borrow_mut().remove(0)
    }
}

fn ingest_with_media(
    vault_root: &Path,
    media: FakeVideoMediaExtractor,
    transcription_endpoint: TranscriptionEndpoint<'_>,
    vision_endpoint: VisionEndpoint<'_>,
    file_name: &str,
) -> Result<VideoIngestResult, WikiError> {
    let source_path = vault_root.join(format!("{file_name}.source"));
    std::fs::write(&source_path, b"video bytes").expect("write source video");
    let mut store = MemoryWikiStore::default();
    ingest_video_file_with_processing(
        vault_root,
        &mut store,
        ScopeIdentity::topic("field-work"),
        VideoFileSnapshot {
            location: format!("/tmp/{file_name}"),
            file_name: file_name.to_string(),
            fetched_at: "2026-05-29T21:30:00Z".to_string(),
            path: source_path,
            mime_type: Some("video/mp4".to_string()),
            duration_seconds: Some(20),
            frame_interval_seconds: Some(4),
            frame_samples: Vec::new(),
            frame_descriptions: Vec::new(),
            frame_image_paths: Vec::new(),
            transcript_segments: Vec::new(),
            transcription: None,
        },
        transcription_endpoint,
        vision_endpoint,
        &media,
    )
}

fn read_derived(vault_root: &Path, result: &VideoIngestResult) -> String {
    std::fs::read_to_string(vault_root.join(&result.derived_path)).expect("read derived video")
}

fn assert_asset_preserved(vault_root: &Path, result: &VideoIngestResult, expected: &[u8]) {
    assert_eq!(
        std::fs::read(vault_root.join(&result.asset_path)).expect("read video asset"),
        expected
    );
}