gobby-wiki 0.7.0

Gobby wiki CLI shell
use super::*;
use std::path::{Path, PathBuf};

use super::alignment::align_transcript_and_frames;
use crate::ScopeIdentity;
use crate::sources::{
    CompileStatus, IngestionMethod, SourceDraft, SourceKind, SourceManifest, SourceRecord,
};
use crate::transcribe::{TranscriptSegment, TranscriptionDegradation, TranscriptionOutput};

#[test]
fn frame_sampling_records_timestamps() {
    let asset_path = PathBuf::from("raw/assets/lecture.mp4");

    let samples = sample_frames(
        &asset_path,
        FrameSamplingPlan {
            duration_seconds: Some(7),
            interval_seconds: 3,
        },
    );

    assert_eq!(
        samples
            .iter()
            .map(|sample| sample.timestamp.as_str())
            .collect::<Vec<_>>(),
        vec!["00:00:00", "00:00:03", "00:00:06"]
    );
    assert!(
        samples
            .iter()
            .all(|sample| sample.source_asset == asset_path)
    );
    assert_eq!(
        samples[1].source_reference,
        "raw/assets/lecture.mp4#t=00:00:03"
    );
}

#[test]
fn zero_frame_interval_disables_sampling() {
    let samples = sample_frames(
        &PathBuf::from("raw/assets/lecture.mp4"),
        FrameSamplingPlan {
            duration_seconds: Some(7),
            interval_seconds: 0,
        },
    );

    assert!(samples.is_empty());
}

#[test]
fn aligns_transcript_and_frames() {
    let frame_descriptions = vec![
        VideoFrameDescription {
            timestamp: "00:00:00".to_string(),
            source_reference: "raw/assets/lecture.mp4#t=00:00:00".to_string(),
            description: "Wide shot of the speaker at the podium.".to_string(),
        },
        VideoFrameDescription {
            timestamp: "00:00:05".to_string(),
            source_reference: "raw/assets/lecture.mp4#t=00:00:05".to_string(),
            description: "Slide with the alignment diagram.".to_string(),
        },
    ];
    let transcript_segments = vec![
        TranscriptSegment {
            start_ms: 2_000,
            end_ms: 3_500,
            text: "The speaker introduces the data collection setup.".to_string(),
        },
        TranscriptSegment {
            start_ms: 6_000,
            end_ms: 7_500,
            text: "The diagram shows transcript and frame alignment.".to_string(),
        },
    ];

    let aligned = align_transcript_and_frames(&transcript_segments, &frame_descriptions);

    assert_eq!(aligned.len(), 2);
    assert_eq!(aligned[0].timestamp, "00:00:00");
    assert_eq!(aligned[0].frame_descriptions[0], frame_descriptions[0]);
    assert_eq!(aligned[0].transcript_segments[0], transcript_segments[0]);
    assert_eq!(aligned[1].timestamp, "00:00:05");
    assert_eq!(aligned[1].frame_descriptions[0], frame_descriptions[1]);
    assert_eq!(aligned[1].transcript_segments[0], transcript_segments[1]);
}

#[test]
fn aligns_on_numeric_start_ms() {
    let frame_descriptions = vec![
        VideoFrameDescription {
            timestamp: "00:00:00".to_string(),
            source_reference: "raw/assets/lecture.mp4#t=00:00:00".to_string(),
            description: "Wide shot of the speaker at the podium.".to_string(),
        },
        VideoFrameDescription {
            timestamp: "00:00:05".to_string(),
            source_reference: "raw/assets/lecture.mp4#t=00:00:05".to_string(),
            description: "Slide with the alignment diagram.".to_string(),
        },
    ];
    let transcript_segments = vec![
        TranscriptSegment {
            start_ms: 2_450,
            end_ms: 4_950,
            text: "The speaker introduces the data collection setup.".to_string(),
        },
        TranscriptSegment {
            start_ms: 5_000,
            end_ms: 7_250,
            text: "The diagram shows transcript and frame alignment.".to_string(),
        },
    ];

    let aligned = align_transcript_and_frames(&transcript_segments, &frame_descriptions);

    assert_eq!(aligned.len(), 2);
    assert_eq!(aligned[0].timestamp, "00:00:00");
    assert_eq!(aligned[0].frame_descriptions[0], frame_descriptions[0]);
    assert_eq!(aligned[0].transcript_segments[0], transcript_segments[0]);
    assert_eq!(aligned[1].timestamp, "00:00:05");
    assert_eq!(aligned[1].frame_descriptions[0], frame_descriptions[1]);
    assert_eq!(aligned[1].transcript_segments[0], transcript_segments[1]);
}

#[test]
fn partial_failure_matrix() {
    let temp = tempfile::tempdir().expect("tempdir");
    let record = record_for(temp.path());
    let raw_path = PathBuf::from("raw/source-note.md");
    let asset_path = PathBuf::from("raw/assets/lecture.mp4");
    let transcript_segments = vec![TranscriptSegment {
        start_ms: 1_000,
        end_ms: 2_000,
        text: "Transcript survives frame extraction failure.".to_string(),
    }];
    let frame_descriptions = vec![VideoFrameDescription {
        timestamp: "00:00:04".to_string(),
        source_reference: "raw/assets/lecture.mp4#t=00:00:04".to_string(),
        description: "Frame survives STT failure.".to_string(),
    }];
    let no_frames = [VideoMediaDegradation {
        kind: "frames".to_string(),
        reason: "extraction_failed".to_string(),
        message: "ffmpeg frame sampling failed".to_string(),
    }];
    let transcription_degradation = TranscriptionDegradation {
        reason: gobby_core::degradation::ModalityDegradationReason::TranscriptionError,
        fallback: "STT provider failed after frames were extracted.".to_string(),
    };

    let transcript_only = write_video_derived_markdown(
        temp.path(),
        &ScopeIdentity::topic("field-work"),
        &record,
        VideoMarkdownRequest {
            file_name: "lecture.mp4",
            mime_type: Some("video/mp4"),
            asset_path: &asset_path,
            raw_path: &raw_path,
            duration_seconds: Some(8),
            media_metadata: Some(VideoMediaMetadata {
                file_size_bytes: 12,
                duration_seconds: Some(8),
            }),
            media_degradations: &no_frames,
            transcription_degradation: None,
            frame_interval_seconds: 4,
            frame_samples: &[],
            frame_image_paths: &[],
            frame_descriptions: &[],
            transcript_segments: &transcript_segments,
            transcription: Some(&transcription_output(&transcript_segments)),
        },
    )
    .expect("write transcript-only degradation doc");

    let transcript_only_doc =
        std::fs::read_to_string(temp.path().join(transcript_only.path)).expect("read doc");
    assert!(transcript_only_doc.contains("media_degradation: \"frames:extraction_failed\""));
    assert!(transcript_only_doc.contains("Transcript survives frame extraction failure."));
    assert!(transcript_only_doc.contains("No frame samples recorded."));

    let frame_timeline = write_video_derived_markdown(
        temp.path(),
        &ScopeIdentity::topic("field-work"),
        &record,
        VideoMarkdownRequest {
            file_name: "lecture.mp4",
            mime_type: Some("video/mp4"),
            asset_path: &asset_path,
            raw_path: &raw_path,
            duration_seconds: Some(8),
            media_metadata: Some(VideoMediaMetadata {
                file_size_bytes: 12,
                duration_seconds: Some(8),
            }),
            media_degradations: &[],
            transcription_degradation: Some(&transcription_degradation),
            frame_interval_seconds: 4,
            frame_samples: &[VideoFrameSample {
                timestamp_seconds: 4,
                timestamp: "00:00:04".to_string(),
                source_asset: asset_path.clone(),
                source_reference: "raw/assets/lecture.mp4#t=00:00:04".to_string(),
            }],
            frame_image_paths: &[],
            frame_descriptions: &frame_descriptions,
            transcript_segments: &[],
            transcription: None,
        },
    )
    .expect("write frame-only degradation doc");

    let frame_timeline_doc =
        std::fs::read_to_string(temp.path().join(frame_timeline.path)).expect("read doc");
    assert!(frame_timeline_doc.contains("transcription_degradation: transcription_error"));
    assert!(frame_timeline_doc.contains("STT provider failed after frames were extracted."));
    assert!(frame_timeline_doc.contains("Frame survives STT failure."));
}

#[test]
fn degradation_metadata_has_size_and_duration() {
    let temp = tempfile::tempdir().expect("tempdir");
    let record = record_for(temp.path());
    let raw_path = PathBuf::from("raw/source-note.md");
    let asset_path = PathBuf::from("raw/assets/lecture.mp4");

    let result = write_video_derived_markdown(
        temp.path(),
        &ScopeIdentity::topic("field-work"),
        &record,
        VideoMarkdownRequest {
            file_name: "lecture.mp4",
            mime_type: Some("video/mp4"),
            asset_path: &asset_path,
            raw_path: &raw_path,
            duration_seconds: Some(13),
            media_metadata: Some(VideoMediaMetadata {
                file_size_bytes: 42,
                duration_seconds: Some(13),
            }),
            media_degradations: &[VideoMediaDegradation {
                kind: "media".to_string(),
                reason: "ffmpeg_unavailable".to_string(),
                message: "ffmpeg was not found".to_string(),
            }],
            transcription_degradation: None,
            frame_interval_seconds: 5,
            frame_samples: &[],
            frame_image_paths: &[],
            frame_descriptions: &[],
            transcript_segments: &[],
            transcription: None,
        },
    )
    .expect("write degradation metadata doc");

    let document = std::fs::read_to_string(temp.path().join(result.path)).expect("read doc");
    assert!(document.contains("video_duration_seconds: 13"));
    assert!(document.contains("file_size_bytes: 42"));
    assert!(document.contains("duration_seconds: 13"));
    assert!(document.contains("media_degradation: \"media:ffmpeg_unavailable\""));
    assert!(document.contains("ffmpeg was not found"));
}

fn record_for(temp: &Path) -> SourceRecord {
    SourceManifest::register(
        temp,
        SourceDraft {
            location: "/tmp/lecture.mp4".to_string(),
            kind: SourceKind::Video,
            fetched_at: "2026-05-29T21:30:00Z".to_string(),
            content: b"video-bytes".to_vec(),
            title: Some("lecture.mp4".to_string()),
            citation: Some("/tmp/lecture.mp4".to_string()),
            license: None,
            ingestion_method: IngestionMethod::Manual,
            compile_status: CompileStatus::Pending,
        },
    )
    .expect("register video source")
}

fn transcription_output(segments: &[TranscriptSegment]) -> TranscriptionOutput {
    TranscriptionOutput {
        segments: segments.to_vec(),
        language: Some("en".to_string()),
        model: Some("fake-stt".to_string()),
        source_language: Some("en".to_string()),
        task: Some("transcribe".to_string()),
        target_language: None,
        translated: false,
        translation_degraded: false,
        partial: false,
        completed_ranges: Vec::new(),
        missing_ranges: Vec::new(),
    }
}