captube 0.1.0

Turn a YouTube slide-lecture video into a PDF of its unique slides, using ffmpeg keyframes + perceptual dedup.
use anyhow::{Context, Result, bail};
use rayon::join;
use std::path::{Path, PathBuf};
use std::process::Command;

/// A keyframe candidate: the dumped JPEG plus its source timestamp.
/// The timestamp is needed later so we can re-extract a clean frame via
/// seek (some keyframes decode corruptly under `-skip_frame nokey`).
#[derive(Clone, Debug)]
pub struct Candidate {
    pub path: PathBuf,
    pub pts: f32,
}

/// Dump every keyframe from `video` plus read their presentation times.
///
/// Modern H.264 encoders place keyframes at scene boundaries, so decoding
/// only keyframes (`-skip_frame nokey`) gives us one candidate per slide
/// (plus occasional duplicates for slides that outlast a single GOP).
/// That is ~20x faster than full-video decode.
///
/// `_threshold` and `_fps` are kept for API compatibility with the older
/// scene-filter pipeline.
pub fn extract_scene_frames(
    video: &Path,
    workdir: &Path,
    _threshold: f32,
    _fps: f32,
    max_width: u32,
) -> Result<Vec<Candidate>> {
    let frames_dir = workdir.join("frames");
    std::fs::create_dir_all(&frames_dir)?;

    // Listing keyframe pts via ffprobe and dumping keyframe jpegs via
    // ffmpeg are independent decodes of roughly equal cost — run them on
    // two cores concurrently.
    let pattern = frames_dir.join("frame_%05d.jpg");
    let (timestamps_res, dump_res) = join(
        || keyframe_timestamps(video),
        || {
            let status = Command::new("ffmpeg")
                .arg("-hide_banner")
                .arg("-loglevel")
                .arg("error")
                .arg("-y")
                .arg("-skip_frame")
                .arg("nokey")
                .arg("-i")
                .arg(video)
                .arg("-vf")
                .arg(format!("scale={}:-2:flags=area", max_width))
                .arg("-fps_mode")
                .arg("passthrough")
                .arg("-q:v")
                .arg("3")
                .arg(&pattern)
                .status()
                .context("invoke ffmpeg for keyframe dump")?;
            if !status.success() {
                bail!("ffmpeg keyframe dump exited with status {status}");
            }
            Ok(())
        },
    );
    let timestamps = timestamps_res?;
    dump_res?;

    let mut paths: Vec<PathBuf> = std::fs::read_dir(&frames_dir)?
        .filter_map(|e| e.ok())
        .map(|e| e.path())
        .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("jpg"))
        .collect();
    paths.sort();

    if paths.len() != timestamps.len() {
        bail!(
            "keyframe dump produced {} files but ffprobe reported {} keyframes",
            paths.len(),
            timestamps.len()
        );
    }
    Ok(paths
        .into_iter()
        .zip(timestamps)
        .map(|(path, pts)| Candidate { path, pts })
        .collect())
}

/// Seek to `pts` and decode exactly one frame into `out`, scaled to
/// `max_width` wide. Uses input-side `-ss` so it's fast.
pub fn capture_at(video: &Path, pts: f32, max_width: u32, out: &Path) -> Result<()> {
    let status = Command::new("ffmpeg")
        .arg("-hide_banner")
        .arg("-loglevel")
        .arg("error")
        .arg("-y")
        .arg("-ss")
        .arg(format!("{pts:.3}"))
        .arg("-i")
        .arg(video)
        .arg("-frames:v")
        .arg("1")
        .arg("-vf")
        .arg(format!("scale={}:-2:flags=area", max_width))
        .arg("-q:v")
        .arg("3")
        .arg(out)
        .status()
        .context("invoke ffmpeg for single-frame capture")?;
    if !status.success() {
        bail!("ffmpeg seek-capture exited with status {status}");
    }
    Ok(())
}

fn keyframe_timestamps(video: &Path) -> Result<Vec<f32>> {
    let output = Command::new("ffprobe")
        .arg("-v")
        .arg("error")
        .arg("-select_streams")
        .arg("v:0")
        .arg("-skip_frame")
        .arg("nokey")
        .arg("-show_frames")
        .arg("-show_entries")
        .arg("frame=pts_time")
        .arg("-of")
        .arg("csv=p=0")
        .arg(video)
        .output()
        .context("invoke ffprobe for keyframe timestamps")?;
    if !output.status.success() {
        bail!(
            "ffprobe failed: {}",
            String::from_utf8_lossy(&output.stderr)
        );
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let mut times: Vec<f32> = Vec::new();
    for line in stdout.lines() {
        let s = line.trim();
        if s.is_empty() {
            continue;
        }
        if let Ok(t) = s.parse::<f32>() {
            times.push(t);
        }
    }
    Ok(times)
}