captube 0.1.0

Turn a YouTube slide-lecture video into a PDF of its unique slides, using ffmpeg keyframes + perceptual dedup.
mod dedup;
mod download;
mod extract;
mod pdf;

use anyhow::{Context, Result};
use clap::Parser;
use rayon::prelude::*;
use std::path::PathBuf;

/// Convert a YouTube (slide-based) lecture into a PDF of scene captures.
#[derive(Parser, Debug)]
#[command(version, about)]
struct Args {
    /// YouTube video URL.
    url: String,

    /// Output PDF path.
    #[arg(short, long, default_value = "output.pdf")]
    output: PathBuf,

    /// Scene-change threshold (0.0-1.0). Higher = fewer cuts.
    /// A value around 0.3 is typically robust against mouse-cursor motion.
    #[arg(long, default_value_t = 0.30)]
    scene_threshold: f32,

    /// Sampling fps used when scanning the video for scene changes.
    /// Lower values speed up detection and further suppress tiny motion.
    #[arg(long, default_value_t = 2.0)]
    fps: f32,

    /// Maximum width (px) of frames embedded into the PDF.
    #[arg(long, default_value_t = 1280)]
    max_width: u32,

    /// Mean per-pixel difference (0-255) required between consecutive kept
    /// frames. Frames closer than this to the previous kept frame are
    /// treated as the same slide (e.g. cursor-only changes are collapsed).
    /// Raise for fewer, tighter pages; lower to keep subtler slide
    /// variations.
    #[arg(long, default_value_t = 20.0)]
    dedup_threshold: f32,

    /// Keep the intermediate working directory for inspection.
    #[arg(long)]
    keep_workdir: bool,

    /// Print per-frame dedup decisions.
    #[arg(short, long)]
    verbose: bool,
}

fn main() -> Result<()> {
    let args = Args::parse();

    let tmp = tempfile::Builder::new()
        .prefix("captube-")
        .tempdir()
        .context("failed to create temp dir")?;
    let work = tmp.path().to_path_buf();
    eprintln!("[captube] workdir: {}", work.display());

    let t0 = std::time::Instant::now();
    let video = download::fetch_video(&args.url, &work)
        .context("failed to download video")?;
    eprintln!("[captube] downloaded: {} ({:.1}s)", video.display(), t0.elapsed().as_secs_f32());

    let t1 = std::time::Instant::now();
    let candidates = extract::extract_scene_frames(
        &video,
        &work,
        args.scene_threshold,
        args.fps,
        args.max_width,
    )
    .context("failed to extract scene frames")?;
    eprintln!(
        "[captube] extracted {} candidate keyframes ({:.1}s)",
        candidates.len(),
        t1.elapsed().as_secs_f32()
    );

    if candidates.is_empty() {
        anyhow::bail!("no frames were extracted; try lowering --scene-threshold");
    }

    let t2 = std::time::Instant::now();
    let kept = dedup::dedup_candidates(&candidates, args.dedup_threshold, args.verbose)
        .context("failed to dedup candidates")?;
    eprintln!(
        "[captube] {} unique keyframes ({:.1}s)",
        kept.len(),
        t2.elapsed().as_secs_f32()
    );

    // Re-extract each kept keyframe via seek at `pts + settle`:
    //   (a) some keyframes decode corruptly under `-skip_frame nokey`
    //       (yellow/green-tinted ghost frames at cross-fade boundaries),
    //   (b) if the keyframe itself sits during a transition, the settle
    //       delay moves us to the stable post-transition frame.
    const SETTLE: f32 = 0.8;
    let settled_dir = work.join("settled");
    std::fs::create_dir_all(&settled_dir)?;
    let t3 = std::time::Instant::now();
    let settled: Vec<PathBuf> = kept
        .par_iter()
        .enumerate()
        .map(|(i, c)| {
            let out = settled_dir.join(format!("settled_{i:05}.jpg"));
            match extract::capture_at(&video, c.pts + SETTLE, args.max_width, &out) {
                Ok(()) if out.exists() => Ok::<PathBuf, anyhow::Error>(out),
                // Seek past end-of-video silently produces no file — fall
                // back to the original keyframe so the final PDF still
                // includes this slide.
                _ => Ok(c.path.clone()),
            }
        })
        .collect::<Result<Vec<_>>>()
        .context("failed to re-extract settled frames")?;
    eprintln!(
        "[captube] re-extracted {} settled frames ({:.1}s)",
        settled.len(),
        t3.elapsed().as_secs_f32()
    );

    // Some settled frames from distinct keyframes may converge onto the
    // same stable slide (e.g. a transition keyframe and the subsequent
    // stable-slide keyframe both settle onto the new slide). A small-
    // threshold pass collapses those.
    let t4 = std::time::Instant::now();
    let frames = dedup::dedup_paths(&settled, 5.0, args.verbose)
        .context("failed to dedup settled frames")?;
    eprintln!(
        "[captube] {} frames after settle-dedup ({:.1}s)",
        frames.len(),
        t4.elapsed().as_secs_f32()
    );

    let t5 = std::time::Instant::now();
    pdf::build_pdf(&frames, &args.output).context("failed to build pdf")?;
    eprintln!(
        "[captube] wrote pdf: {} ({:.1}s)",
        args.output.display(),
        t5.elapsed().as_secs_f32()
    );

    if args.keep_workdir {
        let kept = tmp.keep();
        eprintln!("[captube] kept workdir: {}", kept.display());
    }
    Ok(())
}