captube 0.1.0

Turn a YouTube slide-lecture video into a PDF of its unique slides, using ffmpeg keyframes + perceptual dedup.
use crate::extract::Candidate;
use anyhow::{Context, Result};
use image::imageops::FilterType;
use rayon::prelude::*;
use std::path::{Path, PathBuf};

/// Drop frames that are nearly identical to the previously kept frame.
///
/// The fingerprint is a 256x256 grayscale thumbnail; two frames are judged
/// "the same scene" if their mean absolute per-pixel difference is below
/// `threshold` (on a 0-255 scale). This collapses cases where ffmpeg
/// emitted multiple keyframes within a single slide, and — with a higher
/// threshold — also collapses cursor-only motion.
pub fn dedup_candidates(
    candidates: &[Candidate],
    threshold: f32,
    verbose: bool,
) -> Result<Vec<Candidate>> {
    let fingerprints = fingerprints_par(candidates.iter().map(|c| c.path.as_path()))?;
    Ok(run_dedup(
        candidates,
        &fingerprints,
        threshold,
        verbose,
        |c| c.path.as_path(),
    ))
}

pub fn dedup_paths(paths: &[PathBuf], threshold: f32, verbose: bool) -> Result<Vec<PathBuf>> {
    let fingerprints = fingerprints_par(paths.iter().map(|p| p.as_path()))?;
    Ok(run_dedup(paths, &fingerprints, threshold, verbose, |p| {
        p.as_path()
    }))
}

fn fingerprints_par<'a>(paths: impl Iterator<Item = &'a Path>) -> Result<Vec<Vec<u8>>> {
    const THUMB: u32 = 256;
    let paths: Vec<&Path> = paths.collect();
    paths
        .par_iter()
        .map(|p| {
            fingerprint(p, THUMB).with_context(|| format!("fingerprint {}", p.display()))
        })
        .collect()
}

fn run_dedup<T: Clone>(
    items: &[T],
    fingerprints: &[Vec<u8>],
    threshold: f32,
    verbose: bool,
    path_of: impl Fn(&T) -> &Path,
) -> Vec<T> {
    let mut kept: Vec<T> = Vec::new();
    let mut last_fp: Option<&Vec<u8>> = None;

    for (item, fp) in items.iter().zip(fingerprints.iter()) {
        let (keep, diff) = match last_fp {
            None => (true, f32::INFINITY),
            Some(prev) => {
                let d = mean_abs_diff(prev, fp);
                (d > threshold, d)
            }
        };
        if verbose {
            let name = path_of(item)
                .file_name()
                .and_then(|s| s.to_str())
                .unwrap_or("?");
            eprintln!(
                "[dedup] {} diff={:.2} {}",
                name,
                diff,
                if keep { "keep" } else { "drop" }
            );
        }
        if keep {
            kept.push(item.clone());
            last_fp = Some(fp);
        }
    }
    kept
}

fn fingerprint(path: &Path, size: u32) -> Result<Vec<u8>> {
    let img = image::open(path)?
        .resize_exact(size, size, FilterType::Triangle)
        .to_luma8();
    Ok(img.into_raw())
}

fn mean_abs_diff(a: &[u8], b: &[u8]) -> f32 {
    debug_assert_eq!(a.len(), b.len());
    let sum: u32 = a
        .iter()
        .zip(b.iter())
        .map(|(x, y)| (*x as i32 - *y as i32).unsigned_abs())
        .sum();
    sum as f32 / a.len() as f32
}