#![allow(clippy::too_many_arguments)]
mod cost;
mod frame;
mod mc;
mod me;
mod pred;
mod refs;
mod util;
mod y4m;
use crate::cost::{estimate_inter_costs, estimate_intra_costs};
use ::y4m::Decoder;
use std::collections::{BTreeMap, BTreeSet};
use std::io::Read;
use v_frame::frame::Frame;
use v_frame::pixel::{CastFromPrimitive, ChromaSampling, Pixel};
use v_frame::plane::Plane;
pub use v_frame;
pub struct DetectionOptions {
pub fast_analysis: bool,
pub ignore_flashes: bool,
pub min_scenecut_distance: Option<usize>,
pub max_scenecut_distance: Option<usize>,
pub lookahead_distance: usize,
}
impl Default for DetectionOptions {
fn default() -> Self {
DetectionOptions {
fast_analysis: false,
ignore_flashes: false,
lookahead_distance: 5,
min_scenecut_distance: None,
max_scenecut_distance: None,
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
pub struct DetectionResults {
pub scene_changes: Vec<usize>,
pub frame_count: usize,
}
pub type ProgressCallback = Box<dyn Fn(usize, usize)>;
pub fn detect_scene_changes<R: Read, T: Pixel>(
dec: &mut Decoder<R>,
opts: DetectionOptions,
progress_callback: Option<ProgressCallback>,
) -> DetectionResults {
assert!(opts.lookahead_distance >= 1);
let video_details = y4m::get_video_details(dec);
let mut detector = SceneChangeDetector::new(
video_details.bit_depth,
video_details.chroma_sampling,
&opts,
);
let mut frame_queue = BTreeMap::new();
let mut keyframes = BTreeSet::new();
let mut frameno = 0;
loop {
let mut next_input_frameno = frame_queue
.keys()
.last()
.copied()
.map(|key| key + 1)
.unwrap_or(0);
while next_input_frameno < frameno + opts.lookahead_distance {
let frame = y4m::read_video_frame::<R, T>(dec, &video_details);
if let Ok(frame) = frame {
frame_queue.insert(next_input_frameno, frame);
next_input_frameno += 1;
} else {
break;
}
}
let frame_set = frame_queue
.values()
.take(opts.lookahead_distance + 1)
.collect::<Vec<_>>();
if frame_set.len() < 2 {
break;
}
detector.analyze_next_frame(&frame_set, frameno, &mut keyframes);
if frameno > 0 {
frame_queue.remove(&(frameno - 1));
}
frameno += 1;
if let Some(ref progress_fn) = progress_callback {
progress_fn(frameno, keyframes.len());
}
}
DetectionResults {
scene_changes: keyframes.into_iter().collect(),
frame_count: frameno,
}
}
pub struct SceneChangeDetector<'a> {
threshold: usize,
opts: &'a DetectionOptions,
excluded_frames: BTreeSet<usize>,
chroma_sampling: ChromaSampling,
bit_depth: usize,
}
impl<'a> SceneChangeDetector<'a> {
pub fn new(
bit_depth: usize,
chroma_sampling: ChromaSampling,
opts: &'a DetectionOptions,
) -> Self {
const BASE_THRESHOLD: usize = 12;
Self {
threshold: BASE_THRESHOLD * bit_depth / 8,
opts,
excluded_frames: BTreeSet::new(),
chroma_sampling,
bit_depth,
}
}
pub fn analyze_next_frame<T: Pixel>(
&mut self,
frame_set: &[&Frame<T>],
input_frameno: usize,
keyframes: &mut BTreeSet<usize>,
) {
if input_frameno == 0 {
keyframes.insert(input_frameno);
return;
}
let previous_keyframe = *keyframes.iter().last().unwrap();
let distance = input_frameno - previous_keyframe;
if distance < self.opts.min_scenecut_distance.unwrap_or(0) {
return;
}
if distance
>= self
.opts
.max_scenecut_distance
.unwrap_or(usize::max_value())
{
keyframes.insert(input_frameno);
return;
}
self.exclude_scene_flashes(&frame_set, input_frameno, previous_keyframe);
if self.is_key_frame(frame_set[0], frame_set[1], input_frameno, previous_keyframe) {
keyframes.insert(input_frameno);
}
}
fn is_key_frame<T: Pixel>(
&self,
previous_frame: &Frame<T>,
current_frame: &Frame<T>,
current_frameno: usize,
previous_keyframe: usize,
) -> bool {
if self.excluded_frames.contains(¤t_frameno) {
return false;
}
self.has_scenecut(
previous_frame,
current_frame,
current_frameno,
previous_keyframe,
)
}
fn exclude_scene_flashes<T: Pixel>(
&mut self,
frame_subset: &[&Frame<T>],
frameno: usize,
previous_keyframe: usize,
) {
let lookahead_distance = self.opts.lookahead_distance;
if frame_subset.len() - 1 < lookahead_distance {
for frame in frameno..=(frameno + self.opts.lookahead_distance) {
self.excluded_frames.insert(frame);
}
return;
}
for j in (1..=lookahead_distance).rev() {
if !self.has_scenecut(
frame_subset[0],
frame_subset[j],
frameno - 1 + j,
previous_keyframe,
) {
for i in 0..=j {
let frameno = frameno + i - 1;
self.excluded_frames.insert(frameno);
}
break;
}
}
for i in 1..lookahead_distance {
if self.has_scenecut(
frame_subset[i],
frame_subset[lookahead_distance],
frameno - 1 + lookahead_distance,
previous_keyframe,
) {
let frameno = frameno + i - 1;
self.excluded_frames.insert(frameno);
}
}
}
fn has_scenecut<T: Pixel>(
&self,
frame1: &Frame<T>,
frame2: &Frame<T>,
frameno: usize,
previous_keyframe: usize,
) -> bool {
if self.opts.fast_analysis {
let len = frame2.planes[0].cfg.width * frame2.planes[0].cfg.height;
let delta = self.delta_in_planes(&frame1.planes[0], &frame2.planes[0]);
delta >= self.threshold as u64 * len as u64
} else {
let intra_costs = estimate_intra_costs(frame2, self.bit_depth);
let intra_cost = intra_costs.iter().map(|&cost| cost as u64).sum::<u64>() as f64
/ intra_costs.len() as f64;
let inter_costs =
estimate_inter_costs(frame2, frame1, self.bit_depth, self.chroma_sampling);
let inter_cost = inter_costs.iter().map(|&cost| cost as u64).sum::<u64>() as f64
/ inter_costs.len() as f64;
const THRESH_MAX: f64 = 0.4;
const THRESH_MIN: f64 = THRESH_MAX * 0.25;
let distance_from_keyframe = frameno - previous_keyframe;
let min_keyint = self.opts.min_scenecut_distance.unwrap_or(1);
let max_keyint = self.opts.max_scenecut_distance;
let bias = match max_keyint {
Some(max_keyint) => {
if distance_from_keyframe <= min_keyint / 4 {
THRESH_MIN / 4.0
} else if distance_from_keyframe <= min_keyint {
THRESH_MIN * distance_from_keyframe as f64 / min_keyint as f64
} else {
THRESH_MIN
+ (THRESH_MAX - THRESH_MIN)
* (distance_from_keyframe - min_keyint) as f64
/ (max_keyint - min_keyint) as f64
}
}
None => THRESH_MAX,
};
let threshold = intra_cost * (1.0 - bias);
inter_cost > threshold
}
}
fn delta_in_planes<T: Pixel>(&self, plane1: &Plane<T>, plane2: &Plane<T>) -> u64 {
let mut delta = 0;
let lines = plane1.rows_iter().zip(plane2.rows_iter());
for (l1, l2) in lines {
let delta_line = l1
.iter()
.zip(l2.iter())
.map(|(&p1, &p2)| (i16::cast_from(p1) - i16::cast_from(p2)).abs() as u64)
.sum::<u64>();
delta += delta_line;
}
delta
}
}