use std::ffi::CStr;
use std::time::Duration;
use ffmpeg_next::{
Error as FfmpegError, Packet, codec::context::Context as CodecContext,
filter::Graph as FilterGraph, frame::Video as VideoFrame,
};
use ffmpeg_sys_next::AVPixelFormat;
use crate::{error::UnbundleError, metadata::VideoMetadata, unbundle::MediaFile};
#[derive(Debug, Clone)]
pub struct SceneChange {
pub timestamp: Duration,
pub frame_number: u64,
pub score: f64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum SceneDetectionMode {
#[default]
Auto,
Full,
Keyframes,
}
#[derive(Debug, Clone)]
pub struct SceneDetectionOptions {
pub threshold: f64,
pub mode: SceneDetectionMode,
pub max_duration: Option<Duration>,
pub max_scene_changes: Option<usize>,
}
impl Default for SceneDetectionOptions {
fn default() -> Self {
Self {
threshold: 10.0,
mode: SceneDetectionMode::Auto,
max_duration: None,
max_scene_changes: None,
}
}
}
impl SceneDetectionOptions {
pub fn new() -> Self {
Self::default()
}
pub fn threshold(mut self, threshold: f64) -> Self {
self.threshold = threshold;
self
}
pub fn with_threshold(self, threshold: f64) -> Self {
self.threshold(threshold)
}
pub fn mode(mut self, mode: SceneDetectionMode) -> Self {
self.mode = mode;
self
}
pub fn with_mode(self, mode: SceneDetectionMode) -> Self {
self.mode(mode)
}
pub fn max_duration(mut self, duration: Duration) -> Self {
self.max_duration = Some(duration);
self
}
pub fn with_max_duration(self, duration: Duration) -> Self {
self.max_duration(duration)
}
pub fn max_scene_changes(mut self, max_changes: usize) -> Self {
self.max_scene_changes = Some(max_changes);
self
}
pub fn with_max_scene_changes(self, max_changes: usize) -> Self {
self.max_scene_changes(max_changes)
}
}
pub(crate) fn detect_scenes_impl(
unbundler: &mut MediaFile,
video_metadata: &VideoMetadata,
config: &SceneDetectionOptions,
cancel_check: Option<&dyn Fn() -> bool>,
stream_index: Option<usize>,
) -> Result<Vec<SceneChange>, UnbundleError> {
let selected_mode = match config.mode {
SceneDetectionMode::Auto => {
if video_metadata.frame_count > 6_000 && config.max_duration.is_none() {
SceneDetectionMode::Keyframes
} else {
SceneDetectionMode::Full
}
}
mode => mode,
};
if selected_mode == SceneDetectionMode::Keyframes {
return detect_scenes_from_keyframes(
unbundler,
video_metadata,
config,
cancel_check,
stream_index,
);
}
let video_stream_index = stream_index
.or(unbundler.video_stream_index)
.ok_or(UnbundleError::NoVideoStream)?;
log::debug!(
"Detecting scenes (stream={}, threshold={})",
video_stream_index,
config.threshold
);
let stream = unbundler
.input_context
.stream(video_stream_index)
.ok_or(UnbundleError::NoVideoStream)?;
let time_base = stream.time_base();
let codec_parameters = stream.parameters();
let decoder_context = CodecContext::from_parameters(codec_parameters)?;
let mut decoder = decoder_context.decoder().video()?;
let frames_per_second = video_metadata.frames_per_second;
let max_timestamp = config
.max_duration
.map(|duration| crate::conversion::duration_to_stream_timestamp(duration, time_base));
let mut scenes = Vec::new();
let mut decoded_frame = VideoFrame::empty();
let mut filtered_frame = VideoFrame::empty();
let mut actual_pixel_format: Option<i32> = None;
'probe: for (stream, packet) in unbundler.input_context.packets() {
if stream.index() != video_stream_index {
continue;
}
decoder
.send_packet(&packet)
.map_err(|e| UnbundleError::VideoDecodeError(e.to_string()))?;
if decoder.receive_frame(&mut decoded_frame).is_ok() {
actual_pixel_format = Some(AVPixelFormat::from(decoded_frame.format()) as i32);
break 'probe;
}
}
let pixel_format = actual_pixel_format.unwrap_or(AVPixelFormat::from(decoder.format()) as i32);
let (color_space, color_range) = if actual_pixel_format.is_some() {
unsafe {
let pointer = decoded_frame.as_ptr();
((*pointer).colorspace as i32, (*pointer).color_range as i32)
}
} else {
(2, 0) };
let mut graph = FilterGraph::new();
let buffer_args = format!(
"video_size={}x{}:pix_fmt={}:time_base={}/{}:pixel_aspect=1/1:colorspace={}:range={}",
decoder.width(),
decoder.height(),
pixel_format,
time_base.numerator(),
time_base.denominator(),
color_space,
color_range,
);
graph
.add(
&ffmpeg_next::filter::find("buffer").ok_or_else(|| {
UnbundleError::VideoDecodeError("FFmpeg 'buffer' filter not found".to_string())
})?,
"in",
&buffer_args,
)
.map_err(|e| {
UnbundleError::VideoDecodeError(format!("Failed to add buffer filter: {e}"))
})?;
graph
.add(
&ffmpeg_next::filter::find("buffersink").ok_or_else(|| {
UnbundleError::VideoDecodeError("FFmpeg 'buffersink' filter not found".to_string())
})?,
"out",
"",
)
.map_err(|e| {
UnbundleError::VideoDecodeError(format!("Failed to add buffersink filter: {e}"))
})?;
let scdet_spec = format!(
"scale=320:-1,format=pix_fmts=yuv420p,scdet=threshold={}",
config.threshold
);
graph
.output("in", 0)
.map_err(|e| UnbundleError::VideoDecodeError(format!("Filter graph output error: {e}")))?
.input("out", 0)
.map_err(|e| UnbundleError::VideoDecodeError(format!("Filter graph input error: {e}")))?
.parse(&scdet_spec)
.map_err(|e| UnbundleError::VideoDecodeError(format!("Filter graph parse error: {e}")))?;
graph
.validate()
.map_err(|e| UnbundleError::VideoDecodeError(format!("Filter graph validation: {e}")))?;
let mut feed_and_collect = |graph: &mut FilterGraph,
frame: &VideoFrame,
scenes: &mut Vec<SceneChange>|
-> Result<(), UnbundleError> {
graph
.get("in")
.ok_or_else(|| UnbundleError::VideoDecodeError("Filter 'in' not found".to_string()))?
.source()
.add(frame)
.map_err(|e| UnbundleError::VideoDecodeError(format!("Failed to feed filter: {e}")))?;
while graph
.get("out")
.ok_or_else(|| UnbundleError::VideoDecodeError("Filter 'out' not found".to_string()))?
.sink()
.frame(&mut filtered_frame)
.is_ok()
{
let score = read_scdet_score(&filtered_frame);
if let Some(score) = score.filter(|&s| s >= config.threshold) {
let pts = filtered_frame.pts().unwrap_or(0);
let timestamp =
Duration::from_secs_f64(crate::conversion::pts_to_seconds(pts, time_base));
let frame_number =
crate::conversion::pts_to_frame_number(pts, time_base, frames_per_second);
scenes.push(SceneChange {
timestamp,
frame_number,
score,
});
if config
.max_scene_changes
.is_some_and(|max_changes| scenes.len() >= max_changes)
{
return Ok(());
}
}
}
Ok(())
};
if actual_pixel_format.is_some() {
feed_and_collect(&mut graph, &decoded_frame, &mut scenes)?;
while decoder.receive_frame(&mut decoded_frame).is_ok() {
feed_and_collect(&mut graph, &decoded_frame, &mut scenes)?;
}
}
for (stream, packet) in unbundler.input_context.packets() {
if let Some(check) = cancel_check {
if check() {
return Err(UnbundleError::Cancelled);
}
}
if stream.index() != video_stream_index {
continue;
}
if let Some(max_pts) = max_timestamp
&& packet.pts().is_some_and(|pts| pts > max_pts)
{
break;
}
decoder
.send_packet(&packet)
.map_err(|e| UnbundleError::VideoDecodeError(e.to_string()))?;
while decoder.receive_frame(&mut decoded_frame).is_ok() {
if let Some(max_pts) = max_timestamp
&& decoded_frame.pts().is_some_and(|pts| pts > max_pts)
{
return Ok(scenes);
}
feed_and_collect(&mut graph, &decoded_frame, &mut scenes)?;
}
}
let _ = decoder.send_eof();
while decoder.receive_frame(&mut decoded_frame).is_ok() {
if let Some(max_pts) = max_timestamp
&& decoded_frame.pts().is_some_and(|pts| pts > max_pts)
{
break;
}
let _ = feed_and_collect(&mut graph, &decoded_frame, &mut scenes);
}
while graph
.get("out")
.map(|mut f| f.sink().frame(&mut filtered_frame).is_ok())
.unwrap_or(false)
{
let score = read_scdet_score(&filtered_frame);
if let Some(score) = score.filter(|&s| s >= config.threshold) {
let pts = filtered_frame.pts().unwrap_or(0);
let timestamp =
Duration::from_secs_f64(crate::conversion::pts_to_seconds(pts, time_base));
let frame_number =
crate::conversion::pts_to_frame_number(pts, time_base, frames_per_second);
scenes.push(SceneChange {
timestamp,
frame_number,
score,
});
if config
.max_scene_changes
.is_some_and(|max_changes| scenes.len() >= max_changes)
{
break;
}
}
}
Ok(scenes)
}
fn detect_scenes_from_keyframes(
unbundler: &mut MediaFile,
video_metadata: &VideoMetadata,
config: &SceneDetectionOptions,
cancel_check: Option<&dyn Fn() -> bool>,
stream_index: Option<usize>,
) -> Result<Vec<SceneChange>, UnbundleError> {
let video_stream_index = stream_index
.or(unbundler.video_stream_index)
.ok_or(UnbundleError::NoVideoStream)?;
log::debug!(
"Detecting scenes from keyframes (stream={}, max_duration={:?}, max_scene_changes={:?})",
video_stream_index,
config.max_duration,
config.max_scene_changes,
);
let time_base = unbundler
.input_context
.stream(video_stream_index)
.ok_or(UnbundleError::NoVideoStream)?
.time_base();
let max_stream_timestamp = config
.max_duration
.map(|duration| crate::conversion::duration_to_stream_timestamp(duration, time_base));
let mut scenes = Vec::new();
let mut video_packet_number: u64 = 0;
let mut packet = Packet::empty();
loop {
if let Some(check) = cancel_check
&& check()
{
return Err(UnbundleError::Cancelled);
}
match packet.read(&mut unbundler.input_context) {
Ok(()) => {
if packet.stream() as usize != video_stream_index {
continue;
}
if let Some(max_pts) = max_stream_timestamp
&& packet.pts().is_some_and(|pts| pts > max_pts)
{
break;
}
if packet.is_key() {
if video_packet_number > 0 {
let pts = packet.pts().unwrap_or(0);
let timestamp = Duration::from_secs_f64(
crate::conversion::pts_to_seconds(pts, time_base).max(0.0),
);
let frame_number = crate::conversion::pts_to_frame_number(
pts,
time_base,
video_metadata.frames_per_second,
);
scenes.push(SceneChange {
timestamp,
frame_number,
score: 100.0,
});
if config
.max_scene_changes
.is_some_and(|max| scenes.len() >= max)
{
break;
}
}
}
video_packet_number += 1;
}
Err(FfmpegError::Eof) => break,
Err(error) => return Err(UnbundleError::from(error)),
}
}
Ok(scenes)
}
fn read_scdet_score(frame: &VideoFrame) -> Option<f64> {
unsafe {
let frame_ptr = frame.as_ptr();
if frame_ptr.is_null() {
return None;
}
let metadata = (*frame_ptr).metadata;
if metadata.is_null() {
return None;
}
let key = c"lavfi.scd.score";
let entry = ffmpeg_sys_next::av_dict_get(metadata, key.as_ptr(), std::ptr::null(), 0);
if entry.is_null() {
return None;
}
let value_ptr = (*entry).value;
if value_ptr.is_null() {
return None;
}
let value_cstr = CStr::from_ptr(value_ptr);
value_cstr.to_str().ok()?.parse::<f64>().ok()
}
}