use crate::error::{Result, VisionError};
use crate::optical_flow_dense::warp_image;
use scirs2_core::ndarray::{Array2, Array3};
use std::collections::VecDeque;
#[derive(Debug, Clone)]
pub struct VideoFrame {
pub data: Array3<f64>,
pub timestamp: f64,
pub width: usize,
pub height: usize,
}
impl VideoFrame {
pub fn new(data: Array3<f64>, timestamp: f64) -> Result<Self> {
let shape = data.dim();
let height = shape.0;
let width = shape.1;
Ok(Self {
data,
timestamp,
width,
height,
})
}
pub fn channels(&self) -> usize {
self.data.dim().2
}
pub fn channel(&self, ch: usize) -> Result<Array2<f64>> {
if ch >= self.channels() {
return Err(VisionError::InvalidParameter(format!(
"channel index {ch} out of range (frame has {} channels)",
self.channels()
)));
}
Ok(self
.data
.slice(scirs2_core::ndarray::s![.., .., ch])
.to_owned())
}
pub fn to_grayscale(&self) -> Array2<f64> {
let (h, w, c) = self.data.dim();
let mut gray = Array2::<f64>::zeros((h, w));
let weight = 1.0 / c as f64;
for ch in 0..c {
for r in 0..h {
for col in 0..w {
gray[[r, col]] += self.data[[r, col, ch]] * weight;
}
}
}
gray
}
}
#[derive(Debug, Clone)]
pub struct FrameBuffer {
pub frames: VecDeque<VideoFrame>,
pub capacity: usize,
}
impl FrameBuffer {
pub fn new(capacity: usize) -> Result<Self> {
if capacity == 0 {
return Err(VisionError::InvalidParameter(
"FrameBuffer: capacity must be at least 1".into(),
));
}
Ok(Self {
frames: VecDeque::with_capacity(capacity),
capacity,
})
}
pub fn push(&mut self, frame: VideoFrame) {
if self.frames.len() == self.capacity {
self.frames.pop_front();
}
self.frames.push_back(frame);
}
pub fn len(&self) -> usize {
self.frames.len()
}
pub fn is_empty(&self) -> bool {
self.frames.is_empty()
}
pub fn is_full(&self) -> bool {
self.frames.len() == self.capacity
}
pub fn latest(&self) -> Option<&VideoFrame> {
self.frames.back()
}
pub fn iter(&self) -> impl Iterator<Item = &VideoFrame> {
self.frames.iter()
}
}
pub fn temporal_median_filter(frames: &[Array3<f64>], window: usize) -> Result<Array3<f64>> {
if frames.is_empty() {
return Err(VisionError::InvalidParameter(
"temporal_median_filter: frames slice must not be empty".into(),
));
}
let window = window.min(frames.len()).max(1);
let ref_shape = frames[0].dim();
for (i, f) in frames.iter().enumerate() {
if f.dim() != ref_shape {
return Err(VisionError::DimensionMismatch(format!(
"temporal_median_filter: frame {i} shape {:?} != reference {:?}",
f.dim(),
ref_shape
)));
}
}
let (h, w, c) = ref_shape;
let start = frames.len().saturating_sub(window);
let window_frames = &frames[start..];
let n = window_frames.len();
let mut output = Array3::<f64>::zeros((h, w, c));
for row in 0..h {
for col in 0..w {
for ch in 0..c {
let mut vals: Vec<f64> = window_frames.iter().map(|f| f[[row, col, ch]]).collect();
vals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
output[[row, col, ch]] = if n % 2 == 1 {
vals[n / 2]
} else {
(vals[n / 2 - 1] + vals[n / 2]) * 0.5
};
}
}
}
Ok(output)
}
#[derive(Debug, Clone)]
pub struct MogBackground {
pub n_gaussians: usize,
pub learning_rate: f64,
pub threshold: f64,
means: Option<Array3<f64>>,
variances: Option<Array3<f64>>,
weights: Option<Array3<f64>>,
}
impl MogBackground {
pub fn new(n_gaussians: usize, learning_rate: f64, threshold: f64) -> Result<Self> {
if n_gaussians == 0 {
return Err(VisionError::InvalidParameter(
"MogBackground: n_gaussians must be at least 1".into(),
));
}
if !(0.0..=1.0).contains(&learning_rate) {
return Err(VisionError::InvalidParameter(
"MogBackground: learning_rate must be in (0, 1]".into(),
));
}
Ok(Self {
n_gaussians,
learning_rate,
threshold,
means: None,
variances: None,
weights: None,
})
}
}
impl Default for MogBackground {
fn default() -> Self {
Self {
n_gaussians: 3,
learning_rate: 0.005,
threshold: 2.5,
means: None,
variances: None,
weights: None,
}
}
}
pub fn background_subtraction_mog(
frame: &Array2<f64>,
background_model: &mut MogBackground,
) -> Result<Array2<bool>> {
let (rows, cols) = frame.dim();
let k = background_model.n_gaussians;
let alpha = background_model.learning_rate;
let thr = background_model.threshold;
if background_model.means.is_none() {
let init_weight = 1.0 / k as f64;
let mut means = Array3::<f64>::zeros((rows, cols, k));
let variances = Array3::<f64>::from_elem((rows, cols, k), 0.01);
let weights = Array3::<f64>::from_elem((rows, cols, k), init_weight);
for r in 0..rows {
for c in 0..cols {
for ki in 0..k {
means[[r, c, ki]] = frame[[r, c]];
}
}
}
background_model.means = Some(means);
background_model.variances = Some(variances);
background_model.weights = Some(weights);
}
let means = background_model.means.as_mut().expect("means initialised");
let variances = background_model
.variances
.as_mut()
.expect("variances initialised");
let weights = background_model
.weights
.as_mut()
.expect("weights initialised");
let mut fg_mask = Array2::<bool>::from_elem((rows, cols), false);
for r in 0..rows {
for c in 0..cols {
let pixel = frame[[r, c]];
let mut matched = false;
let mut best_ki = 0usize;
for ki in 0..k {
let diff = pixel - means[[r, c, ki]];
let var = variances[[r, c, ki]];
if var > 1e-12 && diff * diff / var < thr * thr {
let rho = alpha / weights[[r, c, ki]].max(1e-12);
means[[r, c, ki]] += rho * diff;
variances[[r, c, ki]] = (1.0 - rho) * var + rho * diff * diff;
weights[[r, c, ki]] = (1.0 - alpha) * weights[[r, c, ki]] + alpha;
matched = true;
best_ki = ki;
break;
}
}
if !matched {
let mut min_w = weights[[r, c, 0]];
let mut min_ki = 0;
for ki in 1..k {
if weights[[r, c, ki]] < min_w {
min_w = weights[[r, c, ki]];
min_ki = ki;
}
}
means[[r, c, min_ki]] = pixel;
variances[[r, c, min_ki]] = 0.01;
weights[[r, c, min_ki]] = alpha;
best_ki = min_ki;
}
let mut w_sum = 0.0_f64;
for ki in 0..k {
w_sum += weights[[r, c, ki]];
}
if w_sum > 1e-12 {
for ki in 0..k {
weights[[r, c, ki]] /= w_sum;
}
}
let is_bg = matched && weights[[r, c, best_ki]] > 1.0 / k as f64;
fg_mask[[r, c]] = !is_bg;
}
}
Ok(fg_mask)
}
pub fn frame_interpolation(
frame1: &Array3<f64>,
frame2: &Array3<f64>,
t: f64,
flow: (&Array2<f64>, &Array2<f64>),
) -> Result<Array3<f64>> {
let shape = frame1.dim();
if shape != frame2.dim() {
return Err(VisionError::DimensionMismatch(
"frame_interpolation: frame1 and frame2 must have identical shapes".into(),
));
}
let (u, v) = flow;
let (h, w, _) = shape;
if u.dim() != (h, w) || v.dim() != (h, w) {
return Err(VisionError::DimensionMismatch(
"frame_interpolation: flow field spatial shape must match frame spatial shape".into(),
));
}
if !(0.0..=1.0).contains(&t) {
return Err(VisionError::InvalidParameter(
"frame_interpolation: t must be in [0, 1]".into(),
));
}
let (rows, cols, channels) = shape;
let u_fwd = u.mapv(|x| x * t);
let v_fwd = v.mapv(|x| x * t);
let u_bwd = u.mapv(|x| -x * (1.0 - t));
let v_bwd = v.mapv(|x| -x * (1.0 - t));
let mut output = Array3::<f64>::zeros((rows, cols, channels));
for ch in 0..channels {
let ch1 = frame1
.slice(scirs2_core::ndarray::s![.., .., ch])
.to_owned();
let ch2 = frame2
.slice(scirs2_core::ndarray::s![.., .., ch])
.to_owned();
let warped1 = warp_image(&ch1, &u_fwd, &v_fwd)?;
let warped2 = warp_image(&ch2, &u_bwd, &v_bwd)?;
for r in 0..rows {
for c in 0..cols {
output[[r, c, ch]] = (1.0 - t) * warped1[[r, c]] + t * warped2[[r, c]];
}
}
}
Ok(output)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::{Array2, Array3};
fn rgb_frame(h: usize, w: usize, val: f64) -> Array3<f64> {
Array3::from_elem((h, w, 3), val)
}
#[test]
fn frame_buffer_circular_eviction() {
let mut buf = FrameBuffer::new(3).expect("FrameBuffer::new failed");
for i in 0..5u32 {
let data = rgb_frame(4, 4, i as f64 / 10.0);
let frame = VideoFrame::new(data, i as f64).expect("VideoFrame::new failed");
buf.push(frame);
}
assert_eq!(buf.len(), 3);
assert!((buf.frames[0].timestamp - 2.0).abs() < 1e-9);
}
#[test]
fn temporal_median_filter_single_frame() {
let f = rgb_frame(4, 4, 0.7);
let out =
temporal_median_filter(std::slice::from_ref(&f), 1).expect("median filter failed");
for &v in out.iter() {
assert!((v - 0.7).abs() < 1e-10);
}
}
#[test]
fn temporal_median_filter_three_frames() {
let f1 = rgb_frame(2, 2, 0.2);
let f2 = rgb_frame(2, 2, 0.5);
let f3 = rgb_frame(2, 2, 0.8);
let out = temporal_median_filter(&[f1, f2, f3], 3).expect("median filter failed");
for &v in out.iter() {
assert!((v - 0.5).abs() < 1e-10, "expected 0.5, got {v}");
}
}
#[test]
fn mog_background_first_frame_all_foreground_zero() {
let frame = Array2::from_elem((4, 4), 0.5_f64);
let mut model = MogBackground::new(3, 0.005, 2.5).expect("MogBackground::new failed");
let mask = background_subtraction_mog(&frame, &mut model).expect("mog failed");
assert_eq!(mask.dim(), (4, 4));
}
#[test]
fn mog_background_converges_to_background() {
let frame = Array2::from_elem((4, 4), 0.5_f64);
let mut model = MogBackground::new(3, 0.1, 2.5).expect("MogBackground::new failed");
for _ in 0..50 {
let _ = background_subtraction_mog(&frame, &mut model);
}
let mask = background_subtraction_mog(&frame, &mut model).expect("mog failed");
for val in mask.iter() {
assert!(!val, "expected background, got foreground");
}
}
#[test]
fn frame_interpolation_at_zero_returns_frame1() {
let f1 = rgb_frame(4, 4, 0.2);
let f2 = rgb_frame(4, 4, 0.8);
let u = Array2::zeros((4, 4));
let v = Array2::zeros((4, 4));
let out = frame_interpolation(&f1, &f2, 0.0, (&u, &v)).expect("interpolation failed");
for &val in out.iter() {
assert!((val - 0.2).abs() < 1e-10, "expected 0.2, got {val}");
}
}
#[test]
fn frame_interpolation_at_one_returns_frame2() {
let f1 = rgb_frame(4, 4, 0.2);
let f2 = rgb_frame(4, 4, 0.8);
let u = Array2::zeros((4, 4));
let v = Array2::zeros((4, 4));
let out = frame_interpolation(&f1, &f2, 1.0, (&u, &v)).expect("interpolation failed");
for &val in out.iter() {
assert!((val - 0.8).abs() < 1e-10, "expected 0.8, got {val}");
}
}
}