use crate::error::{AccelError, AccelResult};
use crate::traits::{HardwareAccel, ScaleFilter};
use oximedia_core::PixelFormat;
use rayon::prelude::*;
pub struct CpuAccel;
impl CpuAccel {
#[must_use]
pub fn new() -> Self {
Self
}
}
impl Default for CpuAccel {
fn default() -> Self {
Self::new()
}
}
impl HardwareAccel for CpuAccel {
fn scale_image(
&self,
input: &[u8],
src_width: u32,
src_height: u32,
dst_width: u32,
dst_height: u32,
format: PixelFormat,
filter: ScaleFilter,
) -> AccelResult<Vec<u8>> {
let channels = match format {
PixelFormat::Rgb24 => 3,
PixelFormat::Rgba32 => 4,
PixelFormat::Gray8 => 1,
_ => {
return Err(AccelError::InvalidFormat(format!(
"Unsupported format: {format:?}"
)))
}
};
let expected_size = (src_width * src_height * channels) as usize;
if input.len() != expected_size {
return Err(AccelError::BufferSizeMismatch {
expected: expected_size,
actual: input.len(),
});
}
let output_size = (dst_width * dst_height * channels) as usize;
let mut output = vec![0u8; output_size];
match filter {
ScaleFilter::Nearest => {
scale_nearest(
input,
&mut output,
src_width,
src_height,
dst_width,
dst_height,
channels,
);
}
ScaleFilter::Bilinear => {
scale_bilinear(
input,
&mut output,
src_width,
src_height,
dst_width,
dst_height,
channels,
);
}
ScaleFilter::Bicubic => {
scale_bicubic(
input,
&mut output,
src_width,
src_height,
dst_width,
dst_height,
channels,
);
}
ScaleFilter::Lanczos => {
scale_lanczos(
input,
&mut output,
src_width,
src_height,
dst_width,
dst_height,
channels,
);
}
}
Ok(output)
}
fn convert_color(
&self,
input: &[u8],
width: u32,
height: u32,
src_format: PixelFormat,
dst_format: PixelFormat,
) -> AccelResult<Vec<u8>> {
match (src_format, dst_format) {
(PixelFormat::Rgb24, PixelFormat::Yuv420p) => rgb_to_yuv420p(input, width, height),
(PixelFormat::Yuv420p, PixelFormat::Rgb24) => yuv420p_to_rgb(input, width, height),
_ => Err(AccelError::Unsupported(format!(
"Color conversion from {src_format:?} to {dst_format:?} not implemented"
))),
}
}
fn motion_estimation(
&self,
reference: &[u8],
current: &[u8],
width: u32,
height: u32,
block_size: u32,
) -> AccelResult<Vec<(i16, i16)>> {
let expected_size = (width * height) as usize;
if reference.len() != expected_size || current.len() != expected_size {
return Err(AccelError::BufferSizeMismatch {
expected: expected_size,
actual: reference.len().min(current.len()),
});
}
Ok(block_motion_estimation(
reference, current, width, height, block_size,
))
}
}
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)]
fn scale_nearest(
input: &[u8],
output: &mut [u8],
src_width: u32,
src_height: u32,
dst_width: u32,
dst_height: u32,
channels: u32,
) {
output
.par_chunks_exact_mut((dst_width * channels) as usize)
.enumerate()
.for_each(|(y, row)| {
let src_y = (y as u32 * src_height) / dst_height;
for x in 0..dst_width {
let src_x = (x * src_width) / dst_width;
let src_idx = ((src_y * src_width + src_x) * channels) as usize;
let dst_idx = (x * channels) as usize;
let channels_usize = channels as usize;
row[dst_idx..dst_idx + channels_usize]
.copy_from_slice(&input[src_idx..src_idx + channels_usize]);
}
});
}
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)]
#[allow(clippy::cast_precision_loss)]
fn scale_bilinear(
input: &[u8],
output: &mut [u8],
src_width: u32,
src_height: u32,
dst_width: u32,
dst_height: u32,
channels: u32,
) {
let x_ratio = (src_width - 1) as f32 / dst_width as f32;
let y_ratio = (src_height - 1) as f32 / dst_height as f32;
output
.par_chunks_exact_mut((dst_width * channels) as usize)
.enumerate()
.for_each(|(y, row)| {
let src_y = y as f32 * y_ratio;
let y1 = src_y.floor() as u32;
let y2 = (y1 + 1).min(src_height - 1);
let y_frac = src_y - y1 as f32;
for x in 0..dst_width {
let src_x = x as f32 * x_ratio;
let x1 = src_x.floor() as u32;
let x2 = (x1 + 1).min(src_width - 1);
let x_frac = src_x - x1 as f32;
let dst_idx = (x * channels) as usize;
for c in 0..channels as usize {
let p11 = f32::from(input[((y1 * src_width + x1) * channels) as usize + c]);
let p12 = f32::from(input[((y2 * src_width + x1) * channels) as usize + c]);
let p21 = f32::from(input[((y1 * src_width + x2) * channels) as usize + c]);
let p22 = f32::from(input[((y2 * src_width + x2) * channels) as usize + c]);
let p1 = p11 * (1.0 - x_frac) + p21 * x_frac;
let p2 = p12 * (1.0 - x_frac) + p22 * x_frac;
let result = p1 * (1.0 - y_frac) + p2 * y_frac;
row[dst_idx + c] = result.clamp(0.0, 255.0) as u8;
}
}
});
}
#[inline]
fn cubic_weight(t: f32) -> f32 {
let t_abs = t.abs();
if t_abs <= 1.0 {
(1.5 * t_abs - 2.5) * t_abs * t_abs + 1.0
} else if t_abs <= 2.0 {
((-0.5 * t_abs + 2.5) * t_abs - 4.0) * t_abs + 2.0
} else {
0.0
}
}
fn scale_bicubic(
input: &[u8],
output: &mut [u8],
src_width: u32,
src_height: u32,
dst_width: u32,
dst_height: u32,
channels: u32,
) {
let x_ratio = src_width as f32 / dst_width as f32;
let y_ratio = src_height as f32 / dst_height as f32;
output
.par_chunks_exact_mut((dst_width * channels) as usize)
.enumerate()
.for_each(|(y, row)| {
let src_y = (y as f32 + 0.5) * y_ratio - 0.5;
let y0 = src_y.floor() as i32;
for x in 0..dst_width {
let src_x = (x as f32 + 0.5) * x_ratio - 0.5;
let x0 = src_x.floor() as i32;
let dst_idx = (x * channels) as usize;
for c in 0..channels as usize {
let mut sum = 0.0f32;
let mut weight_sum = 0.0f32;
for ky in -1i32..=2 {
let sy = (y0 + ky).clamp(0, src_height as i32 - 1) as u32;
let wy = cubic_weight(src_y - (y0 + ky) as f32);
for kx in -1i32..=2 {
let sx = (x0 + kx).clamp(0, src_width as i32 - 1) as u32;
let wx = cubic_weight(src_x - (x0 + kx) as f32);
let w = wx * wy;
let src_idx = ((sy * src_width + sx) * channels) as usize + c;
sum += f32::from(input[src_idx]) * w;
weight_sum += w;
}
}
let value = if weight_sum.abs() > 1e-6 {
sum / weight_sum
} else {
sum
};
row[dst_idx + c] = value.clamp(0.0, 255.0) as u8;
}
}
});
}
#[inline]
fn lanczos_kernel(x: f32, a: f32) -> f32 {
if x.abs() < 1e-6 {
return 1.0;
}
if x.abs() >= a {
return 0.0;
}
let pi_x = std::f32::consts::PI * x;
let pi_x_over_a = pi_x / a;
(pi_x.sin() / pi_x) * (pi_x_over_a.sin() / pi_x_over_a)
}
fn scale_lanczos(
input: &[u8],
output: &mut [u8],
src_width: u32,
src_height: u32,
dst_width: u32,
dst_height: u32,
channels: u32,
) {
let a = 3.0f32; let x_ratio = src_width as f32 / dst_width as f32;
let y_ratio = src_height as f32 / dst_height as f32;
output
.par_chunks_exact_mut((dst_width * channels) as usize)
.enumerate()
.for_each(|(y, row)| {
let src_y = (y as f32 + 0.5) * y_ratio - 0.5;
let y0 = src_y.floor() as i32;
for x in 0..dst_width {
let src_x = (x as f32 + 0.5) * x_ratio - 0.5;
let x0 = src_x.floor() as i32;
let dst_idx = (x * channels) as usize;
for c in 0..channels as usize {
let mut sum = 0.0f32;
let mut weight_sum = 0.0f32;
let radius = a as i32;
for ky in (1 - radius)..=radius {
let sy = (y0 + ky).clamp(0, src_height as i32 - 1) as u32;
let wy = lanczos_kernel(src_y - (y0 + ky) as f32, a);
for kx in (1 - radius)..=radius {
let sx = (x0 + kx).clamp(0, src_width as i32 - 1) as u32;
let wx = lanczos_kernel(src_x - (x0 + kx) as f32, a);
let w = wx * wy;
let src_idx = ((sy * src_width + sx) * channels) as usize + c;
sum += f32::from(input[src_idx]) * w;
weight_sum += w;
}
}
let value = if weight_sum.abs() > 1e-6 {
sum / weight_sum
} else {
sum
};
row[dst_idx + c] = value.clamp(0.0, 255.0) as u8;
}
}
});
}
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)]
#[allow(clippy::cast_precision_loss)]
fn rgb_to_yuv420p(input: &[u8], width: u32, height: u32) -> AccelResult<Vec<u8>> {
let expected_size = (width * height * 3) as usize;
if input.len() != expected_size {
return Err(AccelError::BufferSizeMismatch {
expected: expected_size,
actual: input.len(),
});
}
let y_size = (width * height) as usize;
let uv_size = (width * height / 4) as usize;
let mut output = vec![0u8; y_size + uv_size * 2];
let (y_plane, uv_planes) = output.split_at_mut(y_size);
let (u_plane, v_plane) = uv_planes.split_at_mut(uv_size);
y_plane.par_iter_mut().enumerate().for_each(|(i, y)| {
let rgb_idx = i * 3;
let r = f32::from(input[rgb_idx]);
let g = f32::from(input[rgb_idx + 1]);
let b = f32::from(input[rgb_idx + 2]);
*y = (0.299 * r + 0.587 * g + 0.114 * b).clamp(0.0, 255.0) as u8;
});
u_plane
.par_iter_mut()
.zip(v_plane.par_iter_mut())
.enumerate()
.for_each(|(i, (u, v))| {
let uv_x = (i as u32 % (width / 2)) * 2;
let uv_y = (i as u32 / (width / 2)) * 2;
let rgb_idx = ((uv_y * width + uv_x) * 3) as usize;
let r = f32::from(input[rgb_idx]);
let g = f32::from(input[rgb_idx + 1]);
let b = f32::from(input[rgb_idx + 2]);
*u = (-0.169 * r - 0.331 * g + 0.500 * b + 128.0).clamp(0.0, 255.0) as u8;
*v = (0.500 * r - 0.419 * g - 0.081 * b + 128.0).clamp(0.0, 255.0) as u8;
});
Ok(output)
}
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)]
#[allow(clippy::cast_precision_loss)]
fn yuv420p_to_rgb(input: &[u8], width: u32, height: u32) -> AccelResult<Vec<u8>> {
let y_size = (width * height) as usize;
let uv_size = (width * height / 4) as usize;
let expected_size = y_size + uv_size * 2;
if input.len() != expected_size {
return Err(AccelError::BufferSizeMismatch {
expected: expected_size,
actual: input.len(),
});
}
let y_plane = &input[..y_size];
let u_plane = &input[y_size..y_size + uv_size];
let v_plane = &input[y_size + uv_size..];
let output_size = (width * height * 3) as usize;
let mut output = vec![0u8; output_size];
output
.par_chunks_exact_mut(3)
.enumerate()
.for_each(|(i, pixel)| {
let pixel_x = i as u32 % width;
let pixel_y = i as u32 / width;
let uv_idx = ((pixel_y / 2) * (width / 2) + (pixel_x / 2)) as usize;
let y_val = f32::from(y_plane[i]);
let u_val = f32::from(u_plane[uv_idx]) - 128.0;
let v_val = f32::from(v_plane[uv_idx]) - 128.0;
let red = (y_val + 1.402 * v_val).clamp(0.0, 255.0) as u8;
let green = (y_val - 0.344 * u_val - 0.714 * v_val).clamp(0.0, 255.0) as u8;
let blue = (y_val + 1.772 * u_val).clamp(0.0, 255.0) as u8;
pixel[0] = red;
pixel[1] = green;
pixel[2] = blue;
});
Ok(output)
}
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_possible_wrap)]
fn block_motion_estimation(
reference: &[u8],
current: &[u8],
width: u32,
height: u32,
block_size: u32,
) -> Vec<(i16, i16)> {
let blocks_wide = width.div_ceil(block_size);
let blocks_high = height.div_ceil(block_size);
let search_range = 8i32;
let motion_vectors: Vec<(i16, i16)> = (0..blocks_high)
.into_par_iter()
.flat_map(|block_y| {
(0..blocks_wide).into_par_iter().map(move |block_x| {
let cur_x = block_x * block_size;
let cur_y = block_y * block_size;
let mut best_sad = u32::MAX;
let mut best_delta_x = 0i16;
let mut best_delta_y = 0i16;
for dy in -search_range..=search_range {
for dx in -search_range..=search_range {
let ref_x = cur_x as i32 + dx;
let ref_y = cur_y as i32 + dy;
if ref_x < 0
|| ref_y < 0
|| ref_x + block_size as i32 > width as i32
|| ref_y + block_size as i32 > height as i32
{
continue;
}
let mut sad = 0u32;
for by in 0..block_size {
for bx in 0..block_size {
#[allow(clippy::cast_sign_loss)]
let rx = (ref_x + bx as i32) as u32;
#[allow(clippy::cast_sign_loss)]
let ry = (ref_y + by as i32) as u32;
let cx = cur_x + bx;
let cy = cur_y + by;
if rx >= width || ry >= height || cx >= width || cy >= height {
continue;
}
let ref_idx = (ry * width + rx) as usize;
let cur_idx = (cy * width + cx) as usize;
sad += u32::from(reference[ref_idx].abs_diff(current[cur_idx]));
}
}
if sad < best_sad {
best_sad = sad;
best_delta_x = dx as i16;
best_delta_y = dy as i16;
}
}
}
(best_delta_x, best_delta_y)
})
})
.collect();
motion_vectors
}