#![allow(clippy::cast_possible_truncation)]
#![allow(clippy::cast_sign_loss)]
#![allow(clippy::cast_precision_loss)]
#![allow(clippy::cast_possible_wrap)]
use crate::{GpuError, Result};
use rayon::prelude::*;
pub trait GpuAccelerator: Send + Sync {
fn name(&self) -> &str;
fn is_gpu(&self) -> bool;
fn rgb_to_yuv(&self, input: &[u8], output: &mut [u8], width: u32, height: u32) -> Result<()>;
fn yuv_to_rgb(&self, input: &[u8], output: &mut [u8], width: u32, height: u32) -> Result<()>;
#[allow(clippy::too_many_arguments)]
fn scale_bilinear(
&self,
input: &[u8],
src_width: u32,
src_height: u32,
output: &mut [u8],
dst_width: u32,
dst_height: u32,
) -> Result<()>;
fn gaussian_blur(
&self,
input: &[u8],
output: &mut [u8],
width: u32,
height: u32,
sigma: f32,
) -> Result<()>;
fn edge_detect(&self, input: &[u8], output: &mut [u8], width: u32, height: u32) -> Result<()>;
fn sharpen(
&self,
input: &[u8],
output: &mut [u8],
width: u32,
height: u32,
amount: f32,
) -> Result<()>;
fn dct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()>;
fn idct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()>;
fn pixel_diff(
&self,
a: &[u8],
b: &[u8],
output: &mut [u8],
width: u32,
height: u32,
) -> Result<()>;
fn mse(&self, a: &[u8], b: &[u8], width: u32, height: u32) -> Result<f64>;
}
fn check_rgba_buf(buf: &[u8], width: u32, height: u32, label: &str) -> Result<()> {
let expected = (width as usize) * (height as usize) * 4;
if buf.len() != expected {
return Err(GpuError::InvalidBufferSize {
expected,
actual: buf.len(),
});
}
let _ = label;
Ok(())
}
fn check_f32_buf(buf: &[f32], width: u32, height: u32) -> Result<()> {
let expected = (width as usize) * (height as usize);
if buf.len() != expected {
return Err(GpuError::InvalidBufferSize {
expected,
actual: buf.len(),
});
}
Ok(())
}
pub struct CpuAccelerator {
num_threads: usize,
}
impl CpuAccelerator {
#[must_use]
pub fn new() -> Self {
Self {
num_threads: rayon::current_num_threads(),
}
}
#[must_use]
pub fn num_threads(&self) -> usize {
self.num_threads
}
fn rgb_to_yuv_impl(input: &[u8], output: &mut [u8]) {
const KR: f32 = 0.299;
const KG: f32 = 0.587;
const KB: f32 = 0.114;
output
.par_chunks_exact_mut(4)
.zip(input.par_chunks_exact(4))
.for_each(|(out, inp)| {
let r = f32::from(inp[0]) / 255.0;
let g = f32::from(inp[1]) / 255.0;
let b = f32::from(inp[2]) / 255.0;
let y = KR * r + KG * g + KB * b;
let u = (b - y) / (2.0 * (1.0 - KB)) + 0.5;
let v = (r - y) / (2.0 * (1.0 - KR)) + 0.5;
out[0] = (y.clamp(0.0, 1.0) * 255.0) as u8;
out[1] = (u.clamp(0.0, 1.0) * 255.0) as u8;
out[2] = (v.clamp(0.0, 1.0) * 255.0) as u8;
out[3] = inp[3];
});
}
fn yuv_to_rgb_impl(input: &[u8], output: &mut [u8]) {
const KR: f32 = 0.299;
const KG: f32 = 0.587;
const KB: f32 = 0.114;
output
.par_chunks_exact_mut(4)
.zip(input.par_chunks_exact(4))
.for_each(|(out, inp)| {
let y = f32::from(inp[0]) / 255.0;
let u = f32::from(inp[1]) / 255.0 - 0.5;
let v = f32::from(inp[2]) / 255.0 - 0.5;
let r = y + 2.0 * (1.0 - KR) * v;
let b = y + 2.0 * (1.0 - KB) * u;
let g = (y - KR * r - KB * b) / KG;
out[0] = (r.clamp(0.0, 1.0) * 255.0) as u8;
out[1] = (g.clamp(0.0, 1.0) * 255.0) as u8;
out[2] = (b.clamp(0.0, 1.0) * 255.0) as u8;
out[3] = inp[3];
});
}
fn scale_bilinear_impl(
input: &[u8],
src_w: usize,
src_h: usize,
output: &mut [u8],
dst_w: usize,
dst_h: usize,
) {
let x_ratio = src_w as f32 / dst_w as f32;
let y_ratio = src_h as f32 / dst_h as f32;
output
.par_chunks_exact_mut(4)
.enumerate()
.for_each(|(idx, pixel)| {
let dst_x = idx % dst_w;
let dst_y = idx / dst_w;
if dst_y >= dst_h {
return;
}
let src_x = (dst_x as f32 + 0.5) * x_ratio - 0.5;
let src_y = (dst_y as f32 + 0.5) * y_ratio - 0.5;
let x0 = (src_x.floor().max(0.0) as usize).min(src_w - 1);
let y0 = (src_y.floor().max(0.0) as usize).min(src_h - 1);
let x1 = (x0 + 1).min(src_w - 1);
let y1 = (y0 + 1).min(src_h - 1);
let fx = src_x.fract().max(0.0);
let fy = src_y.fract().max(0.0);
for c in 0..4 {
let p00 = f32::from(input[(y0 * src_w + x0) * 4 + c]);
let p10 = f32::from(input[(y0 * src_w + x1) * 4 + c]);
let p01 = f32::from(input[(y1 * src_w + x0) * 4 + c]);
let p11 = f32::from(input[(y1 * src_w + x1) * 4 + c]);
let top = p00 * (1.0 - fx) + p10 * fx;
let bot = p01 * (1.0 - fx) + p11 * fx;
pixel[c] = (top * (1.0 - fy) + bot * fy).round().clamp(0.0, 255.0) as u8;
}
});
}
fn gaussian_blur_impl(
input: &[u8],
output: &mut [u8],
width: usize,
height: usize,
sigma: f32,
) {
let radius = (3.0 * sigma).ceil() as i32;
let ksize = (2 * radius + 1) as usize;
let two_sigma_sq = 2.0 * sigma * sigma;
let mut kernel = vec![0.0f32; ksize];
let mut sum = 0.0f32;
for i in 0..ksize {
let x = i as i32 - radius;
let v = (-(x * x) as f32 / two_sigma_sq).exp();
kernel[i] = v;
sum += v;
}
for v in &mut kernel {
*v /= sum;
}
let mut temp = vec![0u8; input.len()];
temp.par_chunks_exact_mut(4)
.enumerate()
.for_each(|(i, out)| {
let px = i % width;
let py = i / width;
if py >= height {
return;
}
for c in 0..4 {
let mut acc = 0.0f32;
for (k, &kw) in kernel.iter().enumerate() {
let sx =
(px as i32 + k as i32 - radius).clamp(0, width as i32 - 1) as usize;
acc += f32::from(input[(py * width + sx) * 4 + c]) * kw;
}
out[c] = acc.round().clamp(0.0, 255.0) as u8;
}
});
output
.par_chunks_exact_mut(4)
.enumerate()
.for_each(|(i, out)| {
let px = i % width;
let py = i / width;
if py >= height {
return;
}
for c in 0..4 {
let mut acc = 0.0f32;
for (k, &kw) in kernel.iter().enumerate() {
let sy =
(py as i32 + k as i32 - radius).clamp(0, height as i32 - 1) as usize;
acc += f32::from(temp[(sy * width + px) * 4 + c]) * kw;
}
out[c] = acc.round().clamp(0.0, 255.0) as u8;
}
});
}
fn sobel_impl(input: &[u8], output: &mut [u8], width: usize, height: usize) {
let lum: Vec<f32> = input
.par_chunks_exact(4)
.map(|p| 0.299 * f32::from(p[0]) + 0.587 * f32::from(p[1]) + 0.114 * f32::from(p[2]))
.collect();
output
.par_chunks_exact_mut(4)
.enumerate()
.for_each(|(i, out)| {
let x = (i % width) as i32;
let y = (i / width) as i32;
if x == 0 || x == (width as i32 - 1) || y == 0 || y == (height as i32 - 1) {
out.fill(0);
return;
}
let gx = -lum[(y - 1) as usize * width + (x - 1) as usize]
- 2.0 * lum[y as usize * width + (x - 1) as usize]
- lum[(y + 1) as usize * width + (x - 1) as usize]
+ lum[(y - 1) as usize * width + (x + 1) as usize]
+ 2.0 * lum[y as usize * width + (x + 1) as usize]
+ lum[(y + 1) as usize * width + (x + 1) as usize];
let gy = -lum[(y - 1) as usize * width + (x - 1) as usize]
- 2.0 * lum[(y - 1) as usize * width + x as usize]
- lum[(y - 1) as usize * width + (x + 1) as usize]
+ lum[(y + 1) as usize * width + (x - 1) as usize]
+ 2.0 * lum[(y + 1) as usize * width + x as usize]
+ lum[(y + 1) as usize * width + (x + 1) as usize];
let mag = (gx * gx + gy * gy).sqrt().clamp(0.0, 255.0) as u8;
out[0] = mag;
out[1] = mag;
out[2] = mag;
out[3] = input[i * 4 + 3]; });
}
fn sharpen_impl(input: &[u8], output: &mut [u8], width: usize, height: usize, amount: f32) {
let mut blurred = vec![0u8; input.len()];
Self::gaussian_blur_impl(input, &mut blurred, width, height, 1.0);
output
.par_chunks_exact_mut(4)
.zip(input.par_chunks_exact(4))
.zip(blurred.par_chunks_exact(4))
.for_each(|((out, orig), blur)| {
for c in 0..3 {
let o = f32::from(orig[c]);
let b = f32::from(blur[c]);
let sharpened = o + amount * (o - b);
out[c] = sharpened.round().clamp(0.0, 255.0) as u8;
}
out[3] = orig[3];
});
}
fn dct1d(data: &[f32], out: &mut [f32]) {
let n = data.len();
let nf = n as f32;
for k in 0..n {
let mut s = 0.0f32;
let kf = k as f32;
for (j, &v) in data.iter().enumerate() {
let angle = std::f32::consts::PI * kf * (2.0 * j as f32 + 1.0) / (2.0 * nf);
s += v * angle.cos();
}
let scale = if k == 0 {
(1.0 / nf).sqrt()
} else {
(2.0 / nf).sqrt()
};
out[k] = s * scale;
}
}
fn idct1d(data: &[f32], out: &mut [f32]) {
let n = data.len();
let nf = n as f32;
for j in 0..n {
let jf = j as f32;
let mut s = data[0] / nf.sqrt();
for k in 1..n {
let scale = (2.0 / nf).sqrt();
let angle = std::f32::consts::PI * k as f32 * (2.0 * jf + 1.0) / (2.0 * nf);
s += scale * data[k] * angle.cos();
}
out[j] = s;
}
}
}
impl Default for CpuAccelerator {
fn default() -> Self {
Self::new()
}
}
impl GpuAccelerator for CpuAccelerator {
fn name(&self) -> &'static str {
"CPU SIMD"
}
fn is_gpu(&self) -> bool {
false
}
fn rgb_to_yuv(&self, input: &[u8], output: &mut [u8], width: u32, height: u32) -> Result<()> {
check_rgba_buf(input, width, height, "input")?;
check_rgba_buf(output, width, height, "output")?;
Self::rgb_to_yuv_impl(input, output);
Ok(())
}
fn yuv_to_rgb(&self, input: &[u8], output: &mut [u8], width: u32, height: u32) -> Result<()> {
check_rgba_buf(input, width, height, "input")?;
check_rgba_buf(output, width, height, "output")?;
Self::yuv_to_rgb_impl(input, output);
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn scale_bilinear(
&self,
input: &[u8],
src_width: u32,
src_height: u32,
output: &mut [u8],
dst_width: u32,
dst_height: u32,
) -> Result<()> {
check_rgba_buf(input, src_width, src_height, "input")?;
check_rgba_buf(output, dst_width, dst_height, "output")?;
Self::scale_bilinear_impl(
input,
src_width as usize,
src_height as usize,
output,
dst_width as usize,
dst_height as usize,
);
Ok(())
}
fn gaussian_blur(
&self,
input: &[u8],
output: &mut [u8],
width: u32,
height: u32,
sigma: f32,
) -> Result<()> {
check_rgba_buf(input, width, height, "input")?;
check_rgba_buf(output, width, height, "output")?;
Self::gaussian_blur_impl(input, output, width as usize, height as usize, sigma);
Ok(())
}
fn edge_detect(&self, input: &[u8], output: &mut [u8], width: u32, height: u32) -> Result<()> {
check_rgba_buf(input, width, height, "input")?;
check_rgba_buf(output, width, height, "output")?;
Self::sobel_impl(input, output, width as usize, height as usize);
Ok(())
}
fn sharpen(
&self,
input: &[u8],
output: &mut [u8],
width: u32,
height: u32,
amount: f32,
) -> Result<()> {
check_rgba_buf(input, width, height, "input")?;
check_rgba_buf(output, width, height, "output")?;
Self::sharpen_impl(input, output, width as usize, height as usize, amount);
Ok(())
}
fn dct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()> {
check_f32_buf(input, width, height)?;
check_f32_buf(output, width, height)?;
if width % 8 != 0 || height % 8 != 0 {
return Err(GpuError::InvalidDimensions { width, height });
}
let w = width as usize;
let h = height as usize;
let mut row_pass = vec![0.0f32; w * h];
for row in 0..h {
let src = &input[row * w..(row + 1) * w];
let dst = &mut row_pass[row * w..(row + 1) * w];
Self::dct1d(src, dst);
}
for col in 0..w {
let col_data: Vec<f32> = (0..h).map(|r| row_pass[r * w + col]).collect();
let mut col_out = vec![0.0f32; h];
Self::dct1d(&col_data, &mut col_out);
for (r, &v) in col_out.iter().enumerate() {
output[r * w + col] = v;
}
}
Ok(())
}
fn idct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()> {
check_f32_buf(input, width, height)?;
check_f32_buf(output, width, height)?;
if width % 8 != 0 || height % 8 != 0 {
return Err(GpuError::InvalidDimensions { width, height });
}
let w = width as usize;
let h = height as usize;
let mut col_pass = vec![0.0f32; w * h];
for col in 0..w {
let col_data: Vec<f32> = (0..h).map(|r| input[r * w + col]).collect();
let mut col_out = vec![0.0f32; h];
Self::idct1d(&col_data, &mut col_out);
for (r, &v) in col_out.iter().enumerate() {
col_pass[r * w + col] = v;
}
}
for row in 0..h {
let src = &col_pass[row * w..(row + 1) * w];
let dst = &mut output[row * w..(row + 1) * w];
Self::idct1d(src, dst);
}
Ok(())
}
fn pixel_diff(
&self,
a: &[u8],
b: &[u8],
output: &mut [u8],
width: u32,
height: u32,
) -> Result<()> {
check_rgba_buf(a, width, height, "a")?;
check_rgba_buf(b, width, height, "b")?;
check_rgba_buf(output, width, height, "output")?;
output
.par_chunks_exact_mut(4)
.zip(a.par_chunks_exact(4))
.zip(b.par_chunks_exact(4))
.for_each(|((out, pa), pb)| {
for c in 0..4 {
out[c] = pa[c].abs_diff(pb[c]);
}
});
Ok(())
}
fn mse(&self, a: &[u8], b: &[u8], width: u32, height: u32) -> Result<f64> {
check_rgba_buf(a, width, height, "a")?;
check_rgba_buf(b, width, height, "b")?;
let sum_sq: f64 = a
.par_chunks_exact(4)
.zip(b.par_chunks_exact(4))
.map(|(pa, pb)| {
(0..4)
.map(|c| {
let d = f64::from(pa[c]) - f64::from(pb[c]);
d * d
})
.sum::<f64>()
})
.sum();
let n = f64::from(width) * f64::from(height) * 4.0;
Ok(sum_sq / n)
}
}
pub struct WgpuAccelerator {
device: std::sync::Arc<crate::GpuDevice>,
cpu: CpuAccelerator,
backend_name: String,
}
impl WgpuAccelerator {
pub fn new() -> Result<Self> {
let device = crate::GpuDevice::new(None)?;
let backend_name = format!("{} GPU", device.info().backend);
Ok(Self {
device: std::sync::Arc::new(device),
cpu: CpuAccelerator::new(),
backend_name,
})
}
#[must_use]
pub fn gpu_device(&self) -> &std::sync::Arc<crate::GpuDevice> {
&self.device
}
}
impl GpuAccelerator for WgpuAccelerator {
fn name(&self) -> &str {
&self.backend_name
}
fn is_gpu(&self) -> bool {
true
}
fn rgb_to_yuv(&self, input: &[u8], output: &mut [u8], width: u32, height: u32) -> Result<()> {
crate::ops::ColorSpaceConversion::rgb_to_yuv(
&self.device,
input,
output,
width,
height,
crate::ops::ColorSpace::BT601,
)
}
fn yuv_to_rgb(&self, input: &[u8], output: &mut [u8], width: u32, height: u32) -> Result<()> {
crate::ops::ColorSpaceConversion::yuv_to_rgb(
&self.device,
input,
output,
width,
height,
crate::ops::ColorSpace::BT601,
)
}
#[allow(clippy::too_many_arguments)]
fn scale_bilinear(
&self,
input: &[u8],
src_width: u32,
src_height: u32,
output: &mut [u8],
dst_width: u32,
dst_height: u32,
) -> Result<()> {
crate::ops::ScaleOperation::scale(
&self.device,
input,
src_width,
src_height,
output,
dst_width,
dst_height,
crate::ops::ScaleFilter::Bilinear,
)
}
fn gaussian_blur(
&self,
input: &[u8],
output: &mut [u8],
width: u32,
height: u32,
sigma: f32,
) -> Result<()> {
crate::ops::FilterOperation::gaussian_blur(
&self.device,
input,
output,
width,
height,
sigma,
)
}
fn edge_detect(&self, input: &[u8], output: &mut [u8], width: u32, height: u32) -> Result<()> {
crate::ops::FilterOperation::edge_detect(&self.device, input, output, width, height)
}
fn sharpen(
&self,
input: &[u8],
output: &mut [u8],
width: u32,
height: u32,
amount: f32,
) -> Result<()> {
crate::ops::FilterOperation::sharpen(&self.device, input, output, width, height, amount)
}
fn dct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()> {
crate::ops::TransformOperation::dct_2d(&self.device, input, output, width, height)
}
fn idct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()> {
crate::ops::TransformOperation::idct_2d(&self.device, input, output, width, height)
}
fn pixel_diff(
&self,
a: &[u8],
b: &[u8],
output: &mut [u8],
width: u32,
height: u32,
) -> Result<()> {
self.cpu.pixel_diff(a, b, output, width, height)
}
fn mse(&self, a: &[u8], b: &[u8], width: u32, height: u32) -> Result<f64> {
self.cpu.mse(a, b, width, height)
}
}
pub struct AcceleratorBuilder {
prefer_gpu: bool,
force_cpu: bool,
}
impl AcceleratorBuilder {
#[must_use]
pub fn new() -> Self {
Self {
prefer_gpu: true,
force_cpu: false,
}
}
#[must_use]
pub fn prefer_gpu(mut self, value: bool) -> Self {
self.prefer_gpu = value;
self
}
#[must_use]
pub fn force_cpu(mut self, value: bool) -> Self {
self.force_cpu = value;
self
}
pub fn build(self) -> Result<Box<dyn GpuAccelerator>> {
if self.force_cpu || !self.prefer_gpu {
return Ok(Box::new(CpuAccelerator::new()));
}
match WgpuAccelerator::new() {
Ok(gpu) => Ok(Box::new(gpu)),
Err(_) => Ok(Box::new(CpuAccelerator::new())),
}
}
#[must_use]
pub fn build_cpu() -> CpuAccelerator {
CpuAccelerator::new()
}
}
impl Default for AcceleratorBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_rgba(w: usize, h: usize, fill: u8) -> Vec<u8> {
vec![fill; w * h * 4]
}
#[test]
fn test_cpu_accelerator_name() {
let acc = CpuAccelerator::new();
assert_eq!(acc.name(), "CPU SIMD");
assert!(!acc.is_gpu());
}
#[test]
fn test_cpu_rgb_to_yuv_roundtrip() {
let grey = 128u8;
let input = vec![grey, grey, grey, 255u8];
let mut yuv = vec![0u8; 4];
let mut rgb = vec![0u8; 4];
let acc = CpuAccelerator::new();
acc.rgb_to_yuv(&input, &mut yuv, 1, 1)
.expect("RGB to YUV conversion should succeed");
acc.yuv_to_rgb(&yuv, &mut rgb, 1, 1)
.expect("YUV to RGB conversion should succeed");
assert!(
(rgb[0] as i32 - grey as i32).abs() <= 3,
"R mismatch: {}",
rgb[0]
);
assert!(
(rgb[1] as i32 - grey as i32).abs() <= 3,
"G mismatch: {}",
rgb[1]
);
assert!(
(rgb[2] as i32 - grey as i32).abs() <= 3,
"B mismatch: {}",
rgb[2]
);
}
#[test]
fn test_cpu_rgb_to_yuv_invalid_size() {
let acc = CpuAccelerator::new();
let input = vec![0u8; 5]; let mut output = vec![0u8; 4];
assert!(acc.rgb_to_yuv(&input, &mut output, 1, 1).is_err());
}
#[test]
fn test_cpu_scale_bilinear_identity() {
let w = 16usize;
let h = 16usize;
let input = make_rgba(w, h, 200);
let mut output = make_rgba(w, h, 0);
let acc = CpuAccelerator::new();
acc.scale_bilinear(&input, w as u32, h as u32, &mut output, w as u32, h as u32)
.expect("operation should succeed in test");
for &v in &output {
assert!(v >= 195, "pixel value {v} too low after identity scale");
}
}
#[test]
fn test_cpu_scale_bilinear_upsample() {
let input = make_rgba(2, 2, 255);
let mut output = make_rgba(4, 4, 0);
let acc = CpuAccelerator::new();
acc.scale_bilinear(&input, 2, 2, &mut output, 4, 4)
.expect("bilinear scaling should succeed");
for &v in &output {
assert!(v >= 250, "upsampled pixel {v} not white");
}
}
#[test]
fn test_cpu_gaussian_blur_preserves_size() {
let input = make_rgba(8, 8, 128);
let mut output = make_rgba(8, 8, 0);
let acc = CpuAccelerator::new();
acc.gaussian_blur(&input, &mut output, 8, 8, 1.0)
.expect("gaussian blur should succeed");
assert_eq!(output.len(), input.len());
}
#[test]
fn test_cpu_edge_detect_flat_image() {
let input = make_rgba(16, 16, 200);
let mut output = make_rgba(16, 16, 0);
let acc = CpuAccelerator::new();
acc.edge_detect(&input, &mut output, 16, 16)
.expect("edge detection should succeed");
for row in 1..15usize {
for col in 1..15usize {
let idx = (row * 16 + col) * 4;
assert!(
output[idx] < 10,
"interior edge pixel {} at ({row},{col}) is non-zero",
output[idx]
);
}
}
}
#[test]
fn test_cpu_sharpen_stable_flat() {
let input = make_rgba(8, 8, 128);
let mut output = make_rgba(8, 8, 0);
let acc = CpuAccelerator::new();
acc.sharpen(&input, &mut output, 8, 8, 1.0)
.expect("sharpen should succeed");
for (&o, &i) in output.iter().zip(input.iter()) {
assert!(
(o as i32 - i as i32).abs() <= 3,
"sharpen changed flat pixel by more than 3"
);
}
}
#[test]
fn test_cpu_dct_idct_roundtrip() {
let w = 8u32;
let h = 8u32;
let input: Vec<f32> = (0..(w * h)).map(|i| i as f32).collect();
let mut dct_out = vec![0.0f32; (w * h) as usize];
let mut rec = vec![0.0f32; (w * h) as usize];
let acc = CpuAccelerator::new();
acc.dct_2d(&input, &mut dct_out, w, h)
.expect("DCT should succeed");
acc.idct_2d(&dct_out, &mut rec, w, h)
.expect("DCT should succeed");
for (a, b) in input.iter().zip(rec.iter()) {
assert!((a - b).abs() < 1e-3, "DCT round-trip error: {a} vs {b}");
}
}
#[test]
fn test_cpu_dct_invalid_dims() {
let acc = CpuAccelerator::new();
let input = vec![0.0f32; 10];
let mut output = vec![0.0f32; 10];
assert!(acc.dct_2d(&input, &mut output, 10, 1).is_err());
}
#[test]
fn test_cpu_pixel_diff_self() {
let img = make_rgba(4, 4, 100);
let mut diff = make_rgba(4, 4, 255);
let acc = CpuAccelerator::new();
acc.pixel_diff(&img, &img, &mut diff, 4, 4)
.expect("pixel diff should succeed");
for &v in &diff {
assert_eq!(v, 0, "self-diff should be zero");
}
}
#[test]
fn test_cpu_mse_identical() {
let img = make_rgba(8, 8, 128);
let acc = CpuAccelerator::new();
let mse = acc
.mse(&img, &img, 8, 8)
.expect("MSE computation should succeed");
assert!(
mse.abs() < 1e-10,
"MSE of identical images should be 0, got {mse}"
);
}
#[test]
fn test_cpu_mse_max_error() {
let a = make_rgba(4, 4, 0);
let b = make_rgba(4, 4, 255);
let acc = CpuAccelerator::new();
let mse = acc
.mse(&a, &b, 4, 4)
.expect("MSE computation should succeed");
assert!(
(mse - 65025.0).abs() < 1.0,
"max MSE should be 65025, got {mse}"
);
}
#[test]
fn test_builder_force_cpu() {
let acc = AcceleratorBuilder::new()
.force_cpu(true)
.build()
.expect("accelerator build should succeed");
assert_eq!(acc.name(), "CPU SIMD");
assert!(!acc.is_gpu());
}
#[test]
fn test_builder_build_cpu_static() {
let acc = AcceleratorBuilder::build_cpu();
assert_eq!(acc.name(), "CPU SIMD");
}
#[test]
#[ignore] fn test_builder_default_builds() {
let acc = AcceleratorBuilder::new()
.build()
.expect("accelerator build should succeed");
assert!(!acc.name().is_empty());
}
#[test]
fn test_cpu_rgb_red_pixel() {
let input = vec![255u8, 0, 0, 255];
let mut yuv = vec![0u8; 4];
let acc = CpuAccelerator::new();
acc.rgb_to_yuv(&input, &mut yuv, 1, 1)
.expect("RGB to YUV conversion should succeed");
assert!(
(yuv[0] as i32 - 76).abs() <= 2,
"Y for red should be ~76, got {}",
yuv[0]
);
}
}