oximedia_gpu/backend/
cpu.rs1use super::{Backend, BackendCapabilities, BackendType};
8use crate::Result;
9use rayon::prelude::*;
10
11pub struct CpuBackend {
13 capabilities: BackendCapabilities,
14 num_threads: usize,
15}
16
17impl CpuBackend {
18 pub fn new() -> Result<Self> {
20 let num_threads = rayon::current_num_threads();
21
22 let capabilities = BackendCapabilities {
23 backend_type: BackendType::CPU,
24 max_workgroup_size: (1, 1, 1), max_workgroup_invocations: 1,
26 max_buffer_size: usize::MAX as u64,
27 compute_shaders: false,
28 subgroups: false,
29 push_constants: false,
30 };
31
32 Ok(Self {
33 capabilities,
34 num_threads,
35 })
36 }
37
38 #[must_use]
40 pub fn num_threads(&self) -> usize {
41 self.num_threads
42 }
43
44 pub fn rgb_to_yuv_bt601(input: &[u8], output: &mut [u8], width: usize, height: usize) {
46 const KR: f32 = 0.299;
47 const KB: f32 = 0.114;
48 const KG: f32 = 0.587;
49
50 let pixels = width * height;
51 output
52 .par_chunks_exact_mut(4)
53 .zip(input.par_chunks_exact(4))
54 .take(pixels)
55 .for_each(|(out, inp)| {
56 let r = f32::from(inp[0]) / 255.0;
57 let g = f32::from(inp[1]) / 255.0;
58 let b = f32::from(inp[2]) / 255.0;
59 let a = inp[3];
60
61 let y = KR * r + KG * g + KB * b;
62 let u = (b - y) / (2.0 * (1.0 - KB)) + 0.5;
63 let v = (r - y) / (2.0 * (1.0 - KR)) + 0.5;
64
65 out[0] = (y.clamp(0.0, 1.0) * 255.0) as u8;
66 out[1] = (u.clamp(0.0, 1.0) * 255.0) as u8;
67 out[2] = (v.clamp(0.0, 1.0) * 255.0) as u8;
68 out[3] = a;
69 });
70 }
71
72 pub fn yuv_to_rgb_bt601(input: &[u8], output: &mut [u8], width: usize, height: usize) {
74 const KR: f32 = 0.299;
75 const KB: f32 = 0.114;
76 const KG: f32 = 0.587;
77
78 let pixels = width * height;
79 output
80 .par_chunks_exact_mut(4)
81 .zip(input.par_chunks_exact(4))
82 .take(pixels)
83 .for_each(|(out, inp)| {
84 let y = f32::from(inp[0]) / 255.0;
85 let u = f32::from(inp[1]) / 255.0 - 0.5;
86 let v = f32::from(inp[2]) / 255.0 - 0.5;
87 let a = inp[3];
88
89 let r = y + 2.0 * (1.0 - KR) * v;
90 let b = y + 2.0 * (1.0 - KB) * u;
91 let g = (y - KR * r - KB * b) / KG;
92
93 out[0] = (r.clamp(0.0, 1.0) * 255.0) as u8;
94 out[1] = (g.clamp(0.0, 1.0) * 255.0) as u8;
95 out[2] = (b.clamp(0.0, 1.0) * 255.0) as u8;
96 out[3] = a;
97 });
98 }
99
100 #[allow(clippy::too_many_arguments)]
102 pub fn resize_bilinear(
103 input: &[u8],
104 src_width: usize,
105 src_height: usize,
106 output: &mut [u8],
107 dst_width: usize,
108 dst_height: usize,
109 ) {
110 let x_ratio = src_width as f32 / dst_width as f32;
111 let y_ratio = src_height as f32 / dst_height as f32;
112
113 output
114 .par_chunks_exact_mut(4)
115 .enumerate()
116 .for_each(|(i, pixel)| {
117 let dst_x = i % dst_width;
118 let dst_y = i / dst_width;
119
120 if dst_y >= dst_height {
121 return;
122 }
123
124 let src_x = (dst_x as f32 + 0.5) * x_ratio - 0.5;
125 let src_y = (dst_y as f32 + 0.5) * y_ratio - 0.5;
126
127 let x0 = src_x.floor().max(0.0) as usize;
128 let y0 = src_y.floor().max(0.0) as usize;
129 let x1 = (x0 + 1).min(src_width - 1);
130 let y1 = (y0 + 1).min(src_height - 1);
131
132 let fx = src_x.fract();
133 let fy = src_y.fract();
134
135 for c in 0..4 {
136 let p00 = input[(y0 * src_width + x0) * 4 + c];
137 let p10 = input[(y0 * src_width + x1) * 4 + c];
138 let p01 = input[(y1 * src_width + x0) * 4 + c];
139 let p11 = input[(y1 * src_width + x1) * 4 + c];
140
141 let v0 = f32::from(p00) * (1.0 - fx) + f32::from(p10) * fx;
142 let v1 = f32::from(p01) * (1.0 - fx) + f32::from(p11) * fx;
143 let v = v0 * (1.0 - fy) + v1 * fy;
144
145 pixel[c] = v.round().clamp(0.0, 255.0) as u8;
146 }
147 });
148 }
149
150 pub fn gaussian_blur(input: &[u8], output: &mut [u8], width: usize, height: usize, sigma: f32) {
152 let kernel_radius = (3.0 * sigma).ceil() as i32;
153 let kernel_size = (2 * kernel_radius + 1) as usize;
154
155 let mut kernel = vec![0.0f32; kernel_size];
157 let mut sum = 0.0f32;
158 let two_sigma_sq = 2.0 * sigma * sigma;
159
160 for i in 0..kernel_size {
161 let x = i as i32 - kernel_radius;
162 let value = (-(x * x) as f32 / two_sigma_sq).exp();
163 kernel[i] = value;
164 sum += value;
165 }
166
167 for value in &mut kernel {
169 *value /= sum;
170 }
171
172 let mut temp = vec![0u8; input.len()];
174
175 temp.par_chunks_exact_mut(4)
177 .enumerate()
178 .for_each(|(i, pixel)| {
179 let x = i % width;
180 let y = i / width;
181
182 if y >= height {
183 return;
184 }
185
186 for c in 0..4 {
187 let mut value = 0.0f32;
188
189 for k in 0..kernel_size {
190 let offset = k as i32 - kernel_radius;
191 let sample_x = (x as i32 + offset).clamp(0, width as i32 - 1) as usize;
192 let idx = (y * width + sample_x) * 4 + c;
193 value += f32::from(input[idx]) * kernel[k];
194 }
195
196 pixel[c] = value.round().clamp(0.0, 255.0) as u8;
197 }
198 });
199
200 output
202 .par_chunks_exact_mut(4)
203 .enumerate()
204 .for_each(|(i, pixel)| {
205 let x = i % width;
206 let y = i / width;
207
208 if y >= height {
209 return;
210 }
211
212 for c in 0..4 {
213 let mut value = 0.0f32;
214
215 for k in 0..kernel_size {
216 let offset = k as i32 - kernel_radius;
217 let sample_y = (y as i32 + offset).clamp(0, height as i32 - 1) as usize;
218 let idx = (sample_y * width + x) * 4 + c;
219 value += f32::from(temp[idx]) * kernel[k];
220 }
221
222 pixel[c] = value.round().clamp(0.0, 255.0) as u8;
223 }
224 });
225 }
226
227 #[must_use]
229 pub fn has_simd() -> bool {
230 #[cfg(target_arch = "x86_64")]
232 {
233 is_x86_feature_detected!("avx2") || is_x86_feature_detected!("sse4.2")
234 }
235 #[cfg(target_arch = "aarch64")]
236 {
237 true
239 }
240 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
241 {
242 false
243 }
244 }
245}
246
247impl Backend for CpuBackend {
248 fn capabilities(&self) -> &BackendCapabilities {
249 &self.capabilities
250 }
251
252 fn is_available() -> bool {
253 true
255 }
256
257 fn initialize() -> Result<Self> {
258 Self::new()
259 }
260}
261
262impl Default for CpuBackend {
263 fn default() -> Self {
270 match Self::new() {
271 Ok(backend) => backend,
272 Err(e) => panic!("Failed to initialize CPU backend: {e}"),
273 }
274 }
275}
276
277#[cfg(test)]
278mod tests {
279 use super::*;
280
281 #[test]
282 fn test_cpu_backend_always_available() {
283 assert!(CpuBackend::is_available());
284 }
285
286 #[test]
287 fn test_cpu_backend_creation() {
288 let backend = CpuBackend::new().expect("CPU backend creation should succeed");
289 assert!(backend.num_threads() > 0);
290 assert_eq!(backend.capabilities().backend_type, BackendType::CPU);
291 }
292
293 #[test]
294 fn test_simd_detection() {
295 let has_simd = CpuBackend::has_simd();
296 println!("SIMD available: {has_simd}");
297 }
298
299 #[test]
300 fn test_rgb_to_yuv_cpu() {
301 let input = vec![255, 0, 0, 255]; let mut output = vec![0u8; 4];
303
304 CpuBackend::rgb_to_yuv_bt601(&input, &mut output, 1, 1);
305
306 assert!(output[0] > 70 && output[0] < 80);
308 }
309
310 #[test]
311 fn test_resize_bilinear_cpu() {
312 let input = vec![255u8; 2 * 2 * 4]; let mut output = vec![0u8; 4 * 4 * 4]; CpuBackend::resize_bilinear(&input, 2, 2, &mut output, 4, 4);
316
317 assert!(output[0] > 200);
319 }
320}