oximedia_gpu/lib.rs
1//! GPU compute pipeline using WGPU for `OxiMedia`
2//!
3//! This crate provides a cross-platform GPU acceleration layer using WGPU,
4//! supporting Vulkan, Metal, DirectX 12, and WebGPU backends.
5//!
6//! # Features
7//!
8//! - Color space conversions (RGB ↔ YUV with BT.601, BT.709, BT.2020)
9//! - Image scaling (nearest, bilinear, bicubic)
10//! - Convolution filters (blur, sharpen, edge detection)
11//! - Transform operations (DCT, FFT)
12//! - Automatic CPU fallback
13//! - Multi-GPU support
14//!
15//! # Example
16//!
17//! ```no_run
18//! use oximedia_gpu::GpuContext;
19//!
20//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
21//! let ctx = GpuContext::new()?;
22//!
23//! let input = vec![0u8; 1920 * 1080 * 4];
24//! let mut output = vec![0u8; 1920 * 1080 * 4];
25//!
26//! ctx.rgb_to_yuv(&input, &mut output)?;
27//! # Ok(())
28//! # }
29//! ```
30
31#![allow(clippy::cast_possible_truncation)]
32#![allow(clippy::cast_sign_loss)]
33#![allow(clippy::cast_precision_loss)]
34#![allow(clippy::cast_possible_wrap)]
35#![allow(clippy::missing_errors_doc)]
36#![allow(clippy::missing_panics_doc)]
37
38// Core modules
39pub mod buffer;
40pub mod device;
41pub mod ops;
42pub mod shader;
43
44// New comprehensive modules
45pub mod accelerator;
46pub mod backend;
47pub mod cache;
48pub mod compiler;
49pub mod compute;
50pub mod kernels;
51pub mod memory;
52pub mod queue;
53pub mod sync;
54
55// GPU compute operation modules
56pub mod histogram;
57pub mod motion_detect;
58pub mod pipeline;
59pub mod texture;
60pub mod video_process;
61
62// New kernel / pass / shader-param modules
63pub mod compute_pass;
64pub mod kernel;
65pub mod shader_params;
66
67// Wave-8 new modules
68pub mod compute_dispatch;
69pub mod memory_pool;
70pub mod shader_cache;
71
72// Wave-9 new modules
73pub mod gpu_buffer;
74pub mod gpu_fence;
75pub mod render_pass;
76
77// Wave-10 new modules
78pub mod command_buffer;
79pub mod resource_manager;
80pub mod sync_primitive;
81
82// Wave-11 new modules
83pub mod descriptor_set;
84pub mod gpu_stats;
85pub mod viewport;
86
87// Wave-12 new modules
88pub mod gpu_profiler;
89pub mod sampler;
90pub mod vertex_buffer;
91
92// Wave-13 new modules
93pub mod fence_pool;
94pub mod gpu_timer;
95pub mod upload_queue;
96
97// Wave-14 new modules
98pub mod buffer_copy;
99pub mod occupancy;
100pub mod workgroup;
101
102use std::sync::Arc;
103use thiserror::Error;
104
105// Accelerator exports
106pub use accelerator::{AcceleratorBuilder, CpuAccelerator, GpuAccelerator, WgpuAccelerator};
107
108// Core exports
109pub use buffer::{BufferType, GpuBuffer};
110pub use device::{GpuDevice, GpuDeviceInfo};
111pub use ops::{ColorSpaceConversion, FilterOperation, ScaleOperation, TransformOperation};
112
113// Backend exports
114pub use backend::{Backend, BackendCapabilities, BackendType, CpuBackend, VulkanBackend};
115
116// Cache exports
117pub use cache::{CacheStats, PipelineCache, ShaderCache};
118
119// Compiler exports
120pub use compiler::{
121 CompilationError, CompilationOptions, OptimizationLevel, ShaderCompiler, ShaderPreprocessor,
122};
123
124// Compute exports
125pub use compute::{
126 ComputeExecutor, ComputePassBuilder, ComputePipelineHandle, ComputePipelineManager,
127 DispatchHelper,
128};
129
130// Kernels exports
131pub use kernels::{
132 ColorConversionKernel, ConvolutionKernel, FilterKernel, ReduceKernel, ReduceOp, ResizeFilter,
133 ResizeKernel, TransformKernel, TransformType,
134};
135
136// Memory exports
137pub use memory::{ManagedBuffer, MemoryAllocator, MemoryPool, MemoryStats};
138
139// Queue exports
140pub use queue::{
141 AsyncSubmission, BatchSubmitter, CommandBufferBuilder, CommandQueue, QueueManager, QueueType,
142};
143
144// Sync exports
145pub use sync::{Barrier, Event, Fence, Semaphore};
146
147// Video processing exports
148pub use histogram::{ChannelHistogram, ImageHistogram};
149pub use motion_detect::{MotionAnalysis, MotionDetector, MotionRegion, Sensitivity};
150pub use pipeline::{GpuPipeline, PipelineMetrics, PipelineNode, PipelineStage};
151pub use texture::{TextureDescriptor, TextureFormat, TexturePool};
152pub use video_process::{FrameProcessConfig, FrameProcessResult, VideoFrameProcessor};
153
154/// Error types for GPU operations
155#[derive(Debug, Error)]
156pub enum GpuError {
157 /// Device initialization failed
158 #[error("Failed to initialize GPU device: {0}")]
159 DeviceInit(String),
160
161 /// Adapter selection failed
162 #[error("No suitable GPU adapter found")]
163 NoAdapter,
164
165 /// Device request failed
166 #[error("Failed to request GPU device: {0}")]
167 DeviceRequest(String),
168
169 /// Buffer creation failed
170 #[error("Failed to create GPU buffer: {0}")]
171 BufferCreation(String),
172
173 /// Shader compilation failed
174 #[error("Failed to compile shader: {0}")]
175 ShaderCompilation(String),
176
177 /// Pipeline creation failed
178 #[error("Failed to create compute pipeline: {0}")]
179 PipelineCreation(String),
180
181 /// Command submission failed
182 #[error("Failed to submit GPU commands: {0}")]
183 CommandSubmission(String),
184
185 /// Buffer mapping failed
186 #[error("Failed to map GPU buffer: {0}")]
187 BufferMapping(String),
188
189 /// Invalid dimensions
190 #[error("Invalid image dimensions: {width}x{height}")]
191 InvalidDimensions { width: u32, height: u32 },
192
193 /// Invalid buffer size
194 #[error("Invalid buffer size: expected {expected}, got {actual}")]
195 InvalidBufferSize { expected: usize, actual: usize },
196
197 /// Operation not supported
198 #[error("Operation not supported: {0}")]
199 NotSupported(String),
200
201 /// Internal error
202 #[error("Internal GPU error: {0}")]
203 Internal(String),
204}
205
206pub type Result<T> = std::result::Result<T, GpuError>;
207
208/// GPU context for compute operations
209///
210/// This is the main entry point for GPU-accelerated operations.
211/// It manages device selection, resource allocation, and command submission.
212pub struct GpuContext {
213 device: Arc<GpuDevice>,
214}
215
216impl GpuContext {
217 /// Create a new GPU context with automatic device selection
218 ///
219 /// This will select the most suitable GPU device available on the system.
220 /// If no GPU is available, an error is returned.
221 ///
222 /// # Errors
223 ///
224 /// Returns an error if no suitable GPU device is found or if device
225 /// initialization fails.
226 pub fn new() -> Result<Self> {
227 let device = GpuDevice::new(None)?;
228 Ok(Self {
229 device: Arc::new(device),
230 })
231 }
232
233 /// Create a new GPU context with a specific device
234 ///
235 /// # Arguments
236 ///
237 /// * `device_index` - Index of the device to use (from `list_devices`)
238 ///
239 /// # Errors
240 ///
241 /// Returns an error if the device index is invalid or if device
242 /// initialization fails.
243 pub fn with_device(device_index: usize) -> Result<Self> {
244 let device = GpuDevice::new(Some(device_index))?;
245 Ok(Self {
246 device: Arc::new(device),
247 })
248 }
249
250 /// List available GPU devices
251 ///
252 /// Returns information about all GPU devices available on the system.
253 pub fn list_devices() -> Result<Vec<GpuDeviceInfo>> {
254 GpuDevice::list_devices()
255 }
256
257 /// Get information about the current device
258 #[must_use]
259 pub fn device_info(&self) -> &GpuDeviceInfo {
260 self.device.info()
261 }
262
263 /// Convert RGB to YUV (BT.601)
264 ///
265 /// # Arguments
266 ///
267 /// * `input` - Input RGB buffer (packed RGBA format)
268 /// * `output` - Output YUV buffer (packed YUVA format)
269 ///
270 /// # Errors
271 ///
272 /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
273 pub fn rgb_to_yuv(&self, input: &[u8], output: &mut [u8]) -> Result<()> {
274 if input.len() != output.len() {
275 return Err(GpuError::InvalidBufferSize {
276 expected: input.len(),
277 actual: output.len(),
278 });
279 }
280
281 if input.len() % 4 != 0 {
282 return Err(GpuError::InvalidBufferSize {
283 expected: (input.len() / 4) * 4,
284 actual: input.len(),
285 });
286 }
287
288 let width = ((input.len() / 4) as f32).sqrt() as u32;
289 let height = width;
290
291 ops::ColorSpaceConversion::rgb_to_yuv(
292 &self.device,
293 input,
294 output,
295 width,
296 height,
297 ops::ColorSpace::BT601,
298 )
299 }
300
301 /// Convert YUV to RGB (BT.601)
302 ///
303 /// # Arguments
304 ///
305 /// * `input` - Input YUV buffer (packed YUVA format)
306 /// * `output` - Output RGB buffer (packed RGBA format)
307 ///
308 /// # Errors
309 ///
310 /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
311 pub fn yuv_to_rgb(&self, input: &[u8], output: &mut [u8]) -> Result<()> {
312 if input.len() != output.len() {
313 return Err(GpuError::InvalidBufferSize {
314 expected: input.len(),
315 actual: output.len(),
316 });
317 }
318
319 if input.len() % 4 != 0 {
320 return Err(GpuError::InvalidBufferSize {
321 expected: (input.len() / 4) * 4,
322 actual: input.len(),
323 });
324 }
325
326 let width = ((input.len() / 4) as f32).sqrt() as u32;
327 let height = width;
328
329 ops::ColorSpaceConversion::yuv_to_rgb(
330 &self.device,
331 input,
332 output,
333 width,
334 height,
335 ops::ColorSpace::BT601,
336 )
337 }
338
339 /// Scale an image using bilinear interpolation
340 ///
341 /// # Arguments
342 ///
343 /// * `input` - Input image buffer (packed RGBA format)
344 /// * `src_width` - Source image width
345 /// * `src_height` - Source image height
346 /// * `output` - Output image buffer (packed RGBA format)
347 /// * `dst_width` - Destination image width
348 /// * `dst_height` - Destination image height
349 ///
350 /// # Errors
351 ///
352 /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
353 pub fn scale_bilinear(
354 &self,
355 input: &[u8],
356 src_width: u32,
357 src_height: u32,
358 output: &mut [u8],
359 dst_width: u32,
360 dst_height: u32,
361 ) -> Result<()> {
362 ops::ScaleOperation::scale(
363 &self.device,
364 input,
365 src_width,
366 src_height,
367 output,
368 dst_width,
369 dst_height,
370 ops::ScaleFilter::Bilinear,
371 )
372 }
373
374 /// Scale an image using bicubic interpolation
375 ///
376 /// # Arguments
377 ///
378 /// * `input` - Input image buffer (packed RGBA format)
379 /// * `src_width` - Source image width
380 /// * `src_height` - Source image height
381 /// * `output` - Output image buffer (packed RGBA format)
382 /// * `dst_width` - Destination image width
383 /// * `dst_height` - Destination image height
384 ///
385 /// # Errors
386 ///
387 /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
388 pub fn scale_bicubic(
389 &self,
390 input: &[u8],
391 src_width: u32,
392 src_height: u32,
393 output: &mut [u8],
394 dst_width: u32,
395 dst_height: u32,
396 ) -> Result<()> {
397 ops::ScaleOperation::scale(
398 &self.device,
399 input,
400 src_width,
401 src_height,
402 output,
403 dst_width,
404 dst_height,
405 ops::ScaleFilter::Bicubic,
406 )
407 }
408
409 /// Apply Gaussian blur
410 ///
411 /// # Arguments
412 ///
413 /// * `input` - Input image buffer (packed RGBA format)
414 /// * `output` - Output image buffer (packed RGBA format)
415 /// * `width` - Image width
416 /// * `height` - Image height
417 /// * `sigma` - Blur radius (standard deviation)
418 ///
419 /// # Errors
420 ///
421 /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
422 #[allow(clippy::too_many_arguments)]
423 pub fn gaussian_blur(
424 &self,
425 input: &[u8],
426 output: &mut [u8],
427 width: u32,
428 height: u32,
429 sigma: f32,
430 ) -> Result<()> {
431 ops::FilterOperation::gaussian_blur(&self.device, input, output, width, height, sigma)
432 }
433
434 /// Apply sharpening filter
435 ///
436 /// # Arguments
437 ///
438 /// * `input` - Input image buffer (packed RGBA format)
439 /// * `output` - Output image buffer (packed RGBA format)
440 /// * `width` - Image width
441 /// * `height` - Image height
442 /// * `amount` - Sharpening strength
443 ///
444 /// # Errors
445 ///
446 /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
447 #[allow(clippy::too_many_arguments)]
448 pub fn sharpen(
449 &self,
450 input: &[u8],
451 output: &mut [u8],
452 width: u32,
453 height: u32,
454 amount: f32,
455 ) -> Result<()> {
456 ops::FilterOperation::sharpen(&self.device, input, output, width, height, amount)
457 }
458
459 /// Detect edges using Sobel operator
460 ///
461 /// # Arguments
462 ///
463 /// * `input` - Input image buffer (packed RGBA format)
464 /// * `output` - Output image buffer (packed RGBA format)
465 /// * `width` - Image width
466 /// * `height` - Image height
467 ///
468 /// # Errors
469 ///
470 /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
471 pub fn edge_detect(
472 &self,
473 input: &[u8],
474 output: &mut [u8],
475 width: u32,
476 height: u32,
477 ) -> Result<()> {
478 ops::FilterOperation::edge_detect(&self.device, input, output, width, height)
479 }
480
481 /// Compute 2D DCT (Discrete Cosine Transform)
482 ///
483 /// # Arguments
484 ///
485 /// * `input` - Input data (f32 values)
486 /// * `output` - Output DCT coefficients
487 /// * `width` - Data width (must be multiple of 8)
488 /// * `height` - Data height (must be multiple of 8)
489 ///
490 /// # Errors
491 ///
492 /// Returns an error if dimensions are invalid or if the GPU operation fails.
493 pub fn dct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()> {
494 ops::TransformOperation::dct_2d(&self.device, input, output, width, height)
495 }
496
497 /// Compute 2D IDCT (Inverse Discrete Cosine Transform)
498 ///
499 /// # Arguments
500 ///
501 /// * `input` - Input DCT coefficients
502 /// * `output` - Output reconstructed data
503 /// * `width` - Data width (must be multiple of 8)
504 /// * `height` - Data height (must be multiple of 8)
505 ///
506 /// # Errors
507 ///
508 /// Returns an error if dimensions are invalid or if the GPU operation fails.
509 pub fn idct_2d(
510 &self,
511 input: &[f32],
512 output: &mut [f32],
513 width: u32,
514 height: u32,
515 ) -> Result<()> {
516 ops::TransformOperation::idct_2d(&self.device, input, output, width, height)
517 }
518
519 /// Wait for all GPU operations to complete
520 ///
521 /// This is useful for synchronization and benchmarking.
522 pub fn wait(&self) {
523 self.device.wait();
524 }
525}
526
527impl Default for GpuContext {
528 fn default() -> Self {
529 Self::new().expect("Failed to create GPU context")
530 }
531}