Skip to main content

oximedia_gpu/
lib.rs

1//! GPU compute pipeline using WGPU for `OxiMedia`
2//!
3//! This crate provides a cross-platform GPU acceleration layer using WGPU,
4//! supporting Vulkan, Metal, DirectX 12, and WebGPU backends.
5//!
6//! # Features
7//!
8//! - Color space conversions (RGB ↔ YUV with BT.601, BT.709, BT.2020)
9//! - Image scaling (nearest, bilinear, bicubic)
10//! - Convolution filters (blur, sharpen, edge detection)
11//! - Transform operations (DCT, FFT)
12//! - Automatic CPU fallback
13//! - Multi-GPU support
14//!
15//! # Example
16//!
17//! ```no_run
18//! use oximedia_gpu::GpuContext;
19//!
20//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
21//! let ctx = GpuContext::new()?;
22//!
23//! let input = vec![0u8; 1920 * 1080 * 4];
24//! let mut output = vec![0u8; 1920 * 1080 * 4];
25//!
26//! ctx.rgb_to_yuv(&input, &mut output)?;
27//! # Ok(())
28//! # }
29//! ```
30
31#![allow(clippy::cast_possible_truncation)]
32#![allow(clippy::cast_sign_loss)]
33#![allow(clippy::cast_precision_loss)]
34#![allow(clippy::cast_possible_wrap)]
35#![allow(clippy::missing_errors_doc)]
36#![allow(clippy::missing_panics_doc)]
37
38// Core modules
39pub mod buffer;
40pub mod device;
41pub mod ops;
42pub mod shader;
43
44// New comprehensive modules
45pub mod accelerator;
46pub mod backend;
47pub mod cache;
48pub mod compiler;
49pub mod compute;
50pub mod kernels;
51pub mod memory;
52pub mod queue;
53pub mod sync;
54
55// GPU compute operation modules
56pub mod histogram;
57pub mod motion_detect;
58pub mod pipeline;
59pub mod texture;
60pub mod video_process;
61
62// New kernel / pass / shader-param modules
63pub mod compute_pass;
64pub mod kernel;
65pub mod shader_params;
66
67// Wave-8 new modules
68pub mod compute_dispatch;
69pub mod memory_pool;
70pub mod shader_cache;
71
72// Wave-9 new modules
73pub mod gpu_buffer;
74pub mod gpu_fence;
75pub mod render_pass;
76
77// Wave-10 new modules
78pub mod command_buffer;
79pub mod resource_manager;
80pub mod sync_primitive;
81
82// Wave-11 new modules
83pub mod descriptor_set;
84pub mod gpu_stats;
85pub mod viewport;
86
87// Wave-12 new modules
88pub mod gpu_profiler;
89pub mod sampler;
90pub mod vertex_buffer;
91
92// Wave-13 new modules
93pub mod fence_pool;
94pub mod gpu_timer;
95pub mod upload_queue;
96
97// Wave-14 new modules
98pub mod buffer_copy;
99pub mod occupancy;
100pub mod workgroup;
101
102use std::sync::Arc;
103use thiserror::Error;
104
105// Accelerator exports
106pub use accelerator::{AcceleratorBuilder, CpuAccelerator, GpuAccelerator, WgpuAccelerator};
107
108// Core exports
109pub use buffer::{BufferType, GpuBuffer};
110pub use device::{GpuDevice, GpuDeviceInfo};
111pub use ops::{ColorSpaceConversion, FilterOperation, ScaleOperation, TransformOperation};
112
113// Backend exports
114pub use backend::{Backend, BackendCapabilities, BackendType, CpuBackend, VulkanBackend};
115
116// Cache exports
117pub use cache::{CacheStats, PipelineCache, ShaderCache};
118
119// Compiler exports
120pub use compiler::{
121    CompilationError, CompilationOptions, OptimizationLevel, ShaderCompiler, ShaderPreprocessor,
122};
123
124// Compute exports
125pub use compute::{
126    ComputeExecutor, ComputePassBuilder, ComputePipelineHandle, ComputePipelineManager,
127    DispatchHelper,
128};
129
130// Kernels exports
131pub use kernels::{
132    ColorConversionKernel, ConvolutionKernel, FilterKernel, ReduceKernel, ReduceOp, ResizeFilter,
133    ResizeKernel, TransformKernel, TransformType,
134};
135
136// Memory exports
137pub use memory::{ManagedBuffer, MemoryAllocator, MemoryPool, MemoryStats};
138
139// Queue exports
140pub use queue::{
141    AsyncSubmission, BatchSubmitter, CommandBufferBuilder, CommandQueue, QueueManager, QueueType,
142};
143
144// Sync exports
145pub use sync::{Barrier, Event, Fence, Semaphore};
146
147// Video processing exports
148pub use histogram::{ChannelHistogram, ImageHistogram};
149pub use motion_detect::{MotionAnalysis, MotionDetector, MotionRegion, Sensitivity};
150pub use pipeline::{GpuPipeline, PipelineMetrics, PipelineNode, PipelineStage};
151pub use texture::{TextureDescriptor, TextureFormat, TexturePool};
152pub use video_process::{FrameProcessConfig, FrameProcessResult, VideoFrameProcessor};
153
154/// Error types for GPU operations
155#[derive(Debug, Error)]
156pub enum GpuError {
157    /// Device initialization failed
158    #[error("Failed to initialize GPU device: {0}")]
159    DeviceInit(String),
160
161    /// Adapter selection failed
162    #[error("No suitable GPU adapter found")]
163    NoAdapter,
164
165    /// Device request failed
166    #[error("Failed to request GPU device: {0}")]
167    DeviceRequest(String),
168
169    /// Buffer creation failed
170    #[error("Failed to create GPU buffer: {0}")]
171    BufferCreation(String),
172
173    /// Shader compilation failed
174    #[error("Failed to compile shader: {0}")]
175    ShaderCompilation(String),
176
177    /// Pipeline creation failed
178    #[error("Failed to create compute pipeline: {0}")]
179    PipelineCreation(String),
180
181    /// Command submission failed
182    #[error("Failed to submit GPU commands: {0}")]
183    CommandSubmission(String),
184
185    /// Buffer mapping failed
186    #[error("Failed to map GPU buffer: {0}")]
187    BufferMapping(String),
188
189    /// Invalid dimensions
190    #[error("Invalid image dimensions: {width}x{height}")]
191    InvalidDimensions { width: u32, height: u32 },
192
193    /// Invalid buffer size
194    #[error("Invalid buffer size: expected {expected}, got {actual}")]
195    InvalidBufferSize { expected: usize, actual: usize },
196
197    /// Operation not supported
198    #[error("Operation not supported: {0}")]
199    NotSupported(String),
200
201    /// Internal error
202    #[error("Internal GPU error: {0}")]
203    Internal(String),
204}
205
206pub type Result<T> = std::result::Result<T, GpuError>;
207
208/// GPU context for compute operations
209///
210/// This is the main entry point for GPU-accelerated operations.
211/// It manages device selection, resource allocation, and command submission.
212pub struct GpuContext {
213    device: Arc<GpuDevice>,
214}
215
216impl GpuContext {
217    /// Create a new GPU context with automatic device selection
218    ///
219    /// This will select the most suitable GPU device available on the system.
220    /// If no GPU is available, an error is returned.
221    ///
222    /// # Errors
223    ///
224    /// Returns an error if no suitable GPU device is found or if device
225    /// initialization fails.
226    pub fn new() -> Result<Self> {
227        let device = GpuDevice::new(None)?;
228        Ok(Self {
229            device: Arc::new(device),
230        })
231    }
232
233    /// Create a new GPU context with a specific device
234    ///
235    /// # Arguments
236    ///
237    /// * `device_index` - Index of the device to use (from `list_devices`)
238    ///
239    /// # Errors
240    ///
241    /// Returns an error if the device index is invalid or if device
242    /// initialization fails.
243    pub fn with_device(device_index: usize) -> Result<Self> {
244        let device = GpuDevice::new(Some(device_index))?;
245        Ok(Self {
246            device: Arc::new(device),
247        })
248    }
249
250    /// List available GPU devices
251    ///
252    /// Returns information about all GPU devices available on the system.
253    pub fn list_devices() -> Result<Vec<GpuDeviceInfo>> {
254        GpuDevice::list_devices()
255    }
256
257    /// Get information about the current device
258    #[must_use]
259    pub fn device_info(&self) -> &GpuDeviceInfo {
260        self.device.info()
261    }
262
263    /// Convert RGB to YUV (BT.601)
264    ///
265    /// # Arguments
266    ///
267    /// * `input` - Input RGB buffer (packed RGBA format)
268    /// * `output` - Output YUV buffer (packed YUVA format)
269    ///
270    /// # Errors
271    ///
272    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
273    pub fn rgb_to_yuv(&self, input: &[u8], output: &mut [u8]) -> Result<()> {
274        if input.len() != output.len() {
275            return Err(GpuError::InvalidBufferSize {
276                expected: input.len(),
277                actual: output.len(),
278            });
279        }
280
281        if input.len() % 4 != 0 {
282            return Err(GpuError::InvalidBufferSize {
283                expected: (input.len() / 4) * 4,
284                actual: input.len(),
285            });
286        }
287
288        let width = ((input.len() / 4) as f32).sqrt() as u32;
289        let height = width;
290
291        ops::ColorSpaceConversion::rgb_to_yuv(
292            &self.device,
293            input,
294            output,
295            width,
296            height,
297            ops::ColorSpace::BT601,
298        )
299    }
300
301    /// Convert YUV to RGB (BT.601)
302    ///
303    /// # Arguments
304    ///
305    /// * `input` - Input YUV buffer (packed YUVA format)
306    /// * `output` - Output RGB buffer (packed RGBA format)
307    ///
308    /// # Errors
309    ///
310    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
311    pub fn yuv_to_rgb(&self, input: &[u8], output: &mut [u8]) -> Result<()> {
312        if input.len() != output.len() {
313            return Err(GpuError::InvalidBufferSize {
314                expected: input.len(),
315                actual: output.len(),
316            });
317        }
318
319        if input.len() % 4 != 0 {
320            return Err(GpuError::InvalidBufferSize {
321                expected: (input.len() / 4) * 4,
322                actual: input.len(),
323            });
324        }
325
326        let width = ((input.len() / 4) as f32).sqrt() as u32;
327        let height = width;
328
329        ops::ColorSpaceConversion::yuv_to_rgb(
330            &self.device,
331            input,
332            output,
333            width,
334            height,
335            ops::ColorSpace::BT601,
336        )
337    }
338
339    /// Scale an image using bilinear interpolation
340    ///
341    /// # Arguments
342    ///
343    /// * `input` - Input image buffer (packed RGBA format)
344    /// * `src_width` - Source image width
345    /// * `src_height` - Source image height
346    /// * `output` - Output image buffer (packed RGBA format)
347    /// * `dst_width` - Destination image width
348    /// * `dst_height` - Destination image height
349    ///
350    /// # Errors
351    ///
352    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
353    pub fn scale_bilinear(
354        &self,
355        input: &[u8],
356        src_width: u32,
357        src_height: u32,
358        output: &mut [u8],
359        dst_width: u32,
360        dst_height: u32,
361    ) -> Result<()> {
362        ops::ScaleOperation::scale(
363            &self.device,
364            input,
365            src_width,
366            src_height,
367            output,
368            dst_width,
369            dst_height,
370            ops::ScaleFilter::Bilinear,
371        )
372    }
373
374    /// Scale an image using bicubic interpolation
375    ///
376    /// # Arguments
377    ///
378    /// * `input` - Input image buffer (packed RGBA format)
379    /// * `src_width` - Source image width
380    /// * `src_height` - Source image height
381    /// * `output` - Output image buffer (packed RGBA format)
382    /// * `dst_width` - Destination image width
383    /// * `dst_height` - Destination image height
384    ///
385    /// # Errors
386    ///
387    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
388    pub fn scale_bicubic(
389        &self,
390        input: &[u8],
391        src_width: u32,
392        src_height: u32,
393        output: &mut [u8],
394        dst_width: u32,
395        dst_height: u32,
396    ) -> Result<()> {
397        ops::ScaleOperation::scale(
398            &self.device,
399            input,
400            src_width,
401            src_height,
402            output,
403            dst_width,
404            dst_height,
405            ops::ScaleFilter::Bicubic,
406        )
407    }
408
409    /// Apply Gaussian blur
410    ///
411    /// # Arguments
412    ///
413    /// * `input` - Input image buffer (packed RGBA format)
414    /// * `output` - Output image buffer (packed RGBA format)
415    /// * `width` - Image width
416    /// * `height` - Image height
417    /// * `sigma` - Blur radius (standard deviation)
418    ///
419    /// # Errors
420    ///
421    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
422    #[allow(clippy::too_many_arguments)]
423    pub fn gaussian_blur(
424        &self,
425        input: &[u8],
426        output: &mut [u8],
427        width: u32,
428        height: u32,
429        sigma: f32,
430    ) -> Result<()> {
431        ops::FilterOperation::gaussian_blur(&self.device, input, output, width, height, sigma)
432    }
433
434    /// Apply sharpening filter
435    ///
436    /// # Arguments
437    ///
438    /// * `input` - Input image buffer (packed RGBA format)
439    /// * `output` - Output image buffer (packed RGBA format)
440    /// * `width` - Image width
441    /// * `height` - Image height
442    /// * `amount` - Sharpening strength
443    ///
444    /// # Errors
445    ///
446    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
447    #[allow(clippy::too_many_arguments)]
448    pub fn sharpen(
449        &self,
450        input: &[u8],
451        output: &mut [u8],
452        width: u32,
453        height: u32,
454        amount: f32,
455    ) -> Result<()> {
456        ops::FilterOperation::sharpen(&self.device, input, output, width, height, amount)
457    }
458
459    /// Detect edges using Sobel operator
460    ///
461    /// # Arguments
462    ///
463    /// * `input` - Input image buffer (packed RGBA format)
464    /// * `output` - Output image buffer (packed RGBA format)
465    /// * `width` - Image width
466    /// * `height` - Image height
467    ///
468    /// # Errors
469    ///
470    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
471    pub fn edge_detect(
472        &self,
473        input: &[u8],
474        output: &mut [u8],
475        width: u32,
476        height: u32,
477    ) -> Result<()> {
478        ops::FilterOperation::edge_detect(&self.device, input, output, width, height)
479    }
480
481    /// Compute 2D DCT (Discrete Cosine Transform)
482    ///
483    /// # Arguments
484    ///
485    /// * `input` - Input data (f32 values)
486    /// * `output` - Output DCT coefficients
487    /// * `width` - Data width (must be multiple of 8)
488    /// * `height` - Data height (must be multiple of 8)
489    ///
490    /// # Errors
491    ///
492    /// Returns an error if dimensions are invalid or if the GPU operation fails.
493    pub fn dct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()> {
494        ops::TransformOperation::dct_2d(&self.device, input, output, width, height)
495    }
496
497    /// Compute 2D IDCT (Inverse Discrete Cosine Transform)
498    ///
499    /// # Arguments
500    ///
501    /// * `input` - Input DCT coefficients
502    /// * `output` - Output reconstructed data
503    /// * `width` - Data width (must be multiple of 8)
504    /// * `height` - Data height (must be multiple of 8)
505    ///
506    /// # Errors
507    ///
508    /// Returns an error if dimensions are invalid or if the GPU operation fails.
509    pub fn idct_2d(
510        &self,
511        input: &[f32],
512        output: &mut [f32],
513        width: u32,
514        height: u32,
515    ) -> Result<()> {
516        ops::TransformOperation::idct_2d(&self.device, input, output, width, height)
517    }
518
519    /// Wait for all GPU operations to complete
520    ///
521    /// This is useful for synchronization and benchmarking.
522    pub fn wait(&self) {
523        self.device.wait();
524    }
525}
526
527impl Default for GpuContext {
528    fn default() -> Self {
529        Self::new().expect("Failed to create GPU context")
530    }
531}