Skip to main content

oximedia_gpu/
lib.rs

1//! GPU compute pipeline using WGPU for `OxiMedia`
2//!
3//! This crate provides a cross-platform GPU acceleration layer using WGPU,
4//! supporting Vulkan, Metal, DirectX 12, and WebGPU backends.
5//!
6//! # Features
7//!
8//! - Color space conversions (RGB ↔ YUV with BT.601, BT.709, BT.2020)
9//! - Image scaling (nearest, bilinear, bicubic)
10//! - Convolution filters (blur, sharpen, edge detection)
11//! - Transform operations (DCT, FFT)
12//! - Automatic CPU fallback
13//! - Multi-GPU support
14//!
15//! # Example
16//!
17//! ```no_run
18//! use oximedia_gpu::GpuContext;
19//!
20//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
21//! let ctx = GpuContext::new()?;
22//!
23//! let input = vec![0u8; 1920 * 1080 * 4];
24//! let mut output = vec![0u8; 1920 * 1080 * 4];
25//!
26//! ctx.rgb_to_yuv(&input, &mut output)?;
27//! # Ok(())
28//! # }
29//! ```
30
31#![allow(clippy::cast_possible_truncation)]
32#![allow(clippy::cast_sign_loss)]
33#![allow(clippy::cast_precision_loss)]
34#![allow(clippy::cast_possible_wrap)]
35#![allow(clippy::missing_errors_doc)]
36#![allow(clippy::missing_panics_doc)]
37
38// Core modules
39pub mod buffer;
40pub mod device;
41pub mod ops;
42pub mod shader;
43
44// New comprehensive modules
45pub mod accelerator;
46pub mod backend;
47pub mod cache;
48pub mod compiler;
49pub mod compute;
50pub mod kernels;
51pub mod memory;
52pub mod queue;
53pub mod sync;
54
55// GPU compute operation modules
56pub mod histogram;
57pub mod motion_detect;
58pub mod pipeline;
59pub mod texture;
60pub mod video_process;
61
62// New kernel / pass / shader-param modules
63pub mod compute_pass;
64pub mod kernel;
65pub mod shader_params;
66
67// Wave-8 new modules
68pub mod compute_dispatch;
69pub mod memory_pool;
70pub mod shader_cache;
71
72// Wave-9 new modules
73pub mod gpu_buffer;
74pub mod gpu_fence;
75pub mod render_pass;
76
77// Wave-10 new modules
78pub mod command_buffer;
79pub mod resource_manager;
80pub mod sync_primitive;
81
82// Wave-11 new modules
83pub mod descriptor_set;
84pub mod gpu_stats;
85pub mod viewport;
86
87// Wave-12 new modules
88pub mod gpu_profiler;
89pub mod sampler;
90pub mod vertex_buffer;
91
92// Wave-13 new modules
93pub mod fence_pool;
94pub mod gpu_timer;
95pub mod upload_queue;
96
97// Wave-14 new modules
98pub mod buffer_copy;
99pub mod occupancy;
100pub mod workgroup;
101
102// Wave-15 new modules
103pub mod buffer_pool;
104pub mod compute_kernels;
105pub mod pipeline_stages;
106
107// Wave-16 new modules (0.1.2 enhancements)
108pub mod motion_estimation;
109pub mod multi_gpu;
110
111// Wave-17 new modules
112pub mod compute_shader;
113pub mod histogram_equalization;
114
115use std::sync::Arc;
116use thiserror::Error;
117
118// Accelerator exports
119pub use accelerator::{AcceleratorBuilder, CpuAccelerator, GpuAccelerator, WgpuAccelerator};
120
121// Core exports
122pub use buffer::{BufferType, GpuBuffer};
123pub use device::{GpuDevice, GpuDeviceInfo};
124pub use ops::quality_metrics::{
125    compute_ms_ssim, compute_psnr, compute_ssim, MsSsimResult, PsnrResult, SsimResult,
126};
127pub use ops::{
128    ChromaOps, ChromaSubsampling, ColorSpaceConversion, FilterOperation, ScaleOperation,
129    TransformOperation, YcbcrCoefficients,
130};
131
132// Backend exports
133pub use backend::{Backend, BackendCapabilities, BackendType, CpuBackend, VulkanBackend};
134
135// Cache exports
136pub use cache::{CacheStats, PipelineCache, ShaderCache};
137
138// Compiler exports
139pub use compiler::{
140    CompilationError, CompilationOptions, OptimizationLevel, ShaderCompiler, ShaderPreprocessor,
141};
142
143// Compute exports
144pub use compute::{
145    ComputeExecutor, ComputePassBuilder, ComputePipelineHandle, ComputePipelineManager,
146    DispatchHelper,
147};
148
149// Kernels exports
150pub use kernels::{
151    ColorConversionKernel, ConvolutionKernel, FilterKernel, ReduceKernel, ReduceOp, ResizeFilter,
152    ResizeKernel, TransformKernel, TransformType,
153};
154
155// Memory exports
156pub use memory::{ManagedBuffer, MemoryAllocator, MemoryPool, MemoryStats};
157
158// Queue exports
159pub use queue::{
160    AsyncSubmission, BatchSubmitter, CommandBufferBuilder, CommandQueue, QueueManager, QueueType,
161};
162
163// Sync exports
164pub use sync::{Barrier, Event, Fence, Semaphore};
165
166// Workgroup auto-tuner exports
167pub use workgroup::{DeviceLimits, WorkgroupAutoTuner};
168
169// Memory pool defragmentation exports
170pub use memory_pool::{CompactionPlan, DefragResult, MigrationEntry};
171
172// Video processing exports
173pub use histogram::{ChannelHistogram, ImageHistogram};
174pub use motion_detect::{MotionAnalysis, MotionDetector, MotionRegion, Sensitivity};
175pub use pipeline::{GpuPipeline, PipelineMetrics, PipelineNode, PipelineStage};
176pub use texture::{TextureDescriptor, TextureFormat, TexturePool};
177pub use video_process::{FrameProcessConfig, FrameProcessResult, VideoFrameProcessor};
178
179// Wave-17 exports
180pub use compute_shader::{ComputeShaderSimulator, ShaderKernel, ThreadGroupContext};
181pub use histogram_equalization::{ClaheConfig, EqualizationStats, HistogramEqualizer};
182
183/// Error types for GPU operations
184#[derive(Debug, Error)]
185pub enum GpuError {
186    /// Device initialization failed
187    #[error("Failed to initialize GPU device: {0}")]
188    DeviceInit(String),
189
190    /// Adapter selection failed
191    #[error("No suitable GPU adapter found")]
192    NoAdapter,
193
194    /// Device request failed
195    #[error("Failed to request GPU device: {0}")]
196    DeviceRequest(String),
197
198    /// Buffer creation failed
199    #[error("Failed to create GPU buffer: {0}")]
200    BufferCreation(String),
201
202    /// Shader compilation failed
203    #[error("Failed to compile shader: {0}")]
204    ShaderCompilation(String),
205
206    /// Pipeline creation failed
207    #[error("Failed to create compute pipeline: {0}")]
208    PipelineCreation(String),
209
210    /// Command submission failed
211    #[error("Failed to submit GPU commands: {0}")]
212    CommandSubmission(String),
213
214    /// Buffer mapping failed
215    #[error("Failed to map GPU buffer: {0}")]
216    BufferMapping(String),
217
218    /// Invalid dimensions
219    #[error("Invalid image dimensions: {width}x{height}")]
220    InvalidDimensions { width: u32, height: u32 },
221
222    /// Invalid buffer size
223    #[error("Invalid buffer size: expected {expected}, got {actual}")]
224    InvalidBufferSize { expected: usize, actual: usize },
225
226    /// Operation not supported
227    #[error("Operation not supported: {0}")]
228    NotSupported(String),
229
230    /// Internal error
231    #[error("Internal GPU error: {0}")]
232    Internal(String),
233}
234
235pub type Result<T> = std::result::Result<T, GpuError>;
236
237/// GPU context for compute operations
238///
239/// This is the main entry point for GPU-accelerated operations.
240/// It manages device selection, resource allocation, and command submission.
241pub struct GpuContext {
242    device: Arc<GpuDevice>,
243}
244
245impl GpuContext {
246    /// Create a new GPU context with automatic device selection
247    ///
248    /// This will select the most suitable GPU device available on the system.
249    /// If no GPU is available, an error is returned.
250    ///
251    /// # Errors
252    ///
253    /// Returns an error if no suitable GPU device is found or if device
254    /// initialization fails.
255    pub fn new() -> Result<Self> {
256        let device = GpuDevice::new(None)?;
257        Ok(Self {
258            device: Arc::new(device),
259        })
260    }
261
262    /// Create a new GPU context with a specific device
263    ///
264    /// # Arguments
265    ///
266    /// * `device_index` - Index of the device to use (from `list_devices`)
267    ///
268    /// # Errors
269    ///
270    /// Returns an error if the device index is invalid or if device
271    /// initialization fails.
272    pub fn with_device(device_index: usize) -> Result<Self> {
273        let device = GpuDevice::new(Some(device_index))?;
274        Ok(Self {
275            device: Arc::new(device),
276        })
277    }
278
279    /// List available GPU devices
280    ///
281    /// Returns information about all GPU devices available on the system.
282    pub fn list_devices() -> Result<Vec<GpuDeviceInfo>> {
283        GpuDevice::list_devices()
284    }
285
286    /// Get information about the current device
287    #[must_use]
288    pub fn device_info(&self) -> &GpuDeviceInfo {
289        self.device.info()
290    }
291
292    /// Convert RGB to YUV (BT.601)
293    ///
294    /// # Arguments
295    ///
296    /// * `input` - Input RGB buffer (packed RGBA format)
297    /// * `output` - Output YUV buffer (packed YUVA format)
298    ///
299    /// # Errors
300    ///
301    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
302    pub fn rgb_to_yuv(&self, input: &[u8], output: &mut [u8]) -> Result<()> {
303        if input.len() != output.len() {
304            return Err(GpuError::InvalidBufferSize {
305                expected: input.len(),
306                actual: output.len(),
307            });
308        }
309
310        if input.len() % 4 != 0 {
311            return Err(GpuError::InvalidBufferSize {
312                expected: (input.len() / 4) * 4,
313                actual: input.len(),
314            });
315        }
316
317        let width = ((input.len() / 4) as f32).sqrt() as u32;
318        let height = width;
319
320        ops::ColorSpaceConversion::rgb_to_yuv(
321            &self.device,
322            input,
323            output,
324            width,
325            height,
326            ops::ColorSpace::BT601,
327        )
328    }
329
330    /// Convert YUV to RGB (BT.601)
331    ///
332    /// # Arguments
333    ///
334    /// * `input` - Input YUV buffer (packed YUVA format)
335    /// * `output` - Output RGB buffer (packed RGBA format)
336    ///
337    /// # Errors
338    ///
339    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
340    pub fn yuv_to_rgb(&self, input: &[u8], output: &mut [u8]) -> Result<()> {
341        if input.len() != output.len() {
342            return Err(GpuError::InvalidBufferSize {
343                expected: input.len(),
344                actual: output.len(),
345            });
346        }
347
348        if input.len() % 4 != 0 {
349            return Err(GpuError::InvalidBufferSize {
350                expected: (input.len() / 4) * 4,
351                actual: input.len(),
352            });
353        }
354
355        let width = ((input.len() / 4) as f32).sqrt() as u32;
356        let height = width;
357
358        ops::ColorSpaceConversion::yuv_to_rgb(
359            &self.device,
360            input,
361            output,
362            width,
363            height,
364            ops::ColorSpace::BT601,
365        )
366    }
367
368    /// Scale an image using bilinear interpolation
369    ///
370    /// # Arguments
371    ///
372    /// * `input` - Input image buffer (packed RGBA format)
373    /// * `src_width` - Source image width
374    /// * `src_height` - Source image height
375    /// * `output` - Output image buffer (packed RGBA format)
376    /// * `dst_width` - Destination image width
377    /// * `dst_height` - Destination image height
378    ///
379    /// # Errors
380    ///
381    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
382    pub fn scale_bilinear(
383        &self,
384        input: &[u8],
385        src_width: u32,
386        src_height: u32,
387        output: &mut [u8],
388        dst_width: u32,
389        dst_height: u32,
390    ) -> Result<()> {
391        ops::ScaleOperation::scale(
392            &self.device,
393            input,
394            src_width,
395            src_height,
396            output,
397            dst_width,
398            dst_height,
399            ops::ScaleFilter::Bilinear,
400        )
401    }
402
403    /// Scale an image using bicubic interpolation
404    ///
405    /// # Arguments
406    ///
407    /// * `input` - Input image buffer (packed RGBA format)
408    /// * `src_width` - Source image width
409    /// * `src_height` - Source image height
410    /// * `output` - Output image buffer (packed RGBA format)
411    /// * `dst_width` - Destination image width
412    /// * `dst_height` - Destination image height
413    ///
414    /// # Errors
415    ///
416    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
417    pub fn scale_bicubic(
418        &self,
419        input: &[u8],
420        src_width: u32,
421        src_height: u32,
422        output: &mut [u8],
423        dst_width: u32,
424        dst_height: u32,
425    ) -> Result<()> {
426        ops::ScaleOperation::scale(
427            &self.device,
428            input,
429            src_width,
430            src_height,
431            output,
432            dst_width,
433            dst_height,
434            ops::ScaleFilter::Bicubic,
435        )
436    }
437
438    /// Scale an image using Lanczos-3 interpolation (highest quality)
439    ///
440    /// # Arguments
441    ///
442    /// * `input` - Input image buffer (packed RGBA format)
443    /// * `src_width` - Source image width
444    /// * `src_height` - Source image height
445    /// * `output` - Output image buffer (packed RGBA format)
446    /// * `dst_width` - Destination image width
447    /// * `dst_height` - Destination image height
448    ///
449    /// # Errors
450    ///
451    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
452    pub fn scale_lanczos(
453        &self,
454        input: &[u8],
455        src_width: u32,
456        src_height: u32,
457        output: &mut [u8],
458        dst_width: u32,
459        dst_height: u32,
460    ) -> Result<()> {
461        ops::ScaleOperation::scale(
462            &self.device,
463            input,
464            src_width,
465            src_height,
466            output,
467            dst_width,
468            dst_height,
469            ops::ScaleFilter::Lanczos3,
470        )
471    }
472
473    /// Apply Gaussian blur
474    ///
475    /// # Arguments
476    ///
477    /// * `input` - Input image buffer (packed RGBA format)
478    /// * `output` - Output image buffer (packed RGBA format)
479    /// * `width` - Image width
480    /// * `height` - Image height
481    /// * `sigma` - Blur radius (standard deviation)
482    ///
483    /// # Errors
484    ///
485    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
486    #[allow(clippy::too_many_arguments)]
487    pub fn gaussian_blur(
488        &self,
489        input: &[u8],
490        output: &mut [u8],
491        width: u32,
492        height: u32,
493        sigma: f32,
494    ) -> Result<()> {
495        ops::FilterOperation::gaussian_blur(&self.device, input, output, width, height, sigma)
496    }
497
498    /// Apply sharpening filter
499    ///
500    /// # Arguments
501    ///
502    /// * `input` - Input image buffer (packed RGBA format)
503    /// * `output` - Output image buffer (packed RGBA format)
504    /// * `width` - Image width
505    /// * `height` - Image height
506    /// * `amount` - Sharpening strength
507    ///
508    /// # Errors
509    ///
510    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
511    #[allow(clippy::too_many_arguments)]
512    pub fn sharpen(
513        &self,
514        input: &[u8],
515        output: &mut [u8],
516        width: u32,
517        height: u32,
518        amount: f32,
519    ) -> Result<()> {
520        ops::FilterOperation::sharpen(&self.device, input, output, width, height, amount)
521    }
522
523    /// Detect edges using Sobel operator
524    ///
525    /// # Arguments
526    ///
527    /// * `input` - Input image buffer (packed RGBA format)
528    /// * `output` - Output image buffer (packed RGBA format)
529    /// * `width` - Image width
530    /// * `height` - Image height
531    ///
532    /// # Errors
533    ///
534    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
535    pub fn edge_detect(
536        &self,
537        input: &[u8],
538        output: &mut [u8],
539        width: u32,
540        height: u32,
541    ) -> Result<()> {
542        ops::FilterOperation::edge_detect(&self.device, input, output, width, height)
543    }
544
545    /// Compute 2D DCT (Discrete Cosine Transform)
546    ///
547    /// # Arguments
548    ///
549    /// * `input` - Input data (f32 values)
550    /// * `output` - Output DCT coefficients
551    /// * `width` - Data width (must be multiple of 8)
552    /// * `height` - Data height (must be multiple of 8)
553    ///
554    /// # Errors
555    ///
556    /// Returns an error if dimensions are invalid or if the GPU operation fails.
557    pub fn dct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()> {
558        ops::TransformOperation::dct_2d(&self.device, input, output, width, height)
559    }
560
561    /// Compute 2D IDCT (Inverse Discrete Cosine Transform)
562    ///
563    /// # Arguments
564    ///
565    /// * `input` - Input DCT coefficients
566    /// * `output` - Output reconstructed data
567    /// * `width` - Data width (must be multiple of 8)
568    /// * `height` - Data height (must be multiple of 8)
569    ///
570    /// # Errors
571    ///
572    /// Returns an error if dimensions are invalid or if the GPU operation fails.
573    pub fn idct_2d(
574        &self,
575        input: &[f32],
576        output: &mut [f32],
577        width: u32,
578        height: u32,
579    ) -> Result<()> {
580        ops::TransformOperation::idct_2d(&self.device, input, output, width, height)
581    }
582
583    /// Wait for all GPU operations to complete
584    ///
585    /// This is useful for synchronization and benchmarking.
586    pub fn wait(&self) {
587        self.device.wait();
588    }
589}
590
591// GpuContext intentionally does not implement Default.
592//
593// GPU context creation is inherently fallible (no adapter, driver error, etc.).
594// Callers must use GpuContext::new() or GpuContext::with_device() and handle
595// the returned Result explicitly.  A silent Default impl that can either panic
596// or silently return a non-functional context would be misleading.
597//
598// If a best-effort fallback context is needed, use:
599//   GpuContext::new().or_else(|_| GpuContext::with_device(0))