Skip to main content

oximedia_gpu/
lib.rs

1//! GPU compute pipeline using WGPU for `OxiMedia`
2//!
3//! This crate provides a cross-platform GPU acceleration layer using WGPU,
4//! supporting Vulkan, Metal, DirectX 12, and WebGPU backends.
5//!
6//! # Features
7//!
8//! - Color space conversions (RGB ↔ YUV with BT.601, BT.709, BT.2020)
9//! - Image scaling (nearest, bilinear, bicubic)
10//! - Convolution filters (blur, sharpen, edge detection)
11//! - Transform operations (DCT, FFT)
12//! - Automatic CPU fallback
13//! - Multi-GPU support
14//!
15//! # Example
16//!
17//! ```no_run
18//! use oximedia_gpu::GpuContext;
19//!
20//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
21//! let ctx = GpuContext::new()?;
22//!
23//! let input = vec![0u8; 1920 * 1080 * 4];
24//! let mut output = vec![0u8; 1920 * 1080 * 4];
25//!
26//! ctx.rgb_to_yuv(&input, &mut output)?;
27//! # Ok(())
28//! # }
29//! ```
30
31#![allow(clippy::cast_possible_truncation)]
32#![allow(clippy::cast_sign_loss)]
33#![allow(clippy::cast_precision_loss)]
34#![allow(clippy::cast_possible_wrap)]
35#![allow(clippy::missing_errors_doc)]
36#![allow(clippy::missing_panics_doc)]
37
38// Core modules
39pub mod buffer;
40pub mod device;
41pub mod ops;
42pub mod shader;
43
44// New comprehensive modules
45pub mod accelerator;
46pub mod backend;
47pub mod cache;
48pub mod compiler;
49pub mod compute;
50pub mod kernels;
51pub mod memory;
52pub mod queue;
53pub mod sync;
54
55// GPU compute operation modules
56pub mod histogram;
57pub mod motion_detect;
58pub mod pipeline;
59pub mod texture;
60pub mod video_process;
61
62// New kernel / pass / shader-param modules
63pub mod compute_pass;
64pub mod kernel;
65pub mod shader_params;
66
67// Wave-8 new modules
68pub mod compute_dispatch;
69pub mod memory_pool;
70pub mod shader_cache;
71
72// Wave-9 new modules
73pub mod gpu_buffer;
74pub mod gpu_fence;
75pub mod render_pass;
76
77// Wave-10 new modules
78pub mod command_buffer;
79pub mod resource_manager;
80pub mod sync_primitive;
81
82// Wave-11 new modules
83pub mod descriptor_set;
84pub mod gpu_stats;
85pub mod viewport;
86
87// Wave-12 new modules
88pub mod gpu_profiler;
89pub mod sampler;
90pub mod vertex_buffer;
91
92// Wave-13 new modules
93pub mod fence_pool;
94pub mod gpu_timer;
95pub mod upload_queue;
96
97// Wave-14 new modules
98pub mod buffer_copy;
99pub mod occupancy;
100pub mod workgroup;
101
102// Wave-15 new modules
103pub mod buffer_pool;
104pub mod compute_kernels;
105pub mod pipeline_stages;
106
107// Wave-16 new modules (0.1.2 enhancements)
108pub mod motion_estimation;
109pub mod multi_gpu;
110
111// Wave-17 new modules
112pub mod compute_shader;
113pub mod histogram_equalization;
114
115// Previously undeclared modules (discovered in src/ inventory)
116pub mod async_compute;
117pub mod barrier_manager;
118pub mod blend_kernel;
119pub mod color_convert_kernel;
120pub mod compute_graph;
121pub mod double_buffer;
122pub mod film_grain;
123pub mod gpu_cpu_verify;
124pub mod indirect_dispatch;
125pub mod kernel_scheduler;
126pub mod mipmap_gen;
127pub mod optical_flow;
128pub mod perspective_transform;
129pub mod pipeline_cache;
130pub mod readback;
131pub mod scale_kernel;
132pub mod texture_atlas;
133pub mod texture_cache;
134pub mod tone_curve;
135
136use std::sync::Arc;
137use thiserror::Error;
138
139// Accelerator exports
140pub use accelerator::{AcceleratorBuilder, CpuAccelerator, GpuAccelerator, WgpuAccelerator};
141
142// Core exports
143pub use buffer::{BufferType, GpuBuffer};
144pub use device::{GpuDevice, GpuDeviceInfo};
145pub use ops::quality_metrics::{
146    compute_ms_ssim, compute_psnr, compute_ssim, MsSsimResult, PsnrResult, SsimResult,
147};
148pub use ops::{
149    ChromaOps, ChromaSubsampling, ColorSpaceConversion, FilterOperation, ScaleOperation,
150    TransformOperation, YcbcrCoefficients,
151};
152
153// Backend exports
154pub use backend::{Backend, BackendCapabilities, BackendType, CpuBackend, VulkanBackend};
155
156// Cache exports
157pub use cache::{CacheStats, PipelineCache, ShaderCache};
158
159// Compiler exports
160pub use compiler::{
161    CompilationError, CompilationOptions, OptimizationLevel, ShaderCompiler, ShaderPreprocessor,
162};
163
164// Compute exports
165pub use compute::{
166    ComputeExecutor, ComputePassBuilder, ComputePipelineHandle, ComputePipelineManager,
167    DispatchHelper,
168};
169
170// Kernels exports
171pub use kernels::{
172    ColorConversionKernel, ConvolutionKernel, FilterKernel, ReduceKernel, ReduceOp, ResizeFilter,
173    ResizeKernel, TransformKernel, TransformType,
174};
175
176// Memory exports
177pub use memory::{ManagedBuffer, MemoryAllocator, MemoryPool, MemoryStats};
178
179// Queue exports
180pub use queue::{
181    AsyncSubmission, BatchSubmitter, CommandBufferBuilder, CommandQueue, QueueManager, QueueType,
182};
183
184// Sync exports
185pub use sync::{Barrier, Event, Fence, Semaphore};
186
187// Workgroup auto-tuner exports
188pub use workgroup::{DeviceLimits, WorkgroupAutoTuner};
189
190// Memory pool defragmentation exports
191pub use memory_pool::{CompactionPlan, DefragResult, MigrationEntry};
192
193// Video processing exports
194pub use buffer_pool::SubAllocator;
195pub use compute_pass::{BatchedComputePass, DispatchCommand};
196pub use histogram::{ChannelHistogram, ImageHistogram};
197pub use motion_detect::{MotionAnalysis, MotionDetector, MotionRegion, Sensitivity};
198pub use pipeline::{
199    BarrierBatcher, BarrierKind, BarrierStrategy, BufferBarrier, FlushRecord, GpuPipeline,
200    PipelineMetrics, PipelineNode, PipelineStage,
201};
202pub use texture::{TextureDescriptor, TextureFormat, TexturePool};
203pub use video_process::{FrameProcessConfig, FrameProcessResult, VideoFrameProcessor};
204
205// Wave-17 exports
206pub use compute_shader::{ComputeShaderSimulator, ShaderKernel, ThreadGroupContext};
207pub use histogram_equalization::{ClaheConfig, EqualizationStats, HistogramEqualizer};
208
209/// Error types for GPU operations
210#[derive(Debug, Error)]
211pub enum GpuError {
212    /// Device initialization failed
213    #[error("Failed to initialize GPU device: {0}")]
214    DeviceInit(String),
215
216    /// Adapter selection failed
217    #[error("No suitable GPU adapter found")]
218    NoAdapter,
219
220    /// Device request failed
221    #[error("Failed to request GPU device: {0}")]
222    DeviceRequest(String),
223
224    /// Buffer creation failed
225    #[error("Failed to create GPU buffer: {0}")]
226    BufferCreation(String),
227
228    /// Shader compilation failed
229    #[error("Failed to compile shader: {0}")]
230    ShaderCompilation(String),
231
232    /// Pipeline creation failed
233    #[error("Failed to create compute pipeline: {0}")]
234    PipelineCreation(String),
235
236    /// Command submission failed
237    #[error("Failed to submit GPU commands: {0}")]
238    CommandSubmission(String),
239
240    /// Buffer mapping failed
241    #[error("Failed to map GPU buffer: {0}")]
242    BufferMapping(String),
243
244    /// Invalid dimensions
245    #[error("Invalid image dimensions: {width}x{height}")]
246    InvalidDimensions { width: u32, height: u32 },
247
248    /// Invalid buffer size
249    #[error("Invalid buffer size: expected {expected}, got {actual}")]
250    InvalidBufferSize { expected: usize, actual: usize },
251
252    /// Operation not supported
253    #[error("Operation not supported: {0}")]
254    NotSupported(String),
255
256    /// Internal error
257    #[error("Internal GPU error: {0}")]
258    Internal(String),
259}
260
261pub type Result<T> = std::result::Result<T, GpuError>;
262
263/// GPU context for compute operations
264///
265/// This is the main entry point for GPU-accelerated operations.
266/// It manages device selection, resource allocation, and command submission.
267pub struct GpuContext {
268    device: Arc<GpuDevice>,
269}
270
271impl GpuContext {
272    /// Create a new GPU context with automatic device selection
273    ///
274    /// This will select the most suitable GPU device available on the system.
275    /// If no GPU is available, an error is returned.
276    ///
277    /// # Errors
278    ///
279    /// Returns an error if no suitable GPU device is found or if device
280    /// initialization fails.
281    pub fn new() -> Result<Self> {
282        let device = GpuDevice::new(None)?;
283        Ok(Self {
284            device: Arc::new(device),
285        })
286    }
287
288    /// Create a new GPU context with a specific device
289    ///
290    /// # Arguments
291    ///
292    /// * `device_index` - Index of the device to use (from `list_devices`)
293    ///
294    /// # Errors
295    ///
296    /// Returns an error if the device index is invalid or if device
297    /// initialization fails.
298    pub fn with_device(device_index: usize) -> Result<Self> {
299        let device = GpuDevice::new(Some(device_index))?;
300        Ok(Self {
301            device: Arc::new(device),
302        })
303    }
304
305    /// List available GPU devices
306    ///
307    /// Returns information about all GPU devices available on the system.
308    pub fn list_devices() -> Result<Vec<GpuDeviceInfo>> {
309        GpuDevice::list_devices()
310    }
311
312    /// Get information about the current device
313    #[must_use]
314    pub fn device_info(&self) -> &GpuDeviceInfo {
315        self.device.info()
316    }
317
318    /// Convert RGB to YUV (BT.601)
319    ///
320    /// # Arguments
321    ///
322    /// * `input` - Input RGB buffer (packed RGBA format)
323    /// * `output` - Output YUV buffer (packed YUVA format)
324    ///
325    /// # Errors
326    ///
327    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
328    pub fn rgb_to_yuv(&self, input: &[u8], output: &mut [u8]) -> Result<()> {
329        if input.len() != output.len() {
330            return Err(GpuError::InvalidBufferSize {
331                expected: input.len(),
332                actual: output.len(),
333            });
334        }
335
336        if input.len() % 4 != 0 {
337            return Err(GpuError::InvalidBufferSize {
338                expected: (input.len() / 4) * 4,
339                actual: input.len(),
340            });
341        }
342
343        let width = ((input.len() / 4) as f32).sqrt() as u32;
344        let height = width;
345
346        ops::ColorSpaceConversion::rgb_to_yuv(
347            &self.device,
348            input,
349            output,
350            width,
351            height,
352            ops::ColorSpace::BT601,
353        )
354    }
355
356    /// Convert YUV to RGB (BT.601)
357    ///
358    /// # Arguments
359    ///
360    /// * `input` - Input YUV buffer (packed YUVA format)
361    /// * `output` - Output RGB buffer (packed RGBA format)
362    ///
363    /// # Errors
364    ///
365    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
366    pub fn yuv_to_rgb(&self, input: &[u8], output: &mut [u8]) -> Result<()> {
367        if input.len() != output.len() {
368            return Err(GpuError::InvalidBufferSize {
369                expected: input.len(),
370                actual: output.len(),
371            });
372        }
373
374        if input.len() % 4 != 0 {
375            return Err(GpuError::InvalidBufferSize {
376                expected: (input.len() / 4) * 4,
377                actual: input.len(),
378            });
379        }
380
381        let width = ((input.len() / 4) as f32).sqrt() as u32;
382        let height = width;
383
384        ops::ColorSpaceConversion::yuv_to_rgb(
385            &self.device,
386            input,
387            output,
388            width,
389            height,
390            ops::ColorSpace::BT601,
391        )
392    }
393
394    /// Scale an image using bilinear interpolation
395    ///
396    /// # Arguments
397    ///
398    /// * `input` - Input image buffer (packed RGBA format)
399    /// * `src_width` - Source image width
400    /// * `src_height` - Source image height
401    /// * `output` - Output image buffer (packed RGBA format)
402    /// * `dst_width` - Destination image width
403    /// * `dst_height` - Destination image height
404    ///
405    /// # Errors
406    ///
407    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
408    pub fn scale_bilinear(
409        &self,
410        input: &[u8],
411        src_width: u32,
412        src_height: u32,
413        output: &mut [u8],
414        dst_width: u32,
415        dst_height: u32,
416    ) -> Result<()> {
417        ops::ScaleOperation::scale(
418            &self.device,
419            input,
420            src_width,
421            src_height,
422            output,
423            dst_width,
424            dst_height,
425            ops::ScaleFilter::Bilinear,
426        )
427    }
428
429    /// Scale an image using bicubic interpolation
430    ///
431    /// # Arguments
432    ///
433    /// * `input` - Input image buffer (packed RGBA format)
434    /// * `src_width` - Source image width
435    /// * `src_height` - Source image height
436    /// * `output` - Output image buffer (packed RGBA format)
437    /// * `dst_width` - Destination image width
438    /// * `dst_height` - Destination image height
439    ///
440    /// # Errors
441    ///
442    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
443    pub fn scale_bicubic(
444        &self,
445        input: &[u8],
446        src_width: u32,
447        src_height: u32,
448        output: &mut [u8],
449        dst_width: u32,
450        dst_height: u32,
451    ) -> Result<()> {
452        ops::ScaleOperation::scale(
453            &self.device,
454            input,
455            src_width,
456            src_height,
457            output,
458            dst_width,
459            dst_height,
460            ops::ScaleFilter::Bicubic,
461        )
462    }
463
464    /// Scale an image using Lanczos-3 interpolation (highest quality)
465    ///
466    /// # Arguments
467    ///
468    /// * `input` - Input image buffer (packed RGBA format)
469    /// * `src_width` - Source image width
470    /// * `src_height` - Source image height
471    /// * `output` - Output image buffer (packed RGBA format)
472    /// * `dst_width` - Destination image width
473    /// * `dst_height` - Destination image height
474    ///
475    /// # Errors
476    ///
477    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
478    pub fn scale_lanczos(
479        &self,
480        input: &[u8],
481        src_width: u32,
482        src_height: u32,
483        output: &mut [u8],
484        dst_width: u32,
485        dst_height: u32,
486    ) -> Result<()> {
487        ops::ScaleOperation::scale(
488            &self.device,
489            input,
490            src_width,
491            src_height,
492            output,
493            dst_width,
494            dst_height,
495            ops::ScaleFilter::Lanczos3,
496        )
497    }
498
499    /// Apply Gaussian blur
500    ///
501    /// # Arguments
502    ///
503    /// * `input` - Input image buffer (packed RGBA format)
504    /// * `output` - Output image buffer (packed RGBA format)
505    /// * `width` - Image width
506    /// * `height` - Image height
507    /// * `sigma` - Blur radius (standard deviation)
508    ///
509    /// # Errors
510    ///
511    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
512    #[allow(clippy::too_many_arguments)]
513    pub fn gaussian_blur(
514        &self,
515        input: &[u8],
516        output: &mut [u8],
517        width: u32,
518        height: u32,
519        sigma: f32,
520    ) -> Result<()> {
521        ops::FilterOperation::gaussian_blur(&self.device, input, output, width, height, sigma)
522    }
523
524    /// Apply sharpening filter
525    ///
526    /// # Arguments
527    ///
528    /// * `input` - Input image buffer (packed RGBA format)
529    /// * `output` - Output image buffer (packed RGBA format)
530    /// * `width` - Image width
531    /// * `height` - Image height
532    /// * `amount` - Sharpening strength
533    ///
534    /// # Errors
535    ///
536    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
537    #[allow(clippy::too_many_arguments)]
538    pub fn sharpen(
539        &self,
540        input: &[u8],
541        output: &mut [u8],
542        width: u32,
543        height: u32,
544        amount: f32,
545    ) -> Result<()> {
546        ops::FilterOperation::sharpen(&self.device, input, output, width, height, amount)
547    }
548
549    /// Detect edges using Sobel operator
550    ///
551    /// # Arguments
552    ///
553    /// * `input` - Input image buffer (packed RGBA format)
554    /// * `output` - Output image buffer (packed RGBA format)
555    /// * `width` - Image width
556    /// * `height` - Image height
557    ///
558    /// # Errors
559    ///
560    /// Returns an error if buffer sizes are invalid or if the GPU operation fails.
561    pub fn edge_detect(
562        &self,
563        input: &[u8],
564        output: &mut [u8],
565        width: u32,
566        height: u32,
567    ) -> Result<()> {
568        ops::FilterOperation::edge_detect(&self.device, input, output, width, height)
569    }
570
571    /// Compute 2D DCT (Discrete Cosine Transform)
572    ///
573    /// # Arguments
574    ///
575    /// * `input` - Input data (f32 values)
576    /// * `output` - Output DCT coefficients
577    /// * `width` - Data width (must be multiple of 8)
578    /// * `height` - Data height (must be multiple of 8)
579    ///
580    /// # Errors
581    ///
582    /// Returns an error if dimensions are invalid or if the GPU operation fails.
583    pub fn dct_2d(&self, input: &[f32], output: &mut [f32], width: u32, height: u32) -> Result<()> {
584        ops::TransformOperation::dct_2d(&self.device, input, output, width, height)
585    }
586
587    /// Compute 2D IDCT (Inverse Discrete Cosine Transform)
588    ///
589    /// # Arguments
590    ///
591    /// * `input` - Input DCT coefficients
592    /// * `output` - Output reconstructed data
593    /// * `width` - Data width (must be multiple of 8)
594    /// * `height` - Data height (must be multiple of 8)
595    ///
596    /// # Errors
597    ///
598    /// Returns an error if dimensions are invalid or if the GPU operation fails.
599    pub fn idct_2d(
600        &self,
601        input: &[f32],
602        output: &mut [f32],
603        width: u32,
604        height: u32,
605    ) -> Result<()> {
606        ops::TransformOperation::idct_2d(&self.device, input, output, width, height)
607    }
608
609    /// Wait for all GPU operations to complete
610    ///
611    /// This is useful for synchronization and benchmarking.
612    pub fn wait(&self) {
613        self.device.wait();
614    }
615}
616
617// GpuContext intentionally does not implement Default.
618//
619// GPU context creation is inherently fallible (no adapter, driver error, etc.).
620// Callers must use GpuContext::new() or GpuContext::with_device() and handle
621// the returned Result explicitly.  A silent Default impl that can either panic
622// or silently return a non-functional context would be misleading.
623//
624// If a best-effort fallback context is needed, use:
625//   GpuContext::new().or_else(|_| GpuContext::with_device(0))