oxigdal_gpu/backends/
mod.rs

1//! Backend-specific optimizations for different GPU APIs.
2//!
3//! This module provides platform-specific optimizations for CUDA, Vulkan,
4//! Metal, and DirectML backends.
5
6#[cfg(feature = "cuda")]
7pub mod cuda;
8
9#[cfg(feature = "vulkan")]
10pub mod vulkan;
11
12#[cfg(feature = "metal")]
13pub mod metal;
14
15#[cfg(feature = "directml")]
16pub mod directml;
17
18// Backend capability detection and optimization utilities
19
20/// Backend capability flags.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub struct BackendCapabilities {
23    /// Supports tensor cores.
24    pub tensor_cores: bool,
25    /// Supports ray tracing.
26    pub ray_tracing: bool,
27    /// Supports mesh shaders.
28    pub mesh_shaders: bool,
29    /// Supports variable rate shading.
30    pub variable_rate_shading: bool,
31    /// Supports async compute.
32    pub async_compute: bool,
33    /// Supports peer-to-peer transfers.
34    pub p2p_transfer: bool,
35    /// Maximum workgroup size.
36    pub max_workgroup_size: (u32, u32, u32),
37    /// Maximum compute invocations.
38    pub max_compute_invocations: u32,
39}
40
41impl Default for BackendCapabilities {
42    fn default() -> Self {
43        Self {
44            tensor_cores: false,
45            ray_tracing: false,
46            mesh_shaders: false,
47            variable_rate_shading: false,
48            async_compute: false,
49            p2p_transfer: false,
50            max_workgroup_size: (256, 256, 64),
51            max_compute_invocations: 256,
52        }
53    }
54}
55
56/// Backend-specific optimization hints.
57#[derive(Debug, Clone)]
58pub enum OptimizationHint {
59    /// Use shared memory (CUDA/Vulkan).
60    UseSharedMemory,
61    /// Use warp-level primitives (CUDA).
62    UseWarpPrimitives,
63    /// Use subgroup operations (Vulkan).
64    UseSubgroupOps,
65    /// Use threadgroup memory (Metal).
66    UseThreadgroupMemory,
67    /// Prefer wave operations (DirectML).
68    PreferWaveOps,
69    /// Enable async execution.
70    EnableAsyncExecution,
71}
72
73/// Query backend capabilities.
74pub fn query_capabilities(backend: wgpu::Backend) -> BackendCapabilities {
75    match backend {
76        wgpu::Backend::Vulkan => BackendCapabilities {
77            async_compute: true,
78            max_workgroup_size: (1024, 1024, 64),
79            max_compute_invocations: 1024,
80            ..Default::default()
81        },
82        wgpu::Backend::Metal => BackendCapabilities {
83            async_compute: true,
84            max_workgroup_size: (1024, 1024, 64),
85            max_compute_invocations: 1024,
86            ..Default::default()
87        },
88        wgpu::Backend::Dx12 => BackendCapabilities {
89            async_compute: true,
90            max_workgroup_size: (1024, 1024, 64),
91            max_compute_invocations: 1024,
92            ..Default::default()
93        },
94        _ => BackendCapabilities::default(),
95    }
96}
oxigdal_gpu/backends/mod.rs

oxigdal_gpu/backends/
mod.rs