pub trait TileGpuBackend: Send + Sync {
type Buffer: Send + Sync;
// Required methods
fn create_tile_buffers(
&self,
tile_size: u32,
) -> Result<TileGpuBuffers<Self::Buffer>>;
fn upload_initial_state(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
pressure: &[f32],
pressure_prev: &[f32],
) -> Result<()>;
fn fdtd_step(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
params: &FdtdParams,
) -> Result<()>;
fn extract_halo(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
edge: Edge,
) -> Result<Vec<f32>>;
fn inject_halo(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
edge: Edge,
data: &[f32],
) -> Result<()>;
fn swap_buffers(&self, buffers: &mut TileGpuBuffers<Self::Buffer>);
fn read_interior_pressure(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
) -> Result<Vec<f32>>;
fn apply_boundary(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
edge: Edge,
condition: BoundaryCondition,
) -> Result<()>;
fn synchronize(&self) -> Result<()>;
}Expand description
Unified trait for GPU backends (CUDA and WGPU).
Implementations must provide GPU-resident tile buffers with:
- Double-buffered pressure (ping-pong)
- Halo staging buffers for K2K exchange
- Minimal transfer operations
Required Associated Types§
Required Methods§
Sourcefn create_tile_buffers(
&self,
tile_size: u32,
) -> Result<TileGpuBuffers<Self::Buffer>>
fn create_tile_buffers( &self, tile_size: u32, ) -> Result<TileGpuBuffers<Self::Buffer>>
Create GPU buffers for a tile.
Returns buffers for:
- pressure: 18x18 f32 buffer (current state)
- pressure_prev: 18x18 f32 buffer (previous state)
- halo_staging: 4 × 16 f32 buffers (one per edge)
Sourcefn upload_initial_state(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
pressure: &[f32],
pressure_prev: &[f32],
) -> Result<()>
fn upload_initial_state( &self, buffers: &TileGpuBuffers<Self::Buffer>, pressure: &[f32], pressure_prev: &[f32], ) -> Result<()>
Upload initial pressure data (one-time, at tile creation).
Sourcefn fdtd_step(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
params: &FdtdParams,
) -> Result<()>
fn fdtd_step( &self, buffers: &TileGpuBuffers<Self::Buffer>, params: &FdtdParams, ) -> Result<()>
Execute FDTD step entirely on GPU (no host transfer).
Reads from current buffer, writes to previous buffer (ping-pong).
Sourcefn extract_halo(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
edge: Edge,
) -> Result<Vec<f32>>
fn extract_halo( &self, buffers: &TileGpuBuffers<Self::Buffer>, edge: Edge, ) -> Result<Vec<f32>>
Extract halo from GPU to host (small transfer for K2K).
Returns 16 f32 values for the specified edge.
Sourcefn inject_halo(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
edge: Edge,
data: &[f32],
) -> Result<()>
fn inject_halo( &self, buffers: &TileGpuBuffers<Self::Buffer>, edge: Edge, data: &[f32], ) -> Result<()>
Inject neighbor halo from K2K message into GPU buffer.
Takes 16 f32 values to write to the specified halo region.
Sourcefn swap_buffers(&self, buffers: &mut TileGpuBuffers<Self::Buffer>)
fn swap_buffers(&self, buffers: &mut TileGpuBuffers<Self::Buffer>)
Swap ping-pong buffers (pointer swap, no data movement).
Sourcefn read_interior_pressure(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
) -> Result<Vec<f32>>
fn read_interior_pressure( &self, buffers: &TileGpuBuffers<Self::Buffer>, ) -> Result<Vec<f32>>
Read full tile pressure for visualization.
Only called when GUI needs to render (once per frame). Returns 16x16 interior values (not the full 18x18 buffer).
Sourcefn apply_boundary(
&self,
buffers: &TileGpuBuffers<Self::Buffer>,
edge: Edge,
condition: BoundaryCondition,
) -> Result<()>
fn apply_boundary( &self, buffers: &TileGpuBuffers<Self::Buffer>, edge: Edge, condition: BoundaryCondition, ) -> Result<()>
Apply boundary condition for a domain edge tile.
For tiles at the domain boundary (without a neighbor on one or more edges), this method applies the specified boundary condition to the halo region:
- Absorbing: Sets halo to zero (waves exit and don’t return)
- Reflecting: Mirrors interior values to halo (hard wall)
- Periodic: Would need opposite edge data (handled separately)
This is a GPU operation - more efficient than CPU-side manipulation.
Sourcefn synchronize(&self) -> Result<()>
fn synchronize(&self) -> Result<()>
Synchronize GPU operations.