Skip to main content

oxillama_gpu/
buffer.rs

1//! GPU buffer helpers — upload and download f32 arrays.
2//!
3//! These helpers abstract the common pattern of:
4//! 1. Uploading a `&[f32]` to a wgpu storage buffer (for shader reads/writes).
5//! 2. Downloading f32 data from the GPU back to a `Vec<f32>` via a staging
6//!    buffer + map_async.
7//!
8//! All functions are gated behind `#[cfg(feature = "gpu")]`.  The module
9//! itself is always compiled so that call-sites remain syntactically valid.
10
11#[cfg(feature = "gpu")]
12use crate::error::{GpuError, GpuResult};
13
14/// Upload a `&[f32]` slice to a GPU storage buffer (STORAGE | COPY_SRC).
15///
16/// The buffer is suitable for use as a read-only shader storage binding.
17#[cfg(feature = "gpu")]
18pub(crate) fn upload_f32(device: &wgpu::Device, label: &str, data: &[f32]) -> wgpu::Buffer {
19    use wgpu::util::DeviceExt;
20    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
21        label: Some(label),
22        contents: bytemuck::cast_slice(data),
23        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
24    })
25}
26
27/// Create an empty, writable GPU storage buffer of `len` f32 elements.
28#[cfg(feature = "gpu")]
29pub(crate) fn create_output_f32(device: &wgpu::Device, label: &str, len: usize) -> wgpu::Buffer {
30    device.create_buffer(&wgpu::BufferDescriptor {
31        label: Some(label),
32        size: (len * std::mem::size_of::<f32>()) as u64,
33        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
34        mapped_at_creation: false,
35    })
36}
37
38/// Create a uniform buffer from a `bytemuck::Pod` value.
39#[cfg(feature = "gpu")]
40pub(crate) fn upload_uniform<T: bytemuck::Pod>(
41    device: &wgpu::Device,
42    label: &str,
43    value: &T,
44) -> wgpu::Buffer {
45    use wgpu::util::DeviceExt;
46    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
47        label: Some(label),
48        contents: bytemuck::bytes_of(value),
49        usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
50    })
51}
52
53/// Read back `len` f32 values from `src_buf` on the GPU.
54///
55/// Blocks until the GPU work submitted prior to this call completes.
56/// Returns a `Vec<f32>` with the results.
57#[cfg(feature = "gpu")]
58pub(crate) fn download_f32(
59    device: &wgpu::Device,
60    queue: &wgpu::Queue,
61    src_buf: &wgpu::Buffer,
62    len: usize,
63) -> GpuResult<Vec<f32>> {
64    let byte_len = (len * std::mem::size_of::<f32>()) as u64;
65    let staging = device.create_buffer(&wgpu::BufferDescriptor {
66        label: Some("gpu-staging-readback"),
67        size: byte_len,
68        usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
69        mapped_at_creation: false,
70    });
71
72    let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
73        label: Some("readback"),
74    });
75    encoder.copy_buffer_to_buffer(src_buf, 0, &staging, 0, byte_len);
76    queue.submit([encoder.finish()]);
77
78    // Map the staging buffer and wait for the GPU to finish.
79    let slice = staging.slice(..);
80    let (tx, rx) = std::sync::mpsc::channel();
81    slice.map_async(wgpu::MapMode::Read, move |result| {
82        // Ignore send errors — receiver may have already dropped if GPU failed.
83        let _ = tx.send(result);
84    });
85    device
86        .poll(wgpu::PollType::Wait {
87            submission_index: None,
88            timeout: None,
89        })
90        .map_err(|e| GpuError::BufferMap {
91            detail: format!("{e:?}"),
92        })?;
93
94    rx.recv()
95        .map_err(|_| GpuError::BufferMap {
96            detail: "channel closed before GPU mapped buffer".to_owned(),
97        })?
98        .map_err(|e| GpuError::BufferMap {
99            detail: format!("{e:?}"),
100        })?;
101
102    let view = slice.get_mapped_range();
103    let result: Vec<f32> = bytemuck::cast_slice(&view).to_vec();
104    drop(view);
105    staging.unmap();
106
107    Ok(result)
108}
109
110/// Upload a `&[u32]` slice to a GPU storage buffer (STORAGE | COPY_SRC).
111///
112/// Suitable for read-only shader storage bindings of `array<u32>`.
113#[cfg(feature = "gpu")]
114pub(crate) fn upload_u32(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
115    use wgpu::util::DeviceExt;
116    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
117        label: Some(label),
118        contents: bytemuck::cast_slice(data),
119        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
120    })
121}
122
123/// Create an empty, writable GPU storage buffer of `len` u32 elements.
124#[cfg(feature = "gpu")]
125pub(crate) fn create_output_u32(device: &wgpu::Device, label: &str, len: usize) -> wgpu::Buffer {
126    device.create_buffer(&wgpu::BufferDescriptor {
127        label: Some(label),
128        size: (len * std::mem::size_of::<u32>()) as u64,
129        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
130        mapped_at_creation: false,
131    })
132}
133
134/// Read back `len` u32 values from `src_buf` on the GPU.
135///
136/// Blocks until the GPU work submitted prior to this call completes.
137#[cfg(feature = "gpu")]
138pub(crate) fn download_u32(
139    device: &wgpu::Device,
140    queue: &wgpu::Queue,
141    src_buf: &wgpu::Buffer,
142    len: usize,
143) -> GpuResult<Vec<u32>> {
144    let byte_len = (len * std::mem::size_of::<u32>()) as u64;
145    let staging = device.create_buffer(&wgpu::BufferDescriptor {
146        label: Some("gpu-staging-readback-u32"),
147        size: byte_len,
148        usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
149        mapped_at_creation: false,
150    });
151
152    let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
153        label: Some("readback-u32"),
154    });
155    encoder.copy_buffer_to_buffer(src_buf, 0, &staging, 0, byte_len);
156    queue.submit([encoder.finish()]);
157
158    let slice = staging.slice(..);
159    let (tx, rx) = std::sync::mpsc::channel();
160    slice.map_async(wgpu::MapMode::Read, move |result| {
161        let _ = tx.send(result);
162    });
163    device
164        .poll(wgpu::PollType::Wait {
165            submission_index: None,
166            timeout: None,
167        })
168        .map_err(|e| GpuError::BufferMap {
169            detail: format!("{e:?}"),
170        })?;
171
172    rx.recv()
173        .map_err(|_| GpuError::BufferMap {
174            detail: "channel closed before GPU mapped u32 buffer".to_owned(),
175        })?
176        .map_err(|e| GpuError::BufferMap {
177            detail: format!("{e:?}"),
178        })?;
179
180    let view = slice.get_mapped_range();
181    let result: Vec<u32> = bytemuck::cast_slice(&view).to_vec();
182    drop(view);
183    staging.unmap();
184
185    Ok(result)
186}
187
188// ─── stub when the gpu feature is absent ─────────────────────────────────────
189//
190// No stubs needed here; the `buffer` module functions are only called from
191// kernel code that is itself gated with `#[cfg(feature = "gpu")]`.