Skip to main content

oxiui_render_wgpu/gpu/
ring_buffer.rs

1//! GPU upload ring buffer for streaming vertex/index data.
2//!
3//! [`RingBuffer`] maintains a single large `VERTEX | COPY_DST` GPU buffer and
4//! a write cursor that advances by `align_up(size, alignment)` on each
5//! allocation.  When the cursor would overflow the buffer capacity the entire
6//! buffer is reset to offset 0 (a "ring" wrap).
7//!
8//! # Design
9//!
10//! This avoids the per-frame `create_buffer_init` / `create_buffer` allocations
11//! that otherwise show up in GPU driver heap statistics.  Instead, the caller
12//! obtains a [`RingAllocation`] describing a byte range within the buffer, and
13//! uploads data via `queue.write_buffer`.  The GPU reads from the same buffer
14//! in the same frame — because wgpu submits command encoders sequentially,
15//! `write_buffer` is guaranteed to be visible before any draw commands issued
16//! after the write.
17//!
18//! # Safety / correctness contract
19//!
20//! - Allocations are *frame-scoped*: all allocations from a frame must be
21//!   consumed (drawn) within that frame's command encoder before the next call
22//!   to `reset()`.
23//! - `reset()` must be called once per frame *before* any allocations for that
24//!   frame.  It does NOT wait for GPU work to finish — the caller is responsible
25//!   for ensuring the GPU has consumed the previous frame's commands before
26//!   overwriting the buffer (e.g. by submitting and waiting, or by using
27//!   double-buffering at the `RingBuffer` level).
28//!
29//! # Headless / testing
30//!
31//! The ring buffer wraps a real `wgpu::Buffer`, so tests that need it must
32//! acquire a real GPU device.  CPU-only tests can use the `RingBufferStats`
33//! type directly without a device.
34
35use oxiui_core::UiError;
36
37// ── RingBufferStats ───────────────────────────────────────────────────────────
38
39/// Lifetime statistics for a [`RingBuffer`].
40#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
41pub struct RingBufferStats {
42    /// Number of successful allocations since the buffer was created.
43    pub total_allocations: u64,
44    /// Number of ring wraps (full-buffer resets) performed.
45    pub wrap_count: u64,
46    /// Number of times the buffer was grown to accommodate a large allocation.
47    pub grow_count: u64,
48    /// Current byte capacity of the underlying GPU buffer.
49    pub capacity_bytes: usize,
50    /// Current write cursor offset (bytes from start of buffer).
51    pub cursor_bytes: usize,
52}
53
54// ── RingAllocation ────────────────────────────────────────────────────────────
55
56/// A sub-range allocation within a [`RingBuffer`].
57///
58/// The caller uploads data via `queue.write_buffer(buf, alloc.offset, bytes)`
59/// and then uses `buf.slice(alloc.offset..alloc.offset + alloc.size)` in the
60/// render pass.
61#[derive(Clone, Copy, Debug)]
62pub struct RingAllocation {
63    /// Byte offset from the start of the ring buffer.
64    pub offset: u64,
65    /// Byte size of the allocation (equal to the requested size, *not* the
66    /// aligned stride).
67    pub size: u64,
68}
69
70// ── RingBuffer ────────────────────────────────────────────────────────────────
71
72/// A streaming GPU vertex/index ring buffer.
73///
74/// Holds a single `VERTEX | INDEX | COPY_DST` GPU buffer; sub-ranges are
75/// handed out sequentially and the cursor wraps back to zero at the end of
76/// each frame (or when the remaining space is insufficient for an allocation).
77pub struct RingBuffer {
78    /// The underlying GPU buffer.
79    pub buffer: wgpu::Buffer,
80    /// Current write cursor (byte offset from start of buffer).
81    cursor: usize,
82    /// Alignment requirement for each allocation (typically 4 bytes for
83    /// `VERTEX` buffers; use `device.limits().min_uniform_buffer_offset_alignment`
84    /// for uniform buffers).
85    alignment: u64,
86    /// Lifetime statistics.
87    stats: RingBufferStats,
88}
89
90impl RingBuffer {
91    /// Minimum initial buffer capacity in bytes.
92    const MIN_CAPACITY: usize = 64 * 1024; // 64 KiB
93
94    /// Create a new ring buffer with an initial capacity of
95    /// `max(initial_bytes, MIN_CAPACITY)` bytes.
96    ///
97    /// `alignment` is the byte alignment applied to every allocation.
98    /// For vertex buffers 4 is typical; for uniform buffers use
99    /// `device.limits().min_uniform_buffer_offset_alignment`.
100    pub fn new(device: &wgpu::Device, initial_bytes: usize, alignment: u64) -> Self {
101        let capacity = initial_bytes.max(Self::MIN_CAPACITY).next_power_of_two();
102        let buffer = device.create_buffer(&wgpu::BufferDescriptor {
103            label: Some("oxiui-render-wgpu ring buffer"),
104            size: capacity as u64,
105            usage: wgpu::BufferUsages::VERTEX
106                | wgpu::BufferUsages::INDEX
107                | wgpu::BufferUsages::COPY_DST,
108            mapped_at_creation: false,
109        });
110        let stats = RingBufferStats {
111            capacity_bytes: capacity,
112            ..Default::default()
113        };
114        Self {
115            buffer,
116            cursor: 0,
117            alignment: alignment.max(1),
118            stats,
119        }
120    }
121
122    /// Reset the write cursor to zero.
123    ///
124    /// Must be called once per frame **before** any allocations for that frame.
125    /// Does NOT wait for the GPU — the caller must ensure the previous frame's
126    /// GPU work has completed before calling `reset()`.
127    pub fn reset(&mut self) {
128        self.cursor = 0;
129    }
130
131    /// Allocate `size` bytes from the ring buffer and upload `data` into the
132    /// allocation via `queue.write_buffer`.
133    ///
134    /// Returns a [`RingAllocation`] describing the offset and size within
135    /// `self.buffer`.
136    ///
137    /// # Wrapping
138    ///
139    /// If the remaining capacity after the cursor is insufficient, the cursor
140    /// wraps to zero (one wrap per frame is normal; multiple wraps in a single
141    /// frame indicate the buffer is undersized — consider calling `grow`).
142    ///
143    /// # Growing
144    ///
145    /// If even a fresh buffer at offset 0 cannot fit the requested `size` the
146    /// buffer is automatically grown to `max(capacity * 2, align_up(size))`
147    /// and `grow_count` is incremented.
148    ///
149    /// # Errors
150    ///
151    /// Returns [`UiError::Render`] only if the allocation remains impossible
152    /// after an attempted grow (e.g. device OOM).  In practice this should
153    /// not occur for reasonable data sizes.
154    pub fn upload(
155        &mut self,
156        device: &wgpu::Device,
157        queue: &wgpu::Queue,
158        data: &[u8],
159    ) -> Result<RingAllocation, UiError> {
160        let size = data.len();
161        if size == 0 {
162            return Ok(RingAllocation { offset: 0, size: 0 });
163        }
164
165        let aligned_size = align_up(size as u64, self.alignment) as usize;
166
167        // Check whether the remaining tail fits.
168        if self.cursor + aligned_size > self.stats.capacity_bytes {
169            // Wrap back to zero.
170            self.cursor = 0;
171            self.stats.wrap_count += 1;
172        }
173
174        // Grow if even the full buffer is too small.
175        if aligned_size > self.stats.capacity_bytes {
176            self.grow(device, aligned_size)?;
177        }
178
179        let offset = self.cursor as u64;
180        queue.write_buffer(&self.buffer, offset, data);
181        self.cursor += aligned_size;
182        self.stats.total_allocations += 1;
183        self.stats.cursor_bytes = self.cursor;
184
185        Ok(RingAllocation {
186            offset,
187            size: size as u64,
188        })
189    }
190
191    /// Explicitly grow the ring buffer to at least `min_size` bytes.
192    ///
193    /// The new capacity is `max(capacity * 2, next_power_of_two(min_size))`.
194    /// The cursor is reset to zero after a grow.
195    ///
196    /// # Errors
197    ///
198    /// Returns [`UiError::Render`] on failure (typically OOM).
199    pub fn grow(&mut self, device: &wgpu::Device, min_size: usize) -> Result<(), UiError> {
200        let new_cap = (self.stats.capacity_bytes * 2)
201            .max(min_size.next_power_of_two())
202            .max(Self::MIN_CAPACITY);
203        let new_buf = device.create_buffer(&wgpu::BufferDescriptor {
204            label: Some("oxiui-render-wgpu ring buffer (grown)"),
205            size: new_cap as u64,
206            usage: wgpu::BufferUsages::VERTEX
207                | wgpu::BufferUsages::INDEX
208                | wgpu::BufferUsages::COPY_DST,
209            mapped_at_creation: false,
210        });
211        // Replace the buffer and reset the cursor.
212        self.buffer = new_buf;
213        self.cursor = 0;
214        self.stats.capacity_bytes = new_cap;
215        self.stats.grow_count += 1;
216        self.stats.cursor_bytes = 0;
217        Ok(())
218    }
219
220    /// Return a snapshot of the ring buffer's lifetime statistics.
221    pub fn stats(&self) -> RingBufferStats {
222        let mut s = self.stats;
223        s.cursor_bytes = self.cursor;
224        s
225    }
226
227    /// Current byte capacity of the underlying GPU buffer.
228    pub fn capacity(&self) -> usize {
229        self.stats.capacity_bytes
230    }
231
232    /// Current write cursor offset in bytes.
233    pub fn cursor(&self) -> usize {
234        self.cursor
235    }
236}
237
238// ── Private helpers ───────────────────────────────────────────────────────────
239
240/// Round `n` up to the next multiple of `align` (which must be ≥ 1).
241#[inline]
242fn align_up(n: u64, align: u64) -> u64 {
243    let a = align.max(1);
244    n.div_ceil(a) * a
245}
246
247// ── Tests ─────────────────────────────────────────────────────────────────────
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252
253    // Unit tests for the align_up helper (no GPU needed).
254    #[test]
255    fn align_up_rounds_correctly() {
256        assert_eq!(align_up(0, 4), 0);
257        assert_eq!(align_up(1, 4), 4);
258        assert_eq!(align_up(4, 4), 4);
259        assert_eq!(align_up(5, 4), 8);
260        assert_eq!(align_up(256, 256), 256);
261        assert_eq!(align_up(257, 256), 512);
262    }
263
264    #[test]
265    fn ring_buffer_stats_default() {
266        let s = RingBufferStats::default();
267        assert_eq!(s.total_allocations, 0);
268        assert_eq!(s.wrap_count, 0);
269        assert_eq!(s.grow_count, 0);
270    }
271
272    #[test]
273    fn ring_allocation_size_preserved() {
274        let alloc = RingAllocation {
275            offset: 128,
276            size: 56,
277        };
278        assert_eq!(alloc.offset, 128);
279        assert_eq!(alloc.size, 56);
280    }
281}