oxiui_render_wgpu/gpu/ring_buffer.rs
1//! GPU upload ring buffer for streaming vertex/index data.
2//!
3//! [`RingBuffer`] maintains a single large `VERTEX | COPY_DST` GPU buffer and
4//! a write cursor that advances by `align_up(size, alignment)` on each
5//! allocation. When the cursor would overflow the buffer capacity the entire
6//! buffer is reset to offset 0 (a "ring" wrap).
7//!
8//! # Design
9//!
10//! This avoids the per-frame `create_buffer_init` / `create_buffer` allocations
11//! that otherwise show up in GPU driver heap statistics. Instead, the caller
12//! obtains a [`RingAllocation`] describing a byte range within the buffer, and
13//! uploads data via `queue.write_buffer`. The GPU reads from the same buffer
14//! in the same frame — because wgpu submits command encoders sequentially,
15//! `write_buffer` is guaranteed to be visible before any draw commands issued
16//! after the write.
17//!
18//! # Safety / correctness contract
19//!
20//! - Allocations are *frame-scoped*: all allocations from a frame must be
21//! consumed (drawn) within that frame's command encoder before the next call
22//! to `reset()`.
23//! - `reset()` must be called once per frame *before* any allocations for that
24//! frame. It does NOT wait for GPU work to finish — the caller is responsible
25//! for ensuring the GPU has consumed the previous frame's commands before
26//! overwriting the buffer (e.g. by submitting and waiting, or by using
27//! double-buffering at the `RingBuffer` level).
28//!
29//! # Headless / testing
30//!
31//! The ring buffer wraps a real `wgpu::Buffer`, so tests that need it must
32//! acquire a real GPU device. CPU-only tests can use the `RingBufferStats`
33//! type directly without a device.
34
35use oxiui_core::UiError;
36
37// ── RingBufferStats ───────────────────────────────────────────────────────────
38
39/// Lifetime statistics for a [`RingBuffer`].
40#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
41pub struct RingBufferStats {
42 /// Number of successful allocations since the buffer was created.
43 pub total_allocations: u64,
44 /// Number of ring wraps (full-buffer resets) performed.
45 pub wrap_count: u64,
46 /// Number of times the buffer was grown to accommodate a large allocation.
47 pub grow_count: u64,
48 /// Current byte capacity of the underlying GPU buffer.
49 pub capacity_bytes: usize,
50 /// Current write cursor offset (bytes from start of buffer).
51 pub cursor_bytes: usize,
52}
53
54// ── RingAllocation ────────────────────────────────────────────────────────────
55
56/// A sub-range allocation within a [`RingBuffer`].
57///
58/// The caller uploads data via `queue.write_buffer(buf, alloc.offset, bytes)`
59/// and then uses `buf.slice(alloc.offset..alloc.offset + alloc.size)` in the
60/// render pass.
61#[derive(Clone, Copy, Debug)]
62pub struct RingAllocation {
63 /// Byte offset from the start of the ring buffer.
64 pub offset: u64,
65 /// Byte size of the allocation (equal to the requested size, *not* the
66 /// aligned stride).
67 pub size: u64,
68}
69
70// ── RingBuffer ────────────────────────────────────────────────────────────────
71
72/// A streaming GPU vertex/index ring buffer.
73///
74/// Holds a single `VERTEX | INDEX | COPY_DST` GPU buffer; sub-ranges are
75/// handed out sequentially and the cursor wraps back to zero at the end of
76/// each frame (or when the remaining space is insufficient for an allocation).
77pub struct RingBuffer {
78 /// The underlying GPU buffer.
79 pub buffer: wgpu::Buffer,
80 /// Current write cursor (byte offset from start of buffer).
81 cursor: usize,
82 /// Alignment requirement for each allocation (typically 4 bytes for
83 /// `VERTEX` buffers; use `device.limits().min_uniform_buffer_offset_alignment`
84 /// for uniform buffers).
85 alignment: u64,
86 /// Lifetime statistics.
87 stats: RingBufferStats,
88}
89
90impl RingBuffer {
91 /// Minimum initial buffer capacity in bytes.
92 const MIN_CAPACITY: usize = 64 * 1024; // 64 KiB
93
94 /// Create a new ring buffer with an initial capacity of
95 /// `max(initial_bytes, MIN_CAPACITY)` bytes.
96 ///
97 /// `alignment` is the byte alignment applied to every allocation.
98 /// For vertex buffers 4 is typical; for uniform buffers use
99 /// `device.limits().min_uniform_buffer_offset_alignment`.
100 pub fn new(device: &wgpu::Device, initial_bytes: usize, alignment: u64) -> Self {
101 let capacity = initial_bytes.max(Self::MIN_CAPACITY).next_power_of_two();
102 let buffer = device.create_buffer(&wgpu::BufferDescriptor {
103 label: Some("oxiui-render-wgpu ring buffer"),
104 size: capacity as u64,
105 usage: wgpu::BufferUsages::VERTEX
106 | wgpu::BufferUsages::INDEX
107 | wgpu::BufferUsages::COPY_DST,
108 mapped_at_creation: false,
109 });
110 let stats = RingBufferStats {
111 capacity_bytes: capacity,
112 ..Default::default()
113 };
114 Self {
115 buffer,
116 cursor: 0,
117 alignment: alignment.max(1),
118 stats,
119 }
120 }
121
122 /// Reset the write cursor to zero.
123 ///
124 /// Must be called once per frame **before** any allocations for that frame.
125 /// Does NOT wait for the GPU — the caller must ensure the previous frame's
126 /// GPU work has completed before calling `reset()`.
127 pub fn reset(&mut self) {
128 self.cursor = 0;
129 }
130
131 /// Allocate `size` bytes from the ring buffer and upload `data` into the
132 /// allocation via `queue.write_buffer`.
133 ///
134 /// Returns a [`RingAllocation`] describing the offset and size within
135 /// `self.buffer`.
136 ///
137 /// # Wrapping
138 ///
139 /// If the remaining capacity after the cursor is insufficient, the cursor
140 /// wraps to zero (one wrap per frame is normal; multiple wraps in a single
141 /// frame indicate the buffer is undersized — consider calling `grow`).
142 ///
143 /// # Growing
144 ///
145 /// If even a fresh buffer at offset 0 cannot fit the requested `size` the
146 /// buffer is automatically grown to `max(capacity * 2, align_up(size))`
147 /// and `grow_count` is incremented.
148 ///
149 /// # Errors
150 ///
151 /// Returns [`UiError::Render`] only if the allocation remains impossible
152 /// after an attempted grow (e.g. device OOM). In practice this should
153 /// not occur for reasonable data sizes.
154 pub fn upload(
155 &mut self,
156 device: &wgpu::Device,
157 queue: &wgpu::Queue,
158 data: &[u8],
159 ) -> Result<RingAllocation, UiError> {
160 let size = data.len();
161 if size == 0 {
162 return Ok(RingAllocation { offset: 0, size: 0 });
163 }
164
165 let aligned_size = align_up(size as u64, self.alignment) as usize;
166
167 // Check whether the remaining tail fits.
168 if self.cursor + aligned_size > self.stats.capacity_bytes {
169 // Wrap back to zero.
170 self.cursor = 0;
171 self.stats.wrap_count += 1;
172 }
173
174 // Grow if even the full buffer is too small.
175 if aligned_size > self.stats.capacity_bytes {
176 self.grow(device, aligned_size)?;
177 }
178
179 let offset = self.cursor as u64;
180 queue.write_buffer(&self.buffer, offset, data);
181 self.cursor += aligned_size;
182 self.stats.total_allocations += 1;
183 self.stats.cursor_bytes = self.cursor;
184
185 Ok(RingAllocation {
186 offset,
187 size: size as u64,
188 })
189 }
190
191 /// Explicitly grow the ring buffer to at least `min_size` bytes.
192 ///
193 /// The new capacity is `max(capacity * 2, next_power_of_two(min_size))`.
194 /// The cursor is reset to zero after a grow.
195 ///
196 /// # Errors
197 ///
198 /// Returns [`UiError::Render`] on failure (typically OOM).
199 pub fn grow(&mut self, device: &wgpu::Device, min_size: usize) -> Result<(), UiError> {
200 let new_cap = (self.stats.capacity_bytes * 2)
201 .max(min_size.next_power_of_two())
202 .max(Self::MIN_CAPACITY);
203 let new_buf = device.create_buffer(&wgpu::BufferDescriptor {
204 label: Some("oxiui-render-wgpu ring buffer (grown)"),
205 size: new_cap as u64,
206 usage: wgpu::BufferUsages::VERTEX
207 | wgpu::BufferUsages::INDEX
208 | wgpu::BufferUsages::COPY_DST,
209 mapped_at_creation: false,
210 });
211 // Replace the buffer and reset the cursor.
212 self.buffer = new_buf;
213 self.cursor = 0;
214 self.stats.capacity_bytes = new_cap;
215 self.stats.grow_count += 1;
216 self.stats.cursor_bytes = 0;
217 Ok(())
218 }
219
220 /// Return a snapshot of the ring buffer's lifetime statistics.
221 pub fn stats(&self) -> RingBufferStats {
222 let mut s = self.stats;
223 s.cursor_bytes = self.cursor;
224 s
225 }
226
227 /// Current byte capacity of the underlying GPU buffer.
228 pub fn capacity(&self) -> usize {
229 self.stats.capacity_bytes
230 }
231
232 /// Current write cursor offset in bytes.
233 pub fn cursor(&self) -> usize {
234 self.cursor
235 }
236}
237
238// ── Private helpers ───────────────────────────────────────────────────────────
239
240/// Round `n` up to the next multiple of `align` (which must be ≥ 1).
241#[inline]
242fn align_up(n: u64, align: u64) -> u64 {
243 let a = align.max(1);
244 n.div_ceil(a) * a
245}
246
247// ── Tests ─────────────────────────────────────────────────────────────────────
248
249#[cfg(test)]
250mod tests {
251 use super::*;
252
253 // Unit tests for the align_up helper (no GPU needed).
254 #[test]
255 fn align_up_rounds_correctly() {
256 assert_eq!(align_up(0, 4), 0);
257 assert_eq!(align_up(1, 4), 4);
258 assert_eq!(align_up(4, 4), 4);
259 assert_eq!(align_up(5, 4), 8);
260 assert_eq!(align_up(256, 256), 256);
261 assert_eq!(align_up(257, 256), 512);
262 }
263
264 #[test]
265 fn ring_buffer_stats_default() {
266 let s = RingBufferStats::default();
267 assert_eq!(s.total_allocations, 0);
268 assert_eq!(s.wrap_count, 0);
269 assert_eq!(s.grow_count, 0);
270 }
271
272 #[test]
273 fn ring_allocation_size_preserved() {
274 let alloc = RingAllocation {
275 offset: 128,
276 size: 56,
277 };
278 assert_eq!(alloc.offset, 128);
279 assert_eq!(alloc.size, 56);
280 }
281}