Skip to main content

astrelis_ui/
instance_buffer.rs

1//! GPU instance buffer management for retained rendering.
2//!
3//! This module implements Phase 5 GPU infrastructure for managing instance buffers
4//! with efficient partial updates. Supports any Pod type and tracks dirty ranges
5//! for minimal GPU uploads.
6
7use crate::dirty::DirtyRanges;
8use astrelis_core::profiling::profile_function;
9use astrelis_render::wgpu;
10use bytemuck::Pod;
11
12/// GPU instance buffer with partial update support.
13///
14/// Maintains a CPU-side buffer and GPU buffer, tracking which ranges
15/// have been modified and need uploading. Supports efficient partial writes
16/// for retained-mode rendering where only dirty instances change.
17pub struct InstanceBuffer<T: Pod> {
18    /// GPU buffer for instance data
19    buffer: wgpu::Buffer,
20    /// CPU-side instance data
21    instances: Vec<T>,
22    /// Current capacity in number of instances
23    capacity: usize,
24    /// Ranges that need GPU upload
25    dirty_ranges: DirtyRanges,
26    /// Total number of writes performed
27    write_count: u64,
28}
29
30impl<T: Pod> InstanceBuffer<T> {
31    /// Create a new instance buffer with the specified capacity.
32    pub fn new(device: &wgpu::Device, label: Option<&str>, capacity: usize) -> Self {
33        let buffer = device.create_buffer(&wgpu::BufferDescriptor {
34            label,
35            size: (capacity * std::mem::size_of::<T>()) as u64,
36            usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
37            mapped_at_creation: false,
38        });
39
40        Self {
41            buffer,
42            instances: Vec::with_capacity(capacity),
43            capacity,
44            dirty_ranges: DirtyRanges::new(),
45            write_count: 0,
46        }
47    }
48
49    /// Get the GPU buffer.
50    pub fn buffer(&self) -> &wgpu::Buffer {
51        &self.buffer
52    }
53
54    /// Get the current instance data.
55    pub fn instances(&self) -> &[T] {
56        &self.instances
57    }
58
59    /// Get the number of instances.
60    pub fn len(&self) -> usize {
61        self.instances.len()
62    }
63
64    /// Check if the buffer is empty.
65    pub fn is_empty(&self) -> bool {
66        self.instances.is_empty()
67    }
68
69    /// Get the capacity.
70    pub fn capacity(&self) -> usize {
71        self.capacity
72    }
73
74    /// Clear all instances.
75    pub fn clear(&mut self) {
76        self.instances.clear();
77        self.dirty_ranges.clear();
78    }
79
80    /// Set instances, replacing all existing data.
81    ///
82    /// Marks the entire buffer as dirty for GPU upload.
83    /// Reallocates GPU buffer if capacity is exceeded.
84    pub fn set_instances(&mut self, device: &wgpu::Device, instances: Vec<T>) {
85        let new_len = instances.len();
86
87        // Check if we need to reallocate
88        if new_len > self.capacity {
89            self.reallocate(device, new_len.next_power_of_two());
90        }
91
92        self.instances = instances;
93
94        // Mark entire buffer as dirty
95        if !self.instances.is_empty() {
96            self.dirty_ranges.mark_dirty(0, self.instances.len());
97        }
98    }
99
100    /// Update a specific range of instances.
101    ///
102    /// Replaces instances[start..end] with the provided data.
103    /// Marks the range as dirty for GPU upload.
104    pub fn update_range(&mut self, start: usize, new_data: &[T]) {
105        if new_data.is_empty() || start >= self.instances.len() {
106            return;
107        }
108
109        let end = (start + new_data.len()).min(self.instances.len());
110        let actual_len = end - start;
111
112        self.instances[start..end].copy_from_slice(&new_data[..actual_len]);
113        self.dirty_ranges.mark_dirty(start, end);
114    }
115
116    /// Update a single instance.
117    pub fn update_instance(&mut self, index: usize, instance: T) {
118        if index < self.instances.len() {
119            self.instances[index] = instance;
120            self.dirty_ranges.mark_dirty(index, index + 1);
121        }
122    }
123
124    /// Append instances to the buffer.
125    ///
126    /// Reallocates if capacity is exceeded.
127    pub fn append(&mut self, device: &wgpu::Device, new_instances: &[T]) {
128        let start_idx = self.instances.len();
129        let new_len = start_idx + new_instances.len();
130
131        // Check if we need to reallocate
132        if new_len > self.capacity {
133            self.reallocate(device, new_len.next_power_of_two());
134        }
135
136        self.instances.extend_from_slice(new_instances);
137        self.dirty_ranges.mark_dirty(start_idx, new_len);
138    }
139
140    /// Upload all dirty ranges to the GPU.
141    ///
142    /// This performs partial buffer writes for each dirty range,
143    /// minimizing GPU bandwidth usage for retained rendering.
144    pub fn upload_dirty(&mut self, queue: &wgpu::Queue) {
145        profile_function!();
146
147        if self.dirty_ranges.is_empty() {
148            return;
149        }
150
151        let instance_size = std::mem::size_of::<T>() as u64;
152
153        for range in self.dirty_ranges.iter() {
154            let start = range.start;
155            let end = range.end.min(self.instances.len());
156
157            if start >= end {
158                continue;
159            }
160
161            let offset = (start as u64) * instance_size;
162            let data = bytemuck::cast_slice(&self.instances[start..end]);
163
164            queue.write_buffer(&self.buffer, offset, data);
165            self.write_count += 1;
166        }
167
168        self.dirty_ranges.clear();
169    }
170
171    /// Force upload of the entire buffer, ignoring dirty tracking.
172    pub fn upload_all(&mut self, queue: &wgpu::Queue) {
173        if self.instances.is_empty() {
174            return;
175        }
176
177        let data = bytemuck::cast_slice(&self.instances);
178        queue.write_buffer(&self.buffer, 0, data);
179        self.write_count += 1;
180        self.dirty_ranges.clear();
181    }
182
183    /// Get dirty ranges for inspection.
184    pub fn dirty_ranges(&self) -> &DirtyRanges {
185        &self.dirty_ranges
186    }
187
188    /// Get write statistics.
189    pub fn write_count(&self) -> u64 {
190        self.write_count
191    }
192
193    /// Reallocate the GPU buffer with a new capacity.
194    fn reallocate(&mut self, device: &wgpu::Device, new_capacity: usize) {
195        // Note: WGPU buffers don't expose their label after creation
196        self.buffer = device.create_buffer(&wgpu::BufferDescriptor {
197            label: Some("UI Instance Buffer (Reallocated)"),
198            size: (new_capacity * std::mem::size_of::<T>()) as u64,
199            usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
200            mapped_at_creation: false,
201        });
202        self.capacity = new_capacity;
203
204        // Mark entire buffer as dirty after reallocation
205        if !self.instances.is_empty() {
206            self.dirty_ranges.mark_dirty(0, self.instances.len());
207        }
208    }
209
210    /// Get buffer statistics.
211    pub fn stats(&self) -> InstanceBufferStats {
212        InstanceBufferStats {
213            instance_count: self.instances.len(),
214            capacity: self.capacity,
215            utilization: if self.capacity > 0 {
216                (self.instances.len() as f32 / self.capacity as f32) * 100.0
217            } else {
218                0.0
219            },
220            dirty_ranges: self.dirty_ranges.stats().num_ranges,
221            write_count: self.write_count,
222            size_bytes: self.instances.len() * std::mem::size_of::<T>(),
223            capacity_bytes: self.capacity * std::mem::size_of::<T>(),
224        }
225    }
226}
227
228/// Statistics about an instance buffer.
229#[derive(Debug, Clone, Copy)]
230pub struct InstanceBufferStats {
231    pub instance_count: usize,
232    pub capacity: usize,
233    pub utilization: f32,
234    pub dirty_ranges: usize,
235    pub write_count: u64,
236    pub size_bytes: usize,
237    pub capacity_bytes: usize,
238}
239
240/// Ring buffer strategy for multi-buffered instance data.
241///
242/// Useful for triple-buffering or managing multiple frames in flight.
243/// Each frame gets its own slot in a circular buffer.
244pub struct RingInstanceBuffer<T: Pod> {
245    /// Multiple instance buffers, one per frame slot
246    buffers: Vec<InstanceBuffer<T>>,
247    /// Current frame index
248    current_frame: usize,
249    /// Number of frames to buffer
250    frame_count: usize,
251}
252
253impl<T: Pod> RingInstanceBuffer<T> {
254    /// Create a new ring buffer with the specified number of frame slots.
255    pub fn new(
256        device: &wgpu::Device,
257        label_prefix: &str,
258        frame_count: usize,
259        capacity: usize,
260    ) -> Self {
261        let mut buffers = Vec::with_capacity(frame_count);
262
263        for i in 0..frame_count {
264            let label = format!("{} Frame {}", label_prefix, i);
265            buffers.push(InstanceBuffer::new(device, Some(&label), capacity));
266        }
267
268        Self {
269            buffers,
270            current_frame: 0,
271            frame_count,
272        }
273    }
274
275    /// Get the current frame's buffer.
276    pub fn current(&self) -> &InstanceBuffer<T> {
277        &self.buffers[self.current_frame]
278    }
279
280    /// Get mutable reference to current frame's buffer.
281    pub fn current_mut(&mut self) -> &mut InstanceBuffer<T> {
282        &mut self.buffers[self.current_frame]
283    }
284
285    /// Advance to the next frame.
286    pub fn advance_frame(&mut self) {
287        self.current_frame = (self.current_frame + 1) % self.frame_count;
288    }
289
290    /// Get all buffers.
291    pub fn buffers(&self) -> &[InstanceBuffer<T>] {
292        &self.buffers
293    }
294
295    /// Get current frame index.
296    pub fn frame_index(&self) -> usize {
297        self.current_frame
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    // Mock Pod type for testing
306    #[repr(C)]
307    #[derive(Copy, Clone, Debug, PartialEq, bytemuck::Pod, bytemuck::Zeroable)]
308    struct TestInstance {
309        position: [f32; 2],
310        color: [f32; 4],
311    }
312
313    impl TestInstance {
314        fn new(x: f32, y: f32, r: f32, g: f32, b: f32, a: f32) -> Self {
315            Self {
316                position: [x, y],
317                color: [r, g, b, a],
318            }
319        }
320    }
321
322    // Note: These tests can't actually run without a WGPU device
323    // In a real test environment, you'd use pollster and create a test device
324    // For now, we test the logic that doesn't require GPU
325
326    #[test]
327    fn test_instance_tracking() {
328        // We can test the CPU-side logic without GPU
329        let instances = [
330            TestInstance::new(0.0, 0.0, 1.0, 0.0, 0.0, 1.0),
331            TestInstance::new(10.0, 10.0, 0.0, 1.0, 0.0, 1.0),
332        ];
333
334        // Test that we can track instances
335        assert_eq!(instances.len(), 2);
336        assert_eq!(instances[0].position, [0.0, 0.0]);
337    }
338
339    #[test]
340    fn test_dirty_range_tracking() {
341        let mut dirty_ranges = DirtyRanges::new();
342
343        dirty_ranges.mark_dirty(0, 5);
344        dirty_ranges.mark_dirty(10, 15);
345
346        assert_eq!(dirty_ranges.len(), 2);
347        assert_eq!(dirty_ranges.total_dirty_count(), 10);
348    }
349
350    #[test]
351    fn test_capacity_calculation() {
352        let capacity = 100;
353        let instance_size = std::mem::size_of::<TestInstance>();
354        let buffer_size = capacity * instance_size;
355
356        assert_eq!(buffer_size, capacity * 24); // 2 floats + 4 floats = 24 bytes
357    }
358
359    #[test]
360    fn test_stats_calculation() {
361        // Test stats calculation logic
362        let instance_count = 75;
363        let capacity = 100;
364        let utilization = (instance_count as f32 / capacity as f32) * 100.0;
365
366        assert_eq!(utilization, 75.0);
367    }
368}