Skip to main content

oxiui_render_wgpu/gpu/
instance.rs

1//! Instanced rendering for repeated UI primitives.
2//!
3//! [`InstancedRectPipeline`] renders many axis-aligned rectangles (with
4//! optional uniform corner radius) using a single indexed quad mesh and a
5//! per-instance vertex buffer.  This is much more efficient than emitting
6//! individual `Vertex` quads when rendering large numbers of identical
7//! primitives (buttons, table cells, list items).
8//!
9//! # Pipeline design
10//!
11//! - **Mesh vertex buffer** (step_mode=Vertex): 4 corners of a unit quad as
12//!   `[f32; 2]` UV coordinates in `[0,1]²`.  Reused across all instances.
13//! - **Index buffer**: 6 indices forming 2 triangles for the unit quad.
14//! - **Instance vertex buffer** (step_mode=Instance): one [`InstanceRect`] per
15//!   rectangle, carrying position, size, colour, and corner radius.
16//!
17//! The vertex shader computes the pixel-space position from `inst_pos +
18//! uv * inst_size` and applies the 2-D orthographic projection.  The fragment
19//! shader runs a rounded-rect SDF when `corner_radius > 0`.
20//!
21//! # Usage
22//!
23//! ```rust,ignore
24//! let pipeline = InstancedRectPipeline::new(&device, sample_count);
25//! let mut renderer = InstancedRectRenderer::new(&device, 256);
26//! renderer.push(InstanceRect { pos: [10.0, 10.0], size: [80.0, 30.0],
27//!                               color: [1.0, 0.0, 0.0, 1.0], corner_radius: 4.0 });
28//! renderer.flush(&device, &queue, &mut encoder, &pipeline, &globals_bind_group, ...);
29//! ```
30
31use bytemuck::{Pod, Zeroable};
32use oxiui_core::UiError;
33use wgpu::util::DeviceExt;
34
35use crate::gpu::device::TARGET_FORMAT;
36
37// ── InstanceRect ──────────────────────────────────────────────────────────────
38
39/// Per-instance data for a single instanced rectangle.
40///
41/// 36 bytes, `#[repr(C)]`, `Pod` + `Zeroable` so it can be uploaded directly.
42#[repr(C)]
43#[derive(Clone, Copy, Debug, PartialEq, Pod, Zeroable)]
44pub struct InstanceRect {
45    /// Top-left corner in pixel space.
46    pub pos: [f32; 2],
47    /// Width × height in pixels.
48    pub size: [f32; 2],
49    /// Straight-alpha RGBA colour in `[0, 1]`.
50    pub color: [f32; 4],
51    /// Uniform corner radius in pixels (0 = sharp).
52    pub corner_radius: f32,
53    /// Padding to align to 4 bytes.
54    pub _pad: [f32; 3],
55}
56
57// Compile-time size assert: InstanceRect must be 48 bytes (4+4+4+4+4+4+4+4 × 4
58// = 12 × 4 = 48).
59const _: () = assert!(core::mem::size_of::<InstanceRect>() == 48);
60
61impl InstanceRect {
62    /// Construct an instance with no corner radius.
63    pub fn rect(pos: [f32; 2], size: [f32; 2], color: [f32; 4]) -> Self {
64        Self {
65            pos,
66            size,
67            color,
68            corner_radius: 0.0,
69            _pad: [0.0; 3],
70        }
71    }
72
73    /// Construct an instance with a uniform corner radius.
74    pub fn rounded(pos: [f32; 2], size: [f32; 2], color: [f32; 4], corner_radius: f32) -> Self {
75        Self {
76            pos,
77            size,
78            color,
79            corner_radius,
80            _pad: [0.0; 3],
81        }
82    }
83}
84
85// ── UvVertex ─────────────────────────────────────────────────────────────────
86
87/// A single mesh vertex: just a 2-D UV coordinate in `[0,1]²`.
88#[repr(C)]
89#[derive(Clone, Copy, Debug, Pod, Zeroable)]
90struct UvVertex {
91    uv: [f32; 2],
92}
93
94/// The four corners of the unit quad, counter-clockwise.
95const QUAD_VERTICES: [UvVertex; 4] = [
96    UvVertex { uv: [0.0, 0.0] }, // top-left
97    UvVertex { uv: [1.0, 0.0] }, // top-right
98    UvVertex { uv: [1.0, 1.0] }, // bottom-right
99    UvVertex { uv: [0.0, 1.0] }, // bottom-left
100];
101
102/// Two triangles from the quad vertices: [0,1,2] and [0,2,3].
103const QUAD_INDICES: [u16; 6] = [0, 1, 2, 0, 2, 3];
104
105// ── InstancedRectPipeline ─────────────────────────────────────────────────────
106
107/// The compiled instanced-rect render pipeline.
108pub struct InstancedRectPipeline {
109    /// The render pipeline.
110    pub pipeline: wgpu::RenderPipeline,
111    /// Bind group layout for bind group 0 (the viewport `Globals` uniform).
112    pub globals_layout: wgpu::BindGroupLayout,
113    /// The shared unit-quad index buffer.
114    pub index_buffer: wgpu::Buffer,
115    /// The shared unit-quad vertex buffer.
116    pub vertex_buffer: wgpu::Buffer,
117}
118
119impl InstancedRectPipeline {
120    /// Build the instanced-rect pipeline for a colour target in [`TARGET_FORMAT`].
121    ///
122    /// `sample_count` controls MSAA (1 = no MSAA, 4 or 8 = MSAA).
123    pub fn new(device: &wgpu::Device, sample_count: u32) -> Self {
124        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
125            label: Some("oxiui-render-wgpu instanced.wgsl"),
126            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/instanced.wgsl").into()),
127        });
128
129        let globals_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
130            label: Some("oxiui-render-wgpu instanced globals layout"),
131            entries: &[wgpu::BindGroupLayoutEntry {
132                binding: 0,
133                visibility: wgpu::ShaderStages::VERTEX,
134                ty: wgpu::BindingType::Buffer {
135                    ty: wgpu::BufferBindingType::Uniform,
136                    has_dynamic_offset: false,
137                    min_binding_size: None,
138                },
139                count: None,
140            }],
141        });
142
143        let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
144            label: Some("oxiui-render-wgpu instanced pipeline layout"),
145            bind_group_layouts: &[Some(&globals_layout)],
146            immediate_size: 0,
147        });
148
149        // Per-instance attributes (step_mode = Instance):
150        //   pos           vec2  @0  offset  0
151        //   size          vec2  @1  offset  8
152        //   color         vec4  @2  offset 16
153        //   corner_radius f32   @3  offset 32
154        //   (3 × f32 pad)           offset 36
155        let instance_attrs = [
156            wgpu::VertexAttribute {
157                format: wgpu::VertexFormat::Float32x2,
158                offset: 0,
159                shader_location: 0,
160            },
161            wgpu::VertexAttribute {
162                format: wgpu::VertexFormat::Float32x2,
163                offset: 8,
164                shader_location: 1,
165            },
166            wgpu::VertexAttribute {
167                format: wgpu::VertexFormat::Float32x4,
168                offset: 16,
169                shader_location: 2,
170            },
171            wgpu::VertexAttribute {
172                format: wgpu::VertexFormat::Float32,
173                offset: 32,
174                shader_location: 3,
175            },
176        ];
177        let instance_layout = wgpu::VertexBufferLayout {
178            array_stride: core::mem::size_of::<InstanceRect>() as wgpu::BufferAddress,
179            step_mode: wgpu::VertexStepMode::Instance,
180            attributes: &instance_attrs,
181        };
182
183        // Per-vertex attribute (step_mode = Vertex):
184        //   uv  vec2  @4  offset 0
185        let vertex_attrs = [wgpu::VertexAttribute {
186            format: wgpu::VertexFormat::Float32x2,
187            offset: 0,
188            shader_location: 4,
189        }];
190        let vertex_layout = wgpu::VertexBufferLayout {
191            array_stride: core::mem::size_of::<UvVertex>() as wgpu::BufferAddress,
192            step_mode: wgpu::VertexStepMode::Vertex,
193            attributes: &vertex_attrs,
194        };
195
196        let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
197            label: Some("oxiui-render-wgpu instanced pipeline"),
198            layout: Some(&pipeline_layout),
199            vertex: wgpu::VertexState {
200                module: &shader,
201                entry_point: Some("vs_main"),
202                // Instance buffer first (per-instance), then quad vertex buffer.
203                buffers: &[instance_layout, vertex_layout],
204                compilation_options: wgpu::PipelineCompilationOptions::default(),
205            },
206            fragment: Some(wgpu::FragmentState {
207                module: &shader,
208                entry_point: Some("fs_main"),
209                targets: &[Some(wgpu::ColorTargetState {
210                    format: TARGET_FORMAT,
211                    blend: Some(wgpu::BlendState::ALPHA_BLENDING),
212                    write_mask: wgpu::ColorWrites::ALL,
213                })],
214                compilation_options: wgpu::PipelineCompilationOptions::default(),
215            }),
216            primitive: wgpu::PrimitiveState {
217                topology: wgpu::PrimitiveTopology::TriangleList,
218                strip_index_format: None,
219                front_face: wgpu::FrontFace::Ccw,
220                cull_mode: None,
221                unclipped_depth: false,
222                polygon_mode: wgpu::PolygonMode::Fill,
223                conservative: false,
224            },
225            depth_stencil: None,
226            multisample: wgpu::MultisampleState {
227                count: sample_count,
228                mask: !0,
229                alpha_to_coverage_enabled: false,
230            },
231            multiview_mask: None,
232            cache: None,
233        });
234
235        // Build the shared unit-quad buffers.
236        let vertex_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
237            label: Some("oxiui-render-wgpu instanced quad vertices"),
238            contents: bytemuck::cast_slice(&QUAD_VERTICES),
239            usage: wgpu::BufferUsages::VERTEX,
240        });
241        let index_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
242            label: Some("oxiui-render-wgpu instanced quad indices"),
243            contents: bytemuck::cast_slice(&QUAD_INDICES),
244            usage: wgpu::BufferUsages::INDEX,
245        });
246
247        Self {
248            pipeline,
249            globals_layout,
250            index_buffer,
251            vertex_buffer,
252        }
253    }
254}
255
256// ── InstancedRectRenderer ─────────────────────────────────────────────────────
257
258/// A frame-scoped collector of [`InstanceRect`] data and a flusher that
259/// issues a single instanced draw call.
260///
261/// # Usage
262///
263/// 1. Call `push` once per rectangle to append to the batch.
264/// 2. Call `flush` to upload the instance buffer and issue one
265///    `draw_indexed(0..6, 0, 0..n)` call.
266/// 3. Call `clear` at the start of the next frame (or after flush).
267pub struct InstancedRectRenderer {
268    instances: Vec<InstanceRect>,
269    /// Persistent instance buffer, grown on demand (next power of two).
270    instance_buf: Option<wgpu::Buffer>,
271    /// Byte capacity of `instance_buf`.
272    instance_buf_capacity: usize,
273}
274
275impl InstancedRectRenderer {
276    /// Create a renderer pre-allocated for `initial_capacity` instances.
277    pub fn new(initial_capacity: usize) -> Self {
278        Self {
279            instances: Vec::with_capacity(initial_capacity.max(4)),
280            instance_buf: None,
281            instance_buf_capacity: 0,
282        }
283    }
284
285    /// Append one rectangle instance to the pending batch.
286    pub fn push(&mut self, inst: InstanceRect) {
287        self.instances.push(inst);
288    }
289
290    /// Return the number of pending instances.
291    pub fn len(&self) -> usize {
292        self.instances.len()
293    }
294
295    /// Return `true` if there are no pending instances.
296    pub fn is_empty(&self) -> bool {
297        self.instances.is_empty()
298    }
299
300    /// Clear all pending instances (call at the start of each frame).
301    pub fn clear(&mut self) {
302        self.instances.clear();
303    }
304
305    /// Upload the instance buffer and issue a single instanced draw call.
306    ///
307    /// Opens a render pass on `encoder`, uses `LoadOp::Load` so existing frame
308    /// content is preserved.  The scissor is set to the full viewport.
309    ///
310    /// Returns the number of draw calls issued (0 if empty, 1 otherwise).
311    ///
312    /// # Errors
313    ///
314    /// Returns [`UiError::Render`] if buffer creation fails.
315    #[allow(clippy::too_many_arguments)]
316    pub fn flush(
317        &mut self,
318        device: &wgpu::Device,
319        queue: &wgpu::Queue,
320        encoder: &mut wgpu::CommandEncoder,
321        pipeline: &InstancedRectPipeline,
322        globals_bind_group: &wgpu::BindGroup,
323        screen_view: &wgpu::TextureView,
324        screen_resolve: Option<&wgpu::TextureView>,
325        viewport_w: u32,
326        viewport_h: u32,
327    ) -> Result<u32, UiError> {
328        if self.instances.is_empty() {
329            return Ok(0);
330        }
331
332        // Upload instance data to a persistent, reused buffer.
333        let inst_bytes: &[u8] = bytemuck::cast_slice(&self.instances);
334        let needed = inst_bytes.len();
335
336        let needs_grow = self.instance_buf.is_none() || self.instance_buf_capacity < needed;
337        if needs_grow {
338            let min_bytes = core::mem::size_of::<InstanceRect>() * 64;
339            let new_cap = needed.next_power_of_two().max(min_bytes);
340            self.instance_buf = Some(device.create_buffer(&wgpu::BufferDescriptor {
341                label: Some("oxiui-render-wgpu instanced-rects persistent"),
342                size: new_cap as u64,
343                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
344                mapped_at_creation: false,
345            }));
346            self.instance_buf_capacity = new_cap;
347        }
348
349        if let Some(ref buf) = self.instance_buf {
350            queue.write_buffer(buf, 0, inst_bytes);
351        }
352
353        let n_instances = self.instances.len() as u32;
354
355        let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
356            label: Some("oxiui-render-wgpu instanced-rect pass"),
357            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
358                view: screen_view,
359                depth_slice: None,
360                resolve_target: screen_resolve,
361                ops: wgpu::Operations {
362                    load: wgpu::LoadOp::Load,
363                    store: wgpu::StoreOp::Store,
364                },
365            })],
366            depth_stencil_attachment: None,
367            timestamp_writes: None,
368            occlusion_query_set: None,
369            multiview_mask: None,
370        });
371
372        pass.set_pipeline(&pipeline.pipeline);
373        pass.set_bind_group(0, globals_bind_group, &[]);
374        pass.set_scissor_rect(0, 0, viewport_w, viewport_h);
375
376        if let Some(ref inst_buf) = self.instance_buf {
377            pass.set_vertex_buffer(
378                0,
379                inst_buf.slice(..n_instances as u64 * core::mem::size_of::<InstanceRect>() as u64),
380            );
381        }
382        pass.set_vertex_buffer(1, pipeline.vertex_buffer.slice(..));
383        pass.set_index_buffer(pipeline.index_buffer.slice(..), wgpu::IndexFormat::Uint16);
384        pass.draw_indexed(0..6, 0, 0..n_instances);
385
386        Ok(1)
387    }
388}
389
390// ── Tests ─────────────────────────────────────────────────────────────────────
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395
396    #[test]
397    fn instance_rect_size_is_48() {
398        assert_eq!(core::mem::size_of::<InstanceRect>(), 48);
399    }
400
401    #[test]
402    fn instance_rect_is_pod() {
403        // Verify Pod/Zeroable derivation compiles and zeroed is valid.
404        let _zero: InstanceRect = bytemuck::Zeroable::zeroed();
405    }
406
407    #[test]
408    fn instance_rect_constructors() {
409        let r = InstanceRect::rect([10.0, 20.0], [80.0, 30.0], [1.0, 0.0, 0.0, 1.0]);
410        assert_eq!(r.corner_radius, 0.0);
411        assert_eq!(r.pos, [10.0, 20.0]);
412
413        let rnd = InstanceRect::rounded([0.0, 0.0], [100.0, 100.0], [0.0, 1.0, 0.0, 1.0], 8.0);
414        assert_eq!(rnd.corner_radius, 8.0);
415    }
416
417    #[test]
418    fn instanced_renderer_push_and_clear() {
419        let mut r = InstancedRectRenderer::new(4);
420        assert!(r.is_empty());
421        r.push(InstanceRect::rect([0.0, 0.0], [10.0, 10.0], [1.0; 4]));
422        assert_eq!(r.len(), 1);
423        r.clear();
424        assert!(r.is_empty());
425    }
426
427    fn try_device() -> Option<(wgpu::Device, wgpu::Queue)> {
428        let instance = wgpu::Instance::default();
429        let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
430            power_preference: wgpu::PowerPreference::default(),
431            force_fallback_adapter: false,
432            compatible_surface: None,
433        }))
434        .ok()?;
435        pollster::block_on(adapter.request_device(&wgpu::DeviceDescriptor {
436            label: Some("instanced test device"),
437            required_features: wgpu::Features::empty(),
438            required_limits: wgpu::Limits::downlevel_defaults(),
439            memory_hints: wgpu::MemoryHints::Performance,
440            experimental_features: wgpu::ExperimentalFeatures::disabled(),
441            trace: wgpu::Trace::Off,
442        }))
443        .ok()
444    }
445
446    #[test]
447    fn instanced_pipeline_compiles() {
448        // Verify that the WGSL shader compiles without error on a real device.
449        let Some((device, _queue)) = try_device() else {
450            return;
451        };
452        let _pipeline = InstancedRectPipeline::new(&device, 1);
453        // Reaching here means no WGSL compile error.
454    }
455
456    #[test]
457    fn instanced_renderer_renders_rects() {
458        use crate::gpu::buffer::Globals;
459        use wgpu::util::DeviceExt;
460
461        let Some((device, queue)) = try_device() else {
462            return;
463        };
464
465        // Create a small offscreen render target.
466        let w = 64u32;
467        let h = 64u32;
468        let texture = device.create_texture(&wgpu::TextureDescriptor {
469            label: Some("instanced test target"),
470            size: wgpu::Extent3d {
471                width: w,
472                height: h,
473                depth_or_array_layers: 1,
474            },
475            mip_level_count: 1,
476            sample_count: 1,
477            dimension: wgpu::TextureDimension::D2,
478            format: crate::gpu::device::TARGET_FORMAT,
479            usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC,
480            view_formats: &[],
481        });
482        let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
483
484        let pipeline = InstancedRectPipeline::new(&device, 1);
485
486        let globals = Globals::new(w, h);
487        let globals_buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
488            label: Some("instanced test globals"),
489            contents: bytemuck::bytes_of(&globals),
490            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
491        });
492        let globals_bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
493            label: Some("instanced test globals bg"),
494            layout: &pipeline.globals_layout,
495            entries: &[wgpu::BindGroupEntry {
496                binding: 0,
497                resource: globals_buf.as_entire_binding(),
498            }],
499        });
500
501        // Render a red rect filling the entire canvas.
502        let mut renderer = InstancedRectRenderer::new(4);
503        renderer.push(InstanceRect::rect(
504            [0.0, 0.0],
505            [w as f32, h as f32],
506            [1.0, 0.0, 0.0, 1.0],
507        ));
508
509        // Clear pass.
510        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
511            label: Some("instanced test encoder"),
512        });
513        {
514            let _clear = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
515                label: Some("clear"),
516                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
517                    view: &view,
518                    depth_slice: None,
519                    resolve_target: None,
520                    ops: wgpu::Operations {
521                        load: wgpu::LoadOp::Clear(wgpu::Color {
522                            r: 0.0,
523                            g: 0.0,
524                            b: 0.0,
525                            a: 0.0,
526                        }),
527                        store: wgpu::StoreOp::Store,
528                    },
529                })],
530                depth_stencil_attachment: None,
531                timestamp_writes: None,
532                occlusion_query_set: None,
533                multiview_mask: None,
534            });
535        }
536
537        let draws = renderer
538            .flush(
539                &device,
540                &queue,
541                &mut encoder,
542                &pipeline,
543                &globals_bg,
544                &view,
545                None,
546                w,
547                h,
548            )
549            .expect("flush");
550        assert_eq!(draws, 1, "should have issued 1 draw call");
551
552        queue.submit(Some(encoder.finish()));
553
554        // Readback a pixel from the centre.
555        let unpadded = w * 4;
556        let align = wgpu::COPY_BYTES_PER_ROW_ALIGNMENT;
557        let padded = unpadded.div_ceil(align) * align;
558        let readback = device.create_buffer(&wgpu::BufferDescriptor {
559            label: Some("readback"),
560            size: (padded * h) as u64,
561            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
562            mapped_at_creation: false,
563        });
564        let mut enc2 =
565            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
566        enc2.copy_texture_to_buffer(
567            wgpu::TexelCopyTextureInfo {
568                texture: &texture,
569                mip_level: 0,
570                origin: wgpu::Origin3d::ZERO,
571                aspect: wgpu::TextureAspect::All,
572            },
573            wgpu::TexelCopyBufferInfo {
574                buffer: &readback,
575                layout: wgpu::TexelCopyBufferLayout {
576                    offset: 0,
577                    bytes_per_row: Some(padded),
578                    rows_per_image: Some(h),
579                },
580            },
581            wgpu::Extent3d {
582                width: w,
583                height: h,
584                depth_or_array_layers: 1,
585            },
586        );
587        queue.submit(Some(enc2.finish()));
588        let slice = readback.slice(..);
589        slice.map_async(wgpu::MapMode::Read, |_| {});
590        device
591            .poll(wgpu::PollType::wait_indefinitely())
592            .expect("poll");
593        let data = slice.get_mapped_range();
594        let row = 32u32;
595        let col = 32u32;
596        let idx = (row * padded + col * 4) as usize;
597        let r = data[idx];
598        let a = data[idx + 3];
599        drop(data);
600        readback.unmap();
601        assert!(r > 200, "centre pixel should be reddish (r={r})");
602        assert!(a > 200, "centre pixel should be opaque (a={a})");
603    }
604}