Skip to main content

oxiui_render_wgpu/gpu/
exec.rs

1//! GPU pass execution helpers and per-frame statistics for [`WgpuBackend`].
2//!
3//! This module owns:
4//!
5//! - [`FrameStats`] — per-frame draw-call and render-pass counters.
6//! - `run_solid_pass` — executes the solid vertex-buffer pass using a
7//!   persistent, reused, growable vertex buffer.
8//! - `run_gradient_pass_batched` — executes ALL gradient draws in a single
9//!   render pass using dynamic-offset uniforms.
10//! - `run_textured_pass` — executes one textured render pass.
11//!
12//! [`WgpuBackend`]: super::renderer::WgpuBackend
13
14use oxiui_core::UiError;
15use wgpu::util::DeviceExt;
16
17use crate::gpu::buffer::{GradientUniforms, GradientVertex, Vertex};
18use crate::gpu::geometry::{DrawSegment, GradientDraw};
19use crate::gpu::pipeline::{GradientPipeline, SolidPipeline, TexturedPipeline};
20use crate::gpu::texture::{upload_image, TexturedDraw};
21
22// ── FrameStats ────────────────────────────────────────────────────────────────
23
24/// Per-frame draw-call and render-pass counters.
25///
26/// Populated during `WgpuBackend::execute` and accessible via
27/// [`WgpuBackend::frame_stats`].
28///
29/// [`WgpuBackend::frame_stats`]: super::renderer::WgpuBackend::frame_stats
30#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
31pub struct FrameStats {
32    /// Number of `pass.draw(...)` calls issued during the last `execute()` call.
33    ///
34    /// Counts only real GPU draws; skipped/dead segments are excluded.
35    /// Shadow passes are included.
36    pub draw_calls: u32,
37    /// Number of `begin_render_pass` calls issued during the last `execute()` call.
38    ///
39    /// Includes the clear pass, shadow passes, solid pass, gradient pass(es),
40    /// and textured pass(es).
41    pub render_passes: u32,
42}
43
44// ── align_up ──────────────────────────────────────────────────────────────────
45
46/// Round `n` up to the next multiple of `align` (which must be a power of two).
47#[inline]
48pub(crate) fn align_up(n: u32, align: u32) -> u32 {
49    (n + align - 1) & !(align - 1)
50}
51
52// ── Solid pass ────────────────────────────────────────────────────────────────
53
54/// Parameters for the solid-geometry render pass.
55pub(crate) struct SolidPassParams<'a> {
56    pub(crate) device: &'a wgpu::Device,
57    pub(crate) queue: &'a wgpu::Queue,
58    pub(crate) encoder: &'a mut wgpu::CommandEncoder,
59    pub(crate) screen_view: &'a wgpu::TextureView,
60    pub(crate) screen_resolve: Option<&'a wgpu::TextureView>,
61    pub(crate) pipeline: &'a SolidPipeline,
62    pub(crate) globals_bind_group: &'a wgpu::BindGroup,
63    pub(crate) verts: &'a [Vertex],
64    pub(crate) segments: &'a [DrawSegment],
65    pub(crate) viewport_w: u32,
66    pub(crate) viewport_h: u32,
67    /// Persistent vertex buffer: mutated in-place if it needs to grow.
68    pub(crate) solid_vertex_buf: &'a mut Option<wgpu::Buffer>,
69    /// Byte capacity of the persistent vertex buffer.
70    pub(crate) solid_vertex_buf_capacity: &'a mut usize,
71}
72
73/// Execute the solid geometry pass, returning the number of draw calls issued.
74///
75/// Uses a persistent, reusable vertex buffer that is grown (next power-of-two)
76/// whenever the current frame requires more capacity, then updated via
77/// `queue.write_buffer` instead of creating a new buffer.
78pub(crate) fn run_solid_pass(p: SolidPassParams<'_>) -> u32 {
79    let mut draw_calls = 0u32;
80
81    // ── Persistent buffer management ─────────────────────────────────────────
82    // If we have vertices to draw, ensure the persistent buffer is large enough
83    // and upload the current frame's geometry.
84    if !p.verts.is_empty() {
85        let verts_bytes: &[u8] = bytemuck::cast_slice(p.verts);
86        let needed = verts_bytes.len();
87
88        let needs_grow = p.solid_vertex_buf.is_none() || *p.solid_vertex_buf_capacity < needed;
89
90        if needs_grow {
91            // Grow to the next power of two, with a minimum of 64 vertices.
92            let min_bytes = core::mem::size_of::<Vertex>() * 64;
93            let new_cap = needed.next_power_of_two().max(min_bytes);
94            *p.solid_vertex_buf = Some(p.device.create_buffer(&wgpu::BufferDescriptor {
95                label: Some("oxiui-render-wgpu solid-verts-persistent"),
96                size: new_cap as u64,
97                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
98                mapped_at_creation: false,
99            }));
100            *p.solid_vertex_buf_capacity = new_cap;
101        }
102
103        // Upload current frame's geometry into the persistent buffer.
104        if let Some(buf) = p.solid_vertex_buf.as_ref() {
105            p.queue.write_buffer(buf, 0, verts_bytes);
106        }
107    }
108
109    let mut pass = p.encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
110        label: Some("oxiui-render-wgpu solid pass"),
111        color_attachments: &[Some(wgpu::RenderPassColorAttachment {
112            view: p.screen_view,
113            depth_slice: None,
114            resolve_target: p.screen_resolve,
115            ops: wgpu::Operations {
116                load: wgpu::LoadOp::Load,
117                store: wgpu::StoreOp::Store,
118            },
119        })],
120        depth_stencil_attachment: None,
121        timestamp_writes: None,
122        occlusion_query_set: None,
123        multiview_mask: None,
124    });
125
126    if let Some(ref vb) = p.solid_vertex_buf {
127        if !p.verts.is_empty() {
128            // ── Pipeline state caching ────────────────────────────────────
129            // Track raw pointers to the last-set pipeline and globals bind
130            // group so that redundant `set_pipeline` / `set_bind_group`
131            // calls can be skipped.  These are frame-local variables;
132            // each RenderPass starts from a clean GPU state so there is
133            // nothing to carry across pass boundaries.
134            let mut last_pipeline_ptr: Option<*const wgpu::RenderPipeline> = None;
135            let mut last_globals_bg_ptr: Option<*const wgpu::BindGroup> = None;
136
137            let cur_pipeline_ptr = &p.pipeline.pipeline as *const wgpu::RenderPipeline;
138            let cur_globals_bg_ptr = p.globals_bind_group as *const wgpu::BindGroup;
139
140            if last_pipeline_ptr != Some(cur_pipeline_ptr) {
141                pass.set_pipeline(&p.pipeline.pipeline);
142                last_pipeline_ptr = Some(cur_pipeline_ptr);
143            }
144            if last_globals_bg_ptr != Some(cur_globals_bg_ptr) {
145                pass.set_bind_group(0, p.globals_bind_group, &[]);
146                last_globals_bg_ptr = Some(cur_globals_bg_ptr);
147            }
148            // Anchor the caching locals so the compiler does not warn
149            // about dead assignments when there is only one pipeline/BG.
150            let _ = last_pipeline_ptr;
151            let _ = last_globals_bg_ptr;
152
153            // Draw only the vertices for this frame (0..verts.len()), not
154            // the whole buffer capacity.
155            pass.set_vertex_buffer(
156                0,
157                vb.slice(..p.verts.len() as u64 * core::mem::size_of::<Vertex>() as u64),
158            );
159
160            for seg in p.segments {
161                match seg.scissor {
162                    Some([_, _, 0, _]) | Some([_, _, _, 0]) => continue,
163                    Some([x, y, w, h]) => pass.set_scissor_rect(x, y, w, h),
164                    None => pass.set_scissor_rect(0, 0, p.viewport_w, p.viewport_h),
165                }
166                pass.draw(seg.start..seg.end, 0..1);
167                draw_calls += 1;
168            }
169        }
170    }
171
172    draw_calls
173}
174
175// ── Gradient pass (batched) ───────────────────────────────────────────────────
176
177/// Parameters for the batched gradient render pass.
178///
179/// All gradient draws are coalesced into a single render pass using a
180/// dynamic-offset uniform buffer.  The pipeline bind group layout must have
181/// binding 1 with `has_dynamic_offset: true`.
182pub(crate) struct GradientPassParams<'a> {
183    pub(crate) device: &'a wgpu::Device,
184    pub(crate) queue: &'a wgpu::Queue,
185    pub(crate) encoder: &'a mut wgpu::CommandEncoder,
186    pub(crate) screen_view: &'a wgpu::TextureView,
187    pub(crate) screen_resolve: Option<&'a wgpu::TextureView>,
188    pub(crate) pipeline: &'a GradientPipeline,
189    pub(crate) globals_buffer: &'a wgpu::Buffer,
190    pub(crate) gradient_draws: &'a [GradientDraw],
191    pub(crate) viewport_w: u32,
192    pub(crate) viewport_h: u32,
193}
194
195/// Execute ALL gradient draws in a single render pass via dynamic-offset uniforms.
196///
197/// Returns `(render_passes_added, draw_calls_added)`.
198/// Returns `(0, 0)` when `gradient_draws` is empty.
199///
200/// # Dynamic-offset batching
201///
202/// All per-gradient [`GradientUniforms`] are packed into a single combined
203/// buffer with a stride of `align_up(sizeof(GradientUniforms),
204/// min_uniform_buffer_offset_alignment)`.  Inside the one render pass each
205/// draw call issues `set_bind_group` with a different byte offset — this is
206/// valid because binding 1 in the gradient bind group layout has
207/// `has_dynamic_offset: true`.
208pub(crate) fn run_gradient_pass_batched(p: GradientPassParams<'_>) -> (u32, u32) {
209    if p.gradient_draws.is_empty() {
210        return (0, 0);
211    }
212
213    // ── Compute per-element stride (device-aligned) ───────────────────────
214    let struct_size = core::mem::size_of::<GradientUniforms>() as u32;
215    let min_align = p.device.limits().min_uniform_buffer_offset_alignment;
216    let grad_stride = align_up(struct_size, min_align) as u64;
217
218    let n_grads = p.gradient_draws.len() as u64;
219
220    // ── Build combined uniforms buffer ────────────────────────────────────
221    let grad_uniform_buf = p.device.create_buffer(&wgpu::BufferDescriptor {
222        label: Some("oxiui-render-wgpu grad-uniforms-combined"),
223        size: n_grads * grad_stride,
224        usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
225        mapped_at_creation: false,
226    });
227    for (i, gd) in p.gradient_draws.iter().enumerate() {
228        p.queue.write_buffer(
229            &grad_uniform_buf,
230            i as u64 * grad_stride,
231            bytemuck::bytes_of(&gd.uniforms),
232        );
233    }
234
235    // ── Build combined vertex buffer (all quads concatenated) ─────────────
236    let all_verts: Vec<GradientVertex> = p
237        .gradient_draws
238        .iter()
239        .flat_map(|gd| gd.verts.iter().copied())
240        .collect();
241
242    if all_verts.is_empty() {
243        return (0, 0);
244    }
245
246    let grad_vert_buf = p
247        .device
248        .create_buffer_init(&wgpu::util::BufferInitDescriptor {
249            label: Some("oxiui-render-wgpu grad-verts-combined"),
250            contents: bytemuck::cast_slice(&all_verts),
251            usage: wgpu::BufferUsages::VERTEX,
252        });
253
254    // ── Create ONE bind group using the combined uniforms buffer ──────────
255    // Binding 1 (dynamic) needs a sized binding so the driver knows which
256    // chunk of the buffer each draw uses.  We provide a BufferBinding with
257    // offset=0 and size=sizeof(GradientUniforms); the actual per-draw offset
258    // is supplied via the dynamic offset argument to set_bind_group.
259    let grad_uniform_size =
260        core::num::NonZeroU64::new(core::mem::size_of::<GradientUniforms>() as u64);
261    let grad_bg = p.device.create_bind_group(&wgpu::BindGroupDescriptor {
262        label: Some("oxiui-render-wgpu gradient-batched bg"),
263        layout: &p.pipeline.bind_group_layout,
264        entries: &[
265            wgpu::BindGroupEntry {
266                binding: 0,
267                resource: p.globals_buffer.as_entire_binding(),
268            },
269            wgpu::BindGroupEntry {
270                binding: 1,
271                resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
272                    buffer: &grad_uniform_buf,
273                    offset: 0,
274                    size: grad_uniform_size,
275                }),
276            },
277        ],
278    });
279
280    // ── ONE render pass for all gradient draws ────────────────────────────
281    let mut pass = p.encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
282        label: Some("oxiui-render-wgpu gradient-batched pass"),
283        color_attachments: &[Some(wgpu::RenderPassColorAttachment {
284            view: p.screen_view,
285            depth_slice: None,
286            resolve_target: p.screen_resolve,
287            ops: wgpu::Operations {
288                load: wgpu::LoadOp::Load,
289                store: wgpu::StoreOp::Store,
290            },
291        })],
292        depth_stencil_attachment: None,
293        timestamp_writes: None,
294        occlusion_query_set: None,
295        multiview_mask: None,
296    });
297
298    pass.set_pipeline(&p.pipeline.pipeline);
299    pass.set_vertex_buffer(0, grad_vert_buf.slice(..));
300
301    let mut draw_calls = 0u32;
302    let mut vertex_offset: u32 = 0;
303
304    for (i, gd) in p.gradient_draws.iter().enumerate() {
305        if gd.verts.is_empty() {
306            continue;
307        }
308
309        // Dynamic offset selects this gradient's uniform slice.
310        let dyn_offset = (i as u64 * grad_stride) as u32;
311        pass.set_bind_group(0, &grad_bg, &[dyn_offset]);
312
313        // Apply scissor for this gradient draw.
314        match gd.scissor {
315            Some([_, _, 0, _]) | Some([_, _, _, 0]) => {
316                vertex_offset += gd.verts.len() as u32;
317                continue;
318            }
319            Some([x, y, w, h]) => pass.set_scissor_rect(x, y, w, h),
320            None => pass.set_scissor_rect(0, 0, p.viewport_w, p.viewport_h),
321        }
322
323        let n_verts = gd.verts.len() as u32;
324        pass.draw(vertex_offset..vertex_offset + n_verts, 0..1);
325        draw_calls += 1;
326        vertex_offset += n_verts;
327    }
328
329    (1, draw_calls)
330}
331
332// ── Textured pass ─────────────────────────────────────────────────────────────
333
334/// Parameters for a single textured render pass.
335pub(crate) struct TexturedPassParams<'a> {
336    pub(crate) device: &'a wgpu::Device,
337    pub(crate) queue: &'a wgpu::Queue,
338    pub(crate) encoder: &'a mut wgpu::CommandEncoder,
339    pub(crate) screen_view: &'a wgpu::TextureView,
340    pub(crate) screen_resolve: Option<&'a wgpu::TextureView>,
341    pub(crate) pipeline: &'a TexturedPipeline,
342    pub(crate) globals_bind_group: &'a wgpu::BindGroup,
343    pub(crate) td: &'a TexturedDraw,
344    pub(crate) viewport_w: u32,
345    pub(crate) viewport_h: u32,
346}
347
348/// Execute one textured pass. Returns `(render_passes_added, draw_calls_added)`.
349///
350/// Returns `(0, 0)` if the textured draw has no vertices.
351///
352/// # Errors
353///
354/// Returns [`UiError::Render`] if texture upload fails.
355pub(crate) fn run_textured_pass(p: TexturedPassParams<'_>) -> Result<(u32, u32), UiError> {
356    if p.td.verts.is_empty() {
357        return Ok((0, 0));
358    }
359
360    let (tex_view, tex_sampler) = upload_image(p.device, p.queue, &p.td.image, p.td.filter)
361        .map_err(|e| UiError::Render(format!("texture upload failed: {e}")))?;
362
363    let tex_vb = p
364        .device
365        .create_buffer_init(&wgpu::util::BufferInitDescriptor {
366            label: Some("oxiui-render-wgpu tex verts"),
367            contents: bytemuck::cast_slice(&p.td.verts),
368            usage: wgpu::BufferUsages::VERTEX,
369        });
370
371    let tex_bg = p.device.create_bind_group(&wgpu::BindGroupDescriptor {
372        label: Some("oxiui-render-wgpu tex bind group"),
373        layout: &p.pipeline.texture_layout,
374        entries: &[
375            wgpu::BindGroupEntry {
376                binding: 0,
377                resource: wgpu::BindingResource::TextureView(&tex_view),
378            },
379            wgpu::BindGroupEntry {
380                binding: 1,
381                resource: wgpu::BindingResource::Sampler(&tex_sampler),
382            },
383        ],
384    });
385
386    let mut pass = p.encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
387        label: Some("oxiui-render-wgpu textured pass"),
388        color_attachments: &[Some(wgpu::RenderPassColorAttachment {
389            view: p.screen_view,
390            depth_slice: None,
391            resolve_target: p.screen_resolve,
392            ops: wgpu::Operations {
393                load: wgpu::LoadOp::Load,
394                store: wgpu::StoreOp::Store,
395            },
396        })],
397        depth_stencil_attachment: None,
398        timestamp_writes: None,
399        occlusion_query_set: None,
400        multiview_mask: None,
401    });
402
403    pass.set_pipeline(&p.pipeline.pipeline);
404    pass.set_bind_group(0, p.globals_bind_group, &[]);
405    pass.set_bind_group(1, &tex_bg, &[]);
406    pass.set_vertex_buffer(0, tex_vb.slice(..));
407
408    match p.td.scissor {
409        Some([_, _, 0, _]) | Some([_, _, _, 0]) => return Ok((1, 0)),
410        Some([x, y, w, h]) => pass.set_scissor_rect(x, y, w, h),
411        None => pass.set_scissor_rect(0, 0, p.viewport_w, p.viewport_h),
412    }
413
414    pass.draw(0..p.td.verts.len() as u32, 0..1);
415    Ok((1, 1))
416}