Skip to main content

cvkg_render_gpu/renderer/
draw.rs

1use super::GpuRenderer;
2use super::context_helpers::create_surface_context;
3use crate::types::{DrawCall, MAX_PARTICLES};
4use crate::vertex::{InstanceData, InstanceData3D, Vertex, Vertex3D};
5use cvkg_core::{Rect, Renderer};
6use std::sync::Arc;
7
8impl GpuRenderer {
9    /// begin_frame_headless -- Strike the flaming sword to begin a new GPU frame for headless rendering.
10    pub fn begin_frame_headless(&mut self) -> wgpu::CommandEncoder {
11        self.current_window = None;
12        self.compositor_index_cursor = self.indices.len() as u32;
13        self.reset_frame_state();
14
15        // Recall staging belt buffers so they can be reused for vertex upload
16        self.staging_belt.recall();
17
18        let ctx = self
19            .headless_context
20            .as_ref()
21            .expect("Headless context not initialized");
22        let time = self.start_time.elapsed().as_secs_f32();
23        let logical_w = ctx.width as f32 / ctx.scale_factor;
24        let logical_h = ctx.height as f32 / ctx.scale_factor;
25        let dt = time - self.current_scene.time;
26        self.current_scene.time = time;
27        self.current_scene.delta_time = dt;
28        self.current_scene.resolution = [logical_w, logical_h];
29        self.current_scene.scale_factor = ctx.scale_factor;
30        self.current_scene.proj =
31            glam::Mat4::orthographic_lh(0.0, logical_w, logical_h, 0.0, -1000.0, 1000.0);
32
33        self.queue.write_buffer(
34            &self.scene_buffer,
35            0,
36            bytemuck::bytes_of(&self.current_scene),
37        );
38
39        self.device
40            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
41                label: Some("Surtr Headless Command Encoder"),
42            })
43    }
44
45    /// Reset per-frame state shared by both `begin_frame` and `begin_frame_headless`.
46    /// Factored out to avoid the copy-paste duplication hazard identified in the audit.
47    fn reset_frame_state(&mut self) {
48        self.vertices.clear();
49        self.indices.clear();
50        self.instance_data.clear();
51        self.draw_calls.clear();
52        self.svg.clear_filter_batches();
53        self.shared_elements.clear();
54        self.current_texture_id = None;
55        self.current_panel_id = None;
56        self.panel_stack.clear();
57        self.world_space_panels.clear();
58        self.opacity_stack.clear();
59        self.opacity_stack.push(1.0);
60        self.clip_stack.clear();
61        self.slice_stack.clear();
62        self.transform_stack.clear();
63        self.portal_regions.clear();
64        self.hologram_instances.clear();
65        self.pending_directional_light = None;
66        self.pending_mesh_instances_3d.clear();
67        self.pending_scene_radius = 100.0;
68        self.current_z = 0.0;
69        self.vnode_stack.clear();
70        self.event_handlers.clear();
71        // P2-13: Always update the volumetric time uniform, even if the
72        // volumetric pass is skipped by the frame budget system. This prevents
73        // a visible time pop when the pass resumes after being skipped.
74        let current_time = self.current_time();
75        let resolution = [self.current_width() as f32, self.current_height() as f32];
76        let time_uniform: [f32; 4] = [
77            current_time,
78            resolution[0],
79            resolution[1],
80            0.0, // _pad
81        ];
82        self.queue.write_buffer(
83            &self.volumetric_uniform_buffer,
84            0,
85            bytemuck::cast_slice(&time_uniform),
86        );
87        // Clear per-frame state but NOT memo_cache -- use generation counter instead
88        self.frame_generation += 1;
89        // Evict memo cache entries that are too old to prevent unbounded growth.
90        const MAX_MEMO_AGE: u64 = 1000;
91        if self.frame_generation > MAX_MEMO_AGE {
92            let cutoff = self.frame_generation - MAX_MEMO_AGE;
93            self.memo_cache.retain(|_, entry| entry.frame_gen >= cutoff);
94        }
95        self.last_frame_start = std::time::Instant::now();
96        self.telemetry.draw_calls = 0;
97        self.telemetry.vertices = 0;
98    }
99
100    /// begin_frame -- Strike the flaming sword to begin a new GPU frame for a specific window.
101    pub fn begin_frame(&mut self, window_id: winit::window::WindowId) -> wgpu::CommandEncoder {
102        self.begin_frame_internal(window_id, true)
103    }
104
105    /// Begin a frame without resetting per-frame state.
106    /// Used when reusing the previous frame's draw calls (view unchanged).
107    pub fn begin_frame_reuse(
108        &mut self,
109        window_id: winit::window::WindowId,
110    ) -> wgpu::CommandEncoder {
111        self.begin_frame_internal(window_id, false)
112    }
113
114    fn begin_frame_internal(
115        &mut self,
116        window_id: winit::window::WindowId,
117        reset_state: bool,
118    ) -> wgpu::CommandEncoder {
119        // Drain AI material channel
120        if let Some(rx) = &self.ai_material_rx {
121            while let Ok(res) = rx.try_recv() {
122                match res {
123                    Ok(_) => tracing::info!("[Surtr] Received AI generated material"),
124                    Err(e) => tracing::warn!("[Surtr] AI material generation error: {:?}", e),
125                }
126            }
127        }
128
129        // Skuld timestamp query removed — was causing GPU sync stalls (10ms/frame)
130        // and buffer mapping errors. GPU time can be profiled externally if needed.
131
132        self.staging_belt.recall();
133        self.current_window = Some(window_id);
134        if reset_state {
135            self.reset_frame_state();
136        }
137
138        let ctx = self
139            .surfaces
140            .get(&window_id)
141            .expect("Window not registered");
142        let time = self.start_time.elapsed().as_secs_f32();
143        let logical_w = ctx.config.width as f32 / ctx.scale_factor;
144        let logical_h = ctx.config.height as f32 / ctx.scale_factor;
145        let dt = time - self.current_scene.time;
146        self.current_scene.time = time;
147        self.current_scene.delta_time = dt;
148        self.current_scene.resolution = [logical_w, logical_h];
149        self.current_scene.scale_factor = ctx.scale_factor;
150        self.current_scene.proj =
151            glam::Mat4::orthographic_lh(0.0, logical_w, logical_h, 0.0, -1000.0, 1000.0);
152
153        self.queue.write_buffer(
154            &self.scene_buffer,
155            0,
156            bytemuck::bytes_of(&self.current_scene),
157        );
158
159        self.device
160            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
161                label: Some("Surtr Command Encoder"),
162            })
163    }
164
165    /// register_window -- Attaches a new OS window to the shared GPU context.
166    pub fn register_window(&mut self, window: Arc<winit::window::Window>) {
167        let size = window.inner_size();
168        let surface = self
169            .instance
170            .create_surface(window.clone())
171            .expect("Failed to create surface");
172        let caps = surface.get_capabilities(&self.adapter);
173        let format = caps.formats[0];
174
175        // Dynamic present mode selection -- Mailbox not available on all platforms (e.g. Wayland)
176        let present_mode = if caps.present_modes.contains(&wgpu::PresentMode::Mailbox) {
177            wgpu::PresentMode::Mailbox
178        } else {
179            tracing::warn!("[GPU] Mailbox not supported, falling back to Fifo (V-Sync)");
180            wgpu::PresentMode::Fifo
181        };
182
183        let alpha_mode = if caps
184            .alpha_modes
185            .contains(&wgpu::CompositeAlphaMode::PostMultiplied)
186        {
187            wgpu::CompositeAlphaMode::PostMultiplied
188        } else if caps
189            .alpha_modes
190            .contains(&wgpu::CompositeAlphaMode::PreMultiplied)
191        {
192            wgpu::CompositeAlphaMode::PreMultiplied
193        } else {
194            caps.alpha_modes[0]
195        };
196
197        tracing::info!(
198            "[GPU] Configuring surface: {}x{} | {:?} | {:?}",
199            size.width,
200            size.height,
201            present_mode,
202            alpha_mode
203        );
204
205        let config = wgpu::SurfaceConfiguration {
206            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
207            format,
208            width: size.width,
209            height: size.height,
210            present_mode,
211            alpha_mode,
212            view_formats: vec![],
213            desired_maximum_frame_latency: 1,
214        };
215        surface.configure(&self.device, &config);
216
217        let ctx = create_surface_context(
218            &self.device,
219            surface,
220            config,
221            &self.env_bind_group_layout,
222            &self.texture_bind_group_layout,
223            window.scale_factor() as f32,
224            self.quality_level.msaa_sample_count(),
225            &mut self.registry,
226        );
227
228        self.surfaces.insert(window.id(), ctx);
229    }
230
231    pub(crate) fn shatter_internal(
232        &mut self,
233        rect: Rect,
234        pieces: u32,
235        force: f32,
236        color: [f32; 4],
237        material_id: u32,
238    ) {
239        // High-Fidelity Variable Particle Density
240        let count = (pieces as f32).sqrt().ceil() as u32;
241        let dw = rect.width / count as f32;
242        let dh = rect.height / count as f32;
243
244        let c = self.apply_opacity(color);
245
246        let cx = rect.x + rect.width * 0.5;
247        let cy = rect.y + rect.height * 0.5;
248
249        for y in 0..count {
250            for x in 0..count {
251                let init_x = rect.x + x as f32 * dw;
252                let init_y = rect.y + y as f32 * dh;
253
254                // Center of the shard relative to the card center
255                let dx = (init_x + dw * 0.5) - cx;
256                let dy = (init_y + dh * 0.5) - cy;
257                let dist = (dx * dx + dy * dy).sqrt().max(1.0);
258
259                // Normal direction outwards
260                let nx = dx / dist;
261                let ny = dy / dist;
262
263                // Hash-based pseudo-random variations for dispersion
264                let hash =
265                    ((x as f32 * 12.9898 + y as f32 * 78.233).sin().fract() * 43_758.547).fract();
266                let hash2 =
267                    ((x as f32 * 37.11 + y as f32 * 149.87).sin().fract() * 23_412.19).fract();
268
269                let speed_var = 0.5 + hash * 1.5;
270                let angle = ny.atan2(nx) + (hash2 - 0.5) * 0.6;
271                let disp_x = angle.cos() * force * 50.0 * speed_var;
272                let disp_y = angle.sin() * force * 50.0 * speed_var;
273
274                // Downward gravity-like drift over time/force
275                let gravity = force * force * 20.0;
276
277                // Shrink shard size as it scatters away
278                // Assuming max force in demo is ~6.0
279                let scale_factor = (1.0 - (force / 6.0).min(1.0)).max(0.0);
280                let shard_w = dw * scale_factor;
281                let shard_h = dh * scale_factor;
282
283                let displaced_x = init_x + disp_x + (dw - shard_w) * 0.5;
284                let displaced_y = init_y + disp_y + gravity + (dh - shard_h) * 0.5;
285
286                let shard_rect = Rect {
287                    x: displaced_x,
288                    y: displaced_y,
289                    width: shard_w,
290                    height: shard_h,
291                };
292
293                let uv = Rect {
294                    x: x as f32 / count as f32,
295                    y: y as f32 / count as f32,
296                    width: 1.0 / count as f32,
297                    height: 1.0 / count as f32,
298                };
299
300                self.fill_rect_with_full_params(shard_rect, c, material_id, None, force, uv);
301            }
302        }
303    }
304
305    pub(crate) fn recursive_bolt(
306        &mut self,
307        from: [f32; 2],
308        to: [f32; 2],
309        depth: u32,
310        color: [f32; 4],
311    ) {
312        if depth == 0 {
313            self.draw_lightning_segment(from, to, color);
314            return;
315        }
316
317        let mid_x = (from[0] + to[0]) * 0.5;
318        let mid_y = (from[1] + to[1]) * 0.5;
319
320        let dx = to[0] - from[0];
321        let dy = to[1] - from[1];
322        let len = (dx * dx + dy * dy).sqrt();
323
324        if len < 1e-4 {
325            return;
326        }
327
328        // Perpendicular offset for jaggedness
329        let offset_scale = len * 0.15;
330        let seed = (from[0] * 12.9898 + from[1] * 78.233 + (depth as f32) * 37.11)
331            .sin()
332            .fract();
333        let offset_x = -dy / len * (seed - 0.5) * offset_scale;
334        let offset_y = dx / len * (seed - 0.5) * offset_scale;
335
336        let mid = [mid_x + offset_x, mid_y + offset_y];
337
338        self.recursive_bolt(from, mid, depth - 1, color);
339        self.recursive_bolt(mid, to, depth - 1, color);
340
341        // 20% chance of a secondary branch
342        if depth > 2 && seed > 0.8 {
343            let branch_to = [
344                mid[0] + offset_x * 2.0 + (seed * 100.0).sin() * 50.0,
345                mid[1] + offset_y * 2.0 + (seed * 100.0).cos() * 50.0,
346            ];
347            self.recursive_bolt(mid, branch_to, depth - 2, color);
348        }
349    }
350
351    pub(crate) fn draw_lightning_segment(&mut self, from: [f32; 2], to: [f32; 2], color: [f32; 4]) {
352        let dx = to[0] - from[0];
353        let dy = to[1] - from[1];
354        let len = (dx * dx + dy * dy).sqrt();
355        if len < 0.001 {
356            return;
357        }
358
359        let glow_width = 32.0;
360        let core_width = 4.0;
361        let c = self.apply_opacity(color);
362
363        // 1. Render Volumetric Glow (Cyan)
364        let gnx = -dy / len * glow_width * 0.5;
365        let gny = dx / len * glow_width * 0.5;
366        let gp1 = [from[0] + gnx, from[1] + gny];
367        let gp2 = [to[0] + gnx, to[1] + gny];
368        let gp3 = [to[0] - gnx, to[1] - gny];
369        let gp4 = [from[0] - gnx, from[1] - gny];
370        self.push_oriented_quad(
371            [gp1, gp2, gp3, gp4],
372            c,
373            9,
374            Rect {
375                x: 0.0,
376                y: 0.0,
377                width: 1.0,
378                height: 1.0,
379            },
380        );
381
382        // 2. Render Blinding Core (White)
383        let cnx = -dy / len * core_width * 0.5;
384        let cny = dx / len * core_width * 0.5;
385        let cp1 = [from[0] + cnx, from[1] + cny];
386        let cp2 = [to[0] + cnx, to[1] + cny];
387        let cp3 = [to[0] - cnx, to[1] - cny];
388        let cp4 = [from[0] - cnx, from[1] - cny];
389        self.push_oriented_quad(
390            [cp1, cp2, cp3, cp4],
391            [1.0, 1.0, 1.0, c[3]],
392            0,
393            Rect {
394                x: 0.0,
395                y: 0.0,
396                width: 1.0,
397                height: 1.0,
398            },
399        );
400    }
401
402    pub(crate) fn push_oriented_quad(
403        &mut self,
404        points: [[f32; 2]; 4],
405        color: [f32; 4],
406        material_id: u32,
407        uv_rect: Rect,
408    ) {
409        let scissor = self.clip_stack.last().copied();
410        let texture_id = None; // Oriented quads like lightning don't use textures yet
411
412        let (translation, scale_transform, rotation, _, _) = self.current_transform();
413        let current_instance_data = InstanceData {
414            translation,
415            scale: scale_transform,
416            rotation,
417            blur_radius: 0.0,
418            ior_override: 0.0,
419            glass_intensity: 1.0,
420        };
421
422        // CRITICAL FIX: Only break batch on material/scissor/texture state changes.
423        // Transform (translation/scale/rotation) is per-instance data.
424        let material =
425            Self::resolve_material_with_context(material_id, &self.current_draw_material);
426        let final_material_id = match material {
427            cvkg_core::DrawMaterial::Opaque => material_id,
428            cvkg_core::DrawMaterial::TopUI => crate::renderer::material_id::TOP_UI,
429            cvkg_core::DrawMaterial::Glass { .. } => crate::renderer::material_id::GLASS,
430            cvkg_core::DrawMaterial::Blend { mode } => 7 + mode,
431        };
432
433        let last_call = self.draw_calls.last();
434        let needs_new_call = self.draw_calls.is_empty()
435            || self.current_texture_id != texture_id
436            || last_call.unwrap().scissor_rect != scissor
437            || last_call.unwrap().panel_id != self.current_panel_id
438            || last_call.unwrap().material != material
439            || {
440                let last_material = last_call.unwrap().material;
441                matches!((material, last_material),
442                    (cvkg_core::DrawMaterial::Glass { blur_radius: a, ior_override: b, glass_intensity: c },
443                     cvkg_core::DrawMaterial::Glass { blur_radius: d, ior_override: e, glass_intensity: f })
444                    if a != d || b != e || c != f)
445            };
446
447        if needs_new_call {
448            self.current_texture_id = texture_id;
449            self.instance_data.push(current_instance_data);
450            self.draw_calls.push(DrawCall {
451                target_id: None,
452                panel_id: self.current_panel_id,
453                texture_id,
454                scissor_rect: scissor,
455                index_start: self.indices.len() as u32,
456                index_count: 0,
457                instance_count: 1,
458                material,
459                instance_start: (self.instance_data.len() - 1) as u32,
460                draw_order: 0,
461            });
462        } else {
463            // Same batch - add instance data and increment instance count
464            self.instance_data.push(current_instance_data);
465            if let Some(call) = self.draw_calls.last_mut() {
466                call.instance_count += 1;
467            }
468        }
469
470        let uvs = [
471            [uv_rect.x, uv_rect.y],
472            [uv_rect.x + uv_rect.width, uv_rect.y],
473            [uv_rect.x + uv_rect.width, uv_rect.y + uv_rect.height],
474            [uv_rect.x, uv_rect.y + uv_rect.height],
475        ];
476
477        let rect = Rect {
478            x: points[0][0],
479            y: points[0][1],
480            width: 1.0,
481            height: 1.0,
482        };
483
484        for i in 0..4 {
485            let px = points[i][0];
486            let py = points[i][1];
487
488            self.vertices.push(Vertex {
489                position: [px, py, 0.0],
490                normal: [0.0, 0.0, 1.0],
491                uv: uvs[i],
492                color,
493                material_id: final_material_id,
494                radius: 0.0,
495                slice: [0.0, 0.0, 0.0, 1.0],
496                logical: [px - rect.x, py - rect.y],
497                size: [rect.width, rect.height],
498                clip: [-f32::INFINITY, -f32::INFINITY, f32::INFINITY, f32::INFINITY],
499                tex_index: 0,
500            });
501        }
502
503        // Push indices for the quad (two triangles: 0-1-2 and 0-2-3)
504        let base = self.vertices.len() as u32 - 4;
505        self.indices
506            .extend_from_slice(&[base, base + 1, base + 2, base, base + 2, base + 3]);
507
508        if let Some(call) = self.draw_calls.last_mut() {
509            call.index_count += 6;
510        }
511    }
512
513    pub(crate) fn get_texture_id(&mut self, name: &str) -> Option<u32> {
514        self.texture_registry.get(name).copied()
515    }
516
517    /// fill_rect_with_mode -- Specialized rectangle drawing with mode-specific shader logic.
518    pub fn fill_rect_with_mode(
519        &mut self,
520        rect: Rect,
521        color: [f32; 4],
522        material_id: u32,
523        texture_id: Option<u32>,
524    ) {
525        self.fill_rect_with_full_params(
526            rect,
527            color,
528            material_id,
529            texture_id,
530            0.0,
531            Rect {
532                x: 0.0,
533                y: 0.0,
534                width: 1.0,
535                height: 1.0,
536            },
537        );
538    }
539
540    pub(crate) fn fill_rect_with_full_params(
541        &mut self,
542        rect: Rect,
543        color: [f32; 4],
544        material_id: u32,
545        texture_id: Option<u32>,
546        radius: f32,
547        uv_rect: Rect,
548    ) {
549        // If a shadow is active, draw it first, offset by shadow._offset
550        if let Some(shadow) = self.shadow_stack.last().copied()
551            && shadow.color[3] > 0.001
552        {
553            let shadow_rect = Rect {
554                x: rect.x + shadow._offset[0],
555                y: rect.y + shadow._offset[1],
556                width: rect.width,
557                height: rect.height,
558            };
559            Renderer::draw_drop_shadow(
560                self,
561                shadow_rect,
562                radius,
563                shadow.color,
564                shadow.radius,
565                0.0, // Spread
566            );
567        }
568
569        let slice = self
570            .slice_stack
571            .last()
572            .copied()
573            .map(|(a, o)| [a, o, 1.0, 1.0])
574            .unwrap_or([0.0, 0.0, 0.0, 1.0]);
575        self.fill_rect_with_full_params_and_slice(
576            rect,
577            color,
578            material_id,
579            texture_id,
580            radius,
581            uv_rect,
582            slice,
583            [0.0, 0.0],
584        );
585    }
586
587    #[allow(clippy::too_many_arguments)]
588    pub(crate) fn fill_rect_with_full_params_and_slice(
589        &mut self,
590        mut rect: Rect,
591        color: [f32; 4],
592        material_id: u32,
593        texture_id: Option<u32>,
594        radius: f32,
595        uv_rect: Rect,
596        slice: [f32; 4],
597        _glyph_time: [f32; 2],
598    ) {
599        // Pixel-snap rect coordinates to prevent sub-pixel blurring on high-DPI displays.
600        // Only snap for non-glass materials where visual crispness matters.
601        if material_id != crate::renderer::material_id::GLASS {
602            let scale = self.current_scale_factor();
603            let snap = |v: f32| (v * scale).round() / scale;
604            rect.x = snap(rect.x);
605            rect.y = snap(rect.y);
606            rect.width = snap(rect.width);
607            rect.height = snap(rect.height);
608        }
609
610        let scissor = self.clip_stack.last().copied();
611
612        let material =
613            Self::resolve_material_with_context(material_id, &self.current_draw_material);
614        let final_material_id = match material {
615            cvkg_core::DrawMaterial::Opaque => material_id,
616            cvkg_core::DrawMaterial::TopUI => crate::renderer::material_id::TOP_UI,
617            cvkg_core::DrawMaterial::Glass { .. } => crate::renderer::material_id::GLASS,
618            cvkg_core::DrawMaterial::Blend { mode } => 7 + mode,
619        };
620
621        let (translation, scale_transform, rotation, _, _) = self.current_transform();
622        let (blur_radius, ior_override, glass_intensity) = if let cvkg_core::DrawMaterial::Glass {
623            blur_radius,
624            ior_override,
625            glass_intensity,
626        } = material
627        {
628            (blur_radius, ior_override, glass_intensity)
629        } else {
630            (0.0, 0.0, 1.0)
631        };
632
633        let current_instance_data = InstanceData {
634            translation,
635            scale: scale_transform,
636            rotation,
637            blur_radius,
638            ior_override,
639            glass_intensity,
640        };
641
642        // Batching: check if we need to start a new DrawCall
643        // With Texture Array, we no longer need to break batches when the texture changes,
644        // as long as they are all part of the same array bind group (Group 0).
645        // CRITICAL FIX: Only break batch on material/scissor/blur/glass state changes.
646        // Transform (translation/scale/rotation) is per-instance data and should NOT
647        // break the batch - multiple instances with different transforms can share a draw call.
648        let last_call = self.draw_calls.last();
649        let needs_new_call = self.draw_calls.is_empty()
650            || last_call.unwrap().scissor_rect != scissor
651            || last_call.unwrap().material != material
652            || last_call.unwrap().texture_id != self.current_texture_id
653            || last_call.unwrap().panel_id != self.current_panel_id
654            || {
655                // Check if glass/blur state changed (these require pipeline changes)
656                let last_material = last_call.unwrap().material;
657                matches!((material, last_material),
658                    (cvkg_core::DrawMaterial::Glass { blur_radius: a, ior_override: b, glass_intensity: c },
659                     cvkg_core::DrawMaterial::Glass { blur_radius: d, ior_override: e, glass_intensity: f })
660                    if a != d || b != e || c != f)
661            };
662
663        if needs_new_call {
664            self.current_texture_id = Some(0); // All textures are now in the binding array at Group 0
665            self.instance_data.push(current_instance_data);
666            self.draw_calls.push(DrawCall {
667                target_id: None,
668                panel_id: self.current_panel_id,
669                texture_id: self.current_texture_id,
670                scissor_rect: scissor,
671                index_start: self.indices.len() as u32,
672                index_count: 0,
673                instance_count: 1,
674                material,
675                instance_start: (self.instance_data.len() - 1) as u32,
676                draw_order: 0,
677            });
678        } else {
679            // Same batch - add instance data and increment instance count
680            self.instance_data.push(current_instance_data);
681            if let Some(call) = self.draw_calls.last_mut() {
682                call.instance_count += 1;
683            }
684        }
685
686        let scale = self.current_scale_factor();
687        let snap = |v: f32| (v * scale).round() / scale;
688
689        let base_idx = self.vertices.len() as u32;
690        let x1 = snap(rect.x);
691        let y1 = snap(rect.y);
692        let x2 = snap(rect.x + rect.width);
693        let y2 = snap(rect.y + rect.height);
694        // Negate z-index: higher z-index should be closer (win under GreaterEqual depth test)
695        let z = -self.current_z;
696        let normal = [0.0, 0.0, 1.0];
697        let clip_rect = self.clip_stack.last().copied().unwrap_or(cvkg_core::Rect {
698            x: -10000.0,
699            y: -10000.0,
700            width: 20000.0,
701            height: 20000.0,
702        });
703        let clip = [clip_rect.x, clip_rect.y, clip_rect.width, clip_rect.height];
704
705        let tex_index = texture_id.unwrap_or(0);
706
707        self.vertices.push(Vertex {
708            position: [x1, y1, z],
709            normal,
710            uv: [uv_rect.x, uv_rect.y],
711            color,
712            material_id: final_material_id,
713            radius,
714            slice,
715            logical: [0.0, 0.0],
716            size: [rect.width, rect.height],
717            clip,
718            tex_index,
719        });
720        self.vertices.push(Vertex {
721            position: [x2, y1, z],
722            normal,
723            uv: [uv_rect.x + uv_rect.width, uv_rect.y],
724            color,
725            material_id: final_material_id,
726            radius,
727            slice,
728            logical: [rect.width, 0.0],
729            size: [rect.width, rect.height],
730            clip,
731            tex_index,
732        });
733        self.vertices.push(Vertex {
734            position: [x2, y2, z],
735            normal,
736            uv: [uv_rect.x + uv_rect.width, uv_rect.y + uv_rect.height],
737            color,
738            material_id: final_material_id,
739            radius,
740            slice,
741            logical: [rect.width, rect.height],
742            size: [rect.width, rect.height],
743            clip,
744            tex_index,
745        });
746        self.vertices.push(Vertex {
747            position: [x1, y2, z],
748            normal,
749            uv: [uv_rect.x, uv_rect.y + uv_rect.height],
750            color,
751            material_id: final_material_id,
752            radius,
753            slice,
754            logical: [0.0, rect.height],
755            size: [rect.width, rect.height],
756            clip,
757            tex_index,
758        });
759
760        self.indices.extend_from_slice(&[
761            base_idx,
762            base_idx + 1,
763            base_idx + 2,
764            base_idx,
765            base_idx + 2,
766            base_idx + 3,
767        ]);
768
769        if let Some(call) = self.draw_calls.last_mut() {
770            call.index_count += 6;
771        }
772    }
773
774    /// Pass 1: Clear scene+depth, draw atmosphere, draw opaque geometry.
775    /// end_frame -- Quench the blade by submitting the full Muspelheim multi-pass effect.
776    ///
777    /// Since the Renderer 3.0 migration, the pass sequence is driven by a Kvasir
778    /// dependency graph rather than hardcoded ordering. The graph is built each
779    /// frame (cheap -- just node/edge allocation), validated (cycle detection,
780    /// input satisfiability), then executed. Conditional passes (glass, bloom,
781    /// accessibility) are automatically eliminated when not needed.
782    pub fn end_frame(&mut self, mut encoder: wgpu::CommandEncoder) {
783        struct ActiveFrameResources {
784            surface_texture: Option<wgpu::SurfaceTexture>,
785            target_view: wgpu::TextureView,
786            scene_texture: wgpu::TextureView,
787            scene_msaa_texture: wgpu::TextureView,
788            depth_texture_view: wgpu::TextureView,
789            blur_env_bind_group_a: wgpu::BindGroup,
790            blur_env_bind_group_b: wgpu::BindGroup,
791            bloom_env_bind_group_a: wgpu::BindGroup,
792            bloom_env_bind_group_b: wgpu::BindGroup,
793        }
794
795        let res = if let Some(window_id) = self.current_window {
796            let Some(ctx) = self.surfaces.get(&window_id) else {
797                tracing::error!("[GPU] Missing surface context for end_frame");
798                return;
799            };
800            let frame = match ctx.surface.get_current_texture() {
801                wgpu::CurrentSurfaceTexture::Success(t) => t,
802                wgpu::CurrentSurfaceTexture::Suboptimal(t) => {
803                    ctx.surface.configure(&self.device, &ctx.config);
804                    t
805                }
806                other => {
807                    tracing::warn!(
808                        "[GPU] Surface texture acquisition failed ({:?}), reconfiguring surface",
809                        other
810                    );
811                    ctx.surface.configure(&self.device, &ctx.config);
812                    // Retry once after reconfiguration; if it fails again, skip the frame.
813                    match ctx.surface.get_current_texture() {
814                        wgpu::CurrentSurfaceTexture::Success(t) => t,
815                        wgpu::CurrentSurfaceTexture::Suboptimal(t) => {
816                            ctx.surface.configure(&self.device, &ctx.config);
817                            t
818                        }
819                        retry_failed => {
820                            tracing::error!(
821                                "[GPU] Surface texture retry also failed ({:?}), skipping frame",
822                                retry_failed
823                            );
824                            self.queue.submit(std::iter::once(encoder.finish()));
825                            return;
826                        }
827                    }
828                }
829            };
830            let view = frame
831                .texture
832                .create_view(&wgpu::TextureViewDescriptor::default());
833
834            ActiveFrameResources {
835                surface_texture: Some(frame),
836                target_view: view,
837                scene_texture: ctx.scene_texture.clone(),
838                scene_msaa_texture: ctx.scene_msaa_texture.clone(),
839                depth_texture_view: ctx.depth_texture_view.clone(),
840                blur_env_bind_group_a: ctx.blur_env_bind_group_a.clone(),
841                blur_env_bind_group_b: ctx.blur_env_bind_group_b.clone(),
842                bloom_env_bind_group_a: ctx.bloom_env_bind_group_a.clone(),
843                bloom_env_bind_group_b: ctx.bloom_env_bind_group_b.clone(),
844            }
845        } else {
846            let Some(ctx) = self.headless_context.as_ref() else {
847                tracing::error!("[GPU] No headless context for end_frame");
848                return;
849            };
850
851            ActiveFrameResources {
852                surface_texture: None,
853                target_view: ctx.output_view.clone(),
854                scene_texture: ctx.scene_texture.clone(),
855                scene_msaa_texture: ctx.scene_msaa_texture.clone(),
856                depth_texture_view: ctx.depth_texture_view.clone(),
857                blur_env_bind_group_a: ctx.blur_env_bind_group_a.clone(),
858                blur_env_bind_group_b: ctx.blur_env_bind_group_b.clone(),
859                bloom_env_bind_group_a: ctx.bloom_env_bind_group_a.clone(),
860                bloom_env_bind_group_b: ctx.bloom_env_bind_group_b.clone(),
861            }
862        };
863
864        // Auto-flush staging belt if render_frame() was not called but geometry was queued.
865        // This ensures apps that forget render_frame() still see their draw calls rendered.
866        if !self.frame_rendered && (!self.vertices.is_empty() || !self.indices.is_empty()) {
867            tracing::debug!(
868                "[GPU] Auto-flushing staging belt in end_frame (render_frame was not called)"
869            );
870            let mut staging_encoder =
871                self.device
872                    .create_command_encoder(&wgpu::CommandEncoderDescriptor {
873                        label: Some("Surtr Auto-Flush Staging Encoder"),
874                    });
875            if !self.vertices.is_empty() {
876                let v_bytes = bytemuck::cast_slice(&self.vertices);
877                self.staging_belt
878                    .write_buffer(
879                        &mut staging_encoder,
880                        &self.geometry_buffers.vertex_buffer,
881                        0,
882                        wgpu::BufferSize::new(v_bytes.len() as u64).unwrap(),
883                    )
884                    .copy_from_slice(v_bytes);
885            }
886            if !self.indices.is_empty() {
887                let i_bytes = bytemuck::cast_slice(&self.indices);
888                self.staging_belt
889                    .write_buffer(
890                        &mut staging_encoder,
891                        &self.geometry_buffers.index_buffer,
892                        0,
893                        wgpu::BufferSize::new(i_bytes.len() as u64).unwrap(),
894                    )
895                    .copy_from_slice(i_bytes);
896            }
897            if !self.instance_data.is_empty() {
898                let inst_bytes = bytemuck::cast_slice(&self.instance_data);
899                self.staging_belt
900                    .write_buffer(
901                        &mut staging_encoder,
902                        &self.geometry_buffers.instance_buffer,
903                        0,
904                        wgpu::BufferSize::new(inst_bytes.len() as u64).unwrap(),
905                    )
906                    .copy_from_slice(inst_bytes);
907            }
908            self.staging_belt.finish();
909            self.staging_command_buffers.push(staging_encoder.finish());
910        }
911
912        // ── Build and execute the Kvasir frame graph ─────────────────────────────
913        let has_glass = self
914            .draw_calls
915            .iter()
916            .any(|c| matches!(c.material, cvkg_core::DrawMaterial::Glass { .. }));
917        let has_bloom = self.bloom_enabled;
918        let has_accessibility =
919            self.color_blind_mode != crate::color_blindness::ColorBlindMode::Normal;
920
921        // Build the frame graph using the Kvasir helper for correct pass ordering.
922        // Conditional passes (glass, bloom, accessibility) are included/excluded based on frame state.
923        // This replaces the hardcoded if/else pass dispatch with a data-driven approach:
924        // the graph declares which passes exist and their ordering, and we execute only enabled ones.
925        //
926        // NOTE: Geometry is uploaded by render_frame() via StagingBelt into staging_command_buffers.
927        // Those staging commands must be submitted before the render pass encoders below, which is
928        // guaranteed by inserting the render encoders after the existing staging entries (see submit block).
929
930        let (blur_id, bloom_id) = if let Some(window_id) = self.current_window {
931            let ctx = self.surfaces.get(&window_id).unwrap();
932            (ctx.blur_tex_a, ctx.bloom_tex_a)
933        } else {
934            let ctx = self.headless_context.as_ref().unwrap();
935            (ctx.blur_tex_a, ctx.bloom_tex_a)
936        };
937        self.registry
938            .alias(crate::kvasir::nodes::RES_BLUR_A, blur_id);
939        self.registry
940            .alias(crate::kvasir::nodes::RES_BLOOM_A, bloom_id);
941        self.registry
942            .alias_view(crate::kvasir::nodes::RES_SCENE, res.scene_texture.clone());
943        self.registry.alias_view(
944            crate::kvasir::nodes::RES_SCENE_MSAA,
945            res.scene_msaa_texture.clone(),
946        );
947
948        let scale = self.current_scale_factor();
949        let scale_bits = scale.to_bits();
950        let active_offscreens_count = self.active_offscreens.len();
951        let portal_regions_count = self.portal_regions.len();
952        let width = self.current_width();
953        let height = self.current_height();
954        let has_volumetric = self.volumetric_enabled;
955
956        // Compute content hashes for cache key (must match construction site)
957        let mut offscreen_hash: u64 = 0;
958        for offscreen in &self.active_offscreens {
959            offscreen_hash = offscreen_hash.wrapping_add(
960                offscreen.target_id.wrapping_mul(31)
961                    ^ (offscreen.blend_mode as u64).wrapping_mul(17),
962            );
963        }
964        let mut portal_hash: u64 = 0;
965        for region in &self.portal_regions {
966            portal_hash = portal_hash.wrapping_add(
967                (region.x.to_bits() as u64)
968                    .wrapping_mul(7)
969                    .wrapping_add((region.y.to_bits() as u64).wrapping_mul(13))
970                    .wrapping_add((region.width.to_bits() as u64).wrapping_mul(19))
971                    .wrapping_add((region.height.to_bits() as u64).wrapping_mul(23)),
972            );
973        }
974
975        let use_cache = if let Some(ref cached) = self.cached_graph_plan {
976            cached.matches(
977                has_glass,
978                has_bloom,
979                has_accessibility,
980                has_volumetric,
981                active_offscreens_count,
982                offscreen_hash,
983                portal_regions_count,
984                portal_hash,
985                width,
986                height,
987                scale_bits,
988                self.material_compilation_hash,
989            )
990        } else {
991            false
992        };
993
994        for (id, panel) in &self.world_space_panels {
995            let width = (panel.world_size.0 * panel.pixels_per_unit).max(1.0) as u32;
996            let height = (panel.world_size.1 * panel.pixels_per_unit).max(1.0) as u32;
997            self.registry
998                .allocate_offscreen(&self.device, *id, [width, height]);
999        }
1000
1001        self.current_scene.ibl_enabled = if has_glass { 1 } else { 0 };
1002        self.queue.write_buffer(
1003            &self.scene_buffer,
1004            0,
1005            bytemuck::bytes_of(&self.current_scene),
1006        );
1007
1008        if !use_cache {
1009            let render_graph = crate::kvasir::nodes::build_render_graph(
1010                &crate::kvasir::nodes::RenderGraphConfig {
1011                    has_glass,
1012                    has_bloom,
1013                    has_accessibility,
1014                    has_ibl: has_glass,
1015                    has_volumetric,
1016                    active_offscreens: &self.active_offscreens,
1017                    portal_regions: &self.portal_regions.iter().cloned().collect::<Vec<_>>(),
1018                    world_space_panels: &self.world_space_panels,
1019                    width,
1020                    height,
1021                    scale,
1022                    directional_light: self.pending_directional_light,
1023                    mesh_instances_3d: std::mem::take(&mut self.pending_mesh_instances_3d),
1024                    transparent_meshes_3d: std::mem::take(
1025                        &mut self.pending_transparent_instances_3d,
1026                    ),
1027                    cascade_splits: [8.0, 25.0, 70.0, 200.0],
1028                    camera_view_proj: self.current_scene.proj * self.current_scene.view,
1029                    camera_pos: glam::Vec3::from(self.current_scene.camera_pos),
1030                },
1031            );
1032            let planner = crate::kvasir::planner::ExecutionPlanner::new(&render_graph);
1033            let compiled_plan = match planner.compile() {
1034                Ok(plan) => plan,
1035                Err(e) => {
1036                    tracing::error!(
1037                        "[Kvasir] Render graph compilation failed ({}), skipping render passes",
1038                        e
1039                    );
1040                    // Present the frame with whatever was rendered (stale scene or blank).
1041                    if let Some(surface_texture) = res.surface_texture {
1042                        surface_texture.present();
1043                        tracing::info!("[Surtr] Frame presented (graph compilation fallback)");
1044                    }
1045                    return;
1046                }
1047            };
1048
1049            // Reuse the already-computed hashes (computed above for cache matching)
1050            self.cached_graph_plan = Some(crate::kvasir::graph_cache::CachedGraphPlan {
1051                has_glass,
1052                has_bloom,
1053                has_accessibility,
1054                has_volumetric,
1055                active_offscreens_count,
1056                offscreen_content_hash: offscreen_hash,
1057                portal_regions_count,
1058                portal_content_hash: portal_hash,
1059                width,
1060                height,
1061                scale_bits,
1062                material_compilation_hash: self.material_compilation_hash,
1063                graph: render_graph,
1064                plan: compiled_plan,
1065            });
1066        }
1067
1068        let cached = self.cached_graph_plan.as_ref().unwrap();
1069        let frame_start = self.last_frame_start;
1070        let budget_ms = self.frame_budget.target_ms;
1071        let allow_degradation = self.frame_budget.allow_degradation;
1072
1073        for &node_key in &cached.plan {
1074            // Frame budget enforcement: if we're already over budget and degradation
1075            // is allowed, skip expensive COSMETIC passes (bloom, volumetric).
1076            //
1077            // P0-2 fix: BackdropBlur, BackdropRegion, and Accessibility are FUNCTIONAL
1078            // passes, not cosmetic effects:
1079            //   * BackdropBlur/BackdropRegion implement glassmorphism (frosted glass
1080            //     panels, modals, sidebars). Skipping them makes glass elements
1081            //     render as opaque solid rectangles, breaking the visual contract
1082            //     for any app using glass materials.
1083            //   * Accessibility is required for screen readers and other AT;
1084            //     skipping it makes the UI unusable for visually-impaired users.
1085            // Only BloomExtract/BloomBlur (post-processing glow) and Volumetric
1086            // (raymarched lighting) are true cosmetics and safe to degrade.
1087            if allow_degradation && budget_ms > 0.0 {
1088                let elapsed_ms = frame_start.elapsed().as_secs_f32() * 1000.0;
1089                if elapsed_ms > budget_ms
1090                    && let Some(node) = cached.graph.node(node_key)
1091                {
1092                    match node.pass_id() {
1093                        crate::kvasir::nodes::PassId::BloomExtract
1094                        | crate::kvasir::nodes::PassId::BloomBlur
1095                        | crate::kvasir::nodes::PassId::Volumetric => {
1096                            tracing::trace!(
1097                                "[Kvasir] Skipping {} (over budget: {:.1}ms > {:.1}ms)",
1098                                node.label(),
1099                                elapsed_ms,
1100                                budget_ms
1101                            );
1102                            continue;
1103                        }
1104                        _ => {} // Always run: Glass, BackdropBlur, BackdropRegion,
1105                                // Accessibility, Geometry, UI, Composite, Present, ...
1106                    }
1107                }
1108            }
1109            if let Some(node) = cached.graph.node(node_key) {
1110                tracing::trace!("[Kvasir] Executing node: {}", node.label());
1111                let mut ctx = crate::kvasir::node::ExecutionContext {
1112                    device: &self.device,
1113                    queue: &self.queue,
1114                    encoder: &mut encoder,
1115                    registry: &self.registry,
1116                    renderer: self,
1117                    target_view: &res.target_view,
1118                    depth_view: &res.depth_texture_view,
1119                    blur_env_bind_group_a: &res.blur_env_bind_group_a,
1120                    blur_env_bind_group_b: &res.blur_env_bind_group_b,
1121                    bloom_env_bind_group_a: &res.bloom_env_bind_group_a,
1122                    bloom_env_bind_group_b: &res.bloom_env_bind_group_b,
1123                    scale_factor: scale,
1124                };
1125                node.execute(&mut ctx);
1126            }
1127        }
1128
1129        // ── Particle Compute Pass ──────────────────────────────────────────
1130        // Flush staged particles to GPU, then run compute integration.
1131        // Must run BEFORE the submit so particle positions are up-to-date.
1132        if !self.particles.staging.is_empty() || self.particles.count > 0 {
1133            // 1. Flush staged particles into the ring buffer
1134            if !self.particles.staging.is_empty() {
1135                let write_start = self.particles.write_head as usize;
1136                let write_count = self.particles.staging.len();
1137                let max = MAX_PARTICLES;
1138
1139                // P1-6 fix: cap the write to max particles to prevent
1140                // wrap-around overlap. If write_count > max, only the
1141                // LAST `max` particles are kept (the most recent ones
1142                // are most relevant for particle effects, and the
1143                // earlier ones are dropped). Without this cap, if
1144                // write_count > max - write_start, the second chunk
1145                // would write past offset 0 and overlap the first
1146                // chunk, corrupting the buffer.
1147                let effective_count = write_count.min(max);
1148                let drop_count = write_count - effective_count;
1149
1150                // Write particles in ring-buffer fashion
1151                let first_chunk = (max - write_start).min(effective_count);
1152                let bytes = bytemuck::cast_slice(
1153                    &self.particles.staging[drop_count..drop_count + first_chunk],
1154                );
1155                self.queue.write_buffer(
1156                    &self.particle_buffer,
1157                    (write_start * std::mem::size_of::<crate::types::GpuParticle>()) as u64,
1158                    bytes,
1159                );
1160                if first_chunk < effective_count {
1161                    let remaining = effective_count - first_chunk;
1162                    let bytes2 = bytemuck::cast_slice(
1163                        &self.particles.staging
1164                            [drop_count + first_chunk..drop_count + first_chunk + remaining],
1165                    );
1166                    self.queue.write_buffer(&self.particle_buffer, 0, bytes2);
1167                    self.particles.write_head = remaining as u32;
1168                } else {
1169                    self.particles.write_head = ((write_start + effective_count) % max) as u32;
1170                }
1171                self.particles.count =
1172                    (self.particles.count as usize + effective_count).min(max) as u32;
1173                self.particles.staging.clear();
1174
1175                // Invalidate render bind group so it's recreated with new data
1176                self.particle_render_bind_group = None;
1177            }
1178
1179            // 2. Run compute pass to integrate particle physics
1180            let dt = self.current_scene.delta_time;
1181            let uniforms = crate::types::ParticleUniforms { dt, _pad: [0.0; 7] };
1182            self.queue.write_buffer(
1183                &self.particle_uniform_buffer,
1184                0,
1185                bytemuck::bytes_of(&uniforms),
1186            );
1187
1188            let compute_bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1189                label: Some("Particle Compute BG"),
1190                layout: &self.particle_compute_bgl,
1191                entries: &[
1192                    wgpu::BindGroupEntry {
1193                        binding: 0,
1194                        resource: self.particle_buffer.as_entire_binding(),
1195                    },
1196                    wgpu::BindGroupEntry {
1197                        binding: 1,
1198                        resource: self.particle_uniform_buffer.as_entire_binding(),
1199                    },
1200                ],
1201            });
1202
1203            let mut compute_encoder =
1204                self.device
1205                    .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1206                        label: Some("Particle Compute Encoder"),
1207                    });
1208            {
1209                let mut cpass = compute_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1210                    label: Some("Particle Integration"),
1211                    ..Default::default()
1212                });
1213                cpass.set_pipeline(&self.particle_compute_pipeline);
1214                cpass.set_bind_group(0, &compute_bind_group, &[]);
1215                let workgroups = self.particles.count.div_ceil(64).max(1);
1216                cpass.dispatch_workgroups(workgroups, 1, 1);
1217            }
1218            self.staging_command_buffers.push(compute_encoder.finish());
1219        }
1220
1221        // 3. Compact dead particles periodically (every 2 seconds)
1222        if self.particles.count > 0 && self.particles.last_compact.elapsed().as_secs_f32() > 2.0 {
1223            self.particles.last_compact = std::time::Instant::now();
1224            // Read back particle data to compact dead particles
1225            let read_size = (self.particles.count as usize
1226                * std::mem::size_of::<crate::types::GpuParticle>())
1227                as u64;
1228            let staging_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1229                label: Some("Particle Compact Staging"),
1230                size: read_size,
1231                usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
1232                mapped_at_creation: false,
1233            });
1234            let mut compact_encoder =
1235                self.device
1236                    .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1237                        label: Some("Particle Compact Copy"),
1238                    });
1239            compact_encoder.copy_buffer_to_buffer(
1240                &self.particle_buffer,
1241                0,
1242                &staging_buf,
1243                0,
1244                read_size,
1245            );
1246            self.staging_command_buffers.push(compact_encoder.finish());
1247            // Note: full GPU readback is expensive; in production we'd use a
1248            // compute compaction pass. For now, dead particles are simply
1249            // overwritten by new ones in the ring buffer (lifetime <= 0 causes
1250            // the vertex shader to output degenerate points behind the camera).
1251        }
1252
1253        // ── Particle Render Pass ────────────────────────────────────────────
1254        // Render live particles as colored points to the swapchain target,
1255        // composited on top of the scene with additive blending.
1256        if self.particles.count > 0 {
1257            // Lazily (re)create the render bind group when staging changed
1258            if self.particle_render_bind_group.is_none() {
1259                self.particle_render_bind_group =
1260                    Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1261                        label: Some("Particle Render BG"),
1262                        layout: &self.particle_render_bgl,
1263                        entries: &[wgpu::BindGroupEntry {
1264                            binding: 0,
1265                            resource: self.particle_buffer.as_entire_binding(),
1266                        }],
1267                    }));
1268            }
1269            if let Some(bg) = &self.particle_render_bind_group {
1270                let mut render_encoder =
1271                    self.device
1272                        .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1273                            label: Some("Particle Render Encoder"),
1274                        });
1275                {
1276                    let mut rpass = render_encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1277                        label: Some("Particle Render"),
1278                        color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1279                            view: &res.target_view,
1280                            resolve_target: None,
1281                            ops: wgpu::Operations {
1282                                load: wgpu::LoadOp::Load,
1283                                store: wgpu::StoreOp::Store,
1284                            },
1285                            depth_slice: None,
1286                        })],
1287                        depth_stencil_attachment: None,
1288                        timestamp_writes: None,
1289                        occlusion_query_set: None,
1290                        multiview_mask: None,
1291                    });
1292                    rpass.set_pipeline(&self.particle_render_pipeline);
1293                    rpass.set_bind_group(0, bg, &[]);
1294                    rpass.draw(0..self.particles.count, 0..1);
1295                }
1296                self.staging_command_buffers.push(render_encoder.finish());
1297            }
1298        }
1299
1300        // ── Submit ─────────────────────────────────────────────────────────────
1301        // staging_command_buffers already contains the geometry upload encoder from
1302        // render_frame() (StagingBelt). The render pass encoders must come AFTER it
1303        // so the GPU sees vertex/index data before the draw calls that reference it.
1304        self.staging_command_buffers.push(encoder.finish());
1305
1306        // Skuld: Resolve timestamps (preserved from original)
1307        if let (Some(q), Some(b), Some(rb)) = (
1308            &self.skuld_queries,
1309            &self.skuld_buffer,
1310            &self.skuld_read_buffer,
1311        ) {
1312            let mut resolve_encoder =
1313                self.device
1314                    .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1315                        label: Some("Skuld Resolve Encoder"),
1316                    });
1317            resolve_encoder.resolve_query_set(q, 0..2, b, 0);
1318            resolve_encoder.copy_buffer_to_buffer(b, 0, rb, 0, 16);
1319            self.staging_command_buffers.push(resolve_encoder.finish());
1320        }
1321
1322        let cmds = std::mem::take(&mut self.staging_command_buffers);
1323        self.queue.submit(cmds);
1324        self.telemetry.frame_time_ms = self.last_frame_start.elapsed().as_secs_f32() * 1000.0;
1325        self.update_vram_telemetry();
1326
1327        // Evict transient frame resources (portal regions, offscreen effects) back into
1328        // the texture pool instead of leaking GPU memory when panels are closed.
1329        self.registry.evict_frame_resources();
1330
1331        if let Some(f) = res.surface_texture {
1332            f.present();
1333            tracing::info!("[Surtr] Frame presented");
1334        }
1335    }
1336
1337    /// Submit pre-routed draw command buckets from the cvkg-compositor.
1338    ///
1339    /// Accepts `CommandBuckets` produced by `CompositorEngine::flatten_and_route()`
1340    /// and submits draw calls in the correct pass order for the Backdrop Capture
1341    /// Architecture:
1342    /// 1. Scene commands (opaque) → Scene Capture pass
1343    /// 2. Glass commands → Material Composite pass (samples blur pyramid)
1344    /// 3. Overlay commands → Top-Level Foreground pass
1345    pub fn submit_buckets(&mut self, buckets: &cvkg_compositor::CommandBuckets) {
1346        // Scene pass -- opaque draw calls, sorted by (z_index, draw_order)
1347        let mut active_offscreens = Vec::new();
1348        let mut current_target_id = None;
1349
1350        // Collect and sort scene commands by (z_index, draw_order) for correct painter's order.
1351        let mut sorted_scene: Vec<_> = buckets.scene_commands.iter().collect();
1352        sorted_scene.sort_by_key(|cmd| match cmd {
1353            cvkg_compositor::engine::RenderCommand::Draw(routed) => {
1354                (routed.z_index as i64, routed.draw_order as i64)
1355            }
1356            _ => (0, 0),
1357        });
1358
1359        for cmd in sorted_scene {
1360            match cmd {
1361                cvkg_compositor::engine::RenderCommand::Draw(routed) => {
1362                    self.set_material(cvkg_core::DrawMaterial::Opaque);
1363                    self.submit_routed(routed, current_target_id);
1364                }
1365                cvkg_compositor::engine::RenderCommand::PushOffscreen {
1366                    source_layer,
1367                    material,
1368                    bounds,
1369                } => {
1370                    current_target_id = Some(source_layer.0);
1371
1372                    // Pre-allocate the texture
1373                    let width = (bounds.width).max(1.0) as u32;
1374                    let height = (bounds.height).max(1.0) as u32;
1375                    self.registry
1376                        .allocate_offscreen(&self.device, source_layer.0, [width, height]);
1377
1378                    if let cvkg_compositor::Material::ShaderEffect {
1379                        effect_name,
1380                        params_json: _,
1381                        ..
1382                    } = material
1383                    {
1384                        active_offscreens.push(crate::types::OffscreenEffectConfig {
1385                            target_id: source_layer.0,
1386                            effect: effect_name.clone(),
1387                            blend_mode: 0,          // Default blend
1388                            effect_args: [0.0; 16], // Need to parse params_json
1389                        });
1390                    }
1391                }
1392                cvkg_compositor::engine::RenderCommand::PopOffscreen => {
1393                    current_target_id = None;
1394                }
1395            }
1396        }
1397        self.active_offscreens = active_offscreens;
1398
1399        // Glass pass -- glassmorphism draw calls sampling blur pyramid
1400        let mut sorted_glass: Vec<_> = buckets.glass_commands.iter().collect();
1401        sorted_glass.sort_by_key(|cmd| match cmd {
1402            cvkg_compositor::engine::RenderCommand::Draw(routed) => {
1403                (routed.z_index as i64, routed.draw_order as i64)
1404            }
1405            _ => (0, 0),
1406        });
1407        for cmd in sorted_glass {
1408            if let cvkg_compositor::engine::RenderCommand::Draw(routed) = cmd {
1409                self.set_material(Self::convert_compositor_material(&routed.material));
1410                self.submit_routed(routed, None);
1411            }
1412        }
1413
1414        // Overlay pass -- foreground UI (crisp text, icons, edge lighting)
1415        let mut sorted_overlay: Vec<_> = buckets.overlay_commands.iter().collect();
1416        sorted_overlay.sort_by_key(|cmd| match cmd {
1417            cvkg_compositor::engine::RenderCommand::Draw(routed) => {
1418                (routed.z_index as i64, routed.draw_order as i64)
1419            }
1420            _ => (0, 0),
1421        });
1422        for cmd in sorted_overlay {
1423            if let cvkg_compositor::engine::RenderCommand::Draw(routed) = cmd {
1424                self.set_material(cvkg_core::DrawMaterial::TopUI);
1425                self.submit_routed(routed, None);
1426            }
1427        }
1428    }
1429
1430    /// Submit a single routed draw command through the internal pipeline.
1431    pub(crate) fn submit_routed(
1432        &mut self,
1433        routed: &cvkg_compositor::RoutedDrawCommand,
1434        target_id: Option<u64>,
1435    ) {
1436        let cmd = &routed.command;
1437        if cmd.index_count == 0 {
1438            return;
1439        }
1440        let material = Self::convert_compositor_material(&routed.material);
1441        self.draw_calls.push(DrawCall {
1442            texture_id: cmd.texture_id,
1443            scissor_rect: cmd.scissor_rect,
1444            index_start: cmd.index_start,
1445            index_count: cmd.index_count,
1446            instance_count: 1,
1447            material,
1448            target_id,
1449            panel_id: self.current_panel_id,
1450            instance_start: cmd.instance_id,
1451            draw_order: 0,
1452        });
1453    }
1454
1455    /// Returns the current effective opacity (product of all stacked values).
1456    pub(crate) fn apply_opacity(&self, mut color: [f32; 4]) -> [f32; 4] {
1457        if let Some(&alpha) = self.opacity_stack.last() {
1458            color[3] *= alpha;
1459        }
1460        color
1461    }
1462
1463    /// Resolve a material_id to DrawMaterial with default parameters.
1464    /// Used by draw_svg which doesn't have a current_draw_material context.
1465    pub(crate) fn resolve_material(material_id: u32) -> cvkg_core::DrawMaterial {
1466        Self::resolve_material_with_context(material_id, &cvkg_core::DrawMaterial::Opaque)
1467    }
1468
1469    /// Resolve a material_id to DrawMaterial, using current_draw_material as context
1470    /// for glass parameters. Centralizes the material routing logic used by both
1471    /// fill_rect_with_full_params_and_slice and emit_draw_call.
1472    pub(crate) fn resolve_material_with_context(
1473        material_id: u32,
1474        current: &cvkg_core::DrawMaterial,
1475    ) -> cvkg_core::DrawMaterial {
1476        use crate::renderer::material_id::*;
1477
1478        // If current context is TopUI, route all non-glass elements to the overlay pass.
1479        // This ensures dropdowns, popovers, and menus render crisp text/shapes on top of other content.
1480        if matches!(current, cvkg_core::DrawMaterial::TopUI) && material_id != GLASS {
1481            return cvkg_core::DrawMaterial::TopUI;
1482        }
1483
1484        // If current context has an active Blend mode, route standard opaque quads to that Blend mode.
1485        if let cvkg_core::DrawMaterial::Blend { mode } = current
1486            && material_id == 0
1487        {
1488            return cvkg_core::DrawMaterial::Blend { mode: *mode };
1489        }
1490
1491        match material_id {
1492            GLASS => {
1493                if let cvkg_core::DrawMaterial::Glass {
1494                    blur_radius,
1495                    ior_override,
1496                    glass_intensity,
1497                } = current
1498                {
1499                    cvkg_core::DrawMaterial::Glass {
1500                        blur_radius: *blur_radius,
1501                        ior_override: *ior_override,
1502                        glass_intensity: *glass_intensity,
1503                    }
1504                } else {
1505                    cvkg_core::DrawMaterial::Glass {
1506                        blur_radius: 20.0,
1507                        ior_override: 0.0,
1508                        glass_intensity: 1.0,
1509                    }
1510                }
1511            }
1512            TOP_UI => cvkg_core::DrawMaterial::TopUI,
1513            BLEND_START..=BLEND_END => cvkg_core::DrawMaterial::Blend {
1514                mode: (material_id - 7),
1515            },
1516            _ => cvkg_core::DrawMaterial::Opaque,
1517        }
1518    }
1519
1520    /// Convert a compositor Material to a core DrawMaterial.
1521    /// Centralizes the mapping used by submit_buckets and submit_routed.
1522    pub(crate) fn convert_compositor_material(
1523        mat: &cvkg_compositor::Material,
1524    ) -> cvkg_core::DrawMaterial {
1525        match mat {
1526            cvkg_compositor::Material::Glass { blur_radius, .. } => {
1527                cvkg_core::DrawMaterial::Glass {
1528                    blur_radius: *blur_radius,
1529                    ior_override: 0.0,
1530                    glass_intensity: 1.0,
1531                }
1532            }
1533            cvkg_compositor::Material::Overlay => cvkg_core::DrawMaterial::TopUI,
1534            cvkg_compositor::Material::Multiply => cvkg_core::DrawMaterial::Blend { mode: 1 },
1535            cvkg_compositor::Material::Screen => cvkg_core::DrawMaterial::Blend { mode: 2 },
1536            cvkg_compositor::Material::BlendOverlay => cvkg_core::DrawMaterial::Blend { mode: 3 },
1537            cvkg_compositor::Material::Darken => cvkg_core::DrawMaterial::Blend { mode: 4 },
1538            cvkg_compositor::Material::Lighten => cvkg_core::DrawMaterial::Blend { mode: 5 },
1539            cvkg_compositor::Material::ColorDodge => cvkg_core::DrawMaterial::Blend { mode: 6 },
1540            cvkg_compositor::Material::ColorBurn => cvkg_core::DrawMaterial::Blend { mode: 7 },
1541            cvkg_compositor::Material::HardLight => cvkg_core::DrawMaterial::Blend { mode: 8 },
1542            cvkg_compositor::Material::SoftLight => cvkg_core::DrawMaterial::Blend { mode: 9 },
1543            cvkg_compositor::Material::Difference => cvkg_core::DrawMaterial::Blend { mode: 10 },
1544            cvkg_compositor::Material::Exclusion => cvkg_core::DrawMaterial::Blend { mode: 11 },
1545            cvkg_compositor::Material::Hue => cvkg_core::DrawMaterial::Blend { mode: 12 },
1546            cvkg_compositor::Material::Saturation => cvkg_core::DrawMaterial::Blend { mode: 13 },
1547            cvkg_compositor::Material::Color => cvkg_core::DrawMaterial::Blend { mode: 14 },
1548            cvkg_compositor::Material::Luminosity => cvkg_core::DrawMaterial::Blend { mode: 15 },
1549            cvkg_compositor::Material::Opaque => cvkg_core::DrawMaterial::Opaque,
1550            _ => cvkg_core::DrawMaterial::Opaque,
1551        }
1552    }
1553
1554    /// Helper: position vertices from SVG view_box into output rect.
1555    pub(crate) fn position_vertices(
1556        vertices: &mut [Vertex],
1557        view_box: Rect,
1558        rect: Rect,
1559        material_id: u32,
1560        clip: [f32; 4],
1561        snap: impl Fn(f32) -> f32,
1562    ) {
1563        for v in vertices.iter_mut() {
1564            let rel_x = (v.position[0] - view_box.x) / view_box.width;
1565            let rel_y = (v.position[1] - view_box.y) / view_box.height;
1566            v.position[0] = snap(rect.x + rel_x * rect.width);
1567            v.position[1] = snap(rect.y + rel_y * rect.height);
1568            v.position[2] = 0.0; // z will be set by transform stack
1569            v.logical = [v.position[0], v.position[1]];
1570            v.clip = clip;
1571            v.material_id = material_id;
1572        }
1573    }
1574
1575    /// Helper: emit a draw call for a batch of vertices.
1576    pub(crate) fn emit_draw_call(
1577        renderer: &mut GpuRenderer,
1578        material: cvkg_core::DrawMaterial,
1579        texture_id: Option<u32>,
1580        scissor_rect: Rect,
1581        index_count: u32,
1582        base_vertex: u32,
1583    ) {
1584        let draw_order = renderer.current_draw_order;
1585        let (translation, scale_transform, rotation, _, _) = renderer.current_transform();
1586        let current_instance_data = InstanceData {
1587            translation,
1588            scale: scale_transform,
1589            rotation,
1590            blur_radius: 0.0,
1591            ior_override: 0.0,
1592            glass_intensity: 1.0,
1593        };
1594        // CRITICAL FIX: Only break batch on material/scissor/texture state changes.
1595        // Transform (translation/scale/rotation) is per-instance data.
1596        let last_call = renderer.draw_calls.last();
1597        let needs_new_call = renderer.draw_calls.is_empty()
1598            || renderer.current_texture_id != texture_id
1599            || last_call.unwrap().scissor_rect != renderer.clip_stack.last().copied()
1600            || last_call.unwrap().panel_id != renderer.current_panel_id
1601            || last_call.unwrap().material != material
1602            || {
1603                let last_material = last_call.unwrap().material;
1604                matches!((material, last_material),
1605                    (cvkg_core::DrawMaterial::Glass { blur_radius: a, ior_override: b, glass_intensity: c },
1606                     cvkg_core::DrawMaterial::Glass { blur_radius: d, ior_override: e, glass_intensity: f })
1607                    if a != d || b != e || c != f)
1608            };
1609
1610        if needs_new_call {
1611            renderer.current_texture_id = texture_id;
1612            renderer.instance_data.push(current_instance_data);
1613            renderer.draw_calls.push(DrawCall {
1614                target_id: None,
1615                panel_id: renderer.current_panel_id,
1616                texture_id,
1617                scissor_rect: renderer.clip_stack.last().copied(),
1618                index_start: (renderer.indices.len() - index_count as usize) as u32,
1619                index_count,
1620                instance_count: 1,
1621                material,
1622                instance_start: (renderer.instance_data.len() - 1) as u32,
1623                draw_order: 0,
1624            });
1625        } else {
1626            // Same batch - add instance data and increment instance count
1627            renderer.instance_data.push(current_instance_data);
1628            if let Some(call) = renderer.draw_calls.last_mut() {
1629                call.instance_count += 1;
1630            }
1631        }
1632    }
1633
1634    /// capture_frame -- Read back the rendered frame as a byte buffer (RGBA8).
1635    pub async fn capture_frame(&self) -> Result<Vec<u8>, String> {
1636        let ctx = self
1637            .headless_context
1638            .as_ref()
1639            .ok_or("Headless context required for capture")?;
1640
1641        let u32_size = std::mem::size_of::<u32>() as u32;
1642        let width = ctx.width;
1643        let height = ctx.height;
1644        let bytes_per_row = width * u32_size;
1645        let padding = (256 - (bytes_per_row % 256)) % 256;
1646        let padded_bytes_per_row = bytes_per_row + padding;
1647
1648        let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
1649            label: Some("Capture Buffer"),
1650            size: (padded_bytes_per_row as u64 * height as u64),
1651            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
1652            mapped_at_creation: false,
1653        });
1654
1655        let mut encoder = self
1656            .device
1657            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1658                label: Some("Capture Encoder"),
1659            });
1660
1661        encoder.copy_texture_to_buffer(
1662            wgpu::TexelCopyTextureInfo {
1663                texture: &ctx.output_texture,
1664                mip_level: 0,
1665                origin: wgpu::Origin3d::ZERO,
1666                aspect: wgpu::TextureAspect::All,
1667            },
1668            wgpu::TexelCopyBufferInfo {
1669                buffer: &output_buffer,
1670                layout: wgpu::TexelCopyBufferLayout {
1671                    offset: 0,
1672                    bytes_per_row: Some(padded_bytes_per_row),
1673                    rows_per_image: Some(height),
1674                },
1675            },
1676            wgpu::Extent3d {
1677                width,
1678                height,
1679                depth_or_array_layers: 1,
1680            },
1681        );
1682
1683        self.queue.submit(Some(encoder.finish()));
1684
1685        let buffer_slice = output_buffer.slice(..);
1686        let (sender, receiver) = futures::channel::oneshot::channel();
1687        buffer_slice.map_async(wgpu::MapMode::Read, move |v| {
1688            let _ = sender.send(v);
1689        });
1690
1691        let _ = self.device.poll(wgpu::PollType::Wait {
1692            submission_index: None,
1693            timeout: None,
1694        });
1695
1696        if let Ok(Ok(_)) = receiver.await {
1697            let data = buffer_slice.get_mapped_range();
1698            let mut result = Vec::with_capacity((width * height * 4) as usize);
1699
1700            for y in 0..height {
1701                let start = (y * padded_bytes_per_row) as usize;
1702                let end = start + bytes_per_row as usize;
1703                result.extend_from_slice(&data[start..end]);
1704            }
1705
1706            tracing::trace!(
1707                "[GPU] capture_frame: data len={}, first 4 bytes={:?}",
1708                data.len(),
1709                &data[0..4.min(data.len())]
1710            );
1711
1712            drop(data);
1713            output_buffer.unmap();
1714            Ok(result)
1715        } else {
1716            Err("Failed to capture frame".to_string())
1717        }
1718    }
1719
1720    /// Hash a set of gradient stops for cache lookup.
1721    /// Uses the position and color of each stop to produce a stable hash.
1722    fn hash_gradient_stops(stops: &[[f32; 4]]) -> u64 {
1723        use std::hash::{Hash, Hasher};
1724        let mut hasher = std::collections::hash_map::DefaultHasher::new();
1725        for stop in stops {
1726            for v in stop {
1727                v.to_bits().hash(&mut hasher);
1728            }
1729        }
1730        hasher.finish()
1731    }
1732
1733    /// Upload gradient stops as a 32x1 RGBA8 texture.
1734    /// RGB = stop color (linear-ish sRGB from the component), A = stop position (0-255 mapped to 0-1).
1735    /// The texture is cached by hash; stops are only re-uploaded when the hash changes.
1736    #[allow(clippy::collapsible_if)]
1737    pub(crate) fn upload_gradient_stops(&mut self, stops: &[[f32; 4]]) {
1738        if stops.is_empty() {
1739            return;
1740        }
1741
1742        let hash = Self::hash_gradient_stops(stops);
1743
1744        // Check if the texture is already cached with this hash
1745        if hash == self.gradient_stops_hash {
1746            if let Some((_, _, bg)) = self.gradient_texture_cache.get(&hash) {
1747                self.gradient_bind_group = bg.clone();
1748                return;
1749            }
1750        }
1751
1752        // Check if we have a cached texture for this hash (from a previous frame)
1753        if let Some((_, view, bg)) = self.gradient_texture_cache.get(&hash) {
1754            self.gradient_stop_texture = view.texture().clone();
1755            self.gradient_stop_texture_view = view.clone();
1756            self.gradient_bind_group = bg.clone();
1757            self.gradient_stops_hash = hash;
1758            return;
1759        }
1760
1761        // Upload stops into a 32x1 RGBA8 texture
1762        let max_stops = 32u32;
1763        let num_stops = stops.len().min(max_stops as usize) as u32;
1764
1765        // Build RGBA8 data: pack position into alpha as u8
1766        let mut data = vec![0u8; (max_stops as usize) * 4];
1767        for (i, stop) in stops.iter().enumerate().take(max_stops as usize) {
1768            // Convert linear-ish float color to sRGB u8
1769            let r = (stop[0].clamp(0.0, 1.0) * 255.0).round() as u8;
1770            let g = (stop[1].clamp(0.0, 1.0) * 255.0).round() as u8;
1771            let b = (stop[2].clamp(0.0, 1.0) * 255.0).round() as u8;
1772            let a = (stop[3].clamp(0.0, 1.0) * 255.0).round() as u8;
1773            // Store position in the alpha channel (4th byte)
1774            // The color goes in RGB (bytes 0-2), position in byte 3
1775            #[allow(clippy::identity_op)]
1776            {
1777                data[i * 4 + 0] = r;
1778                data[i * 4 + 1] = g;
1779                data[i * 4 + 2] = b;
1780                data[i * 4 + 3] = a;
1781            }
1782        }
1783
1784        // Create or reuse texture
1785        let texture = self.device.create_texture(&wgpu::TextureDescriptor {
1786            label: Some("Gradient Stops Texture"),
1787            size: wgpu::Extent3d {
1788                width: max_stops,
1789                height: 1,
1790                depth_or_array_layers: 1,
1791            },
1792            mip_level_count: 1,
1793            sample_count: 1,
1794            dimension: wgpu::TextureDimension::D2,
1795            format: wgpu::TextureFormat::Rgba8Unorm,
1796            usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
1797            view_formats: &[],
1798        });
1799
1800        self.queue.write_texture(
1801            wgpu::TexelCopyTextureInfo {
1802                texture: &texture,
1803                mip_level: 0,
1804                origin: wgpu::Origin3d::ZERO,
1805                aspect: wgpu::TextureAspect::All,
1806            },
1807            &data,
1808            wgpu::TexelCopyBufferLayout {
1809                offset: 0,
1810                bytes_per_row: Some(max_stops * 4),
1811                rows_per_image: Some(1),
1812            },
1813            wgpu::Extent3d {
1814                width: max_stops,
1815                height: 1,
1816                depth_or_array_layers: 1,
1817            },
1818        );
1819
1820        let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
1821
1822        let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1823            layout: &self.gradient_bind_group_layout,
1824            entries: &[
1825                wgpu::BindGroupEntry {
1826                    binding: 0,
1827                    resource: wgpu::BindingResource::TextureView(&texture_view),
1828                },
1829                wgpu::BindGroupEntry {
1830                    binding: 1,
1831                    resource: wgpu::BindingResource::Sampler(&self.dummy_sampler),
1832                },
1833            ],
1834            label: Some("Gradient Bind Group"),
1835        });
1836
1837        // Cache the texture
1838        self.gradient_stops_hash = hash;
1839        self.gradient_stop_texture = texture.clone();
1840        self.gradient_stop_texture_view = texture_view.clone();
1841        self.gradient_bind_group = bind_group.clone();
1842        self.gradient_texture_cache
1843            .insert(hash, (texture, texture_view, bind_group));
1844    }
1845
1846    /// Draw a multi-stop gradient quad using the GPU shader.
1847    /// rect: bounding rectangle in logical pixels
1848    /// stops: array of [R, G, B, A] where A is the position (0.0-1.0)
1849    /// angle: gradient angle in radians (for linear gradients)
1850    /// is_radial: true for radial gradient, false for linear
1851    pub fn draw_gradient_multi(
1852        &mut self,
1853        rect: Rect,
1854        stops: &[[f32; 4]],
1855        angle: f32,
1856        is_radial: bool,
1857    ) {
1858        if stops.is_empty() {
1859            return;
1860        }
1861
1862        // Upload gradient stops (cached by hash)
1863        self.upload_gradient_stops(stops);
1864
1865        let num_stops = stops.len().min(32) as f32;
1866        let material_id = if is_radial { 31u32 } else { 30u32 };
1867
1868        // Use a white base color; the shader reads stops from the texture
1869        let white = [1.0f32, 1.0, 1.0, 1.0];
1870
1871        // slice.x = angle (for linear), slice.y = num_stops
1872        let slice = [angle, num_stops, 0.0, 1.0];
1873
1874        self.fill_rect_with_full_params_and_slice(
1875            rect,
1876            white,
1877            material_id,
1878            None,
1879            0.0,
1880            Rect {
1881                x: 0.0,
1882                y: 0.0,
1883                width: 1.0,
1884                height: 1.0,
1885            },
1886            slice,
1887            [0.0, 0.0],
1888        );
1889    }
1890
1891    /// Submit a 3D mesh instance to the GPU-ready staging buffer.
1892    ///
1893    /// Creates GPU vertex and index buffers for the mesh and stores the
1894    /// instance in `pending_mesh_instances_3d`. The instance will be consumed
1895    /// by the frame graph during `end_frame` to construct the Shadow and Opaque3d
1896    /// pass nodes.
1897    ///
1898    /// WHY: This enables the Kvasir render graph to render true 3D meshes with
1899    /// instanced rendering, separate from the CPU-baked 2D vertex buffer path.
1900    pub fn submit_mesh_3d(
1901        &mut self,
1902        mesh: &cvkg_core::Mesh,
1903        material: &cvkg_core::Material3D,
1904        transform: &cvkg_core::Transform3D,
1905    ) {
1906        let model_matrix = transform.to_matrix();
1907
1908        // Use Vertex3D which matches the WGSL VertexInput3D layout (locations 0-4, 9)
1909        // This provides position, normal, uv, color, and tangent fields directly.
1910        let mut mesh_vertices: Vec<Vertex3D> = Vec::with_capacity(mesh.vertices.len());
1911        for (i, pos) in mesh.vertices.iter().enumerate() {
1912            let raw_uv = mesh.tex_coords.get(i).copied().unwrap_or([0.0, 0.0]);
1913            let uv = [
1914                raw_uv[0] * material.uv_scale[0] + material.uv_offset[0],
1915                raw_uv[1] * material.uv_scale[1] + material.uv_offset[1],
1916            ];
1917            mesh_vertices.push(Vertex3D {
1918                position: *pos,
1919                normal: mesh.normals.get(i).copied().unwrap_or([0.0, 0.0, 1.0]),
1920                uv,
1921                color: material.base_color,
1922                tangent: mesh
1923                    .tangents
1924                    .get(i)
1925                    .copied()
1926                    .unwrap_or([0.0, 0.0, 1.0, 1.0]),
1927            });
1928        }
1929
1930        let vertex_bytes: Vec<u8> = bytemuck::cast_slice(&mesh_vertices).to_vec();
1931        let vertex_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
1932            label: Some("Mesh3D Vertex Buffer"),
1933            size: (mesh_vertices.len() * std::mem::size_of::<Vertex3D>()) as u64,
1934            usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
1935            mapped_at_creation: false,
1936        });
1937
1938        let index_bytes: Vec<u8> = bytemuck::cast_slice(&mesh.indices).to_vec();
1939        let index_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
1940            label: Some("Mesh3D Index Buffer"),
1941            size: (mesh.indices.len() * std::mem::size_of::<u32>()) as u64,
1942            usage: wgpu::BufferUsages::INDEX | wgpu::BufferUsages::COPY_DST,
1943            mapped_at_creation: false,
1944        });
1945
1946        self.queue.write_buffer(&vertex_buffer, 0, &vertex_bytes);
1947        self.queue.write_buffer(&index_buffer, 0, &index_bytes);
1948
1949        let (center, half_extents) = mesh.aabb();
1950        let mesh_radius = half_extents.length().max(1.0);
1951        if mesh_radius > self.pending_scene_radius {
1952            self.pending_scene_radius = mesh_radius;
1953        }
1954
1955        // Compute average view_depth from raw vertices in world space
1956        let view_depth = (0..mesh.vertices.len())
1957            .map(|i| {
1958                let world_pos = model_matrix.transform_point3(glam::Vec3::from(mesh.vertices[i]));
1959                (glam::Vec3::from(self.current_scene.camera_pos) - world_pos).length()
1960            })
1961            .sum::<f32>()
1962            / mesh.vertices.len().max(1) as f32;
1963
1964        let row0 = model_matrix.row(0);
1965        let row1 = model_matrix.row(1);
1966        let row2 = model_matrix.row(2);
1967        let instance_index = self.instance_data_3d.len() as u32;
1968        self.instance_data_3d.push(InstanceData3D {
1969            model_row0: [row0.x, row0.y, row0.z, row0.w],
1970            model_row1: [row1.x, row1.y, row1.z, row1.w],
1971            model_row2: [row2.x, row2.y, row2.z, row2.w],
1972            material_overrides: [material.metallic, material.roughness, 0.0, material.opacity],
1973            uv_scale: material.uv_scale,
1974            uv_offset: material.uv_offset,
1975        });
1976
1977        let gpu_mesh = crate::passes::shadow::GpuMesh3d {
1978            vertex_buffer,
1979            index_buffer,
1980            index_count: mesh.indices.len() as u32,
1981            transform: model_matrix,
1982            view_depth,
1983            instance_index,
1984        };
1985
1986        if material.opacity < 1.0 {
1987            self.pending_transparent_instances_3d.push(gpu_mesh);
1988        } else {
1989            self.pending_mesh_instances_3d.push(gpu_mesh);
1990        }
1991
1992        if self.pending_directional_light.is_none() {
1993            self.pending_directional_light = Some(crate::passes::shadow::DirectionalLight {
1994                direction: glam::Vec3::new(0.5, 0.8, 0.6),
1995                color: glam::Vec3::new(1.0, 0.95, 0.9),
1996                intensity: 1.0,
1997            });
1998        }
1999    }
2000}