Skip to main content

cvkg_render_gpu/renderer/
mod.rs

1//! The main GpuRenderer struct and core frame lifecycle.
2use crate::heim::SkylinePacker;
3use crate::types::*;
4use crate::vertex::*;
5use cvkg_core::Rect;
6use cvkg_core::{ColorTheme, SceneUniforms};
7use lru::LruCache;
8use std::collections::VecDeque;
9use std::num::NonZeroUsize;
10use std::sync::Arc;
11
12// Re-export for test access
13pub use crate::subsystems::RendererConfig;
14
15pub(crate) mod context_helpers;
16pub(crate) mod draw;
17pub(crate) mod init;
18pub(crate) mod pipelines;
19pub(crate) mod svg;
20#[cfg(test)]
21pub(crate) mod tests;
22
23/// Material ID constants used in vertex `material_id` and DrawMaterial routing.
24/// These map to shader material indices and control per-draw-call pipeline selection.
25pub(crate) mod material_id {
26    /// Opaque geometry (default, depth-tested, no blending).
27    pub const OPAQUE: u32 = 0;
28    /// Ellipse shape (SDF circle, no blending).
29    pub const ELLIPSE: u32 = 4;
30    /// Top UI layer (alpha blended, no blur).
31    pub const TOP_UI: u32 = 6;
32    /// Glass / frosted blur material.
33    pub const GLASS: u32 = 7;
34    /// Blend modes occupy IDs 8..=22 (mapping to blend mode 1..=15).
35    pub const BLEND_START: u32 = 8;
36    pub const BLEND_END: u32 = 22;
37    /// Radial gradient (blend mode 9).
38    pub const RADIAL_GRADIENT: u32 = 16;
39    /// Squircle stroke / circular progress (blend mode 10).
40    pub const SQUIRCLE_STROKE: u32 = 17;
41    /// Drop shadow / glow SDF (blend mode 11).
42    pub const DROP_SHADOW: u32 = 18;
43    /// Dashed stroke (blend mode 12).
44    pub const DASHED_STROKE: u32 = 19;
45    /// 3D cube mesh (blend mode 14).
46    pub const MESH_3D: u32 = 21;
47}
48
49/// P1-10: Quality level for adaptive rendering on different GPU tiers.
50///
51/// `High` matches the previous hardcoded behavior (MSAA 4x).
52/// `Medium` reduces MSAA to 2x for moderate savings on mobile.
53/// `Low` disables MSAA entirely for low-end GPUs (Adreno 3xx, etc.).
54#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
55pub enum QualityLevel {
56    #[default]
57    High,
58    Medium,
59    Low,
60}
61
62impl QualityLevel {
63    /// Returns the MSAA sample count for this quality level.
64    pub fn msaa_sample_count(self) -> u32 {
65        match self {
66            QualityLevel::High => 4,
67            QualityLevel::Medium => 2,
68            QualityLevel::Low => 1,
69        }
70    }
71}
72
73/// GpuRenderer implements the high-performance GPU backend.
74pub struct GpuRenderer {
75    pub(crate) instance: Arc<wgpu::Instance>,
76    pub(crate) adapter: Arc<wgpu::Adapter>,
77    pub(crate) device: Arc<wgpu::Device>,
78    pub(crate) queue: Arc<wgpu::Queue>,
79
80    // Kvasir resource registry -- tracks GPU resource lifetimes
81    pub(crate) registry: crate::kvasir::registry::ResourceRegistry,
82
83    pub(crate) active_offscreens: Vec<crate::types::OffscreenEffectConfig>,
84    pub(crate) effect_pipelines: std::collections::HashMap<String, wgpu::RenderPipeline>,
85    pub(crate) effect_params_buffer: wgpu::Buffer,
86    pub(crate) effect_params_bind_group: wgpu::BindGroup,
87    pub(crate) linear_sampler: wgpu::Sampler,
88    // AI Generator Channel
89    pub ai_material_rx: Option<
90        std::sync::mpsc::Receiver<
91            Result<crate::material::CompiledMaterial, crate::ai::GeneratorError>,
92        >,
93    >,
94
95    // Multi-Window Surface Management
96    pub(crate) surfaces: std::collections::HashMap<winit::window::WindowId, SurfaceContext>,
97    pub(crate) current_window: Option<winit::window::WindowId>,
98    pub headless_context: Option<HeadlessContext>,
99
100    // Mega-Heim (Shared across all windows)
101    pub(crate) text: crate::types::TextSubsystem,
102    pub(crate) mega_heim_tex: wgpu::Texture,
103    pub(crate) mega_heim_bind_group: wgpu::BindGroup,
104    pub(crate) heim_packer: SkylinePacker,
105    pub(crate) image_uv_registry: LruCache<String, Rect>,
106    pub(crate) texture_registry: LruCache<String, u32>,
107    pub(crate) texture_views: Vec<wgpu::TextureView>,
108    pub(crate) dummy_sampler: wgpu::Sampler,
109    /// Dummy single-sampled depth texture view.
110    ///
111    /// WHY: Used in the volumetric shader to bind a valid single-sampled depth view
112    /// when MSAA is enabled (since the actual scene depth view is multisampled).
113    ///
114    /// CONTRACT: Always sample_count = 1, format = Depth32Float.
115    pub(crate) dummy_depth_view: wgpu::TextureView,
116    /// Dummy multisampled depth texture view.
117    ///
118    /// WHY: Used in the volumetric shader to bind a valid multisampled depth view
119    /// when MSAA is disabled (since the actual scene depth view is single-sampled).
120    ///
121    /// CONTRACT: Always sample_count = 4, format = Depth32Float.
122    pub(crate) dummy_depth_view_msaa: wgpu::TextureView,
123    pub(crate) svg: crate::types::SvgSubsystem,
124
125    // Niflheim Resources (Shared)
126    pub(crate) dummy_texture_bind_group: wgpu::BindGroup,
127    pub(crate) dummy_env_bind_group: wgpu::BindGroup,
128    pub(crate) texture_bind_group_layout: wgpu::BindGroupLayout,
129    pub(crate) texture_bind_groups: Vec<wgpu::BindGroup>,
130    pub(crate) shared_elements: LruCache<String, cvkg_core::Rect>,
131
132    // The Forge's Anvil (GPU Buffers)
133    pub(crate) geometry_buffers: crate::types::GeometryBuffers,
134    pub(crate) vertices: Vec<Vertex>,
135    pub(crate) indices: Vec<u32>,
136    pub(crate) instance_data: Vec<InstanceData>,
137    pub(crate) staging_belt: wgpu::util::StagingBelt,
138    pub(crate) staging_command_buffers: Vec<wgpu::CommandBuffer>,
139    pub(crate) draw_calls: Vec<DrawCall>,
140    pub(crate) current_texture_id: Option<u32>,
141
142    // Opacity & Clip Stacks
143    pub(crate) opacity_stack: Vec<f32>,
144    pub(crate) clip_stack: Vec<Rect>,
145    pub(crate) slice_stack: Vec<(f32, f32)>,
146    pub(crate) shadow_stack: Vec<ShadowState>,
147
148    // SVG Filter Engine Resources
149    /// Render pipeline for Gaussian blur (two-pass separable kernel).
150    /// Initialized lazily on first use.
151    pub blur_pipeline: Option<wgpu::RenderPipeline>,
152    /// Uniform buffer for blur parameters (std_deviation, kernel_size, direction).
153    /// Initialized lazily on first use.
154    pub blur_uniform: Option<wgpu::Buffer>,
155    /// Bind group layout for blur shader.
156    /// Initialized lazily on first use.
157    pub blur_bind_group_layout: Option<wgpu::BindGroupLayout>,
158    /// Render pipeline for blend operations (feBlend, feComposite).
159    /// Initialized lazily on first use.
160    pub blend_pipeline: Option<wgpu::RenderPipeline>,
161    /// Bind group layout for blend shader.
162    /// Initialized lazily on first use.
163    pub blend_bind_group_layout: Option<wgpu::BindGroupLayout>,
164    /// Render pipeline for flood fill (feFlood).
165    /// Initialized lazily on first use.
166    pub flood_pipeline: Option<wgpu::RenderPipeline>,
167    /// Bind group layout for copy/offset operations.
168    /// Initialized lazily on first use.
169    pub copy_bind_group_layout: Option<wgpu::BindGroupLayout>,
170
171    // The Forge's Heart (Shared Berserker State)
172    pub(crate) theme_buffer: wgpu::Buffer,
173    pub(crate) scene_buffer: wgpu::Buffer,
174    pub(crate) berserker_bind_group: wgpu::BindGroup,
175    pub(crate) berserker_bind_group_layout: wgpu::BindGroupLayout,
176    pub(crate) start_time: std::time::Instant,
177    pub(crate) current_theme: ColorTheme,
178    pub(crate) current_scene: SceneUniforms,
179    pub(crate) current_z: f32,
180
181    /// Default background color for the canvas (RGBA).
182    /// Used when the app does not draw its own background.
183    /// Defaults to Deep Void [0.02, 0.02, 0.05, 1.0].
184    pub(crate) default_background_color: [f32; 4],
185
186    /// Whether the app drew any background geometry this frame.
187    /// If false, the renderer clears to default_background_color.
188    pub(crate) app_drew_background: bool,
189
190    /// Whether render_frame() was called this frame.
191    /// Used by end_frame() to auto-flush staging if render_frame() was skipped.
192    pub(crate) frame_rendered: bool,
193
194    /// Current draw order for SVG and other direct draw calls.
195    /// Set by draw_svg_with_order(), used by emit_draw_call().
196    pub(crate) current_draw_order: i32,
197
198    // Muspelheim Pipelines (Shared)
199    pub(crate) pipeline: wgpu::RenderPipeline,
200    /// Specialized opaque/2D material pipeline (modes 0-20 excluding 7,13-15,18,21).
201    pub(crate) opaque_pipeline: wgpu::RenderPipeline,
202    /// Non-multisampled pipeline used specifically to draw UI overlays.
203    /// Drawn with sample count 1 and no depth testing/depth stencil attachment.
204    pub(crate) ui_pipeline: wgpu::RenderPipeline,
205    /// Specialized glass material pipeline (mode 7 only, ~150 lines of complex math).
206    pub(crate) glass_pipeline: wgpu::RenderPipeline,
207    pub(crate) background_pipeline: wgpu::RenderPipeline,
208    pub(crate) bloom_extract_pipeline: wgpu::RenderPipeline,
209    /// Identity copy pipeline for Pass 2 backdrop blur (all pixels, no luminance gate).
210    pub(crate) copy_pipeline: wgpu::RenderPipeline,
211    pub(crate) composite_pipeline: wgpu::RenderPipeline,
212    /// Color blindness simulation pipeline (fullscreen triangle).
213    pub(crate) color_blind_pipeline: wgpu::RenderPipeline,
214    /// Volumetric raymarching pipeline (fullscreen triangle with SDF raymarch).
215    pub(crate) volumetric_pipeline: wgpu::RenderPipeline,
216    /// Volumetric bind group layout for scene uniforms (time/resolution/light).
217    pub(crate) volumetric_bind_group_layout: wgpu::BindGroupLayout,
218    /// Persistent uniform buffer for volumetric data (updated each frame).
219    pub(crate) volumetric_uniform_buffer: wgpu::Buffer,
220    /// Comparison sampler for volumetric depth comparison.
221    pub(crate) volumetric_depth_sampler: wgpu::Sampler,
222    /// CPU-side list of hologram instances submitted this frame.
223    /// Cleared each frame in reset_frame_state; consumed by VolumetricNode::execute.
224    pub(crate) hologram_instances: Vec<HologramInstance>,
225    /// Kawase blur pyramid downsample pipeline (separate shader module).
226    pub(crate) kawase_down_pipeline: wgpu::RenderPipeline,
227    /// Kawase blur pyramid upsample pipeline (separate shader module).
228    pub(crate) kawase_up_pipeline: wgpu::RenderPipeline,
229    /// Kawase blur bind group layout (uniform + texture + sampler).
230    pub(crate) kawase_bind_group_layout: wgpu::BindGroupLayout,
231    /// Persistent uniform buffer for Kawase blur operations (avoids per-frame allocation).
232    pub(crate) kawase_uniform: wgpu::Buffer,
233    /// Pool of persistent uniform buffers for Kawase blur operations.
234    pub(crate) kawase_uniform_buffers: Vec<wgpu::Buffer>,
235    /// Environment bind group layout (texture + sampler).
236    pub(crate) env_bind_group_layout: wgpu::BindGroupLayout,
237
238    // Telemetry
239    pub telemetry: cvkg_core::TelemetryData,
240
241    /// Pipeline cache for disk-persisted compiled shaders when the adapter exposes PIPELINE_CACHE.
242    /// None means pipelines compile normally without a disk cache.
243    pub(crate) pipeline_cache: Option<wgpu::PipelineCache>,
244
245    /// Configuration for render-loop frame timing and degradation strategies.
246    pub frame_budget: cvkg_core::FrameBudget,
247    /// Staging buffer for windowed frame capture.
248    pub(crate) capture_staging_buffer: Option<wgpu::Buffer>,
249    /// Instant at the start of the last redraw, used for measuring frame timings.
250    pub last_redraw_start: std::time::Instant,
251    /// Instant at the start of the last frame, used for frame_time_ms calculation.
252    pub last_frame_start: std::time::Instant,
253
254    // VRAM Tracking (Bytes)
255    pub(crate) vram_buffers_bytes: u64,
256    pub(crate) vram_textures_bytes: u64,
257
258    // Debugging
259    pub(crate) _debug_layout: bool,
260
261    // Transform Stack -- stores full affine matrices for correct SVG transform composition.
262    pub(crate) transform_stack: Vec<glam::Mat3>,
263    /// Whether a redraw has been requested for the next frame.
264    pub redraw_requested: bool,
265    /// Cursor for compositor draw call submission tracking.
266    pub(crate) compositor_index_cursor: u32,
267
268    /// Bloom post-processing enabled flag.
269    pub bloom_enabled: bool,
270    /// Dynamic toggle to enable or disable the volumetric raymarching pass, which handles fog and light shaft simulations.
271    pub volumetric_enabled: bool,
272
273    // Path Geometry Cache — avoids re-tessellating static paths every frame.
274    pub(crate) path_geometry_cache: lru::LruCache<u64, (Vec<Vertex>, Vec<u32>)>,
275    /// Color blindness bind group layout (texture + sampler + uniform).
276    pub(crate) color_blind_bind_group_layout: wgpu::BindGroupLayout,
277    /// Color blindness uniform buffer (updated each frame when mode changes).
278    pub(crate) color_blind_uniform_buffer: wgpu::Buffer,
279    /// Color blindness simulation mode (Normal = disabled).
280    pub color_blind_mode: crate::color_blindness::ColorBlindMode,
281    /// Color blindness effect intensity (0.0–1.0).
282    pub color_blind_intensity: f32,
283    /// Sampler for the color blindness pass (reused from main pipeline).
284    pub(crate) sampler: wgpu::Sampler,
285
286    // Timestamp Queries (Norse: Skuld = future/time/debt)
287    pub(crate) skuld_queries: Option<wgpu::QuerySet>,
288    pub(crate) skuld_buffer: Option<wgpu::Buffer>,
289    pub(crate) skuld_read_buffer: Option<wgpu::Buffer>,
290    pub(crate) skuld_period: f32,
291    pub last_gpu_time_ns: u64,
292
293    // Particle Compute Pipeline (Muspelheim Compute)
294    pub(crate) particle_compute_pipeline: wgpu::ComputePipeline,
295    pub(crate) particle_compute_bgl: wgpu::BindGroupLayout,
296    pub(crate) particle_buffer: wgpu::Buffer,
297    pub(crate) particle_uniform_buffer: wgpu::Buffer,
298    pub(crate) particles: crate::types::ParticleSubsystem,
299    pub(crate) particle_render_pipeline: wgpu::RenderPipeline,
300    pub(crate) particle_render_bgl: wgpu::BindGroupLayout,
301    pub(crate) particle_render_bind_group: Option<wgpu::BindGroup>,
302    pub(crate) particle_compute_bind_group: Option<wgpu::BindGroup>,
303
304    // VDOM node stack for hierarchy tracking
305    pub(crate) vnode_stack: Vec<(Rect, &'static str)>,
306
307    /// Event handlers registered during render passes.
308    pub(crate) event_handlers: std::collections::HashMap<
309        String,
310        Vec<std::sync::Arc<dyn Fn(cvkg_core::Event) + Send + Sync>>,
311    >,
312
313    // Error tracking (set via RendererErrorHandler trait)
314    pub(crate) render_error_count: u64,
315    pub(crate) has_fatal_error: bool,
316
317    /// Bind group layout for reading blur output in glass composite pass.
318    pub(crate) glass_output_bind_group_layout: wgpu::BindGroupLayout,
319    /// Current material state -- draw calls are tagged with this material.
320    pub(crate) current_draw_material: cvkg_core::DrawMaterial,
321
322    /// Portal backdrop blur regions -- collected during portal enter/exit
323    pub(crate) portal_regions: std::collections::VecDeque<cvkg_core::Rect>,
324
325    /// Gradient stop texture (32 x 1, RGBA) for multi-stop gradient rendering.
326    /// RGB = stop color, A = stop position (0-1). Cached per unique stop set.
327    pub(crate) gradient_stop_texture: wgpu::Texture,
328    pub(crate) gradient_stop_texture_view: wgpu::TextureView,
329    pub(crate) gradient_bind_group: wgpu::BindGroup,
330    /// Gradient texture cache: maps stop-hash to (texture, bind_group) to avoid re-uploading.
331    pub(crate) gradient_texture_cache:
332        std::collections::HashMap<u64, (wgpu::Texture, wgpu::TextureView, wgpu::BindGroup)>,
333    /// Last uploaded gradient stops hash, to detect when we need to re-upload.
334    pub(crate) gradient_stops_hash: u64,
335    /// Layout for the gradient bind group (texture + sampler).
336    pub(crate) gradient_bind_group_layout: wgpu::BindGroupLayout,
337
338    /// Cache of the compiled Kvasir render graph execution plan.
339    pub(crate) cached_graph_plan: Option<crate::kvasir::graph_cache::CachedGraphPlan>,
340    /// Hash of the active material set, used to invalidate the graph plan
341    pub(crate) material_compilation_hash: u64,
342    /// Memoization cache for frame-level render skipping.
343    pub(crate) memo_cache: std::collections::HashMap<u64, crate::types::MemoEntry>,
344    /// Current frame generation counter.
345    pub(crate) frame_generation: u64,
346    /// P1-1: GpuRenderer configuration.
347    pub(crate) config: crate::subsystems::RendererConfig,
348    /// P1-10: Quality level controlling MSAA sample count.
349    pub(crate) quality_level: QualityLevel,
350    /// Thread-safe bind group cache to avoid per-frame allocations during render passes.
351    pub(crate) bind_group_cache: std::sync::Mutex<
352        std::collections::HashMap<
353            (
354                Option<winit::window::WindowId>,
355                crate::kvasir::resource::ResourceId,
356                u32,
357                bool,
358            ),
359            wgpu::BindGroup,
360        >,
361    >,
362    /// Thread-safe texture view cache to avoid per-frame allocations of TextureViews.
363    pub(crate) texture_view_cache: std::sync::Mutex<
364        std::collections::HashMap<
365            (
366                Option<winit::window::WindowId>,
367                crate::kvasir::resource::ResourceId,
368                u32,
369            ),
370            wgpu::TextureView,
371        >,
372    >,
373}
374
375#[cfg(target_arch = "wasm32")]
376unsafe impl Send for GpuRenderer {}
377#[cfg(target_arch = "wasm32")]
378unsafe impl Sync for GpuRenderer {}
379
380/// Per-hologram instance data submitted during the frame.
381#[derive(Debug, Clone)]
382pub struct HologramInstance {
383    /// Bounding rectangle in logical coordinates (x, y, width, height).
384    pub rect: cvkg_core::Rect,
385    /// Hash of the hologram_id string -- used for per-hologram visual variation.
386    pub id_hash: u32,
387    /// Application-provided time for this hologram instance.
388    pub time: f32,
389}
390
391/// Trait for types that can be cleared in place. Implemented for the
392/// collection types used as cache values (HashMap, Vec).
393pub trait ClearInto {
394    fn clear_into(&mut self);
395}
396
397impl<K, V, S> ClearInto for std::collections::HashMap<K, V, S>
398where
399    S: std::hash::BuildHasher,
400{
401    fn clear_into(&mut self) {
402        self.clear();
403    }
404}
405
406impl<T> ClearInto for Vec<T> {
407    fn clear_into(&mut self) {
408        self.clear();
409    }
410}
411
412// =========================================================================
413// P1-11: Pipeline cache integrity check
414// =========================================================================
415
416/// P1-11 fix: load a pipeline cache file from disk with SHA256 integrity check.
417fn load_pipeline_cache_with_integrity_check(
418    cache_path: &std::path::Path,
419) -> Result<Option<Vec<u8>>, String> {
420    let cache_data = match std::fs::read(cache_path) {
421        Ok(d) => d,
422        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
423        Err(e) => return Err(format!("read failed: {e}")),
424    };
425
426    let hash_path = cache_path.with_extension("bin.sha256");
427    let expected_hash = match std::fs::read_to_string(&hash_path) {
428        Ok(s) => s.trim().to_lowercase(),
429        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
430            return Err(format!(
431                "sidecar hash file missing at {}",
432                hash_path.display()
433            ));
434        }
435        Err(e) => return Err(format!("sidecar read failed: {e}")),
436    };
437
438    let actual = compute_sha256(&cache_data);
439    let actual_hex: String = actual.iter().map(|b| format!("{:02x}", b)).collect();
440    if actual_hex != expected_hash {
441        return Err(format!(
442            "hash mismatch: expected {expected_hash}, got {actual_hex}"
443        ));
444    }
445
446    Ok(Some(cache_data))
447}
448
449/// Compute SHA256 of a byte slice. Inline FIPS 180-4 implementation
450fn compute_sha256(data: &[u8]) -> [u8; 32] {
451    let mut hasher = Sha256::new();
452    hasher.update(data);
453    hasher.finalize()
454}
455
456/// Minimal SHA256 implementation (FIPS 180-4). Used only for the
457/// pipeline cache integrity check so we don't add a sha2 dependency.
458#[derive(Clone)]
459struct Sha256 {
460    state: [u32; 8],
461    buffer: [u8; 64],
462    buffer_len: usize,
463    total_len: u64,
464}
465
466impl Sha256 {
467    const K: [u32; 64] = [
468        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4,
469        0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe,
470        0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f,
471        0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
472        0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc,
473        0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
474        0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116,
475        0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
476        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7,
477        0xc67178f2,
478    ];
479
480    fn new() -> Self {
481        Self {
482            state: [
483                0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab,
484                0x5be0cd19,
485            ],
486            buffer: [0; 64],
487            buffer_len: 0,
488            total_len: 0,
489        }
490    }
491
492    fn update(&mut self, data: &[u8]) {
493        self.total_len = self.total_len.wrapping_add(data.len() as u64);
494        for &b in data {
495            self.buffer[self.buffer_len] = b;
496            self.buffer_len += 1;
497            if self.buffer_len == 64 {
498                let block = self.buffer;
499                self.compress(&block);
500                self.buffer_len = 0;
501            }
502        }
503    }
504
505    fn finalize(mut self) -> [u8; 32] {
506        self.buffer[self.buffer_len] = 0x80;
507        self.buffer_len += 1;
508        if self.buffer_len > 56 {
509            for b in &mut self.buffer[self.buffer_len..] {
510                *b = 0;
511            }
512            let block = self.buffer;
513            self.compress(&block);
514            self.buffer_len = 0;
515        }
516        for b in &mut self.buffer[self.buffer_len..56] {
517            *b = 0;
518        }
519        let bit_len = self.total_len.wrapping_mul(8);
520        self.buffer[56..64].copy_from_slice(&bit_len.to_be_bytes());
521        let block = self.buffer;
522        self.compress(&block);
523
524        let mut out = [0u8; 32];
525        for (i, &s) in self.state.iter().enumerate() {
526            out[i * 4..(i + 1) * 4].copy_from_slice(&s.to_be_bytes());
527        }
528        out
529    }
530
531    fn compress(&mut self, block: &[u8]) {
532        let mut w = [0u32; 64];
533        for i in 0..16 {
534            w[i] = u32::from_be_bytes([
535                block[i * 4],
536                block[i * 4 + 1],
537                block[i * 4 + 2],
538                block[i * 4 + 3],
539            ]);
540        }
541        for i in 16..64 {
542            let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3);
543            let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10);
544            w[i] = w[i - 16]
545                .wrapping_add(s0)
546                .wrapping_add(w[i - 7])
547                .wrapping_add(s1);
548        }
549        let mut a = self.state[0];
550        let mut b = self.state[1];
551        let mut c = self.state[2];
552        let mut d = self.state[3];
553        let mut e = self.state[4];
554        let mut f = self.state[5];
555        let mut g = self.state[6];
556        let mut h = self.state[7];
557        for (i, wi) in w.iter().enumerate() {
558            let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
559            let ch = (e & f) ^ ((!e) & g);
560            let t1 = h
561                .wrapping_add(s1)
562                .wrapping_add(ch)
563                .wrapping_add(Self::K[i])
564                .wrapping_add(*wi);
565            let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
566            let mj = (a & b) ^ (a & c) ^ (b & c);
567            let t2 = s0.wrapping_add(mj);
568            h = g;
569            g = f;
570            f = e;
571            e = d.wrapping_add(t1);
572            d = c;
573            c = b;
574            b = a;
575            a = t1.wrapping_add(t2);
576        }
577        self.state[0] = self.state[0].wrapping_add(a);
578        self.state[1] = self.state[1].wrapping_add(b);
579        self.state[2] = self.state[2].wrapping_add(c);
580        self.state[3] = self.state[3].wrapping_add(d);
581        self.state[4] = self.state[4].wrapping_add(e);
582        self.state[5] = self.state[5].wrapping_add(f);
583        self.state[6] = self.state[6].wrapping_add(g);
584        self.state[7] = self.state[7].wrapping_add(h);
585    }
586}
587
588fn compute_mip_levels(width: u32, height: u32) -> u32 {
589    let max_dim = width.max(height);
590    if max_dim <= 1 {
591        return 1;
592    }
593    (32 - max_dim.leading_zeros()).clamp(2, 8)
594}
595
596impl GpuRenderer {
597    /// Access the hologram instances submitted this frame.
598    pub fn hologram_instances(&self) -> &[HologramInstance] {
599        &self.hologram_instances
600    }
601
602    pub fn set_quality_level(&mut self, level: QualityLevel) {
603        self.quality_level = level;
604    }
605
606    pub fn set_config(&mut self, config: crate::subsystems::RendererConfig) {
607        self.config = config;
608    }
609
610    pub fn config(&self) -> &crate::subsystems::RendererConfig {
611        &self.config
612    }
613
614    pub fn quality_level(&self) -> QualityLevel {
615        self.quality_level
616    }
617
618    pub(crate) fn lock_or_clear_cache<'a, T: ClearInto>(
619        lock: &'a std::sync::Mutex<T>,
620    ) -> std::sync::MutexGuard<'a, T> {
621        match lock.lock() {
622            Ok(guard) => guard,
623            Err(poisoned) => {
624                tracing::warn!("[GPU] lock_or_clear_cache: mutex poisoned, clearing cache...");
625                let mut guard = poisoned.into_inner();
626                guard.clear_into();
627                guard
628            }
629        }
630    }
631
632    pub fn update_mouse(&mut self, mouse: [f32; 2], velocity: [f32; 2]) {
633        self.current_scene.mouse = mouse;
634        self.current_scene.mouse_velocity = velocity;
635        self.queue.write_buffer(
636            &self.scene_buffer,
637            0,
638            bytemuck::bytes_of(&self.current_scene),
639        );
640    }
641
642    pub fn invalidate_material_cache(&mut self) {
643        self.cached_graph_plan = None;
644    }
645
646    pub fn invalidate_all_caches(&mut self) -> usize {
647        let mut cleared = 0;
648        {
649            let mut bg_cache = Self::lock_or_clear_cache(&self.bind_group_cache);
650            cleared += bg_cache.len();
651            bg_cache.clear();
652        }
653        {
654            let mut view_cache = Self::lock_or_clear_cache(&self.texture_view_cache);
655            cleared += view_cache.len();
656            view_cache.clear();
657        }
658        cleared += self.text.shaped_cache.len();
659        self.text.shaped_cache.clear();
660        cleared += self.svg.model_cache.len();
661        self.svg.model_cache.clear();
662        cleared += self.svg.tree_cache.len();
663        self.svg.tree_cache.clear();
664        self.svg.clear_filter_batches();
665        cleared
666    }
667
668    pub fn prewarm_text_cache(&mut self, labels: &[(&str, f32)]) {
669        let mut count = 0;
670        for (text, size) in labels {
671            let cache_key = (text.to_string(), (size * 100.0) as u32);
672            if self.text.shaped_cache.contains(&cache_key) {
673                continue;
674            }
675            let style = cvkg_runic_text::TextStyle::new("Inter", *size);
676            let spans = [cvkg_runic_text::TextSpan::new(text, style)];
677            if let Ok(shaped) = self.text.engine.shape_layout(
678                &spans,
679                None,
680                cvkg_runic_text::TextAlign::Start,
681                cvkg_runic_text::TextOverflow::Visible,
682            ) {
683                self.text
684                    .shaped_cache
685                    .put(cache_key, std::sync::Arc::new(shaped));
686                count += 1;
687            }
688        }
689        if count > 0 {
690            tracing::info!("[Surtr] prewarm_text_cache: pre-shaped {} labels", count);
691        }
692    }
693
694    pub(crate) fn select_best_surface_format(
695        formats: &[wgpu::TextureFormat],
696    ) -> wgpu::TextureFormat {
697        if formats.is_empty() {
698            return wgpu::TextureFormat::Rgba8Unorm;
699        }
700        let preferred_formats = [
701            wgpu::TextureFormat::Rgba16Float,
702            wgpu::TextureFormat::Rgba8Unorm,
703            wgpu::TextureFormat::Bgra8UnormSrgb,
704            wgpu::TextureFormat::Rgba8UnormSrgb,
705            wgpu::TextureFormat::Bgra8Unorm,
706            wgpu::TextureFormat::Rgba8Unorm,
707            wgpu::TextureFormat::Rgba8Unorm,
708        ];
709        for preferred in &preferred_formats {
710            if formats.contains(preferred) {
711                return *preferred;
712            }
713        }
714        if formats.contains(&wgpu::TextureFormat::Rgba8Unorm) {
715            return wgpu::TextureFormat::Rgba8Unorm;
716        }
717        formats[0]
718    }
719
720    pub(crate) fn rebuild_texture_array_bind_group(&mut self) {
721        let views: Vec<&wgpu::TextureView> = self.texture_views.iter().collect();
722        self.mega_heim_bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
723            layout: &self.texture_bind_group_layout,
724            entries: &[
725                wgpu::BindGroupEntry {
726                    binding: 0,
727                    resource: wgpu::BindingResource::TextureViewArray(&views),
728                },
729                wgpu::BindGroupEntry {
730                    binding: 1,
731                    resource: wgpu::BindingResource::Sampler(&self.dummy_sampler),
732                },
733            ],
734            label: Some("Mega-Heim Rebuilt Bind Group"),
735        });
736    }
737
738    pub(crate) fn update_vram_telemetry(&mut self) {
739        let buffers = self.geometry_buffers.vertex_buffer.size()
740            + self.geometry_buffers.index_buffer.size()
741            + self.geometry_buffers.instance_buffer.size()
742            + self.scene_buffer.size()
743            + self.theme_buffer.size()
744            + self.particle_buffer.size()
745            + self.particle_uniform_buffer.size();
746
747        let mut textures = self.config.mega_heim_vram_bytes();
748        textures += 4; // Dummy texture
749
750        for surface in self.surfaces.values() {
751            let width = surface.config.width;
752            let height = surface.config.height;
753            let format_bytes = 8; // Rgba16Float
754            textures += (width * height * format_bytes) as u64; // Scene texture
755            textures +=
756                (width * height * format_bytes * self.quality_level.msaa_sample_count()) as u64; // MSAA texture
757            textures += (width * height * 4) as u64; // Depth texture (Depth32Float)
758
759            let blur_width = (width / 2).max(1);
760            let blur_height = (height / 2).max(1);
761            let blur_bytes = (blur_width * blur_height * 4) as u64;
762            textures += blur_bytes * 4; // 2x blur + 2x bloom textures
763        }
764
765        if let Some(ref ctx) = self.headless_context {
766            let format_bytes = 8; // Rgba16Float
767            textures += (ctx.width * ctx.height * format_bytes) as u64; // Scene texture
768            textures +=
769                (ctx.width * ctx.height * format_bytes * self.quality_level.msaa_sample_count())
770                    as u64; // MSAA texture
771            textures += (ctx.width * ctx.height * 4) as u64; // Depth texture
772            textures += (ctx.width * ctx.height * 4) as u64; // Output texture
773        }
774
775        self.vram_buffers_bytes = buffers;
776        self.vram_textures_bytes = textures;
777        self.telemetry.vram_usage_mb = (buffers + textures) as f32 / (1024.0 * 1024.0);
778    }
779
780    pub fn get_telemetry(&self) -> cvkg_core::TelemetryData {
781        self.telemetry.clone()
782    }
783
784    pub fn resize(
785        &mut self,
786        window_id: winit::window::WindowId,
787        width: u32,
788        height: u32,
789        scale_factor: f32,
790    ) {
791        if width > 0
792            && height > 0
793            && let Some(ctx) = self.surfaces.get_mut(&window_id)
794        {
795            if ctx.config.width == width && ctx.config.height == height {
796                return;
797            }
798
799            tracing::info!("[GPU] Reconfiguring surface: {}x{}", width, height);
800            GpuRenderer::lock_or_clear_cache(&self.bind_group_cache).clear();
801            GpuRenderer::lock_or_clear_cache(&self.texture_view_cache).clear();
802            self.text.shaped_cache.clear();
803            ctx.config.width = width;
804            ctx.config.height = height;
805            ctx.scale_factor = scale_factor;
806            ctx.surface.configure(&self.device, &ctx.config);
807
808            let texture_desc = wgpu::TextureDescriptor {
809                label: Some("Surtr Scene Texture"),
810                size: wgpu::Extent3d {
811                    width,
812                    height,
813                    depth_or_array_layers: 1,
814                },
815                mip_level_count: 1,
816                sample_count: 1,
817                dimension: wgpu::TextureDimension::D2,
818                format: wgpu::TextureFormat::Rgba16Float,
819                usage: wgpu::TextureUsages::RENDER_ATTACHMENT
820                    | wgpu::TextureUsages::TEXTURE_BINDING,
821                view_formats: &[],
822            };
823
824            let scene_tex = self.device.create_texture(&texture_desc);
825
826            let msaa_desc = wgpu::TextureDescriptor {
827                label: Some("Scene MSAA"),
828                size: texture_desc.size,
829                mip_level_count: 1,
830                sample_count: self.quality_level.msaa_sample_count(),
831                dimension: wgpu::TextureDimension::D2,
832                format: wgpu::TextureFormat::Rgba16Float,
833                usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
834                view_formats: &[],
835            };
836            let scene_msaa_tex = self.device.create_texture(&msaa_desc);
837            ctx.scene_texture = scene_tex.create_view(&wgpu::TextureViewDescriptor::default());
838            ctx.scene_msaa_texture =
839                scene_msaa_tex.create_view(&wgpu::TextureViewDescriptor::default());
840
841            self.registry.remove_image(ctx.blur_tex_a);
842            self.registry.remove_image(ctx.blur_tex_b);
843            self.registry.remove_image(ctx.bloom_tex_a);
844            self.registry.remove_image(ctx.bloom_tex_b);
845
846            let blur_width = (width / 2).max(1);
847            let blur_height = (height / 2).max(1);
848
849            let blur_desc_a = crate::kvasir::resource::ResourceDescriptor {
850                label: Some("Surtr Blur Texture A".into()),
851                kind: crate::kvasir::resource::ResourceKind::Image {
852                    format: ctx.config.format,
853                    width: blur_width,
854                    height: blur_height,
855                    mip_level_count: compute_mip_levels(blur_width, blur_height),
856                    usage: wgpu::TextureUsages::RENDER_ATTACHMENT
857                        | wgpu::TextureUsages::TEXTURE_BINDING
858                        | wgpu::TextureUsages::COPY_SRC,
859                },
860                lifetime: crate::kvasir::resource::ResourceLifetime::Persistent,
861            };
862            ctx.blur_tex_a = self.registry.allocate_image(&self.device, &blur_desc_a);
863
864            let blur_desc_b = crate::kvasir::resource::ResourceDescriptor {
865                label: Some("Surtr Blur Texture B".into()),
866                kind: crate::kvasir::resource::ResourceKind::Image {
867                    format: ctx.config.format,
868                    width: blur_width,
869                    height: blur_height,
870                    mip_level_count: compute_mip_levels(blur_width, blur_height),
871                    usage: wgpu::TextureUsages::RENDER_ATTACHMENT
872                        | wgpu::TextureUsages::TEXTURE_BINDING
873                        | wgpu::TextureUsages::COPY_SRC,
874                },
875                lifetime: crate::kvasir::resource::ResourceLifetime::Persistent,
876            };
877            ctx.blur_tex_b = self.registry.allocate_image(&self.device, &blur_desc_b);
878
879            let bloom_desc_a = crate::kvasir::resource::ResourceDescriptor {
880                label: Some("Surtr Bloom Texture A".into()),
881                kind: crate::kvasir::resource::ResourceKind::Image {
882                    format: ctx.config.format,
883                    width: blur_width,
884                    height: blur_height,
885                    mip_level_count: compute_mip_levels(blur_width, blur_height),
886                    usage: wgpu::TextureUsages::RENDER_ATTACHMENT
887                        | wgpu::TextureUsages::TEXTURE_BINDING
888                        | wgpu::TextureUsages::COPY_SRC,
889                },
890                lifetime: crate::kvasir::resource::ResourceLifetime::Persistent,
891            };
892            ctx.bloom_tex_a = self.registry.allocate_image(&self.device, &bloom_desc_a);
893
894            let bloom_desc_b = crate::kvasir::resource::ResourceDescriptor {
895                label: Some("Surtr Bloom Texture B".into()),
896                kind: crate::kvasir::resource::ResourceKind::Image {
897                    format: ctx.config.format,
898                    width: blur_width,
899                    height: blur_height,
900                    mip_level_count: compute_mip_levels(blur_width, blur_height),
901                    usage: wgpu::TextureUsages::RENDER_ATTACHMENT
902                        | wgpu::TextureUsages::TEXTURE_BINDING
903                        | wgpu::TextureUsages::COPY_SRC,
904                },
905                lifetime: crate::kvasir::resource::ResourceLifetime::Persistent,
906            };
907            ctx.bloom_tex_b = self.registry.allocate_image(&self.device, &bloom_desc_b);
908
909            ctx.scene_bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
910                layout: &self.env_bind_group_layout,
911                entries: &[
912                    wgpu::BindGroupEntry {
913                        binding: 0,
914                        resource: wgpu::BindingResource::TextureView(&ctx.scene_texture),
915                    },
916                    wgpu::BindGroupEntry {
917                        binding: 1,
918                        resource: wgpu::BindingResource::Sampler(&ctx.sampler),
919                    },
920                ],
921                label: Some("Scene Bind Group Resize"),
922            });
923
924            let scene_views: Vec<&wgpu::TextureView> =
925                (0..32).map(|_| &ctx.scene_texture).collect();
926            ctx.scene_texture_bind_group =
927                self.device.create_bind_group(&wgpu::BindGroupDescriptor {
928                    layout: &self.texture_bind_group_layout,
929                    entries: &[
930                        wgpu::BindGroupEntry {
931                            binding: 0,
932                            resource: wgpu::BindingResource::TextureViewArray(&scene_views),
933                        },
934                        wgpu::BindGroupEntry {
935                            binding: 1,
936                            resource: wgpu::BindingResource::Sampler(&ctx.sampler),
937                        },
938                    ],
939                    label: Some("Scene Texture Bind Group Resize"),
940                });
941
942            let depth_texture = self.device.create_texture(&wgpu::TextureDescriptor {
943                label: Some("Surtr Depth Texture"),
944                size: wgpu::Extent3d {
945                    width,
946                    height,
947                    depth_or_array_layers: 1,
948                },
949                mip_level_count: 1,
950                sample_count: self.quality_level.msaa_sample_count(),
951                dimension: wgpu::TextureDimension::D2,
952                format: wgpu::TextureFormat::Depth32Float,
953                usage: wgpu::TextureUsages::RENDER_ATTACHMENT
954                    | wgpu::TextureUsages::TEXTURE_BINDING,
955                view_formats: &[],
956            });
957            ctx.depth_texture_view =
958                depth_texture.create_view(&wgpu::TextureViewDescriptor::default());
959        }
960    }
961
962    pub fn reset_time(&mut self) {
963        self.start_time = std::time::Instant::now();
964    }
965
966    pub fn reclaim_vram(&mut self) {
967        tracing::warn!("[GPU] Sundr Compaction: Compacting Mega-Heim...");
968
969        let new_mega_heim_tex = self.device.create_texture(&wgpu::TextureDescriptor {
970            label: Some("Sundr Mega-Heim (Compacted)"),
971            size: wgpu::Extent3d {
972                width: 4096,
973                height: 4096,
974                depth_or_array_layers: 1,
975            },
976            mip_level_count: 1,
977            sample_count: 1,
978            dimension: wgpu::TextureDimension::D2,
979            format: wgpu::TextureFormat::Rgba8UnormSrgb,
980            usage: wgpu::TextureUsages::TEXTURE_BINDING
981                | wgpu::TextureUsages::COPY_DST
982                | wgpu::TextureUsages::COPY_SRC,
983            view_formats: &[],
984        });
985
986        let mut new_packer = SkylinePacker::new(4096, 4096);
987        let mut encoder = self
988            .device
989            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
990                label: Some("Heim Compaction Encoder"),
991            });
992
993        let image_entries: Vec<(String, Rect)> = self
994            .image_uv_registry
995            .iter()
996            .map(|(k, v)| (k.clone(), *v))
997            .collect();
998        for (name, old_uv) in image_entries {
999            if let Some(&tex_idx) = self.texture_registry.get(&name)
1000                && tex_idx == 0
1001            {
1002                let w_px = (old_uv.width * 4096.0).round() as u32;
1003                let h_px = (old_uv.height * 4096.0).round() as u32;
1004                let old_x_px = (old_uv.x * 4096.0).round() as u32;
1005                let old_y_px = (old_uv.y * 4096.0).round() as u32;
1006
1007                if let Some((new_x, new_y)) = new_packer.pack(w_px, h_px) {
1008                    encoder.copy_texture_to_texture(
1009                        wgpu::TexelCopyTextureInfo {
1010                            texture: &self.mega_heim_tex,
1011                            mip_level: 0,
1012                            origin: wgpu::Origin3d {
1013                                x: old_x_px,
1014                                y: old_y_px,
1015                                z: 0,
1016                            },
1017                            aspect: wgpu::TextureAspect::All,
1018                        },
1019                        wgpu::TexelCopyTextureInfo {
1020                            texture: &new_mega_heim_tex,
1021                            mip_level: 0,
1022                            origin: wgpu::Origin3d {
1023                                x: new_x,
1024                                y: new_y,
1025                                z: 0,
1026                            },
1027                            aspect: wgpu::TextureAspect::All,
1028                        },
1029                        wgpu::Extent3d {
1030                            width: w_px,
1031                            height: h_px,
1032                            depth_or_array_layers: 1,
1033                        },
1034                    );
1035
1036                    let new_uv = Rect {
1037                        x: new_x as f32 / 4096.0,
1038                        y: new_y as f32 / 4096.0,
1039                        width: old_uv.width,
1040                        height: old_uv.height,
1041                    };
1042                    self.image_uv_registry.put(name.clone(), new_uv);
1043                }
1044            }
1045        }
1046
1047        let text_entries: Vec<(u64, (Rect, f32, f32, f32, f32))> = self
1048            .text
1049            .glyph_cache
1050            .iter()
1051            .map(|(k, v)| (*k, *v))
1052            .collect();
1053        for (hash, (old_uv, w_f, h_f, x_off, y_off)) in text_entries {
1054            let w_px = (old_uv.width * 4096.0).round() as u32;
1055            let h_px = (old_uv.height * 4096.0).round() as u32;
1056            let old_x_px = (old_uv.x * 4096.0).round() as u32;
1057            let old_y_px = (old_uv.y * 4096.0).round() as u32;
1058
1059            if let Some((new_x, new_y)) = new_packer.pack(w_px, h_px) {
1060                encoder.copy_texture_to_texture(
1061                    wgpu::TexelCopyTextureInfo {
1062                        texture: &self.mega_heim_tex,
1063                        mip_level: 0,
1064                        origin: wgpu::Origin3d {
1065                            x: old_x_px,
1066                            y: old_y_px,
1067                            z: 0,
1068                        },
1069                        aspect: wgpu::TextureAspect::All,
1070                    },
1071                    wgpu::TexelCopyTextureInfo {
1072                        texture: &new_mega_heim_tex,
1073                        mip_level: 0,
1074                        origin: wgpu::Origin3d {
1075                            x: new_x,
1076                            y: new_y,
1077                            z: 0,
1078                        },
1079                        aspect: wgpu::TextureAspect::All,
1080                    },
1081                    wgpu::Extent3d {
1082                        width: w_px,
1083                        height: h_px,
1084                        depth_or_array_layers: 1,
1085                    },
1086                );
1087
1088                let new_uv = Rect {
1089                    x: new_x as f32 / 4096.0,
1090                    y: new_y as f32 / 4096.0,
1091                    width: old_uv.width,
1092                    height: old_uv.height,
1093                };
1094                self.text
1095                    .glyph_cache
1096                    .put(hash, (new_uv, w_f, h_f, x_off, y_off));
1097            }
1098        }
1099
1100        self.queue.submit(std::iter::once(encoder.finish()));
1101
1102        self.mega_heim_tex = new_mega_heim_tex;
1103        let mega_heim_view_obj = self
1104            .mega_heim_tex
1105            .create_view(&wgpu::TextureViewDescriptor::default());
1106        self.texture_views[0] = mega_heim_view_obj.clone();
1107
1108        self.rebuild_texture_array_bind_group();
1109
1110        if !self.texture_bind_groups.is_empty() {
1111            self.texture_bind_groups[0] = self.mega_heim_bind_group.clone();
1112        }
1113
1114        self.heim_packer = new_packer;
1115        self.telemetry.vram_exhausted = false;
1116    }
1117}
1118
1119impl Drop for GpuRenderer {
1120    fn drop(&mut self) {
1121        let cache_dir = std::env::current_exe()
1122            .ok()
1123            .and_then(|p| p.parent().map(|d| d.join("pipeline_cache")))
1124            .unwrap_or_else(|| std::env::temp_dir().join("cvkg_pipeline_cache"));
1125        let _ = std::fs::create_dir_all(&cache_dir);
1126        let cache_path = cache_dir.join("cvkg_render_gpu.bin");
1127        if let Some(cache) = &self.pipeline_cache
1128            && let Some(data) = cache.get_data()
1129            && let Err(e) = std::fs::write(&cache_path, data)
1130        {
1131            tracing::warn!("Failed to persist pipeline cache: {}", e);
1132        }
1133
1134        let _ = self.device.poll(wgpu::PollType::Wait {
1135            submission_index: None,
1136            timeout: None,
1137        });
1138    }
1139}
1140
1141impl GpuRenderer {
1142    pub(crate) fn current_width(&self) -> u32 {
1143        if let Some(id) = self.current_window {
1144            self.surfaces.get(&id).map(|s| s.config.width).unwrap_or(1)
1145        } else {
1146            self.headless_context.as_ref().map(|h| h.width).unwrap_or(1)
1147        }
1148    }
1149
1150    pub(crate) fn current_height(&self) -> u32 {
1151        if let Some(id) = self.current_window {
1152            self.surfaces.get(&id).map(|s| s.config.height).unwrap_or(1)
1153        } else {
1154            self.headless_context
1155                .as_ref()
1156                .map(|h| h.height)
1157                .unwrap_or(1)
1158        }
1159    }
1160
1161    pub(crate) fn current_scale_factor(&self) -> f32 {
1162        if let Some(id) = self.current_window {
1163            self.surfaces
1164                .get(&id)
1165                .map(|s| s.scale_factor)
1166                .unwrap_or(1.0)
1167        } else {
1168            self.headless_context
1169                .as_ref()
1170                .map(|h| h.scale_factor)
1171                .unwrap_or(1.0)
1172        }
1173    }
1174
1175    pub(crate) fn current_time(&self) -> f32 {
1176        self.start_time.elapsed().as_secs_f32()
1177    }
1178
1179    /// forge_headless -- Initializes Surtr without a window for visual regression testing.
1180    pub async fn forge_headless(width: u32, height: u32) -> Self {
1181        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
1182            backends: wgpu::Backends::all(),
1183            flags: wgpu::InstanceFlags::default(),
1184            backend_options: wgpu::BackendOptions::default(),
1185            display: None,
1186            memory_budget_thresholds: wgpu::MemoryBudgetThresholds::default(),
1187        });
1188
1189        // Request adapter with robust multi-stage fallback for Bumblebee/Optimus compatibility
1190        tracing::info!("[GPU] Requesting HighPerformance adapter (headless)...");
1191        let mut adapter = instance
1192            .request_adapter(&wgpu::RequestAdapterOptions {
1193                power_preference: wgpu::PowerPreference::HighPerformance,
1194                compatible_surface: None,
1195                force_fallback_adapter: false,
1196            })
1197            .await
1198            .ok();
1199
1200        if adapter.is_none() {
1201            tracing::warn!(
1202                "[GPU] HighPerformance adapter failed (possible Bumblebee/Optimus), trying LowPower..."
1203            );
1204            adapter = instance
1205                .request_adapter(&wgpu::RequestAdapterOptions {
1206                    power_preference: wgpu::PowerPreference::LowPower,
1207                    compatible_surface: None,
1208                    force_fallback_adapter: false,
1209                })
1210                .await
1211                .ok();
1212        }
1213
1214        if adapter.is_none() {
1215            tracing::warn!("[GPU] Hardware adapters failed, trying Software fallback...");
1216            adapter = instance
1217                .request_adapter(&wgpu::RequestAdapterOptions {
1218                    power_preference: wgpu::PowerPreference::LowPower,
1219                    compatible_surface: None,
1220                    force_fallback_adapter: true,
1221                })
1222                .await
1223                .ok();
1224        }
1225
1226        let adapter = adapter.expect("Failed to find a suitable GPU for Surtr");
1227        let info = adapter.get_info();
1228        let caps =
1229            crate::subsystems::GpuCapabilities::detect(&info.name, format!("{:?}", info.backend));
1230        tracing::info!(
1231            "[GPU] Selected adapter: {} ({:?}) on backend: {:?} -- detected as {}",
1232            info.name,
1233            info.device_type,
1234            info.backend,
1235            caps.vendor
1236        );
1237        tracing::info!("[GPU] Driver info: {} - {}", info.driver, info.driver_info);
1238        let required_features = adapter.features()
1239            & (wgpu::Features::TIMESTAMP_QUERY
1240                | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING
1241                | wgpu::Features::TEXTURE_BINDING_ARRAY);
1242
1243        let (device, queue) = adapter
1244            .request_device(&wgpu::DeviceDescriptor {
1245                label: Some("Surtr Headless Forge"),
1246                required_features,
1247                required_limits: wgpu::Limits {
1248                    max_bindings_per_bind_group: adapter
1249                        .limits()
1250                        .max_bindings_per_bind_group
1251                        .min(256),
1252                    max_binding_array_elements_per_shader_stage: adapter
1253                        .limits()
1254                        .max_binding_array_elements_per_shader_stage
1255                        .min(256),
1256                    ..wgpu::Limits::default()
1257                },
1258                memory_hints: wgpu::MemoryHints::default(),
1259                experimental_features: wgpu::ExperimentalFeatures::disabled(),
1260                trace: wgpu::Trace::Off,
1261            })
1262            .await
1263            .expect("Failed to create Surtr device");
1264
1265        let instance = Arc::new(instance);
1266        let adapter = Arc::new(adapter);
1267
1268        device.on_uncaptured_error(Arc::new(|error| {
1269            tracing::error!(
1270                "[GPU] Uncaptured device error (Device Lost or Panic): {:?}",
1271                error
1272            );
1273        }));
1274
1275        let device = Arc::new(device);
1276        let queue = Arc::new(queue);
1277
1278        Self::forge_internal(
1279            instance,
1280            adapter,
1281            device,
1282            queue,
1283            None,
1284            Some((width, height, wgpu::TextureFormat::Rgba8UnormSrgb)),
1285        )
1286        .await
1287    }
1288
1289    /// Read back the headless output texture as RGBA8 pixels.
1290    /// Must be called after `end_frame` on a headless renderer.
1291    pub fn readback_headless_rgba8(&self) -> Vec<u8> {
1292        let ctx = self
1293            .headless_context
1294            .as_ref()
1295            .expect("readback_headless_rgba8 requires a headless renderer");
1296
1297        let width = ctx.width;
1298        let height = ctx.height;
1299        let row_bytes = width * 4;
1300        let padded_row_bytes = ((row_bytes + 255) / 256) * 256;
1301        let buffer_size = (padded_row_bytes * height) as u64;
1302
1303        let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
1304            label: Some("headless-readback-buffer"),
1305            size: buffer_size,
1306            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
1307            mapped_at_creation: false,
1308        });
1309
1310        let mut encoder = self
1311            .device
1312            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1313                label: Some("headless-readback-encoder"),
1314            });
1315
1316        encoder.copy_texture_to_buffer(
1317            wgpu::TexelCopyTextureInfo {
1318                texture: &ctx.output_texture,
1319                mip_level: 0,
1320                origin: wgpu::Origin3d::ZERO,
1321                aspect: wgpu::TextureAspect::All,
1322            },
1323            wgpu::TexelCopyBufferInfo {
1324                buffer: &output_buffer,
1325                layout: wgpu::TexelCopyBufferLayout {
1326                    offset: 0,
1327                    bytes_per_row: Some(padded_row_bytes),
1328                    rows_per_image: Some(height),
1329                },
1330            },
1331            wgpu::Extent3d {
1332                width,
1333                height,
1334                depth_or_array_layers: 1,
1335            },
1336        );
1337
1338        self.queue.submit(std::iter::once(encoder.finish()));
1339        let buffer_slice = output_buffer.slice(..);
1340        buffer_slice.map_async(wgpu::MapMode::Read, |_| {});
1341        let _ = self.device.poll(wgpu::PollType::Wait {
1342            submission_index: None,
1343            timeout: None,
1344        });
1345
1346        let data = buffer_slice.get_mapped_range();
1347        let mut result = Vec::with_capacity((width * height * 4) as usize);
1348        for row in 0..height {
1349            let start = (row * padded_row_bytes) as usize;
1350            let end = start + row_bytes as usize;
1351            result.extend_from_slice(&data[start..end]);
1352        }
1353        drop(data);
1354        output_buffer.unmap();
1355        output_buffer.destroy();
1356        result
1357    }
1358
1359    /// Render a headless frame with a draw callback and read back pixels.
1360    pub fn render_headless_frame<F>(&mut self, draw: F) -> Vec<u8>
1361    where
1362        F: FnOnce(&mut Self),
1363    {
1364        let encoder = self.begin_frame_headless();
1365        draw(self);
1366        self.end_frame(encoder);
1367        self.readback_headless_rgba8()
1368    }
1369
1370    /// Create a headless GpuRenderer from an existing device and surface.
1371    ///
1372    /// This constructor does not require an event loop and is suitable for
1373    /// headless rendering (e.g., server-side rendering, tests).
1374    /// It delegates to the existing `forge_internal` which handles all
1375    /// pipeline, buffer, and bind group initialization.
1376    pub async fn from_external(
1377        device: Arc<wgpu::Device>,
1378        queue: Arc<wgpu::Queue>,
1379        surface: wgpu::Surface<'static>,
1380        width: u32,
1381        height: u32,
1382    ) -> Self {
1383        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
1384            backends: wgpu::Backends::all(),
1385            flags: wgpu::InstanceFlags::default(),
1386            backend_options: wgpu::BackendOptions::default(),
1387            display: None,
1388            memory_budget_thresholds: wgpu::MemoryBudgetThresholds::default(),
1389        });
1390
1391        let adapter = instance
1392            .request_adapter(&wgpu::RequestAdapterOptions {
1393                power_preference: wgpu::PowerPreference::default(),
1394                compatible_surface: Some(&surface),
1395                force_fallback_adapter: false,
1396            })
1397            .await
1398            .expect("No compatible adapter found");
1399
1400        Self::forge_internal(
1401            Arc::new(instance),
1402            Arc::new(adapter),
1403            device,
1404            queue,
1405            None,
1406            Some((width, height, wgpu::TextureFormat::Rgba8UnormSrgb)),
1407        )
1408        .await
1409    }
1410}