use crate::heim::SkylinePacker;
use crate::types::*;
use crate::vertex::*;
use cvkg_core::Rect;
use cvkg_core::{ColorTheme, SceneUniforms};
use lru::LruCache;
use std::collections::{HashMap, VecDeque};
use std::num::NonZeroUsize;
use std::sync::Arc;
pub use crate::subsystems::RendererConfig;
pub(crate) mod context_helpers;
pub(crate) mod draw;
pub(crate) mod init;
pub(crate) mod pipelines;
pub(crate) mod svg;
#[cfg(test)]
pub(crate) mod tests;
pub(crate) mod material_id {
pub const OPAQUE: u32 = 0;
pub const ELLIPSE: u32 = 4;
pub const TOP_UI: u32 = 6;
pub const GLASS: u32 = 7;
pub const BLEND_START: u32 = 8;
pub const BLEND_END: u32 = 22;
pub const RADIAL_GRADIENT: u32 = 16;
pub const SQUIRCLE_STROKE: u32 = 17;
pub const DROP_SHADOW: u32 = 18;
pub const DASHED_STROKE: u32 = 19;
pub const MESH_3D: u32 = 21;
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub enum QualityLevel {
#[default]
High,
Medium,
Low,
}
impl QualityLevel {
pub fn msaa_sample_count(self) -> u32 {
match self {
QualityLevel::High => 4,
QualityLevel::Medium => 2,
QualityLevel::Low => 1,
}
}
}
pub struct GpuRenderer {
pub(crate) instance: Arc<wgpu::Instance>,
pub(crate) adapter: Arc<wgpu::Adapter>,
pub(crate) device: Arc<wgpu::Device>,
pub(crate) queue: Arc<wgpu::Queue>,
pub(crate) registry: crate::kvasir::registry::ResourceRegistry,
pub(crate) active_offscreens: Vec<crate::types::OffscreenEffectConfig>,
pub(crate) world_space_panels: Vec<(u64, cvkg_vdom::WorldSpacePanel)>,
pub(crate) panel_vdoms: HashMap<u64, cvkg_vdom::VDom>,
pub(crate) effect_pipelines: std::collections::HashMap<String, wgpu::RenderPipeline>,
pub(crate) effect_params_buffer: wgpu::Buffer,
pub(crate) effect_params_bind_group: wgpu::BindGroup,
pub(crate) linear_sampler: wgpu::Sampler,
pub ai_material_rx: Option<
std::sync::mpsc::Receiver<
Result<crate::material::CompiledMaterial, crate::ai::GeneratorError>,
>,
>,
pub(crate) surfaces: std::collections::HashMap<winit::window::WindowId, SurfaceContext>,
pub(crate) current_window: Option<winit::window::WindowId>,
pub headless_context: Option<HeadlessContext>,
pub(crate) text: crate::types::TextSubsystem,
pub(crate) mega_heim_tex: wgpu::Texture,
pub(crate) mega_heim_bind_group: wgpu::BindGroup,
pub(crate) heim_packer: SkylinePacker,
pub(crate) image_uv_registry: LruCache<String, Rect>,
pub(crate) texture_registry: LruCache<String, u32>,
pub(crate) texture_views: Vec<wgpu::TextureView>,
pub(crate) dummy_sampler: wgpu::Sampler,
pub(crate) dummy_view: wgpu::TextureView,
pub(crate) dummy_depth_view: wgpu::TextureView,
pub(crate) dummy_depth_view_msaa: wgpu::TextureView,
pub(crate) shadow_map_texture: Option<wgpu::Texture>,
pub(crate) shadow_map_view: Option<wgpu::TextureView>,
pub(crate) shadow_sampler: Option<wgpu::Sampler>,
pub(crate) shadow_light_vp: glam::Mat4,
pub(crate) shadow_map_size: u32,
pub(crate) shadow_bias: f32,
pub(crate) svg: crate::types::SvgSubsystem,
pub(crate) dummy_texture_bind_group: wgpu::BindGroup,
pub(crate) dummy_env_bind_group: wgpu::BindGroup,
pub(crate) texture_bind_group_layout: wgpu::BindGroupLayout,
pub(crate) texture_bind_groups: Vec<wgpu::BindGroup>,
pub(crate) shared_elements: LruCache<String, cvkg_core::Rect>,
pub(crate) geometry_buffers: crate::types::GeometryBuffers,
pub(crate) vertices: Vec<Vertex>,
pub(crate) indices: Vec<u32>,
pub(crate) instance_data: Vec<InstanceData>,
pub(crate) instance_data_3d: Vec<InstanceData3D>,
pub(crate) instance_buffer_3d: Option<wgpu::Buffer>,
pub(crate) staging_belt: wgpu::util::StagingBelt,
pub(crate) staging_command_buffers: Vec<wgpu::CommandBuffer>,
pub(crate) draw_calls: Vec<DrawCall>,
pub(crate) current_texture_id: Option<u32>,
pub(crate) current_panel_id: Option<u64>,
pub(crate) panel_stack: Vec<u64>,
pub(crate) opacity_stack: Vec<f32>,
pub(crate) clip_stack: Vec<Rect>,
pub(crate) slice_stack: Vec<(f32, f32)>,
pub(crate) shadow_stack: Vec<ShadowState>,
pub blur_pipeline: Option<wgpu::RenderPipeline>,
pub blur_uniform: Option<wgpu::Buffer>,
pub blur_bind_group_layout: Option<wgpu::BindGroupLayout>,
pub blend_pipeline: Option<wgpu::RenderPipeline>,
pub blend_bind_group_layout: Option<wgpu::BindGroupLayout>,
pub flood_pipeline: Option<wgpu::RenderPipeline>,
pub copy_bind_group_layout: Option<wgpu::BindGroupLayout>,
pub(crate) theme_buffer: wgpu::Buffer,
pub(crate) scene_buffer: wgpu::Buffer,
pub(crate) theme_stack: Vec<ColorTheme>,
pub(crate) portal_theme_stack: Vec<ColorTheme>,
pub(crate) berserker_bind_group: wgpu::BindGroup,
pub(crate) berserker_bind_group_layout: wgpu::BindGroupLayout,
pub(crate) start_time: std::time::Instant,
pub(crate) current_theme: ColorTheme,
pub(crate) current_scene: SceneUniforms,
pub(crate) current_z: f32,
pub(crate) default_background_color: [f32; 4],
pub(crate) app_drew_background: bool,
pub(crate) frame_rendered: bool,
pub(crate) current_draw_order: i32,
pub(crate) pipeline: wgpu::RenderPipeline,
pub(crate) opaque_pipeline: wgpu::RenderPipeline,
pub(crate) ui_pipeline: wgpu::RenderPipeline,
pub(crate) glass_pipeline: wgpu::RenderPipeline,
pub(crate) pbr_pipeline: wgpu::RenderPipeline,
pub(crate) transparent_pipeline: wgpu::RenderPipeline,
pub(crate) shadow_pipeline: wgpu::RenderPipeline,
pub(crate) background_pipeline: wgpu::RenderPipeline,
pub(crate) bloom_extract_pipeline: wgpu::RenderPipeline,
pub(crate) copy_pipeline: wgpu::RenderPipeline,
pub(crate) composite_pipeline: wgpu::RenderPipeline,
pub(crate) color_blind_pipeline: wgpu::RenderPipeline,
pub(crate) volumetric_pipeline: wgpu::RenderPipeline,
pub(crate) volumetric_bind_group_layout: wgpu::BindGroupLayout,
pub(crate) volumetric_uniform_buffer: wgpu::Buffer,
pub(crate) csm_buffer: wgpu::Buffer,
pub(crate) pbr_material_bind_group_layout: wgpu::BindGroupLayout,
pub(crate) volumetric_depth_sampler: wgpu::Sampler,
pub(crate) hologram_instances: Vec<HologramInstance>,
pub(crate) pending_directional_light: Option<crate::passes::shadow::DirectionalLight>,
pub(crate) pending_mesh_instances_3d: Vec<crate::passes::shadow::GpuMesh3d>,
pub(crate) pending_transparent_instances_3d: Vec<crate::passes::shadow::GpuMesh3d>,
pub(crate) pending_scene_radius: f32,
pub(crate) kawase_down_pipeline: wgpu::RenderPipeline,
pub(crate) kawase_up_pipeline: wgpu::RenderPipeline,
pub(crate) kawase_bind_group_layout: wgpu::BindGroupLayout,
pub(crate) kawase_uniform: wgpu::Buffer,
pub(crate) kawase_uniform_buffers: Vec<wgpu::Buffer>,
pub(crate) env_bind_group_layout: wgpu::BindGroupLayout,
pub telemetry: cvkg_core::TelemetryData,
pub(crate) pipeline_cache: Option<wgpu::PipelineCache>,
pub frame_budget: cvkg_core::FrameBudget,
pub(crate) capture_staging_buffer: Option<wgpu::Buffer>,
pub last_redraw_start: std::time::Instant,
pub last_frame_start: std::time::Instant,
pub(crate) vram_buffers_bytes: u64,
pub(crate) vram_textures_bytes: u64,
pub(crate) _debug_layout: bool,
pub(crate) transform_stack: Vec<glam::Mat3>,
pub(crate) transform_stack_3d: Vec<glam::Mat4>,
pub redraw_requested: bool,
pub(crate) compositor_index_cursor: u32,
pub bloom_enabled: bool,
pub volumetric_enabled: bool,
pub(crate) path_geometry_cache: lru::LruCache<u64, (Vec<Vertex>, Vec<u32>)>,
pub(crate) color_blind_bind_group_layout: wgpu::BindGroupLayout,
pub(crate) color_blind_uniform_buffer: wgpu::Buffer,
pub color_blind_mode: crate::color_blindness::ColorBlindMode,
pub color_blind_intensity: f32,
pub(crate) sampler: wgpu::Sampler,
pub(crate) skuld_queries: Option<wgpu::QuerySet>,
pub(crate) skuld_buffer: Option<wgpu::Buffer>,
pub(crate) skuld_read_buffer: Option<wgpu::Buffer>,
pub(crate) skuld_period: f32,
pub last_gpu_time_ns: u64,
pub(crate) particle_compute_pipeline: wgpu::ComputePipeline,
pub(crate) particle_compute_bgl: wgpu::BindGroupLayout,
pub(crate) particle_buffer: wgpu::Buffer,
pub(crate) particle_uniform_buffer: wgpu::Buffer,
pub(crate) particles: crate::types::ParticleSubsystem,
pub(crate) particle_render_pipeline: wgpu::RenderPipeline,
pub(crate) particle_render_bgl: wgpu::BindGroupLayout,
pub(crate) particle_render_bind_group: Option<wgpu::BindGroup>,
pub(crate) particle_compute_bind_group: Option<wgpu::BindGroup>,
pub(crate) vnode_stack: Vec<(Rect, &'static str)>,
pub(crate) event_handlers: std::collections::HashMap<
String,
Vec<std::sync::Arc<dyn Fn(cvkg_core::Event) + Send + Sync>>,
>,
pub(crate) render_error_count: u64,
pub(crate) has_fatal_error: bool,
pub(crate) glass_output_bind_group_layout: wgpu::BindGroupLayout,
pub(crate) current_draw_material: cvkg_core::DrawMaterial,
pub(crate) portal_regions: std::collections::VecDeque<cvkg_core::Rect>,
pub(crate) gradient_stop_texture: wgpu::Texture,
pub(crate) gradient_stop_texture_view: wgpu::TextureView,
pub(crate) gradient_bind_group: wgpu::BindGroup,
pub(crate) gradient_texture_cache:
std::collections::HashMap<u64, (wgpu::Texture, wgpu::TextureView, wgpu::BindGroup)>,
pub(crate) gradient_stops_hash: u64,
pub(crate) gradient_bind_group_layout: wgpu::BindGroupLayout,
pub(crate) cached_graph_plan: Option<crate::kvasir::graph_cache::CachedGraphPlan>,
pub(crate) material_compilation_hash: u64,
pub(crate) memo_cache: std::collections::HashMap<u64, crate::types::MemoEntry>,
pub(crate) frame_generation: u64,
pub(crate) config: crate::subsystems::RendererConfig,
pub(crate) quality_level: QualityLevel,
pub(crate) bind_group_cache: std::sync::Mutex<
std::collections::HashMap<
(
Option<winit::window::WindowId>,
crate::kvasir::resource::ResourceId,
u32,
bool,
),
wgpu::BindGroup,
>,
>,
pub(crate) texture_view_cache: std::sync::Mutex<
std::collections::HashMap<
(
Option<winit::window::WindowId>,
crate::kvasir::resource::ResourceId,
u32,
),
wgpu::TextureView,
>,
>,
}
#[cfg(target_arch = "wasm32")]
unsafe impl Send for GpuRenderer {}
#[cfg(target_arch = "wasm32")]
unsafe impl Sync for GpuRenderer {}
#[derive(Debug, Clone)]
pub struct HologramInstance {
pub rect: cvkg_core::Rect,
pub id_hash: u32,
pub time: f32,
}
pub trait ClearInto {
fn clear_into(&mut self);
}
impl<K, V, S> ClearInto for std::collections::HashMap<K, V, S>
where
S: std::hash::BuildHasher,
{
fn clear_into(&mut self) {
self.clear();
}
}
impl<T> ClearInto for Vec<T> {
fn clear_into(&mut self) {
self.clear();
}
}
fn load_pipeline_cache_with_integrity_check(
cache_path: &std::path::Path,
) -> Result<Option<Vec<u8>>, String> {
let cache_data = match std::fs::read(cache_path) {
Ok(d) => d,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(e) => return Err(format!("read failed: {e}")),
};
let hash_path = cache_path.with_extension("bin.sha256");
let expected_hash = match std::fs::read_to_string(&hash_path) {
Ok(s) => s.trim().to_lowercase(),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
return Err(format!(
"sidecar hash file missing at {}",
hash_path.display()
));
}
Err(e) => return Err(format!("sidecar read failed: {e}")),
};
let actual = compute_sha256(&cache_data);
let actual_hex: String = actual.iter().map(|b| format!("{:02x}", b)).collect();
if actual_hex != expected_hash {
return Err(format!(
"hash mismatch: expected {expected_hash}, got {actual_hex}"
));
}
Ok(Some(cache_data))
}
fn compute_sha256(data: &[u8]) -> [u8; 32] {
let mut hasher = Sha256::new();
hasher.update(data);
hasher.finalize()
}
#[derive(Clone)]
struct Sha256 {
state: [u32; 8],
buffer: [u8; 64],
buffer_len: usize,
total_len: u64,
}
impl Sha256 {
const K: [u32; 64] = [
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4,
0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe,
0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f,
0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc,
0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116,
0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7,
0xc67178f2,
];
fn new() -> Self {
Self {
state: [
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab,
0x5be0cd19,
],
buffer: [0; 64],
buffer_len: 0,
total_len: 0,
}
}
fn update(&mut self, data: &[u8]) {
self.total_len = self.total_len.wrapping_add(data.len() as u64);
for &b in data {
self.buffer[self.buffer_len] = b;
self.buffer_len += 1;
if self.buffer_len == 64 {
let block = self.buffer;
self.compress(&block);
self.buffer_len = 0;
}
}
}
fn finalize(mut self) -> [u8; 32] {
self.buffer[self.buffer_len] = 0x80;
self.buffer_len += 1;
if self.buffer_len > 56 {
for b in &mut self.buffer[self.buffer_len..] {
*b = 0;
}
let block = self.buffer;
self.compress(&block);
self.buffer_len = 0;
}
for b in &mut self.buffer[self.buffer_len..56] {
*b = 0;
}
let bit_len = self.total_len.wrapping_mul(8);
self.buffer[56..64].copy_from_slice(&bit_len.to_be_bytes());
let block = self.buffer;
self.compress(&block);
let mut out = [0u8; 32];
for (i, &s) in self.state.iter().enumerate() {
out[i * 4..(i + 1) * 4].copy_from_slice(&s.to_be_bytes());
}
out
}
fn compress(&mut self, block: &[u8]) {
let mut w = [0u32; 64];
for i in 0..16 {
w[i] = u32::from_be_bytes([
block[i * 4],
block[i * 4 + 1],
block[i * 4 + 2],
block[i * 4 + 3],
]);
}
for i in 16..64 {
let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3);
let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10);
w[i] = w[i - 16]
.wrapping_add(s0)
.wrapping_add(w[i - 7])
.wrapping_add(s1);
}
let mut a = self.state[0];
let mut b = self.state[1];
let mut c = self.state[2];
let mut d = self.state[3];
let mut e = self.state[4];
let mut f = self.state[5];
let mut g = self.state[6];
let mut h = self.state[7];
for (i, wi) in w.iter().enumerate() {
let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
let ch = (e & f) ^ ((!e) & g);
let t1 = h
.wrapping_add(s1)
.wrapping_add(ch)
.wrapping_add(Self::K[i])
.wrapping_add(*wi);
let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
let mj = (a & b) ^ (a & c) ^ (b & c);
let t2 = s0.wrapping_add(mj);
h = g;
g = f;
f = e;
e = d.wrapping_add(t1);
d = c;
c = b;
b = a;
a = t1.wrapping_add(t2);
}
self.state[0] = self.state[0].wrapping_add(a);
self.state[1] = self.state[1].wrapping_add(b);
self.state[2] = self.state[2].wrapping_add(c);
self.state[3] = self.state[3].wrapping_add(d);
self.state[4] = self.state[4].wrapping_add(e);
self.state[5] = self.state[5].wrapping_add(f);
self.state[6] = self.state[6].wrapping_add(g);
self.state[7] = self.state[7].wrapping_add(h);
}
}
fn compute_mip_levels(width: u32, height: u32) -> u32 {
let max_dim = width.max(height);
if max_dim <= 1 {
return 1;
}
(32 - max_dim.leading_zeros()).clamp(2, 8)
}
impl GpuRenderer {
pub fn hologram_instances(&self) -> &[HologramInstance] {
&self.hologram_instances
}
pub fn set_quality_level(&mut self, level: QualityLevel) {
self.quality_level = level;
}
pub fn set_config(&mut self, config: crate::subsystems::RendererConfig) {
self.config = config;
}
pub fn config(&self) -> &crate::subsystems::RendererConfig {
&self.config
}
pub fn quality_level(&self) -> QualityLevel {
self.quality_level
}
pub(crate) fn lock_or_clear_cache<'a, T: ClearInto>(
lock: &'a std::sync::Mutex<T>,
) -> std::sync::MutexGuard<'a, T> {
match lock.lock() {
Ok(guard) => guard,
Err(poisoned) => {
tracing::warn!("[GPU] lock_or_clear_cache: mutex poisoned, clearing cache...");
let mut guard = poisoned.into_inner();
guard.clear_into();
guard
}
}
}
pub fn update_mouse(&mut self, mouse: [f32; 2], velocity: [f32; 2]) {
self.current_scene.mouse = mouse;
self.current_scene.mouse_velocity = velocity;
self.queue.write_buffer(
&self.scene_buffer,
0,
bytemuck::bytes_of(&self.current_scene),
);
}
pub fn invalidate_material_cache(&mut self) {
self.cached_graph_plan = None;
}
pub fn invalidate_all_caches(&mut self) -> usize {
let mut cleared = 0;
{
let mut bg_cache = Self::lock_or_clear_cache(&self.bind_group_cache);
cleared += bg_cache.len();
bg_cache.clear();
}
{
let mut view_cache = Self::lock_or_clear_cache(&self.texture_view_cache);
cleared += view_cache.len();
view_cache.clear();
}
cleared += self.text.shaped_cache.len();
self.text.shaped_cache.clear();
cleared += self.svg.model_cache.len();
self.svg.model_cache.clear();
cleared += self.svg.tree_cache.len();
self.svg.tree_cache.clear();
self.svg.clear_filter_batches();
cleared
}
pub fn prewarm_text_cache(&mut self, labels: &[(&str, f32)]) {
let mut count = 0;
for (text, size) in labels {
let cache_key = (text.to_string(), (size * 100.0) as u32);
if self.text.shaped_cache.contains(&cache_key) {
continue;
}
let style = cvkg_runic_text::TextStyle::new("Inter", *size);
let spans = [cvkg_runic_text::TextSpan::new(text, style)];
if let Ok(shaped) = self.text.engine.shape_layout(
&spans,
None,
cvkg_runic_text::TextAlign::Start,
cvkg_runic_text::TextOverflow::Visible,
) {
self.text
.shaped_cache
.put(cache_key, std::sync::Arc::new(shaped));
count += 1;
}
}
if count > 0 {
tracing::info!("[Surtr] prewarm_text_cache: pre-shaped {} labels", count);
}
}
pub(crate) fn select_best_surface_format(
formats: &[wgpu::TextureFormat],
) -> wgpu::TextureFormat {
if formats.is_empty() {
return wgpu::TextureFormat::Rgba8Unorm;
}
let preferred_formats = [
wgpu::TextureFormat::Rgba16Float,
wgpu::TextureFormat::Rgba8Unorm,
wgpu::TextureFormat::Bgra8UnormSrgb,
wgpu::TextureFormat::Rgba8UnormSrgb,
wgpu::TextureFormat::Bgra8Unorm,
wgpu::TextureFormat::Rgba8Unorm,
wgpu::TextureFormat::Rgba8Unorm,
];
for preferred in &preferred_formats {
if formats.contains(preferred) {
return *preferred;
}
}
if formats.contains(&wgpu::TextureFormat::Rgba8Unorm) {
return wgpu::TextureFormat::Rgba8Unorm;
}
formats[0]
}
pub(crate) fn rebuild_texture_array_bind_group(&mut self) {
let views: Vec<&wgpu::TextureView> = self.texture_views.iter().collect();
self.mega_heim_bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &self.texture_bind_group_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureViewArray(&views),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&self.dummy_sampler),
},
],
label: Some("Mega-Heim Rebuilt Bind Group"),
});
}
pub(crate) fn update_vram_telemetry(&mut self) {
let buffers = self.geometry_buffers.vertex_buffer.size()
+ self.geometry_buffers.index_buffer.size()
+ self.geometry_buffers.instance_buffer.size()
+ self.scene_buffer.size()
+ self.theme_buffer.size()
+ self.particle_buffer.size()
+ self.particle_uniform_buffer.size();
let mut textures = self.config.mega_heim_vram_bytes();
textures += 4;
for surface in self.surfaces.values() {
let width = surface.config.width;
let height = surface.config.height;
let format_bytes = 8; textures += (width * height * format_bytes) as u64; textures +=
(width * height * format_bytes * self.quality_level.msaa_sample_count()) as u64; textures += (width * height * 4) as u64;
let blur_width = (width / 2).max(1);
let blur_height = (height / 2).max(1);
let blur_bytes = (blur_width * blur_height * 4) as u64;
textures += blur_bytes * 4; }
if let Some(ref ctx) = self.headless_context {
let format_bytes = 8; textures += (ctx.width * ctx.height * format_bytes) as u64; textures +=
(ctx.width * ctx.height * format_bytes * self.quality_level.msaa_sample_count())
as u64; textures += (ctx.width * ctx.height * 4) as u64; textures += (ctx.width * ctx.height * 4) as u64; }
self.vram_buffers_bytes = buffers;
self.vram_textures_bytes = textures;
self.telemetry.vram_usage_mb = (buffers + textures) as f32 / (1024.0 * 1024.0);
}
pub fn get_telemetry(&self) -> cvkg_core::TelemetryData {
self.telemetry.clone()
}
pub fn resize(
&mut self,
window_id: winit::window::WindowId,
width: u32,
height: u32,
scale_factor: f32,
) {
if width > 0
&& height > 0
&& let Some(ctx) = self.surfaces.get_mut(&window_id)
{
if ctx.config.width == width && ctx.config.height == height {
return;
}
tracing::info!("[GPU] Reconfiguring surface: {}x{}", width, height);
GpuRenderer::lock_or_clear_cache(&self.bind_group_cache).clear();
GpuRenderer::lock_or_clear_cache(&self.texture_view_cache).clear();
self.text.shaped_cache.clear();
ctx.config.width = width;
ctx.config.height = height;
ctx.scale_factor = scale_factor;
ctx.surface.configure(&self.device, &ctx.config);
let texture_desc = wgpu::TextureDescriptor {
label: Some("Surtr Scene Texture"),
size: wgpu::Extent3d {
width,
height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba16Float,
usage: wgpu::TextureUsages::RENDER_ATTACHMENT
| wgpu::TextureUsages::TEXTURE_BINDING,
view_formats: &[],
};
let scene_tex = self.device.create_texture(&texture_desc);
let msaa_desc = wgpu::TextureDescriptor {
label: Some("Scene MSAA"),
size: texture_desc.size,
mip_level_count: 1,
sample_count: self.quality_level.msaa_sample_count(),
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba16Float,
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
view_formats: &[],
};
let scene_msaa_tex = self.device.create_texture(&msaa_desc);
ctx.scene_texture = scene_tex.create_view(&wgpu::TextureViewDescriptor::default());
ctx.scene_msaa_texture =
scene_msaa_tex.create_view(&wgpu::TextureViewDescriptor::default());
self.registry.remove_image(ctx.blur_tex_a);
self.registry.remove_image(ctx.blur_tex_b);
self.registry.remove_image(ctx.bloom_tex_a);
self.registry.remove_image(ctx.bloom_tex_b);
let blur_width = (width / 2).max(1);
let blur_height = (height / 2).max(1);
let blur_desc_a = crate::kvasir::resource::ResourceDescriptor {
label: Some("Surtr Blur Texture A".into()),
kind: crate::kvasir::resource::ResourceKind::Image {
format: ctx.config.format,
width: blur_width,
height: blur_height,
mip_level_count: compute_mip_levels(blur_width, blur_height),
usage: wgpu::TextureUsages::RENDER_ATTACHMENT
| wgpu::TextureUsages::TEXTURE_BINDING
| wgpu::TextureUsages::COPY_SRC,
},
lifetime: crate::kvasir::resource::ResourceLifetime::Persistent,
};
ctx.blur_tex_a = self.registry.allocate_image(&self.device, &blur_desc_a);
let blur_desc_b = crate::kvasir::resource::ResourceDescriptor {
label: Some("Surtr Blur Texture B".into()),
kind: crate::kvasir::resource::ResourceKind::Image {
format: ctx.config.format,
width: blur_width,
height: blur_height,
mip_level_count: compute_mip_levels(blur_width, blur_height),
usage: wgpu::TextureUsages::RENDER_ATTACHMENT
| wgpu::TextureUsages::TEXTURE_BINDING
| wgpu::TextureUsages::COPY_SRC,
},
lifetime: crate::kvasir::resource::ResourceLifetime::Persistent,
};
ctx.blur_tex_b = self.registry.allocate_image(&self.device, &blur_desc_b);
let bloom_desc_a = crate::kvasir::resource::ResourceDescriptor {
label: Some("Surtr Bloom Texture A".into()),
kind: crate::kvasir::resource::ResourceKind::Image {
format: ctx.config.format,
width: blur_width,
height: blur_height,
mip_level_count: compute_mip_levels(blur_width, blur_height),
usage: wgpu::TextureUsages::RENDER_ATTACHMENT
| wgpu::TextureUsages::TEXTURE_BINDING
| wgpu::TextureUsages::COPY_SRC,
},
lifetime: crate::kvasir::resource::ResourceLifetime::Persistent,
};
ctx.bloom_tex_a = self.registry.allocate_image(&self.device, &bloom_desc_a);
let bloom_desc_b = crate::kvasir::resource::ResourceDescriptor {
label: Some("Surtr Bloom Texture B".into()),
kind: crate::kvasir::resource::ResourceKind::Image {
format: ctx.config.format,
width: blur_width,
height: blur_height,
mip_level_count: compute_mip_levels(blur_width, blur_height),
usage: wgpu::TextureUsages::RENDER_ATTACHMENT
| wgpu::TextureUsages::TEXTURE_BINDING
| wgpu::TextureUsages::COPY_SRC,
},
lifetime: crate::kvasir::resource::ResourceLifetime::Persistent,
};
ctx.bloom_tex_b = self.registry.allocate_image(&self.device, &bloom_desc_b);
ctx.scene_bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &self.env_bind_group_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureView(&ctx.scene_texture),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&ctx.sampler),
},
],
label: Some("Scene Bind Group Resize"),
});
let scene_views: Vec<&wgpu::TextureView> =
(0..32).map(|_| &ctx.scene_texture).collect();
ctx.scene_texture_bind_group =
self.device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &self.texture_bind_group_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureViewArray(&scene_views),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&ctx.sampler),
},
],
label: Some("Scene Texture Bind Group Resize"),
});
let depth_texture = self.device.create_texture(&wgpu::TextureDescriptor {
label: Some("Surtr Depth Texture"),
size: wgpu::Extent3d {
width,
height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: self.quality_level.msaa_sample_count(),
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Depth32Float,
usage: wgpu::TextureUsages::RENDER_ATTACHMENT
| wgpu::TextureUsages::TEXTURE_BINDING,
view_formats: &[],
});
ctx.depth_texture_view =
depth_texture.create_view(&wgpu::TextureViewDescriptor::default());
}
}
pub fn reset_time(&mut self) {
self.start_time = std::time::Instant::now();
}
pub fn reclaim_vram(&mut self) {
tracing::warn!("[GPU] Sundr Compaction: Compacting Mega-Heim...");
let new_mega_heim_tex = self.device.create_texture(&wgpu::TextureDescriptor {
label: Some("Sundr Mega-Heim (Compacted)"),
size: wgpu::Extent3d {
width: 4096,
height: 4096,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8UnormSrgb,
usage: wgpu::TextureUsages::TEXTURE_BINDING
| wgpu::TextureUsages::COPY_DST
| wgpu::TextureUsages::COPY_SRC,
view_formats: &[],
});
let mut new_packer = SkylinePacker::new(4096, 4096);
let mut encoder = self
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Heim Compaction Encoder"),
});
let image_entries: Vec<(String, Rect)> = self
.image_uv_registry
.iter()
.map(|(k, v)| (k.clone(), *v))
.collect();
for (name, old_uv) in image_entries {
if let Some(&tex_idx) = self.texture_registry.get(&name)
&& tex_idx == 0
{
let w_px = (old_uv.width * 4096.0).round() as u32;
let h_px = (old_uv.height * 4096.0).round() as u32;
let old_x_px = (old_uv.x * 4096.0).round() as u32;
let old_y_px = (old_uv.y * 4096.0).round() as u32;
if let Some((new_x, new_y)) = new_packer.pack(w_px, h_px) {
encoder.copy_texture_to_texture(
wgpu::TexelCopyTextureInfo {
texture: &self.mega_heim_tex,
mip_level: 0,
origin: wgpu::Origin3d {
x: old_x_px,
y: old_y_px,
z: 0,
},
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyTextureInfo {
texture: &new_mega_heim_tex,
mip_level: 0,
origin: wgpu::Origin3d {
x: new_x,
y: new_y,
z: 0,
},
aspect: wgpu::TextureAspect::All,
},
wgpu::Extent3d {
width: w_px,
height: h_px,
depth_or_array_layers: 1,
},
);
let new_uv = Rect {
x: new_x as f32 / 4096.0,
y: new_y as f32 / 4096.0,
width: old_uv.width,
height: old_uv.height,
};
self.image_uv_registry.put(name.clone(), new_uv);
}
}
}
let text_entries: Vec<(u64, (Rect, f32, f32, f32, f32))> = self
.text
.glyph_cache
.iter()
.map(|(k, v)| (*k, *v))
.collect();
for (hash, (old_uv, w_f, h_f, x_off, y_off)) in text_entries {
let w_px = (old_uv.width * 4096.0).round() as u32;
let h_px = (old_uv.height * 4096.0).round() as u32;
let old_x_px = (old_uv.x * 4096.0).round() as u32;
let old_y_px = (old_uv.y * 4096.0).round() as u32;
if let Some((new_x, new_y)) = new_packer.pack(w_px, h_px) {
encoder.copy_texture_to_texture(
wgpu::TexelCopyTextureInfo {
texture: &self.mega_heim_tex,
mip_level: 0,
origin: wgpu::Origin3d {
x: old_x_px,
y: old_y_px,
z: 0,
},
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyTextureInfo {
texture: &new_mega_heim_tex,
mip_level: 0,
origin: wgpu::Origin3d {
x: new_x,
y: new_y,
z: 0,
},
aspect: wgpu::TextureAspect::All,
},
wgpu::Extent3d {
width: w_px,
height: h_px,
depth_or_array_layers: 1,
},
);
let new_uv = Rect {
x: new_x as f32 / 4096.0,
y: new_y as f32 / 4096.0,
width: old_uv.width,
height: old_uv.height,
};
self.text
.glyph_cache
.put(hash, (new_uv, w_f, h_f, x_off, y_off));
}
}
self.queue.submit(std::iter::once(encoder.finish()));
self.mega_heim_tex = new_mega_heim_tex;
let mega_heim_view_obj = self
.mega_heim_tex
.create_view(&wgpu::TextureViewDescriptor::default());
self.texture_views[0] = mega_heim_view_obj.clone();
self.rebuild_texture_array_bind_group();
if !self.texture_bind_groups.is_empty() {
self.texture_bind_groups[0] = self.mega_heim_bind_group.clone();
}
self.heim_packer = new_packer;
self.telemetry.vram_exhausted = false;
}
}
impl Drop for GpuRenderer {
fn drop(&mut self) {
let cache_dir = std::env::current_exe()
.ok()
.and_then(|p| p.parent().map(|d| d.join("pipeline_cache")))
.unwrap_or_else(|| std::env::temp_dir().join("cvkg_pipeline_cache"));
let _ = std::fs::create_dir_all(&cache_dir);
let cache_path = cache_dir.join("cvkg_render_gpu.bin");
if let Some(cache) = &self.pipeline_cache
&& let Some(data) = cache.get_data()
&& let Err(e) = std::fs::write(&cache_path, data)
{
tracing::warn!("Failed to persist pipeline cache: {}", e);
}
let _ = self.device.poll(wgpu::PollType::Wait {
submission_index: None,
timeout: None,
});
}
}
impl GpuRenderer {
pub(crate) fn current_width(&self) -> u32 {
if let Some(id) = self.current_window {
self.surfaces.get(&id).map(|s| s.config.width).unwrap_or(1)
} else {
self.headless_context.as_ref().map(|h| h.width).unwrap_or(1)
}
}
pub(crate) fn current_height(&self) -> u32 {
if let Some(id) = self.current_window {
self.surfaces.get(&id).map(|s| s.config.height).unwrap_or(1)
} else {
self.headless_context
.as_ref()
.map(|h| h.height)
.unwrap_or(1)
}
}
pub(crate) fn current_scale_factor(&self) -> f32 {
if let Some(id) = self.current_window {
self.surfaces
.get(&id)
.map(|s| s.scale_factor)
.unwrap_or(1.0)
} else {
self.headless_context
.as_ref()
.map(|h| h.scale_factor)
.unwrap_or(1.0)
}
}
pub(crate) fn current_time(&self) -> f32 {
self.start_time.elapsed().as_secs_f32()
}
pub async fn forge_headless(width: u32, height: u32) -> Self {
let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
backends: wgpu::Backends::all(),
flags: wgpu::InstanceFlags::default(),
backend_options: wgpu::BackendOptions::default(),
display: None,
memory_budget_thresholds: wgpu::MemoryBudgetThresholds::default(),
});
tracing::info!("[GPU] Requesting HighPerformance adapter (headless)...");
let mut adapter = instance
.request_adapter(&wgpu::RequestAdapterOptions {
power_preference: wgpu::PowerPreference::HighPerformance,
compatible_surface: None,
force_fallback_adapter: false,
})
.await
.ok();
if adapter.is_none() {
tracing::warn!(
"[GPU] HighPerformance adapter failed (possible Bumblebee/Optimus), trying LowPower..."
);
adapter = instance
.request_adapter(&wgpu::RequestAdapterOptions {
power_preference: wgpu::PowerPreference::LowPower,
compatible_surface: None,
force_fallback_adapter: false,
})
.await
.ok();
}
if adapter.is_none() {
tracing::warn!("[GPU] Hardware adapters failed, trying Software fallback...");
adapter = instance
.request_adapter(&wgpu::RequestAdapterOptions {
power_preference: wgpu::PowerPreference::LowPower,
compatible_surface: None,
force_fallback_adapter: true,
})
.await
.ok();
}
let adapter = adapter.expect("Failed to find a suitable GPU for Surtr");
let info = adapter.get_info();
let caps =
crate::subsystems::GpuCapabilities::detect(&info.name, format!("{:?}", info.backend));
tracing::info!(
"[GPU] Selected adapter: {} ({:?}) on backend: {:?} -- detected as {}",
info.name,
info.device_type,
info.backend,
caps.vendor
);
tracing::info!("[GPU] Driver info: {} - {}", info.driver, info.driver_info);
let required_features = adapter.features()
& (wgpu::Features::TIMESTAMP_QUERY
| wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING
| wgpu::Features::TEXTURE_BINDING_ARRAY);
let (device, queue) = adapter
.request_device(&wgpu::DeviceDescriptor {
label: Some("Surtr Headless Forge"),
required_features,
required_limits: wgpu::Limits {
max_bindings_per_bind_group: adapter
.limits()
.max_bindings_per_bind_group
.min(256),
max_binding_array_elements_per_shader_stage: adapter
.limits()
.max_binding_array_elements_per_shader_stage
.min(256),
..wgpu::Limits::default()
},
memory_hints: wgpu::MemoryHints::default(),
experimental_features: wgpu::ExperimentalFeatures::disabled(),
trace: wgpu::Trace::Off,
})
.await
.expect("Failed to create Surtr device");
let instance = Arc::new(instance);
let adapter = Arc::new(adapter);
device.on_uncaptured_error(Arc::new(|error| {
tracing::error!(
"[GPU] Uncaptured device error (Device Lost or Panic): {:?}",
error
);
}));
let device = Arc::new(device);
let queue = Arc::new(queue);
Self::forge_internal(
instance,
adapter,
device,
queue,
None,
Some((width, height, wgpu::TextureFormat::Rgba8UnormSrgb)),
)
.await
}
pub fn readback_headless_rgba8(&self) -> Vec<u8> {
let ctx = self
.headless_context
.as_ref()
.expect("readback_headless_rgba8 requires a headless renderer");
let width = ctx.width;
let height = ctx.height;
let row_bytes = width * 4;
let padded_row_bytes = row_bytes.div_ceil(256) * 256;
let buffer_size = (padded_row_bytes * height) as u64;
let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("headless-readback-buffer"),
size: buffer_size,
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
});
let mut encoder = self
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("headless-readback-encoder"),
});
encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
texture: &ctx.output_texture,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyBufferInfo {
buffer: &output_buffer,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(padded_row_bytes),
rows_per_image: Some(height),
},
},
wgpu::Extent3d {
width,
height,
depth_or_array_layers: 1,
},
);
self.queue.submit(std::iter::once(encoder.finish()));
let buffer_slice = output_buffer.slice(..);
buffer_slice.map_async(wgpu::MapMode::Read, |_| {});
let _ = self.device.poll(wgpu::PollType::Wait {
submission_index: None,
timeout: None,
});
let data = buffer_slice.get_mapped_range();
let mut result = Vec::with_capacity((width * height * 4) as usize);
for row in 0..height {
let start = (row * padded_row_bytes) as usize;
let end = start + row_bytes as usize;
result.extend_from_slice(&data[start..end]);
}
drop(data);
output_buffer.unmap();
output_buffer.destroy();
result
}
pub fn render_headless_frame<F>(&mut self, draw: F) -> Vec<u8>
where
F: FnOnce(&mut Self),
{
let encoder = self.begin_frame_headless();
draw(self);
self.end_frame(encoder);
self.readback_headless_rgba8()
}
pub async fn from_external(
device: Arc<wgpu::Device>,
queue: Arc<wgpu::Queue>,
surface: wgpu::Surface<'static>,
width: u32,
height: u32,
) -> Self {
let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
backends: wgpu::Backends::all(),
flags: wgpu::InstanceFlags::default(),
backend_options: wgpu::BackendOptions::default(),
display: None,
memory_budget_thresholds: wgpu::MemoryBudgetThresholds::default(),
});
let adapter = instance
.request_adapter(&wgpu::RequestAdapterOptions {
power_preference: wgpu::PowerPreference::default(),
compatible_surface: Some(&surface),
force_fallback_adapter: false,
})
.await
.expect("No compatible adapter found");
Self::forge_internal(
Arc::new(instance),
Arc::new(adapter),
device,
queue,
None,
Some((width, height, wgpu::TextureFormat::Rgba8UnormSrgb)),
)
.await
}
pub fn set_world_space_panels(&mut self, panels: Vec<(u64, cvkg_vdom::WorldSpacePanel)>) {
self.world_space_panels = panels;
}
pub fn begin_world_space_panel(
&mut self,
node_id: u64,
transform: &cvkg_core::Transform3D,
glass: Option<cvkg_materials::GlassMaterial>,
pixels_per_unit: f32,
world_size: (f32, f32),
) {
if let Some(prev) = self.current_panel_id {
self.panel_stack.push(prev);
}
self.current_panel_id = Some(node_id);
let panel = cvkg_vdom::WorldSpacePanel {
transform: *transform,
glass,
pixels_per_unit,
world_size,
spring: None,
physics: None,
};
if !self.world_space_panels.iter().any(|(id, _)| *id == node_id) {
self.world_space_panels.push((node_id, panel));
}
}
pub fn end_world_space_panel(&mut self, _node_id: u64) {
self.current_panel_id = self.panel_stack.pop();
}
}