use std::sync::Mutex;
use awsm_renderer_core::{
buffers::{BufferDescriptor, BufferUsage},
error::AwsmCoreError,
renderer::AwsmRendererWebGpu,
};
static BUDGET_LOG_GUARD: Mutex<bool> = Mutex::new(false);
static OVERFLOW_WARN_GUARD: Mutex<bool> = Mutex::new(false);
fn note_edge_budget_initialized(bucket_count: u32, max_edge_budget: u32) {
if let Ok(mut guard) = BUDGET_LOG_GUARD.lock() {
if !*guard {
*guard = true;
let accumulator_mb = (accumulator_bytes(max_edge_budget) as f64) / (1024.0 * 1024.0);
tracing::info!(
target: "awsm_renderer::edge_resolve",
bucket_count,
max_edge_budget,
accumulator_mb,
"MAX_EDGE_BUDGET initialized — edges beyond this count saturate the counter \
(edge_overflow_count atomicAdd) and skip edge_resolve; affected pixels render \
with the primary-sample shading. Full atomic-add overflow fallback is parked \
(see https://github.com/dakom/awsm-renderer/pull/99)."
);
}
}
}
pub fn note_edge_overflow_observed(overflow_count: u32, max_edge_budget: u32) {
if overflow_count == 0 {
return;
}
if let Ok(mut guard) = OVERFLOW_WARN_GUARD.lock() {
if !*guard {
*guard = true;
tracing::warn!(
target: "awsm_renderer::edge_resolve",
overflow_count,
max_edge_budget,
"MAX_EDGE_BUDGET exceeded — edge_overflow_count={overflow_count} edges past \
budget {max_edge_budget} were dropped this frame; those pixels rendered with \
primary-sample shading instead of full MSAA resolve. Raise the budget or \
lower edge density; the atomic-add overflow fallback \
is not yet wired in.",
);
}
}
}
pub const DEFAULT_MAX_EDGE_BUDGET_DESKTOP: u32 = 512 * 1024;
pub const DEFAULT_MAX_EDGE_BUDGET_MOBILE: u32 = 256 * 1024;
pub const INDIRECT_ARGS_STRIDE: u32 = 16;
#[inline]
pub fn pack_edge_sample_entry(edge_pixel_id: u32, sample_mask: u8) -> u32 {
(edge_pixel_id & 0x00FF_FFFF) | ((sample_mask as u32) << 24)
}
#[inline]
pub fn unpack_edge_sample_entry(packed: u32) -> (u32, u8) {
let edge_pixel_id = packed & 0x00FF_FFFF;
let sample_mask = ((packed >> 24) & 0xFF) as u8;
(edge_pixel_id, sample_mask)
}
#[inline]
pub fn pack_xy(x: u32, y: u32) -> u32 {
(x & 0xFFFF) | ((y & 0xFFFF) << 16)
}
#[inline]
pub fn unpack_xy(packed: u32) -> (u32, u32) {
let x = packed & 0xFFFF;
let y = (packed >> 16) & 0xFFFF;
(x, y)
}
pub const ACCUMULATOR_SLOTS_PER_EDGE: u32 = 4;
pub const ACCUMULATOR_SLOT_BYTES: u32 = 16;
pub const ARGS_COUNTERS_BYTES: u32 = 16;
pub fn args_buffer_bytes(_bucket_count: u32) -> u32 {
ARGS_COUNTERS_BYTES + INDIRECT_ARGS_STRIDE
}
pub fn data_header_bytes(bucket_count: u32) -> u32 {
let counters = 16u32;
let per_bucket = bucket_count.saturating_mul(4);
let skybox = 4u32;
let unpadded = counters + per_bucket + skybox;
(unpadded + 15) & !15
}
pub fn data_edge_count_offset() -> u32 {
0
}
pub fn edge_to_xy_offset(bucket_count: u32) -> u32 {
data_header_bytes(bucket_count)
}
pub fn edge_slot_map_offset(bucket_count: u32, max_edge_budget: u32) -> u32 {
edge_to_xy_offset(bucket_count) + max_edge_budget.saturating_mul(4)
}
pub fn edge_slot_map_bytes(bucket_count: u32, max_edge_budget: u32) -> u32 {
let slot_words = crate::dynamic_materials::edge_slot_words_per_edge(bucket_count);
max_edge_budget.saturating_mul(4).saturating_mul(slot_words)
}
pub fn accumulator_offset(bucket_count: u32, max_edge_budget: u32) -> u32 {
edge_slot_map_offset(bucket_count, max_edge_budget)
+ edge_slot_map_bytes(bucket_count, max_edge_budget)
}
pub fn accumulator_bytes(max_edge_budget: u32) -> u32 {
max_edge_budget
.saturating_mul(ACCUMULATOR_SLOTS_PER_EDGE)
.saturating_mul(ACCUMULATOR_SLOT_BYTES)
}
pub fn data_buffer_bytes(bucket_count: u32, max_edge_budget: u32) -> u32 {
accumulator_offset(bucket_count, max_edge_budget) + accumulator_bytes(max_edge_budget)
}
pub struct MaterialEdgeBuffers {
pub args_buffer: web_sys::GpuBuffer,
pub data_buffer: web_sys::GpuBuffer,
pub overflow_readback_buffer: web_sys::GpuBuffer,
pub bucket_count: u32,
pub max_edge_budget: u32,
pub args_size_bytes: u32,
pub data_size_bytes: u32,
args_scratch: Vec<u8>,
data_header_scratch: Vec<u8>,
}
pub const EDGE_OVERFLOW_READBACK_BYTES: u32 = 8;
impl MaterialEdgeBuffers {
pub fn new(gpu: &AwsmRendererWebGpu, bucket_count: u32) -> Result<Self, AwsmCoreError> {
Self::new_with_budget(gpu, bucket_count, DEFAULT_MAX_EDGE_BUDGET_DESKTOP)
}
pub fn new_with_budget(
gpu: &AwsmRendererWebGpu,
bucket_count: u32,
max_edge_budget: u32,
) -> Result<Self, AwsmCoreError> {
let bucket_count = bucket_count.max(1);
let max_edge_budget = max_edge_budget.max(1);
let args_size_bytes = args_buffer_bytes(bucket_count);
let data_size_bytes = data_buffer_bytes(bucket_count, max_edge_budget);
tracing::info!(
target: "awsm_renderer::edge_buffers",
"MaterialEdgeBuffers alloc: bucket_count={}, max_edge_budget={}, \
data_buffer={:.1} MB, args_buffer={} B",
bucket_count,
max_edge_budget,
data_size_bytes as f64 / (1024.0 * 1024.0),
args_size_bytes,
);
let args_buffer = gpu.create_buffer(
&BufferDescriptor::new(
Some("MaterialEdgeBuffers::args"),
args_size_bytes as usize,
BufferUsage::new()
.with_storage()
.with_indirect()
.with_copy_dst(),
)
.into(),
)?;
let data_buffer = gpu.create_buffer(
&BufferDescriptor::new(
Some("MaterialEdgeBuffers::data"),
data_size_bytes as usize,
BufferUsage::new()
.with_storage()
.with_copy_dst()
.with_copy_src(),
)
.into(),
)?;
let overflow_readback_buffer = gpu.create_buffer(
&BufferDescriptor::new(
Some("MaterialEdgeBuffers::overflow_readback"),
EDGE_OVERFLOW_READBACK_BYTES as usize,
BufferUsage::new().with_map_read().with_copy_dst(),
)
.into(),
)?;
let mut args_scratch = vec![0u8; args_size_bytes as usize];
write_args_header(&mut args_scratch, bucket_count);
let data_header_scratch = vec![0u8; data_header_bytes(bucket_count) as usize];
note_edge_budget_initialized(bucket_count, max_edge_budget);
Ok(Self {
args_buffer,
data_buffer,
overflow_readback_buffer,
bucket_count,
max_edge_budget,
args_size_bytes,
data_size_bytes,
args_scratch,
data_header_scratch,
})
}
pub fn ensure_bucket_count(
&mut self,
gpu: &AwsmRendererWebGpu,
needed_bucket_count: u32,
) -> Result<bool, AwsmCoreError> {
if needed_bucket_count <= self.bucket_count {
return Ok(false);
}
*self = Self::new_with_budget(gpu, needed_bucket_count, self.max_edge_budget)?;
Ok(true)
}
pub fn set_max_edge_budget(
&mut self,
gpu: &AwsmRendererWebGpu,
new_budget: u32,
) -> Result<bool, AwsmCoreError> {
let new_budget = new_budget.max(1);
if new_budget == self.max_edge_budget {
return Ok(false);
}
*self = Self::new_with_budget(gpu, self.bucket_count, new_budget)?;
Ok(true)
}
pub fn reset_header(&self, gpu: &AwsmRendererWebGpu) -> Result<(), AwsmCoreError> {
gpu.write_buffer(
&self.args_buffer,
None,
self.args_scratch.as_slice(),
None,
None,
)?;
gpu.write_buffer(
&self.data_buffer,
None,
self.data_header_scratch.as_slice(),
None,
None,
)
}
pub fn final_blend_args_offset() -> u32 {
ARGS_COUNTERS_BYTES
}
}
pub fn build_edge_layout_uniform_bytes(bucket_count: u32, max_edge_budget: u32) -> Vec<u8> {
let to_stride = |byte_off: u32| -> u32 { byte_off / 4 };
let mut words: Vec<u32> = Vec::with_capacity(8);
words.push(max_edge_budget);
words.push(to_stride(data_edge_count_offset())); words.push(to_stride(edge_to_xy_offset(bucket_count)));
words.push(to_stride(edge_slot_map_offset(
bucket_count,
max_edge_budget,
)));
words.push(to_stride(accumulator_offset(bucket_count, max_edge_budget)));
while (words.len() * 4) % 16 != 0 {
words.push(0);
}
let mut bytes = Vec::with_capacity(words.len() * 4);
for w in words {
bytes.extend_from_slice(&w.to_ne_bytes());
}
bytes
}
pub fn build_edge_layout_uniform(
gpu: &AwsmRendererWebGpu,
bucket_count: u32,
max_edge_budget: u32,
) -> Result<(web_sys::GpuBuffer, u32), AwsmCoreError> {
let bytes = build_edge_layout_uniform_bytes(bucket_count, max_edge_budget);
let buffer = gpu.create_buffer(
&BufferDescriptor::new(
Some("EdgeBufferLayout uniform"),
bytes.len(),
BufferUsage::new().with_uniform().with_copy_dst(),
)
.into(),
)?;
gpu.write_buffer(&buffer, None, bytes.as_slice(), None, None)?;
Ok((buffer, bytes.len() as u32))
}
pub fn write_args_header(dst: &mut [u8], _bucket_count: u32) {
let one = 1u32.to_ne_bytes();
let final_blend_base = ARGS_COUNTERS_BYTES as usize;
dst[final_blend_base..final_blend_base + 4].copy_from_slice(&[0; 4]); dst[final_blend_base + 4..final_blend_base + 8].copy_from_slice(&one); dst[final_blend_base + 8..final_blend_base + 12].copy_from_slice(&one); dst[final_blend_base + 12..final_blend_base + 16].copy_from_slice(&[0; 4]); }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pack_round_trip_xy() {
for (x, y) in [(0u32, 0u32), (123, 456), (65535, 65535), (1, 2)] {
let packed = pack_xy(x, y);
let (rx, ry) = unpack_xy(packed);
assert_eq!((rx, ry), (x, y));
}
}
#[test]
fn pack_round_trip_entry() {
for (id, mask) in [(0u32, 0u8), (12345, 0b1010), (0x00FF_FFFF, 0xFF)] {
let packed = pack_edge_sample_entry(id, mask);
let (rid, rmask) = unpack_edge_sample_entry(packed);
assert_eq!((rid, rmask), (id, mask));
}
}
#[test]
fn args_size_is_aligned() {
for bucket_count in [1u32, 4, 5, 17] {
assert_eq!(args_buffer_bytes(bucket_count) % 16, 0);
}
}
#[test]
fn data_buffer_is_o_edge_budget_not_o_buckets_times_budget() {
const WEBGPU_MIN_BINDING: u32 = 128 * 1024 * 1024; for &budget in &[
DEFAULT_MAX_EDGE_BUDGET_MOBILE,
DEFAULT_MAX_EDGE_BUDGET_DESKTOP,
] {
for &bucket_count in &[16u32, 254, 1024] {
let bytes = data_buffer_bytes(bucket_count, budget);
assert!(
bytes <= WEBGPU_MIN_BINDING,
"data_buffer {bytes} B at {bucket_count} buckets / {budget} budget exceeds the 128 MiB floor"
);
}
let at_16 = data_buffer_bytes(16, budget) as u64;
let at_1024 = data_buffer_bytes(1024, budget) as u64;
assert!(
at_1024 <= at_16 * 2,
"data_buffer grew {at_16}→{at_1024} (>2×) from 16→1024 buckets — sample memory is not O(edge_budget)"
);
}
}
}