use std::sync::Mutex;
use awsm_renderer_core::{
buffers::{BufferDescriptor, BufferUsage},
error::AwsmCoreError,
renderer::AwsmRendererWebGpu,
};
static BUDGET_LOG_GUARD: Mutex<bool> = Mutex::new(false);
static OVERFLOW_WARN_GUARD: Mutex<bool> = Mutex::new(false);
fn note_edge_budget_initialized(bucket_count: u32, max_edge_budget: u32) {
if let Ok(mut guard) = BUDGET_LOG_GUARD.lock() {
if !*guard {
*guard = true;
let accumulator_mb = (accumulator_bytes(max_edge_budget) as f64) / (1024.0 * 1024.0);
tracing::info!(
target: "awsm_renderer::edge_resolve",
bucket_count,
max_edge_budget,
accumulator_mb,
"MAX_EDGE_BUDGET initialized — edges beyond this count saturate the counter \
(edge_overflow_count atomicAdd) and skip edge_resolve; affected pixels render \
with the primary-sample shading. Full atomic-add overflow fallback is parked \
(see https://github.com/dakom/awsm-renderer/pull/99)."
);
}
}
}
pub fn note_edge_overflow_observed(overflow_count: u32, max_edge_budget: u32) {
if overflow_count == 0 {
return;
}
if let Ok(mut guard) = OVERFLOW_WARN_GUARD.lock() {
if !*guard {
*guard = true;
tracing::warn!(
target: "awsm_renderer::edge_resolve",
overflow_count,
max_edge_budget,
"MAX_EDGE_BUDGET exceeded — edge_overflow_count={overflow_count} edges past \
budget {max_edge_budget} were dropped this frame; those pixels rendered with \
primary-sample shading instead of full MSAA resolve. Raise the budget or \
lower edge density; the atomic-add overflow fallback \
is not yet wired in.",
);
}
}
}
pub const SAMPLE_ENTRIES_PER_BUCKET_MULTIPLIER: u32 = 2;
pub const DEFAULT_MAX_EDGE_BUDGET_DESKTOP: u32 = 512 * 1024;
pub const DEFAULT_MAX_EDGE_BUDGET_MOBILE: u32 = 256 * 1024;
pub const INDIRECT_ARGS_STRIDE: u32 = 16;
#[inline]
pub fn pack_edge_sample_entry(edge_pixel_id: u32, sample_mask: u8) -> u32 {
(edge_pixel_id & 0x00FF_FFFF) | ((sample_mask as u32) << 24)
}
#[inline]
pub fn unpack_edge_sample_entry(packed: u32) -> (u32, u8) {
let edge_pixel_id = packed & 0x00FF_FFFF;
let sample_mask = ((packed >> 24) & 0xFF) as u8;
(edge_pixel_id, sample_mask)
}
#[inline]
pub fn pack_xy(x: u32, y: u32) -> u32 {
(x & 0xFFFF) | ((y & 0xFFFF) << 16)
}
#[inline]
pub fn unpack_xy(packed: u32) -> (u32, u32) {
let x = packed & 0xFFFF;
let y = (packed >> 16) & 0xFFFF;
(x, y)
}
pub const ACCUMULATOR_SLOTS_PER_EDGE: u32 = 4;
pub const ACCUMULATOR_SLOT_BYTES: u32 = 16;
pub const ARGS_COUNTERS_BYTES: u32 = 16;
pub fn args_buffer_bytes(bucket_count: u32) -> u32 {
ARGS_COUNTERS_BYTES + (2u32.saturating_add(bucket_count)).saturating_mul(INDIRECT_ARGS_STRIDE)
}
pub fn data_header_bytes(bucket_count: u32) -> u32 {
let counters = 16u32;
let per_bucket = bucket_count.saturating_mul(4);
let skybox = 4u32;
let unpadded = counters + per_bucket + skybox;
(unpadded + 15) & !15
}
pub fn data_edge_count_offset() -> u32 {
0
}
pub fn data_per_shader_count_offset(bucket_index: u32) -> u32 {
16 + bucket_index.saturating_mul(4)
}
pub fn data_skybox_count_offset(bucket_count: u32) -> u32 {
16 + bucket_count.saturating_mul(4)
}
pub fn edge_to_xy_offset(bucket_count: u32) -> u32 {
data_header_bytes(bucket_count)
}
pub fn edge_slot_map_offset(bucket_count: u32, max_edge_budget: u32) -> u32 {
edge_to_xy_offset(bucket_count) + max_edge_budget.saturating_mul(4)
}
pub fn accumulator_offset(bucket_count: u32, max_edge_budget: u32) -> u32 {
edge_slot_map_offset(bucket_count, max_edge_budget) + max_edge_budget.saturating_mul(4)
}
pub fn accumulator_bytes(max_edge_budget: u32) -> u32 {
max_edge_budget
.saturating_mul(ACCUMULATOR_SLOTS_PER_EDGE)
.saturating_mul(ACCUMULATOR_SLOT_BYTES)
}
pub fn sample_entries_offset(bucket_count: u32, max_edge_budget: u32) -> u32 {
accumulator_offset(bucket_count, max_edge_budget) + accumulator_bytes(max_edge_budget)
}
pub fn sample_entries_per_bucket(max_edge_budget: u32) -> u32 {
max_edge_budget.saturating_mul(SAMPLE_ENTRIES_PER_BUCKET_MULTIPLIER)
}
pub fn skybox_sample_list_offset(bucket_count: u32, max_edge_budget: u32) -> u32 {
let per_bucket_bytes = sample_entries_per_bucket(max_edge_budget).saturating_mul(4);
sample_entries_offset(bucket_count, max_edge_budget)
+ bucket_count.saturating_mul(per_bucket_bytes)
}
pub fn data_buffer_bytes(bucket_count: u32, max_edge_budget: u32) -> u32 {
let per_bucket_bytes = sample_entries_per_bucket(max_edge_budget).saturating_mul(4);
skybox_sample_list_offset(bucket_count, max_edge_budget) + per_bucket_bytes
}
pub struct MaterialEdgeBuffers {
pub args_buffer: web_sys::GpuBuffer,
pub data_buffer: web_sys::GpuBuffer,
pub overflow_readback_buffer: web_sys::GpuBuffer,
pub bucket_count: u32,
pub max_edge_budget: u32,
pub args_size_bytes: u32,
pub data_size_bytes: u32,
args_scratch: Vec<u8>,
data_header_scratch: Vec<u8>,
}
pub const EDGE_OVERFLOW_READBACK_BYTES: u32 = 8;
impl MaterialEdgeBuffers {
pub fn new(gpu: &AwsmRendererWebGpu, bucket_count: u32) -> Result<Self, AwsmCoreError> {
Self::new_with_budget(gpu, bucket_count, DEFAULT_MAX_EDGE_BUDGET_DESKTOP)
}
pub fn new_with_budget(
gpu: &AwsmRendererWebGpu,
bucket_count: u32,
max_edge_budget: u32,
) -> Result<Self, AwsmCoreError> {
let bucket_count = bucket_count.max(1);
let max_edge_budget = max_edge_budget.max(1);
let args_size_bytes = args_buffer_bytes(bucket_count);
let data_size_bytes = data_buffer_bytes(bucket_count, max_edge_budget);
let args_buffer = gpu.create_buffer(
&BufferDescriptor::new(
Some("MaterialEdgeBuffers::args"),
args_size_bytes as usize,
BufferUsage::new()
.with_storage()
.with_indirect()
.with_copy_dst(),
)
.into(),
)?;
let data_buffer = gpu.create_buffer(
&BufferDescriptor::new(
Some("MaterialEdgeBuffers::data"),
data_size_bytes as usize,
BufferUsage::new()
.with_storage()
.with_copy_dst()
.with_copy_src(),
)
.into(),
)?;
let overflow_readback_buffer = gpu.create_buffer(
&BufferDescriptor::new(
Some("MaterialEdgeBuffers::overflow_readback"),
EDGE_OVERFLOW_READBACK_BYTES as usize,
BufferUsage::new().with_map_read().with_copy_dst(),
)
.into(),
)?;
let mut args_scratch = vec![0u8; args_size_bytes as usize];
write_args_header(&mut args_scratch, bucket_count);
let data_header_scratch = vec![0u8; data_header_bytes(bucket_count) as usize];
note_edge_budget_initialized(bucket_count, max_edge_budget);
Ok(Self {
args_buffer,
data_buffer,
overflow_readback_buffer,
bucket_count,
max_edge_budget,
args_size_bytes,
data_size_bytes,
args_scratch,
data_header_scratch,
})
}
pub fn ensure_bucket_count(
&mut self,
gpu: &AwsmRendererWebGpu,
needed_bucket_count: u32,
) -> Result<bool, AwsmCoreError> {
if needed_bucket_count <= self.bucket_count {
return Ok(false);
}
*self = Self::new_with_budget(gpu, needed_bucket_count, self.max_edge_budget)?;
Ok(true)
}
pub fn set_max_edge_budget(
&mut self,
gpu: &AwsmRendererWebGpu,
new_budget: u32,
) -> Result<bool, AwsmCoreError> {
let new_budget = new_budget.max(1);
if new_budget == self.max_edge_budget {
return Ok(false);
}
*self = Self::new_with_budget(gpu, self.bucket_count, new_budget)?;
Ok(true)
}
pub fn reset_header(&self, gpu: &AwsmRendererWebGpu) -> Result<(), AwsmCoreError> {
gpu.write_buffer(
&self.args_buffer,
None,
self.args_scratch.as_slice(),
None,
None,
)?;
gpu.write_buffer(
&self.data_buffer,
None,
self.data_header_scratch.as_slice(),
None,
None,
)
}
pub fn final_blend_args_offset() -> u32 {
ARGS_COUNTERS_BYTES
}
pub fn skybox_edge_args_offset() -> u32 {
ARGS_COUNTERS_BYTES + INDIRECT_ARGS_STRIDE
}
pub fn per_shader_args_offset(bucket_index: u32) -> u32 {
ARGS_COUNTERS_BYTES + 2 * INDIRECT_ARGS_STRIDE + bucket_index * INDIRECT_ARGS_STRIDE
}
}
pub fn build_edge_layout_uniform_bytes(bucket_count: u32, max_edge_budget: u32) -> Vec<u8> {
let to_stride = |byte_off: u32| -> u32 { byte_off / 4 };
let mut words: Vec<u32> = Vec::with_capacity(8 + bucket_count as usize);
words.push(max_edge_budget);
words.push(to_stride(data_edge_count_offset())); words.push(to_stride(data_per_shader_count_offset(0))); words.push(to_stride(data_skybox_count_offset(bucket_count))); words.push(to_stride(edge_to_xy_offset(bucket_count)));
words.push(to_stride(edge_slot_map_offset(
bucket_count,
max_edge_budget,
)));
words.push(to_stride(accumulator_offset(bucket_count, max_edge_budget)));
let per_bucket = sample_entries_per_bucket(max_edge_budget);
let base = sample_entries_offset(bucket_count, max_edge_budget);
for i in 0..bucket_count {
words.push(to_stride(base + i * per_bucket * 4)); }
words.push(to_stride(skybox_sample_list_offset(
bucket_count,
max_edge_budget,
)));
words.push(per_bucket);
while (words.len() * 4) % 16 != 0 {
words.push(0);
}
let mut bytes = Vec::with_capacity(words.len() * 4);
for w in words {
bytes.extend_from_slice(&w.to_ne_bytes());
}
bytes
}
pub fn build_edge_layout_uniform(
gpu: &AwsmRendererWebGpu,
bucket_count: u32,
max_edge_budget: u32,
) -> Result<(web_sys::GpuBuffer, u32), AwsmCoreError> {
let bytes = build_edge_layout_uniform_bytes(bucket_count, max_edge_budget);
let buffer = gpu.create_buffer(
&BufferDescriptor::new(
Some("EdgeBufferLayout uniform"),
bytes.len(),
BufferUsage::new().with_uniform().with_copy_dst(),
)
.into(),
)?;
gpu.write_buffer(&buffer, None, bytes.as_slice(), None, None)?;
Ok((buffer, bytes.len() as u32))
}
pub fn write_args_header(dst: &mut [u8], bucket_count: u32) {
let one = 1u32.to_ne_bytes();
let final_blend_base = ARGS_COUNTERS_BYTES as usize;
dst[final_blend_base..final_blend_base + 4].copy_from_slice(&[0; 4]); dst[final_blend_base + 4..final_blend_base + 8].copy_from_slice(&one); dst[final_blend_base + 8..final_blend_base + 12].copy_from_slice(&one); dst[final_blend_base + 12..final_blend_base + 16].copy_from_slice(&[0; 4]);
let skybox_base = (ARGS_COUNTERS_BYTES + INDIRECT_ARGS_STRIDE) as usize;
dst[skybox_base..skybox_base + 4].copy_from_slice(&[0; 4]); dst[skybox_base + 4..skybox_base + 8].copy_from_slice(&one); dst[skybox_base + 8..skybox_base + 12].copy_from_slice(&one); dst[skybox_base + 12..skybox_base + 16].copy_from_slice(&[0; 4]);
let per_shader_base = (ARGS_COUNTERS_BYTES + 2 * INDIRECT_ARGS_STRIDE) as usize;
for bucket in 0..bucket_count as usize {
let base = per_shader_base + bucket * INDIRECT_ARGS_STRIDE as usize;
dst[base..base + 4].copy_from_slice(&[0; 4]); dst[base + 4..base + 8].copy_from_slice(&one); dst[base + 8..base + 12].copy_from_slice(&one); dst[base + 12..base + 16].copy_from_slice(&[0; 4]); }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pack_round_trip_xy() {
for (x, y) in [(0u32, 0u32), (123, 456), (65535, 65535), (1, 2)] {
let packed = pack_xy(x, y);
let (rx, ry) = unpack_xy(packed);
assert_eq!((rx, ry), (x, y));
}
}
#[test]
fn pack_round_trip_entry() {
for (id, mask) in [(0u32, 0u8), (12345, 0b1010), (0x00FF_FFFF, 0xFF)] {
let packed = pack_edge_sample_entry(id, mask);
let (rid, rmask) = unpack_edge_sample_entry(packed);
assert_eq!((rid, rmask), (id, mask));
}
}
#[test]
fn args_size_is_aligned() {
for bucket_count in [1u32, 4, 5, 17] {
assert_eq!(args_buffer_bytes(bucket_count) % 16, 0);
}
}
#[test]
fn data_buffer_layout_is_monotonic_and_starts_after_header() {
for bucket_count in [1u32, 4, 17] {
for max_edge_budget in [1024u32, 65536] {
let header = data_header_bytes(bucket_count);
let xy = edge_to_xy_offset(bucket_count);
let slot_map = edge_slot_map_offset(bucket_count, max_edge_budget);
let accum = accumulator_offset(bucket_count, max_edge_budget);
let entries = sample_entries_offset(bucket_count, max_edge_budget);
assert_eq!(xy, header,
"edge_to_xy must start right after the counter-mirror header (bucket_count={bucket_count})");
assert!(xy < slot_map);
assert!(slot_map < accum);
assert!(accum < entries);
}
}
}
}