use crate::encode::layout::LayoutParams;
use crate::error::Result;
use crate::foundation::aligned_alloc::{AlignedVec, try_alloc_zeroed};
use super::autovec::compute_fuzzy_erosion_blocks_autovec;
use super::quant_field_to_aq_strength;
use super::simd::per_block_modulations_row;
#[cfg(not(target_arch = "x86_64"))]
use super::autovec::pre_erosion_row_autovec_iter as pre_erosion_row_padded;
#[cfg(target_arch = "x86_64")]
use super::simd::mage_pre_erosion_row_padded_v4;
#[cfg(target_arch = "x86_64")]
use super::simd::mage_per_block_modulations_row;
#[cfg(target_arch = "x86_64")]
use archmage::{SimdToken, X64V3Token};
#[derive(Debug)]
pub struct StreamingAQ {
aq_enabled: bool,
width: usize,
height: usize,
padded_width: usize,
y_buffer_stride: usize,
strip_stride: usize,
blocks_w: usize,
blocks_h: usize,
pre_erosion_w: usize,
pre_erosion_h: usize,
pre_erosion_buffer: AlignedVec<f32>,
pre_erosion_buffer_rows: usize,
row_prev_prev: AlignedVec<f32>,
row_prev: AlignedVec<f32>,
row_curr: AlignedVec<f32>,
pending_pre_erosion_row: Option<usize>,
pre_erosion_accum: AlignedVec<f32>,
pre_erosion_temp: AlignedVec<f32>,
y_imcu_buffers: [AlignedVec<f32>; 2],
y_imcu_current: usize,
y_imcu_height: usize,
fuzzy_erosion_out: AlignedVec<f32>,
imcu_aq_strengths: Vec<f32>,
all_aq_strengths: Vec<f32>,
y_quant_01: f32,
rows_received: usize,
current_imcu_row: usize,
#[allow(dead_code)]
total_imcu_rows: usize,
pre_erosion_rows_flushed: usize,
pending_imcu_row: Option<usize>,
#[cfg(target_arch = "x86_64")]
archmage_token: Option<X64V3Token>,
}
impl StreamingAQ {
pub(crate) fn new(layout: &LayoutParams, y_quant_01: u16, aq_enabled: bool) -> Result<Self> {
let width = layout.width;
let height = layout.height;
if width == 0 || height == 0 {
return Ok(Self::empty(y_quant_01 as f32));
}
if !aq_enabled {
return Self::new_bypass(layout, y_quant_01);
}
let blocks_w = layout.blocks_w;
let blocks_h = layout.blocks_h;
let padded_width = layout.padded_width;
let y_buffer_stride = layout.y_buffer_stride;
let v_samp_factor = layout.v_samp;
let pre_erosion_w = (width + 3) / 4;
let pre_erosion_h = (height + 3) / 4;
let imcu_height = 8 * v_samp_factor;
let total_imcu_rows = (height + imcu_height - 1) / imcu_height;
let pre_erosion_buffer_rows = 12;
let blocks_per_imcu = blocks_w * v_samp_factor;
let total_blocks = blocks_w * blocks_h;
Ok(Self {
aq_enabled: true,
width,
height,
padded_width,
y_buffer_stride,
strip_stride: padded_width, blocks_w,
blocks_h,
pre_erosion_w,
pre_erosion_h,
pre_erosion_buffer: try_alloc_zeroed(pre_erosion_w * pre_erosion_buffer_rows)?,
pre_erosion_buffer_rows,
row_prev_prev: try_alloc_zeroed(width + 2)?,
row_prev: try_alloc_zeroed(width + 2)?,
row_curr: try_alloc_zeroed(width + 2)?,
pending_pre_erosion_row: None,
pre_erosion_accum: try_alloc_zeroed(width)?,
pre_erosion_temp: try_alloc_zeroed(width)?,
y_imcu_buffers: [
try_alloc_zeroed(y_buffer_stride * imcu_height)?,
try_alloc_zeroed(y_buffer_stride * imcu_height)?,
],
y_imcu_current: 0,
y_imcu_height: imcu_height,
fuzzy_erosion_out: try_alloc_zeroed(blocks_per_imcu)?,
imcu_aq_strengths: vec![0.0f32; blocks_per_imcu],
all_aq_strengths: Vec::with_capacity(total_blocks),
y_quant_01: y_quant_01 as f32,
rows_received: 0,
current_imcu_row: 0,
total_imcu_rows,
pre_erosion_rows_flushed: 0,
pending_imcu_row: None,
#[cfg(target_arch = "x86_64")]
archmage_token: X64V3Token::summon(),
})
}
fn new_bypass(layout: &LayoutParams, y_quant_01: u16) -> Result<Self> {
let blocks_w = layout.blocks_w;
let blocks_h = layout.blocks_h;
let v_samp_factor = layout.v_samp;
let imcu_height = 8 * v_samp_factor;
let total_imcu_rows = (layout.height + imcu_height - 1) / imcu_height;
let blocks_per_imcu = blocks_w * v_samp_factor;
let total_blocks = blocks_w * blocks_h;
Ok(Self {
aq_enabled: false,
width: layout.width,
height: layout.height,
padded_width: layout.padded_width,
y_buffer_stride: 0,
strip_stride: layout.padded_width,
blocks_w,
blocks_h,
pre_erosion_w: 0,
pre_erosion_h: 0,
pre_erosion_buffer: AlignedVec::new(0),
pre_erosion_buffer_rows: 0,
row_prev_prev: AlignedVec::new(0),
row_prev: AlignedVec::new(0),
row_curr: AlignedVec::new(0),
pending_pre_erosion_row: None,
pre_erosion_accum: AlignedVec::new(0),
pre_erosion_temp: AlignedVec::new(0),
y_imcu_buffers: [AlignedVec::new(0), AlignedVec::new(0)],
y_imcu_current: 0,
y_imcu_height: imcu_height,
fuzzy_erosion_out: AlignedVec::new(0),
imcu_aq_strengths: vec![0.0f32; blocks_per_imcu],
all_aq_strengths: Vec::with_capacity(total_blocks),
y_quant_01: y_quant_01 as f32,
rows_received: 0,
current_imcu_row: 0,
total_imcu_rows,
pre_erosion_rows_flushed: 0,
pending_imcu_row: None,
#[cfg(target_arch = "x86_64")]
archmage_token: None,
})
}
fn empty(y_quant_01: f32) -> Self {
Self {
aq_enabled: false,
width: 0,
height: 0,
padded_width: 0,
y_buffer_stride: 0,
strip_stride: 0,
blocks_w: 0,
blocks_h: 0,
pre_erosion_w: 0,
pre_erosion_h: 0,
pre_erosion_buffer: AlignedVec::new(0),
pre_erosion_buffer_rows: 0,
row_prev_prev: AlignedVec::new(0),
row_prev: AlignedVec::new(0),
row_curr: AlignedVec::new(0),
pending_pre_erosion_row: None,
pre_erosion_accum: AlignedVec::new(0),
pre_erosion_temp: AlignedVec::new(0),
y_imcu_buffers: [AlignedVec::new(0), AlignedVec::new(0)],
y_imcu_current: 0,
y_imcu_height: 0,
fuzzy_erosion_out: AlignedVec::new(0),
imcu_aq_strengths: Vec::new(),
all_aq_strengths: Vec::new(),
y_quant_01,
rows_received: 0,
current_imcu_row: 0,
total_imcu_rows: 0,
pre_erosion_rows_flushed: 0,
pending_imcu_row: None,
#[cfg(target_arch = "x86_64")]
archmage_token: None,
}
}
fn compute_imcu_block_count(&self, imcu_row: usize) -> usize {
let v_samp = self.y_imcu_height / 8;
let mut valid_rows = 0;
for by_offset in 0..v_samp {
let global_by = imcu_row * v_samp + by_offset;
if global_by >= self.blocks_h {
break;
}
valid_rows += 1;
}
valid_rows * self.blocks_w
}
fn process_strip_bypass(&mut self, strip_y: usize, strip_height: usize) -> Option<usize> {
for local_y in 0..strip_height {
let global_y = strip_y + local_y;
if global_y >= self.height {
break;
}
self.rows_received = global_y + 1;
}
let imcu_height = self.y_imcu_height;
let next_imcu_boundary = (self.current_imcu_row + 1) * imcu_height;
if self.rows_received >= next_imcu_boundary.min(self.height) {
let valid_count = if let Some(pending) = self.pending_imcu_row.take() {
let count = self.compute_imcu_block_count(pending);
self.all_aq_strengths
.extend_from_slice(&self.imcu_aq_strengths[..count]);
Some(count)
} else {
None
};
self.pending_imcu_row = Some(self.current_imcu_row);
self.current_imcu_row += 1;
return valid_count;
}
None
}
pub fn process_y_strip(
&mut self,
y_strip: &[f32],
strip_y: usize,
strip_height: usize,
) -> Option<&[f32]> {
if self.width == 0 || self.height == 0 {
return None;
}
if !self.aq_enabled {
let count = self.process_strip_bypass(strip_y, strip_height)?;
return Some(&self.imcu_aq_strengths[..count]);
}
let stride = self.strip_stride;
let padded_width = self.padded_width;
let y_buffer_stride = self.y_buffer_stride;
for local_y in 0..strip_height {
let global_y = strip_y + local_y;
if global_y >= self.height {
break;
}
let row_start = local_y * stride;
let imcu_local_y = global_y % self.y_imcu_height;
let dest_start = imcu_local_y * y_buffer_stride;
let padded_row = &y_strip[row_start..row_start + padded_width];
self.y_imcu_buffers[self.y_imcu_current][dest_start..dest_start + padded_width]
.copy_from_slice(padded_row);
let edge_val = padded_row[padded_width - 1];
self.y_imcu_buffers[self.y_imcu_current][dest_start + padded_width] = edge_val;
let row = &y_strip[row_start..row_start + self.width];
self.process_pre_erosion_row(row, global_y);
self.rows_received = global_y + 1;
}
let imcu_height = self.y_imcu_height;
let next_imcu_boundary = (self.current_imcu_row + 1) * imcu_height;
if self.rows_received >= next_imcu_boundary.min(self.height) {
let valid_rows_in_imcu = self.rows_received - self.current_imcu_row * imcu_height;
let y_buffer_stride = self.y_buffer_stride;
if valid_rows_in_imcu < imcu_height && valid_rows_in_imcu > 0 {
let last_valid_row = valid_rows_in_imcu - 1;
let src_start = last_valid_row * y_buffer_stride;
let src_end = src_start + y_buffer_stride;
for fill_row in valid_rows_in_imcu..imcu_height {
let dest_start = fill_row * y_buffer_stride;
self.y_imcu_buffers[self.y_imcu_current]
.copy_within(src_start..src_end, dest_start);
}
}
let valid_count = if let Some(pending) = self.pending_imcu_row.take() {
let prev_buffer = 1 - self.y_imcu_current;
let count = self.finalize_imcu_aq_with_buffer(pending, prev_buffer);
self.all_aq_strengths
.extend_from_slice(&self.imcu_aq_strengths[..count]);
Some(count)
} else {
None
};
self.pending_imcu_row = Some(self.current_imcu_row);
self.current_imcu_row += 1;
self.y_imcu_current = 1 - self.y_imcu_current;
if let Some(count) = valid_count {
return Some(&self.imcu_aq_strengths[..count]);
}
}
None
}
pub fn process_y_strip_into(
&mut self,
y_strip: &[f32],
strip_y: usize,
strip_height: usize,
out_buffer: &mut [f32],
) -> Option<usize> {
if self.width == 0 || self.height == 0 {
return None;
}
if !self.aq_enabled {
let count = self.process_strip_bypass(strip_y, strip_height)?;
out_buffer[..count].copy_from_slice(&self.imcu_aq_strengths[..count]);
return Some(count);
}
let stride = self.strip_stride;
let padded_width = self.padded_width;
let y_buffer_stride = self.y_buffer_stride;
for local_y in 0..strip_height {
let global_y = strip_y + local_y;
if global_y >= self.height {
break;
}
let row_start = local_y * stride;
let imcu_local_y = global_y % self.y_imcu_height;
let dest_start = imcu_local_y * y_buffer_stride;
let padded_row = &y_strip[row_start..row_start + padded_width];
self.y_imcu_buffers[self.y_imcu_current][dest_start..dest_start + padded_width]
.copy_from_slice(padded_row);
let edge_val = padded_row[padded_width - 1];
self.y_imcu_buffers[self.y_imcu_current][dest_start + padded_width] = edge_val;
let row = &y_strip[row_start..row_start + self.width];
self.process_pre_erosion_row(row, global_y);
self.rows_received = global_y + 1;
}
let imcu_height = self.y_imcu_height;
let next_imcu_boundary = (self.current_imcu_row + 1) * imcu_height;
if self.rows_received >= next_imcu_boundary.min(self.height) {
let valid_rows_in_imcu = self.rows_received - self.current_imcu_row * imcu_height;
let y_buffer_stride = self.y_buffer_stride;
if valid_rows_in_imcu < imcu_height && valid_rows_in_imcu > 0 {
let last_valid_row = valid_rows_in_imcu - 1;
let src_start = last_valid_row * y_buffer_stride;
let src_end = src_start + y_buffer_stride;
for fill_row in valid_rows_in_imcu..imcu_height {
let dest_start = fill_row * y_buffer_stride;
self.y_imcu_buffers[self.y_imcu_current]
.copy_within(src_start..src_end, dest_start);
}
}
let valid_count = if let Some(pending) = self.pending_imcu_row.take() {
let prev_buffer = 1 - self.y_imcu_current;
let count = self.finalize_imcu_aq_with_buffer(pending, prev_buffer);
out_buffer[..count].copy_from_slice(&self.imcu_aq_strengths[..count]);
self.all_aq_strengths
.extend_from_slice(&self.imcu_aq_strengths[..count]);
Some(count)
} else {
None
};
self.pending_imcu_row = Some(self.current_imcu_row);
self.current_imcu_row += 1;
self.y_imcu_current = 1 - self.y_imcu_current;
return valid_count;
}
None
}
pub fn flush(&mut self) -> Option<&[f32]> {
if let Some(pending) = self.pending_imcu_row.take() {
let count = if self.aq_enabled {
let prev_buffer = 1 - self.y_imcu_current;
self.finalize_imcu_aq_with_buffer(pending, prev_buffer)
} else {
self.compute_imcu_block_count(pending)
};
self.all_aq_strengths
.extend_from_slice(&self.imcu_aq_strengths[..count]);
return Some(&self.imcu_aq_strengths[..count]);
}
None
}
pub fn flush_into(&mut self, out_buffer: &mut [f32]) -> Option<usize> {
if let Some(pending) = self.pending_imcu_row.take() {
let count = if self.aq_enabled {
let prev_buffer = 1 - self.y_imcu_current;
self.finalize_imcu_aq_with_buffer(pending, prev_buffer)
} else {
self.compute_imcu_block_count(pending)
};
out_buffer[..count].copy_from_slice(&self.imcu_aq_strengths[..count]);
self.all_aq_strengths
.extend_from_slice(&self.imcu_aq_strengths[..count]);
if self.rows_received >= self.height && self.pending_imcu_row.is_none() {
self.dump_aq_map_if_requested();
}
return Some(count);
}
None
}
fn dump_aq_map_if_requested(&self) {
if let Ok(path) = std::env::var("DUMP_AQ_MAP")
&& let Ok(mut file) = std::fs::File::create(&path)
{
use std::io::Write;
let w = self.blocks_w as u32;
let h = self.blocks_h as u32;
let _ = file.write_all(&w.to_le_bytes());
let _ = file.write_all(&h.to_le_bytes());
for val in &self.all_aq_strengths {
let _ = file.write_all(&val.to_le_bytes());
}
eprintln!(
"AQ map dumped to {} ({}x{} blocks, {} values)",
path,
w,
h,
self.all_aq_strengths.len()
);
}
}
pub fn finalize(mut self) -> Result<Vec<f32>> {
if self.width == 0 || self.height == 0 {
return Ok(Vec::new());
}
self.flush();
self.dump_aq_map_if_requested();
Ok(self.all_aq_strengths)
}
#[inline(always)]
fn copy_row_with_edge_replication(dst: &mut [f32], src: &[f32]) {
let width = src.len();
dst[1..1 + width].copy_from_slice(src);
dst[0] = src[0];
dst[width + 1] = src[width - 1];
}
fn process_pre_erosion_row(&mut self, row: &[f32], global_y: usize) {
core::mem::swap(&mut self.row_prev_prev, &mut self.row_prev);
core::mem::swap(&mut self.row_prev, &mut self.row_curr);
Self::copy_row_with_edge_replication(&mut self.row_curr, row);
if global_y == 0 {
Self::copy_row_with_edge_replication(&mut self.row_prev, row);
Self::copy_row_with_edge_replication(&mut self.row_prev_prev, row);
} else if global_y == 1 {
self.row_prev_prev.copy_from_slice(&self.row_prev);
}
if let Some(pending_y) = self.pending_pre_erosion_row {
self.compute_and_accumulate_pre_erosion(pending_y);
}
self.pending_pre_erosion_row = Some(global_y);
if global_y + 1 == self.height {
self.compute_last_row_pre_erosion();
self.pending_pre_erosion_row = None;
let last_block_y = global_y / 4;
self.flush_pre_erosion_block(last_block_y);
}
}
fn compute_last_row_pre_erosion(&mut self) {
let row_above = &self.row_prev;
let row_curr = &self.row_curr;
let row_below = &self.row_curr;
self.pre_erosion_temp.fill(0.0);
#[cfg(target_arch = "x86_64")]
{
mage_pre_erosion_row_padded_v4(
row_curr,
row_above,
row_below,
self.width,
&mut self.pre_erosion_temp,
);
}
#[cfg(not(target_arch = "x86_64"))]
pre_erosion_row_padded(
row_curr,
row_above,
row_below,
self.width,
&mut self.pre_erosion_temp,
);
for x in 0..self.width {
self.pre_erosion_accum[x] += self.pre_erosion_temp[x];
}
}
fn compute_and_accumulate_pre_erosion(&mut self, row_y: usize) {
let row_above = &self.row_prev_prev;
let row_curr = &self.row_prev;
let row_below = &self.row_curr;
self.pre_erosion_temp.fill(0.0);
#[cfg(target_arch = "x86_64")]
{
mage_pre_erosion_row_padded_v4(
row_curr,
row_above,
row_below,
self.width,
&mut self.pre_erosion_temp,
);
}
#[cfg(not(target_arch = "x86_64"))]
pre_erosion_row_padded(
row_curr,
row_above,
row_below,
self.width,
&mut self.pre_erosion_temp,
);
for x in 0..self.width {
self.pre_erosion_accum[x] += self.pre_erosion_temp[x];
}
if (row_y + 1) % 4 == 0 && row_y + 1 < self.height {
self.flush_pre_erosion_block(row_y / 4);
}
}
fn flush_pre_erosion_block(&mut self, block_y: usize) {
if block_y >= self.pre_erosion_h {
return;
}
let buffer_row = block_y % self.pre_erosion_buffer_rows;
let out_start = buffer_row * self.pre_erosion_w;
for x_block in 0..self.pre_erosion_w {
let in_x = x_block * 4;
let mut sum = 0.0f32;
for i in 0..4 {
if in_x + i < self.width {
sum += self.pre_erosion_accum[in_x + i];
}
}
self.pre_erosion_buffer[out_start + x_block] = sum * 0.25;
}
self.pre_erosion_accum.fill(0.0);
self.pre_erosion_rows_flushed = block_y + 1;
}
fn finalize_imcu_aq_with_buffer(&mut self, imcu_row: usize, y_buffer_idx: usize) -> usize {
let v_samp = self.y_imcu_height / 8;
let blocks_w = self.blocks_w;
const K_AC_QUANT: f32 = 0.841;
const K_DAMPEN_RAMP_START: f32 = 9.0;
const K_DAMPEN_RAMP_END: f32 = 65.0;
let base_level = 0.48 * K_AC_QUANT;
let dampen = if self.y_quant_01 >= K_DAMPEN_RAMP_START {
let d = 1.0
- (self.y_quant_01 - K_DAMPEN_RAMP_START)
/ (K_DAMPEN_RAMP_END - K_DAMPEN_RAMP_START);
d.max(0.0)
} else {
1.0
};
let mul = K_AC_QUANT * dampen;
let add = (1.0 - dampen) * base_level;
let mut valid_rows = 0;
for by_offset in 0..v_samp {
let global_by = imcu_row * v_samp + by_offset;
if global_by >= self.blocks_h {
break;
}
valid_rows += 1;
let row_start = by_offset * blocks_w;
let row_end = row_start + blocks_w;
let pe_y = global_by * 2;
self.compute_fuzzy_erosion_row_into(pe_y, row_start, row_end);
#[cfg(target_arch = "x86_64")]
if let Some(token) = self.archmage_token {
mage_per_block_modulations_row(
token,
&self.y_imcu_buffers[y_buffer_idx],
self.y_buffer_stride, by_offset,
blocks_w,
&mut self.fuzzy_erosion_out[row_start..row_end],
mul,
add,
);
} else {
per_block_modulations_row(
&self.y_imcu_buffers[y_buffer_idx],
self.y_buffer_stride,
self.width,
self.height,
by_offset,
blocks_w,
&mut self.fuzzy_erosion_out[row_start..row_end],
mul,
add,
);
}
#[cfg(not(target_arch = "x86_64"))]
per_block_modulations_row(
&self.y_imcu_buffers[y_buffer_idx],
self.y_buffer_stride,
self.width,
self.height,
by_offset,
blocks_w,
&mut self.fuzzy_erosion_out[row_start..row_end],
mul,
add,
);
let qf_slice = &self.fuzzy_erosion_out[row_start..row_end];
let aq_slice = &mut self.imcu_aq_strengths[row_start..row_end];
for (qf, aq) in qf_slice.iter().zip(aq_slice.iter_mut()) {
*aq = quant_field_to_aq_strength(*qf);
}
}
valid_rows * blocks_w
}
fn compute_fuzzy_erosion_row_into(&mut self, pe_y_base: usize, start: usize, end: usize) {
let pe_w = self.pre_erosion_w;
let buffer_rows = self.pre_erosion_buffer_rows;
let max_filled_row = self.pre_erosion_rows_flushed.saturating_sub(1) as isize;
let simd_processed = compute_fuzzy_erosion_blocks_autovec(
&self.pre_erosion_buffer,
pe_w,
buffer_rows,
pe_y_base as isize,
max_filled_row,
start,
end,
&mut self.fuzzy_erosion_out,
);
let scalar_start = start + simd_processed;
if scalar_start >= end {
return;
}
const MUL0: f32 = 0.125;
const MUL1: f32 = 0.075;
const MUL2: f32 = 0.06;
const MUL3: f32 = 0.05;
for bx in scalar_start..end {
let pe_x_base = (bx - start) * 2;
let pe_y = pe_y_base as isize;
let mut sum = 0.0f32;
for dy in 0..2 {
for dx in 0..2 {
let cx = (pe_x_base + dx) as isize;
let cy = pe_y + dy as isize;
let mut vals = [0.0f32; 9];
for (i, (ny, nx)) in [
(-1, -1),
(-1, 0),
(-1, 1),
(0, -1),
(0, 0),
(0, 1),
(1, -1),
(1, 0),
(1, 1),
]
.iter()
.enumerate()
{
let px = (cx + nx).clamp(0, pe_w as isize - 1) as usize;
let py = (cy + ny).clamp(0, max_filled_row.max(0)) as usize;
let buffer_row = py % buffer_rows;
let buf_idx = buffer_row * pe_w + px;
vals[i] = if buf_idx < self.pre_erosion_buffer.len() {
self.pre_erosion_buffer[buf_idx]
} else {
0.0
};
}
for i in 0..4 {
for j in (i + 1)..9 {
if vals[j] < vals[i] {
vals.swap(i, j);
}
}
}
sum += MUL0 * vals[0] + MUL1 * vals[1] + MUL2 * vals[2] + MUL3 * vals[3];
}
}
self.fuzzy_erosion_out[bx] = sum;
}
}
pub fn is_complete(&self) -> bool {
self.rows_received >= self.height
}
pub fn rows_received(&self) -> usize {
self.rows_received
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::quant::aq::compute_aq_strength_map;
use crate::types::Subsampling;
fn test_layout(width: usize, height: usize) -> LayoutParams {
LayoutParams::new(width, height, Subsampling::S420, false)
}
#[test]
fn test_streaming_aq_creation() {
let layout = test_layout(256, 256);
let aq = StreamingAQ::new(&layout, 3, true).unwrap();
assert_eq!(aq.blocks_w, 32);
assert_eq!(aq.blocks_h, 32);
assert_eq!(aq.y_imcu_height, 16);
}
#[test]
fn test_streaming_matches_full_plane_uniform() {
let width = 64;
let height = 64;
let y_quant_01 = 2u16;
let y_plane = vec![128.0f32; width * height];
let full_result = compute_aq_strength_map(&y_plane, width, height, y_quant_01).unwrap();
let layout = test_layout(width, height);
let mut streaming = StreamingAQ::new(&layout, y_quant_01, true).unwrap();
let strip_height = 16;
for strip_y in (0..height).step_by(strip_height) {
let actual_height = strip_height.min(height - strip_y);
let strip_start = strip_y * width;
let strip_end = strip_start + actual_height * width;
streaming.process_y_strip(&y_plane[strip_start..strip_end], strip_y, actual_height);
}
let streaming_result = streaming.finalize().unwrap();
assert_eq!(full_result.strengths.len(), streaming_result.len());
let max_diff: f32 = full_result
.strengths
.iter()
.zip(streaming_result.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0, f32::max);
assert!(max_diff < 0.01, "Max diff {} exceeds threshold", max_diff);
}
#[test]
fn test_streaming_matches_full_plane_gradient() {
let width = 128;
let height = 128;
let y_quant_01 = 3u16;
let y_plane: Vec<f32> = (0..width * height)
.map(|i| {
let x = i % width;
let y = i / width;
((x + y) as f32 / 2.0).min(255.0)
})
.collect();
let full_result = compute_aq_strength_map(&y_plane, width, height, y_quant_01).unwrap();
let layout = test_layout(width, height);
let mut streaming = StreamingAQ::new(&layout, y_quant_01, true).unwrap();
let strip_height = 16;
for strip_y in (0..height).step_by(strip_height) {
let actual_height = strip_height.min(height - strip_y);
let strip_start = strip_y * width;
let strip_end = strip_start + actual_height * width;
streaming.process_y_strip(&y_plane[strip_start..strip_end], strip_y, actual_height);
}
let streaming_result = streaming.finalize().unwrap();
let max_diff: f32 = full_result
.strengths
.iter()
.zip(streaming_result.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0, f32::max);
assert!(max_diff < 0.1, "Max diff {} exceeds threshold", max_diff);
}
#[test]
fn test_streaming_incremental_api() {
let width = 64;
let height = 64;
let y_quant_01 = 3u16;
let y_plane: Vec<f32> = (0..width * height)
.map(|i| ((i % width + i / width) as f32 / 2.0).min(255.0))
.collect();
let layout = test_layout(width, height);
let mut streaming = StreamingAQ::new(&layout, y_quant_01, true).unwrap();
let mut collected = Vec::new();
let strip_height = 16;
for strip_y in (0..height).step_by(strip_height) {
let actual_height = strip_height.min(height - strip_y);
let strip_start = strip_y * width;
let strip_end = strip_start + actual_height * width;
if let Some(aq) =
streaming.process_y_strip(&y_plane[strip_start..strip_end], strip_y, actual_height)
{
collected.extend_from_slice(aq);
}
}
if let Some(aq) = streaming.flush() {
collected.extend_from_slice(aq);
}
assert_eq!(collected.len(), 64);
for &v in &collected {
assert!((0.0..1.0).contains(&v), "Invalid AQ value: {}", v);
}
}
}