#![allow(dead_code)]
use std::f32::consts::PI;
use crate::foundation::aligned_alloc::{AlignedVec, AllocError, try_alloc_zeroed};
pub mod simd;
pub mod autovec;
pub mod streaming;
use simd::{compute_pre_erosion_simd, fuzzy_erosion_simd, per_block_modulations_simd};
const MATCH_GAMMA_OFFSET: f32 = 0.019;
#[allow(dead_code)] const LIMIT: f32 = 0.2;
#[allow(dead_code)] const K_AC_QUANT: f32 = 0.841;
const K_INPUT_SCALING: f32 = 1.0 / 255.0;
const K_GAMMA_MOD_BIAS: f32 = 0.16 * K_INPUT_SCALING;
const K_GAMMA_MOD_SCALE: f32 = 1.0 / 64.0;
const K_INV_LOG2E: f32 = 0.6931471805599453;
const K_GAMMA_MOD_GAMMA: f32 = -0.15526878023684174 * K_INV_LOG2E;
const K_HF_MOD_COEFF: f32 = -2.0052193233688884 / 112.0;
const K_MASK_BASE: f32 = -0.74174993;
#[allow(dead_code)] const K_MUL4: f32 = 0.03879999369382858;
#[allow(dead_code)] const K_MUL2: f32 = 0.17580001056194305;
const K_MASK_MUL4: f32 = 3.2353257320940401;
const K_MASK_MUL2: f32 = 12.906028311180409;
const K_MASK_OFFSET2: f32 = 305.04035728311436;
const K_MASK_MUL3: f32 = 5.0220313103171232;
#[allow(dead_code)] const K_MUL3: f32 = 0.30230000615119934;
const K_MASK_OFFSET3: f32 = 2.1925739705298404;
const K_MASK_OFFSET4: f32 = 0.25 * K_MASK_OFFSET3;
const K_MASK_MUL0: f32 = 0.74760422233706747;
const K_EPSILON_RATIO: f32 = 1e-2;
const K_NUM_OFFSET_RATIO: f32 = K_EPSILON_RATIO / K_INPUT_SCALING / K_INPUT_SCALING;
const K_SG_MUL: f32 = 226.0480446705883;
const K_SG_MUL2: f32 = 1.0 / 73.377132366608819;
const K_SG_RET_MUL: f32 = K_SG_MUL2 * 18.6580932135 * K_INV_LOG2E;
const K_NUM_MUL_RATIO: f32 = K_SG_RET_MUL * 3.0 * K_SG_MUL;
const K_SG_VOFFSET: f32 = 7.14672470003;
const K_VOFFSET_RATIO: f32 = (K_SG_VOFFSET * K_INV_LOG2E + K_EPSILON_RATIO) / K_INPUT_SCALING;
const K_DEN_MUL_RATIO: f32 = K_INV_LOG2E * K_SG_MUL * K_INPUT_SCALING * K_INPUT_SCALING;
#[derive(Debug, Clone)]
pub struct AQStrengthMap {
pub width_blocks: usize,
pub height_blocks: usize,
pub strengths: Vec<f32>,
}
impl AQStrengthMap {
#[must_use]
pub fn uniform(width_blocks: usize, height_blocks: usize, strength: f32) -> Self {
Self {
width_blocks,
height_blocks,
strengths: vec![strength; width_blocks * height_blocks],
}
}
#[must_use]
pub fn with_cpp_mean(width_blocks: usize, height_blocks: usize) -> Self {
Self::uniform(width_blocks, height_blocks, 0.08)
}
#[inline]
#[must_use]
pub fn get(&self, bx: usize, by: usize) -> f32 {
let idx = by * self.width_blocks + bx;
self.strengths.get(idx).copied().unwrap_or(0.08)
}
#[must_use]
pub fn mean(&self) -> f32 {
if self.strengths.is_empty() {
return 0.0;
}
self.strengths.iter().sum::<f32>() / self.strengths.len() as f32
}
#[must_use]
pub fn std(&self) -> f32 {
if self.strengths.is_empty() {
return 0.0;
}
let mean = self.mean();
let variance = self
.strengths
.iter()
.map(|&x| {
let d = x - mean;
d * d
})
.sum::<f32>()
/ self.strengths.len() as f32;
variance.sqrt()
}
#[must_use]
pub fn stats(&self) -> (f32, f32, f32, f32) {
if self.strengths.is_empty() {
return (0.0, 0.0, 0.0, 0.0);
}
let mut min = f32::INFINITY;
let mut max = f32::NEG_INFINITY;
let mut sum = 0.0f32;
for &x in &self.strengths {
if x < min {
min = x;
}
if x > max {
max = x;
}
sum += x;
}
let n = self.strengths.len() as f32;
let mean = sum / n;
let mut var_sum = 0.0f32;
for &x in &self.strengths {
let d = x - mean;
var_sum += d * d;
}
let std = (var_sum / n).sqrt();
(min, max, mean, std)
}
pub fn scale(&mut self, factor: f32) {
for s in &mut self.strengths {
*s *= factor;
}
}
pub fn scale_to_mean(&mut self, target_mean: f32) {
let current_mean = self.mean();
if current_mean > 0.0 {
self.scale(target_mean / current_mean);
}
}
#[must_use]
pub fn scale_for_size_reduction(&self, size_reduction_pct: f32) -> f32 {
let current_mean = self.mean();
if current_mean <= 0.0 {
return 1.0;
}
let mean_decrease_needed = size_reduction_pct / 200.0;
let target_mean = (current_mean - mean_decrease_needed).max(0.01);
target_mean / current_mean
}
}
pub fn compute_aq_strength_map(
y_plane: &[f32],
width: usize,
height: usize,
y_quant_01: u16,
) -> Result<AQStrengthMap, AllocError> {
compute_aq_strength_map_impl(y_plane, width, height, y_quant_01 as f32)
}
#[inline]
#[must_use]
pub fn quant_field_to_aq_strength(quant_field: f32) -> f32 {
(0.6 / quant_field - 1.0).max(0.0)
}
pub fn quant_field_to_aq_strength_simd(quant_field: &[f32]) -> Result<AlignedVec<f32>, AllocError> {
let mut result = try_alloc_zeroed(quant_field.len())?;
for i in 0..quant_field.len() {
result[i] = quant_field_to_aq_strength(quant_field[i]);
}
Ok(result)
}
pub fn compute_aq_strength_map_impl(
y_plane: &[f32],
width: usize,
height: usize,
y_quant_01: f32,
) -> Result<AQStrengthMap, AllocError> {
let width_blocks = (width + 7) / 8;
let height_blocks = (height + 7) / 8;
let num_blocks = width_blocks * height_blocks;
if width == 0 || height == 0 {
return Ok(AQStrengthMap::uniform(0, 0, 0.08));
}
let pre_erosion = compute_pre_erosion_simd(y_plane, width, height)?;
let pre_erosion_w = (width + 3) / 4;
let pre_erosion_h = (height + 3) / 4;
let mut quant_field = try_alloc_zeroed(num_blocks)?;
fuzzy_erosion_simd(
&pre_erosion,
pre_erosion_w,
pre_erosion_h,
width_blocks,
height_blocks,
&mut quant_field,
)?;
per_block_modulations_simd(
y_quant_01,
y_plane, width,
height,
width_blocks,
height_blocks,
&mut quant_field,
);
let strengths = quant_field_to_aq_strength_simd(&quant_field)?;
Ok(AQStrengthMap {
width_blocks,
height_blocks,
strengths: strengths.to_vec(),
})
}
#[allow(dead_code)]
fn gaussian_kernel(sigma: f32, radius: usize) -> Vec<f32> {
let mut kernel = vec![0.0; 2 * radius + 1];
let sigma_sq = sigma * sigma;
let norm_factor = 1.0 / (2.0 * PI * sigma_sq).sqrt();
let mut sum = 0.0;
for i in 0..=radius {
let dist_sq = (i * i) as f32;
let val = norm_factor * (-dist_sq / (2.0 * sigma_sq)).exp();
kernel[radius + i] = val;
kernel[radius - i] = val;
sum += if i == 0 { val } else { 2.0 * val };
}
if sum > 1e-6 {
for val in &mut kernel {
*val /= sum;
}
}
kernel
}
#[allow(dead_code)]
fn ratio_of_derivatives(val: f32, invert: bool) -> f32 {
let v = val.max(0.0);
let v2 = v * v;
let num = v2.mul_add(K_NUM_MUL_RATIO, K_NUM_OFFSET_RATIO);
let den = (v * K_DEN_MUL_RATIO).mul_add(v2, K_VOFFSET_RATIO);
let safe_den = if den == 0.0 { 1e-9 } else { den };
if invert {
num / safe_den
} else {
safe_den / num
}
}
#[inline]
#[allow(dead_code)]
fn update_min4(val: f32, mins: &mut [f32; 4]) {
if val < mins[3] {
if val < mins[2] {
mins[3] = mins[2];
if val < mins[1] {
mins[2] = mins[1];
if val < mins[0] {
mins[1] = mins[0];
mins[0] = val;
} else {
mins[1] = val;
}
} else {
mins[2] = val;
}
} else {
mins[3] = val;
}
}
}
#[inline]
fn masking_sqrt(v: f32) -> f32 {
const K_LOG_OFFSET: f32 = 28.0;
const K_MUL: f32 = 211.50759899638012;
0.25 * v.mul_add((K_MUL * 1e8_f32).sqrt(), K_LOG_OFFSET).sqrt()
}
#[allow(dead_code)]
#[inline]
fn fast_pow2f(x: f32) -> f32 {
const LN2: f32 = 0.6931471805599453;
let x = x.clamp(-126.0, 126.0);
let xi = x.floor();
let xf = x - xi;
let p = 1.0
+ xf * (LN2
+ xf * (0.240226507 + xf * (0.0558325133 + xf * (0.00898934009 + xf * 0.00187757667))));
let bits = p.to_bits();
let exp_offset = ((xi as i32) << 23) as u32;
f32::from_bits(bits.wrapping_add(exp_offset))
}
#[allow(dead_code)]
fn compute_pre_erosion_scalar(input: &[f32], width: usize, height: usize) -> Vec<f32> {
let pre_erosion_w = (width + 3) / 4;
let pre_erosion_h = (height + 3) / 4;
let mut pre_erosion = vec![0.0f32; pre_erosion_w * pre_erosion_h];
const LIMIT: f32 = 0.2;
let gamma_offset = MATCH_GAMMA_OFFSET / K_INPUT_SCALING;
let get = |x: isize, y: isize| -> f32 {
let x = x.clamp(0, width as isize - 1) as usize;
let y = y.clamp(0, height as isize - 1) as usize;
input[y * width + x]
};
let mut diff_buffer = vec![0.0f32; width];
for y_block in 0..pre_erosion_h {
diff_buffer.fill(0.0);
for iy in 0..4 {
let y = y_block * 4 + iy;
if y >= height {
continue;
}
for x in 0..width {
let ix = x as isize;
let iy_s = y as isize;
let pixel = get(ix, iy_s);
let base = 0.25
* (get(ix - 1, iy_s)
+ get(ix + 1, iy_s)
+ get(ix, iy_s - 1)
+ get(ix, iy_s + 1));
let gammacv = ratio_of_derivatives(pixel + gamma_offset, false);
let diff = gammacv * (pixel - base);
let diff_sq = (diff * diff).min(LIMIT);
let masked = masking_sqrt(diff_sq);
diff_buffer[x] += masked;
}
}
for x_block in 0..pre_erosion_w {
let x_start = x_block * 4;
let mut sum = 0.0f32;
for ix in 0..4 {
let x = x_start + ix;
if x < width {
sum += diff_buffer[x];
}
}
pre_erosion[y_block * pre_erosion_w + x_block] = sum * 0.25;
}
}
pre_erosion
}
#[allow(dead_code)]
fn fuzzy_erosion_scalar(
pre_erosion: &[f32],
pre_erosion_w: usize,
pre_erosion_h: usize,
block_w: usize,
block_h: usize,
aq_map: &mut [f32],
) {
assert_eq!(aq_map.len(), block_w * block_h);
const MUL0: f32 = 0.125;
const MUL1: f32 = 0.075;
const MUL2: f32 = 0.06;
const MUL3: f32 = 0.05;
let mut tmp = vec![0.0f32; pre_erosion_w * pre_erosion_h];
let get = |x: isize, y: isize| -> f32 {
let x = x.clamp(0, pre_erosion_w as isize - 1) as usize;
let y = y.clamp(0, pre_erosion_h as isize - 1) as usize;
pre_erosion[y * pre_erosion_w + x]
};
for y in 0..pre_erosion_h {
for x in 0..pre_erosion_w {
let ix = x as isize;
let iy = y as isize;
let mut vals = [
get(ix - 1, iy - 1),
get(ix, iy - 1),
get(ix + 1, iy - 1),
get(ix - 1, iy),
get(ix, iy),
get(ix + 1, iy),
get(ix - 1, iy + 1),
get(ix, iy + 1),
get(ix + 1, iy + 1),
];
for i in 0..4 {
for j in (i + 1)..9 {
if vals[j] < vals[i] {
vals.swap(i, j);
}
}
}
let weighted = MUL0.mul_add(
vals[0],
MUL1.mul_add(vals[1], MUL2.mul_add(vals[2], MUL3 * vals[3])),
);
tmp[y * pre_erosion_w + x] = weighted;
}
}
let get_tmp = |x: usize, y: usize| -> f32 {
let x = x.min(pre_erosion_w.saturating_sub(1));
let y = y.min(pre_erosion_h.saturating_sub(1));
tmp[y * pre_erosion_w + x]
};
for by in 0..block_h {
for bx in 0..block_w {
let px = bx * 2;
let py = by * 2;
let sum = get_tmp(px, py)
+ get_tmp(px + 1, py)
+ get_tmp(px, py + 1)
+ get_tmp(px + 1, py + 1);
aq_map[by * block_w + bx] = sum;
}
}
}
#[allow(dead_code)]
fn compute_mask_scalar(out_val: f32) -> f32 {
let v1 = (out_val * K_MASK_MUL0).max(1e-3);
let v2 = 1.0 / (v1 + K_MASK_OFFSET2);
let v3 = 1.0 / (v1 * v1 + K_MASK_OFFSET3);
let v4 = 1.0 / (v1 * v1 + K_MASK_OFFSET4);
v4.mul_add(
K_MASK_MUL4,
v2.mul_add(K_MASK_MUL2, v3.mul_add(K_MASK_MUL3, K_MASK_BASE)),
)
}
#[allow(dead_code)]
fn hf_modulation_scalar(
x: usize,
y: usize,
input_scaled: &[f32],
width: usize,
height: usize,
current_val: f32,
) -> f32 {
let center_idx = y * width + x;
let center_val = input_scaled[center_idx];
let left_idx = y * width + x.saturating_sub(1);
let right_idx = y * width + (x + 1).min(width - 1);
let top_idx = y.saturating_sub(1) * width + x;
let bottom_idx = (y + 1).min(height - 1) * width + x;
let diff_h =
(input_scaled[left_idx] - center_val).abs() + (input_scaled[right_idx] - center_val).abs();
let diff_v =
(input_scaled[top_idx] - center_val).abs() + (input_scaled[bottom_idx] - center_val).abs();
let diff_sum = diff_h + diff_v;
diff_sum.mul_add(K_HF_MOD_COEFF, current_val)
}
#[allow(dead_code)]
fn gamma_modulation_scalar(
x: usize,
y: usize,
input_scaled: &[f32],
width: usize,
_height: usize,
current_val: f32,
) -> f32 {
let val = input_scaled[y * width + x];
let log_arg = val.mul_add(K_GAMMA_MOD_SCALE, K_GAMMA_MOD_BIAS).max(1e-9);
let modulation = K_GAMMA_MOD_GAMMA * super::ln_poly(log_arg);
current_val + modulation
}
#[allow(dead_code)]
fn per_block_modulations_scalar(
y_quant_01: f32,
input_scaled: &[f32],
width: usize,
height: usize,
block_w: usize,
block_h: usize,
aq_map: &mut [f32],
) {
const K_AC_QUANT: f32 = 0.841;
const K_DAMPEN_RAMP_START: f32 = 9.0;
const K_DAMPEN_RAMP_END: f32 = 65.0;
let base_level = 0.48 * K_AC_QUANT;
let dampen = if y_quant_01 >= K_DAMPEN_RAMP_START {
let d =
1.0 - (y_quant_01 - K_DAMPEN_RAMP_START) / (K_DAMPEN_RAMP_END - K_DAMPEN_RAMP_START);
d.max(0.0)
} else {
1.0
};
let mul = K_AC_QUANT * dampen;
let add = (1.0 - dampen) * base_level;
let get = |x: usize, y: usize| -> f32 {
let x = x.min(width.saturating_sub(1));
let y = y.min(height.saturating_sub(1));
input_scaled[y * width + x]
};
for by in 0..block_h {
let y_start = by * 8;
for bx in 0..block_w {
let x_start = bx * 8;
let block_idx = by * block_w + bx;
let fuzzy_val = aq_map[block_idx];
let mut out_val = compute_mask_scalar(fuzzy_val);
const K_SUM_COEFF: f32 = -2.0052193233688884 * K_INPUT_SCALING / 112.0;
let mut sum = 0.0f32;
for dy in 0..8 {
let y = y_start + dy;
for dx in 0..8 {
let x = x_start + dx;
let p = get(x, y);
if dx < 7 {
let pr = get(x + 1, y);
sum += (p - pr).abs();
}
if dy < 7 {
let pd = get(x, y + 1);
sum += (p - pd).abs();
}
}
}
out_val += sum * K_SUM_COEFF;
const K_GAMMA: f32 = -0.15526878023684174 * K_INV_LOG2E;
const K_BIAS: f32 = 0.16 / K_INPUT_SCALING;
const K_SCALE: f32 = K_INPUT_SCALING / 64.0;
let mut overall_ratio = 0.0f32;
for dy in 0..8 {
let y = y_start + dy;
for dx in 0..8 {
let x = x_start + dx;
let iny = get(x, y) + K_BIAS;
let ratio_g = ratio_of_derivatives(iny, true);
overall_ratio += ratio_g;
}
}
overall_ratio *= K_SCALE;
let log_ratio = if overall_ratio > 0.0 {
overall_ratio.log2()
} else {
0.0
};
out_val += K_GAMMA * log_ratio;
const LOG2_E: f32 = 1.442695041;
let quant_field = (out_val * LOG2_E).exp2().mul_add(mul, add);
aq_map[block_idx] = quant_field;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_aq_strength_map_uniform() {
let map = AQStrengthMap::uniform(10, 10, 0.08);
assert_eq!(map.strengths.len(), 100);
assert!((map.get(5, 5) - 0.08).abs() < 1e-6);
}
#[test]
fn test_aq_strength_map_cpp_mean() {
let map = AQStrengthMap::with_cpp_mean(10, 10);
assert!((map.get(0, 0) - 0.08).abs() < 1e-6);
}
#[test]
fn test_quant_field_to_aq_strength() {
assert!((quant_field_to_aq_strength(0.6) - 0.0).abs() < 1e-6);
assert!((quant_field_to_aq_strength(0.3) - 1.0).abs() < 1e-6);
assert!(quant_field_to_aq_strength(6.0) >= 0.0);
}
#[test]
fn test_compute_returns_uniform() {
let plane = vec![128.0f32; 64 * 64];
let map = compute_aq_strength_map(&plane, 64, 64, 1).unwrap();
assert_eq!(map.width_blocks, 8);
assert_eq!(map.height_blocks, 8);
let first = map.strengths[0];
for &s in &map.strengths {
assert!(
(s - first).abs() < 1e-6,
"Uniform input should produce uniform output, got {} vs {}",
s,
first
);
}
assert!(first > 0.0, "aq_strength should be positive, got {}", first);
assert!(first < 1.0, "aq_strength should be < 1, got {}", first);
}
#[test]
fn test_ratio_of_derivatives() {
let r1 = ratio_of_derivatives(0.5, false);
let r2 = ratio_of_derivatives(0.5, true);
assert!(r1.is_finite());
assert!(r2.is_finite());
assert!(r1 > 0.0);
assert!(r2 > 0.0);
}
#[test]
fn test_update_min4() {
let mut mins = [f32::INFINITY; 4];
update_min4(5.0, &mut mins);
assert!((mins[0] - 5.0).abs() < 1e-6);
update_min4(3.0, &mut mins);
assert!((mins[0] - 3.0).abs() < 1e-6);
assert!((mins[1] - 5.0).abs() < 1e-6);
update_min4(7.0, &mut mins);
assert!((mins[0] - 3.0).abs() < 1e-6);
assert!((mins[1] - 5.0).abs() < 1e-6);
assert!((mins[2] - 7.0).abs() < 1e-6);
}
#[test]
fn test_compute_mask_scalar() {
let mask = compute_mask_scalar(1.0);
assert!(mask.is_finite());
assert!(mask > 0.0);
}
#[test]
fn test_impl_runs_without_panic() {
let plane = vec![128.0f32; 64 * 64];
let map = compute_aq_strength_map_impl(&plane, 64, 64, 1.0).unwrap();
assert_eq!(map.width_blocks, 8);
assert_eq!(map.height_blocks, 8);
assert_eq!(map.strengths.len(), 64);
for &s in &map.strengths {
assert!(s.is_finite(), "aq_strength should be finite");
}
}
#[test]
fn test_impl_output_range() {
let plane: Vec<f32> = (0..64 * 64)
.map(|i| {
let row = i / 64;
(row * 4) as f32 })
.collect();
let distance = 1.0 / 3.0; let map = compute_aq_strength_map_impl(&plane, 64, 64, distance).unwrap();
for &s in &map.strengths {
assert!(s >= 0.0, "aq_strength {} should be non-negative", s);
assert!(s.is_finite(), "aq_strength {} should be finite", s);
}
let mean: f32 = map.strengths.iter().sum::<f32>() / map.strengths.len() as f32;
println!("aq_strength mean for gradient: {}", mean);
assert!(
mean <= 2.0,
"aq_strength mean {} is unexpectedly high even for synthetic image",
mean
);
}
}