#![allow(clippy::too_many_arguments)]
use crate::weights::{F32WeightTable, I16_PRECISION, I16WeightTable};
use archmage::ScalarToken;
pub(crate) fn filter_h_row_f32_scalar(
_token: ScalarToken,
input: &[f32],
output: &mut [f32],
weights: &F32WeightTable,
channels: usize,
) {
let out_width = weights.len();
for out_x in 0..out_width {
let left = weights.left[out_x] as usize;
let w = weights.weights(out_x);
let out_offset = out_x * channels;
for c in 0..channels {
output[out_offset + c] = 0.0;
}
for (t, &weight) in w.iter().enumerate() {
let in_offset = (left + t) * channels;
for c in 0..channels {
output[out_offset + c] += input[in_offset + c] * weight;
}
}
}
}
pub(crate) fn filter_v_row_f32_scalar(
_token: ScalarToken,
rows: &[&[f32]],
output: &mut [f32],
weights: &[f32],
) {
let width = output.len();
debug_assert_eq!(rows.len(), weights.len());
for v in output.iter_mut() {
*v = 0.0;
}
for (row, &weight) in rows.iter().zip(weights.iter()) {
debug_assert!(row.len() >= width);
for x in 0..width {
output[x] += row[x] * weight;
}
}
}
pub(crate) fn u8_to_f32_row_scalar(_token: ScalarToken, input: &[u8], output: &mut [f32]) {
debug_assert_eq!(input.len(), output.len());
for (inp, out) in input.iter().zip(output.iter_mut()) {
*out = *inp as f32 * (1.0 / 255.0);
}
}
pub(crate) fn f32_to_u8_row_scalar(_token: ScalarToken, input: &[f32], output: &mut [u8]) {
debug_assert_eq!(input.len(), output.len());
for (inp, out) in input.iter().zip(output.iter_mut()) {
*out = (*inp * 255.0 + 0.5).clamp(0.0, 255.0) as u8;
}
}
pub(crate) fn premultiply_alpha_row_scalar(_token: ScalarToken, row: &mut [f32]) {
for pixel in row.chunks_exact_mut(4) {
let a = pixel[3];
pixel[0] *= a;
pixel[1] *= a;
pixel[2] *= a;
}
}
pub(crate) fn unpremultiply_alpha_row_scalar(_token: ScalarToken, row: &mut [f32]) {
for pixel in row.chunks_exact_mut(4) {
let a = pixel[3];
if a > 1.0 / 1024.0 {
let inv_a = 1.0 / a;
pixel[0] *= inv_a;
pixel[1] *= inv_a;
pixel[2] *= inv_a;
}
}
}
pub(crate) fn filter_h_u8_i16_scalar(
_token: ScalarToken,
input: &[u8],
output: &mut [u8],
weights: &I16WeightTable,
channels: usize,
) {
let out_width = weights.len();
for out_x in 0..out_width {
let left = weights.left[out_x] as usize;
let w = weights.weights(out_x);
let out_base = out_x * channels;
for c in 0..channels {
let mut acc: i32 = 0;
for (t, &weight) in w.iter().enumerate() {
acc += input[(left + t) * channels + c] as i32 * weight as i32;
}
let rounded = (acc + (1 << (I16_PRECISION - 1))) >> I16_PRECISION;
output[out_base + c] = rounded.clamp(0, 255) as u8;
}
}
}
pub(crate) fn filter_h_u8_to_i16_scalar(
_token: ScalarToken,
input: &[u8],
output: &mut [i16],
weights: &I16WeightTable,
channels: usize,
) {
let out_width = weights.len();
for out_x in 0..out_width {
let left = weights.left[out_x] as usize;
let w = weights.weights(out_x);
let out_base = out_x * channels;
for c in 0..channels {
let mut acc: i32 = 0;
for (t, &weight) in w.iter().enumerate() {
acc += input[(left + t) * channels + c] as i32 * weight as i32;
}
let rounded = (acc + (1 << (I16_PRECISION - 1))) >> I16_PRECISION;
output[out_base + c] = rounded as i16;
}
}
}
#[allow(dead_code)]
pub(crate) fn filter_h_u8_to_i16_4rows_scalar(
_token: ScalarToken,
in0: &[u8],
in1: &[u8],
in2: &[u8],
in3: &[u8],
out0: &mut [i16],
out1: &mut [i16],
out2: &mut [i16],
out3: &mut [i16],
weights: &I16WeightTable,
) {
let ch = 4;
filter_h_u8_to_i16_scalar(_token, in0, out0, weights, ch);
filter_h_u8_to_i16_scalar(_token, in1, out1, weights, ch);
filter_h_u8_to_i16_scalar(_token, in2, out2, weights, ch);
filter_h_u8_to_i16_scalar(_token, in3, out3, weights, ch);
}
pub(crate) fn filter_h_u8_i16_4rows_scalar(
_token: ScalarToken,
in0: &[u8],
in1: &[u8],
in2: &[u8],
in3: &[u8],
out0: &mut [u8],
out1: &mut [u8],
out2: &mut [u8],
out3: &mut [u8],
weights: &I16WeightTable,
) {
let ch = 4;
filter_h_u8_i16_scalar(_token, in0, out0, weights, ch);
filter_h_u8_i16_scalar(_token, in1, out1, weights, ch);
filter_h_u8_i16_scalar(_token, in2, out2, weights, ch);
filter_h_u8_i16_scalar(_token, in3, out3, weights, ch);
}
pub(crate) fn filter_v_all_u8_i16_scalar(
_token: ScalarToken,
intermediate: &[u8],
output: &mut [u8],
h_row_len: usize,
in_h: usize,
out_h: usize,
weights: &crate::weights::I16WeightTable,
) {
let in_h_i32 = in_h as i32 - 1;
let mut out_y = 0;
while out_y < out_h {
let left = weights.left[out_y];
let tap_count = weights.tap_count(out_y);
let w_a = weights.weights(out_y);
let batch2 = out_y + 1 < out_h
&& weights.left[out_y + 1] == left
&& weights.tap_count(out_y + 1) == tap_count;
if batch2 {
let w_b = weights.weights(out_y + 1);
let out_start_a = out_y * h_row_len;
let out_start_b = (out_y + 1) * h_row_len;
for x in 0..h_row_len {
let mut acc_a: i32 = 0;
let mut acc_b: i32 = 0;
for t in 0..tap_count {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
let v = intermediate[in_y * h_row_len + x] as i32;
acc_a += v * w_a[t] as i32;
acc_b += v * w_b[t] as i32;
}
output[out_start_a + x] =
((acc_a + (1 << (I16_PRECISION - 1))) >> I16_PRECISION).clamp(0, 255) as u8;
output[out_start_b + x] =
((acc_b + (1 << (I16_PRECISION - 1))) >> I16_PRECISION).clamp(0, 255) as u8;
}
out_y += 2;
} else {
let out_start = out_y * h_row_len;
for x in 0..h_row_len {
let mut acc: i32 = 0;
for (t, &weight) in w_a[..tap_count].iter().enumerate() {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
acc += intermediate[in_y * h_row_len + x] as i32 * weight as i32;
}
output[out_start + x] =
((acc + (1 << (I16_PRECISION - 1))) >> I16_PRECISION).clamp(0, 255) as u8;
}
out_y += 1;
}
}
}
#[allow(dead_code)]
pub(crate) fn filter_v_all_u8_i16_tiled_scalar(
_token: ScalarToken,
intermediate: &[u8],
output: &mut [u8],
h_row_len: usize,
in_h: usize,
out_h: usize,
weights: &crate::weights::I16WeightTable,
tile_width: usize,
) {
let in_h_i32 = in_h as i32 - 1;
for tile_start in (0..h_row_len).step_by(tile_width) {
let tile_end = (tile_start + tile_width).min(h_row_len);
let mut out_y = 0;
while out_y < out_h {
let left = weights.left[out_y];
let tap_count = weights.tap_count(out_y);
let w_a = weights.weights(out_y);
let batch2 = out_y + 1 < out_h
&& weights.left[out_y + 1] == left
&& weights.tap_count(out_y + 1) == tap_count;
if batch2 {
let w_b = weights.weights(out_y + 1);
let out_start_a = out_y * h_row_len;
let out_start_b = (out_y + 1) * h_row_len;
for x in tile_start..tile_end {
let mut acc_a: i32 = 0;
let mut acc_b: i32 = 0;
for t in 0..tap_count {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
let v = intermediate[in_y * h_row_len + x] as i32;
acc_a += v * w_a[t] as i32;
acc_b += v * w_b[t] as i32;
}
output[out_start_a + x] =
((acc_a + (1 << (I16_PRECISION - 1))) >> I16_PRECISION).clamp(0, 255) as u8;
output[out_start_b + x] =
((acc_b + (1 << (I16_PRECISION - 1))) >> I16_PRECISION).clamp(0, 255) as u8;
}
out_y += 2;
} else {
let out_start = out_y * h_row_len;
for x in tile_start..tile_end {
let mut acc: i32 = 0;
for (t, &weight) in w_a[..tap_count].iter().enumerate() {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
acc += intermediate[in_y * h_row_len + x] as i32 * weight as i32;
}
output[out_start + x] =
((acc + (1 << (I16_PRECISION - 1))) >> I16_PRECISION).clamp(0, 255) as u8;
}
out_y += 1;
}
}
}
}
pub(crate) fn filter_h_i16_i16_scalar(
_token: ScalarToken,
input: &[i16],
output: &mut [i16],
weights: &I16WeightTable,
channels: usize,
) {
let out_width = weights.len();
for out_x in 0..out_width {
let left = weights.left[out_x] as usize;
let w = weights.weights(out_x);
let out_base = out_x * channels;
for c in 0..channels {
let mut acc: i32 = 0;
for (t, &weight) in w.iter().enumerate() {
acc += input[(left + t) * channels + c] as i32 * weight as i32;
}
let rounded = (acc + (1 << (I16_PRECISION - 1))) >> I16_PRECISION;
output[out_base + c] = rounded as i16;
}
}
}
pub(crate) fn filter_v_all_i16_i16_scalar(
_token: ScalarToken,
intermediate: &[i16],
output: &mut [i16],
h_row_len: usize,
in_h: usize,
out_h: usize,
weights: &crate::weights::I16WeightTable,
) {
let in_h_i32 = in_h as i32 - 1;
let mut out_y = 0;
while out_y < out_h {
let left = weights.left[out_y];
let tap_count = weights.tap_count(out_y);
let w_a = weights.weights(out_y);
let batch2 = out_y + 1 < out_h
&& weights.left[out_y + 1] == left
&& weights.tap_count(out_y + 1) == tap_count;
if batch2 {
let w_b = weights.weights(out_y + 1);
let out_start_a = out_y * h_row_len;
let out_start_b = (out_y + 1) * h_row_len;
for x in 0..h_row_len {
let mut acc_a: i32 = 0;
let mut acc_b: i32 = 0;
for t in 0..tap_count {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
let v = intermediate[in_y * h_row_len + x] as i32;
acc_a += v * w_a[t] as i32;
acc_b += v * w_b[t] as i32;
}
output[out_start_a + x] =
((acc_a + (1 << (I16_PRECISION - 1))) >> I16_PRECISION) as i16;
output[out_start_b + x] =
((acc_b + (1 << (I16_PRECISION - 1))) >> I16_PRECISION) as i16;
}
out_y += 2;
} else {
let out_start = out_y * h_row_len;
for x in 0..h_row_len {
let mut acc: i32 = 0;
for (t, &weight) in w_a[..tap_count].iter().enumerate() {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
acc += intermediate[in_y * h_row_len + x] as i32 * weight as i32;
}
output[out_start + x] =
((acc + (1 << (I16_PRECISION - 1))) >> I16_PRECISION) as i16;
}
out_y += 1;
}
}
}
#[allow(dead_code)]
pub(crate) fn filter_v_all_u8_i16_2d_tiled_scalar(
_token: ScalarToken,
intermediate: &[u8],
output: &mut [u8],
h_row_len: usize,
in_h: usize,
out_h: usize,
weights: &crate::weights::I16WeightTable,
tile_width: usize,
band_size: usize,
) {
let in_h_i32 = in_h as i32 - 1;
let mut band_start = 0;
while band_start < out_h {
let band_end = (band_start + band_size).min(out_h);
for tile_start in (0..h_row_len).step_by(tile_width) {
let tile_end = (tile_start + tile_width).min(h_row_len);
let mut out_y = band_start;
while out_y < band_end {
let left = weights.left[out_y];
let tap_count = weights.tap_count(out_y);
let w_a = weights.weights(out_y);
let batch2 = out_y + 1 < band_end
&& weights.left[out_y + 1] == left
&& weights.tap_count(out_y + 1) == tap_count;
if batch2 {
let w_b = weights.weights(out_y + 1);
let out_start_a = out_y * h_row_len;
let out_start_b = (out_y + 1) * h_row_len;
for x in tile_start..tile_end {
let mut acc_a: i32 = 0;
let mut acc_b: i32 = 0;
for t in 0..tap_count {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
let v = intermediate[in_y * h_row_len + x] as i32;
acc_a += v * w_a[t] as i32;
acc_b += v * w_b[t] as i32;
}
output[out_start_a + x] = ((acc_a + (1 << (I16_PRECISION - 1)))
>> I16_PRECISION)
.clamp(0, 255) as u8;
output[out_start_b + x] = ((acc_b + (1 << (I16_PRECISION - 1)))
>> I16_PRECISION)
.clamp(0, 255) as u8;
}
out_y += 2;
} else {
let out_start = out_y * h_row_len;
for x in tile_start..tile_end {
let mut acc: i32 = 0;
for (t, &weight) in w_a[..tap_count].iter().enumerate() {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
acc += intermediate[in_y * h_row_len + x] as i32 * weight as i32;
}
output[out_start + x] = ((acc + (1 << (I16_PRECISION - 1)))
>> I16_PRECISION)
.clamp(0, 255) as u8;
}
out_y += 1;
}
}
}
band_start = band_end;
}
}
#[allow(dead_code)]
pub(crate) fn filter_v_all_i16_i16_tiled_scalar(
_token: ScalarToken,
intermediate: &[i16],
output: &mut [i16],
h_row_len: usize,
in_h: usize,
out_h: usize,
weights: &crate::weights::I16WeightTable,
tile_width: usize,
) {
let in_h_i32 = in_h as i32 - 1;
for tile_start in (0..h_row_len).step_by(tile_width) {
let tile_end = (tile_start + tile_width).min(h_row_len);
let mut out_y = 0;
while out_y < out_h {
let left = weights.left[out_y];
let tap_count = weights.tap_count(out_y);
let w_a = weights.weights(out_y);
let batch2 = out_y + 1 < out_h
&& weights.left[out_y + 1] == left
&& weights.tap_count(out_y + 1) == tap_count;
if batch2 {
let w_b = weights.weights(out_y + 1);
let out_start_a = out_y * h_row_len;
let out_start_b = (out_y + 1) * h_row_len;
for x in tile_start..tile_end {
let mut acc_a: i32 = 0;
let mut acc_b: i32 = 0;
for t in 0..tap_count {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
let v = intermediate[in_y * h_row_len + x] as i32;
acc_a += v * w_a[t] as i32;
acc_b += v * w_b[t] as i32;
}
output[out_start_a + x] =
((acc_a + (1 << (I16_PRECISION - 1))) >> I16_PRECISION) as i16;
output[out_start_b + x] =
((acc_b + (1 << (I16_PRECISION - 1))) >> I16_PRECISION) as i16;
}
out_y += 2;
} else {
let out_start = out_y * h_row_len;
for x in tile_start..tile_end {
let mut acc: i32 = 0;
for (t, &weight) in w_a[..tap_count].iter().enumerate() {
let in_y = (left + t as i32).clamp(0, in_h_i32) as usize;
acc += intermediate[in_y * h_row_len + x] as i32 * weight as i32;
}
output[out_start + x] =
((acc + (1 << (I16_PRECISION - 1))) >> I16_PRECISION) as i16;
}
out_y += 1;
}
}
}
}
pub(crate) fn filter_v_row_u8_i16_scalar(
_token: ScalarToken,
rows: &[&[u8]],
output: &mut [u8],
weights: &[i16],
) {
let width = output.len();
debug_assert_eq!(rows.len(), weights.len());
for x in 0..width {
let mut acc: i32 = 0;
for (row, &weight) in rows.iter().zip(weights.iter()) {
acc += row[x] as i32 * weight as i32;
}
let rounded = (acc + (1 << (I16_PRECISION - 1))) >> I16_PRECISION;
output[x] = rounded.clamp(0, 255) as u8;
}
}
pub(crate) fn filter_v_row_i16_scalar(
_token: ScalarToken,
rows: &[&[i16]],
output: &mut [i16],
weights: &[i16],
) {
let width = output.len();
debug_assert_eq!(rows.len(), weights.len());
for x in 0..width {
let mut acc: i32 = 0;
for (row, &weight) in rows.iter().zip(weights.iter()) {
acc += row[x] as i32 * weight as i32;
}
let rounded = (acc + (1 << (I16_PRECISION - 1))) >> I16_PRECISION;
output[x] = rounded as i16;
}
}
pub(crate) fn premultiply_u8_row_scalar(_token: ScalarToken, input: &[u8], output: &mut [u8]) {
debug_assert_eq!(input.len(), output.len());
for (inp, out) in input.chunks_exact(4).zip(output.chunks_exact_mut(4)) {
let a = inp[3] as u16;
out[0] = ((inp[0] as u16 * a + 127) / 255) as u8;
out[1] = ((inp[1] as u16 * a + 127) / 255) as u8;
out[2] = ((inp[2] as u16 * a + 127) / 255) as u8;
out[3] = inp[3];
}
}
pub(crate) fn unpremultiply_u8_row_scalar(_token: ScalarToken, row: &mut [u8]) {
for pixel in row.chunks_exact_mut(4) {
let a = pixel[3];
if a == 0 {
pixel[0] = 0;
pixel[1] = 0;
pixel[2] = 0;
} else if a < 255 {
let a16 = a as u16;
pixel[0] = ((pixel[0] as u16 * 255 + a16 / 2) / a16).min(255) as u8;
pixel[1] = ((pixel[1] as u16 * 255 + a16 / 2) / a16).min(255) as u8;
pixel[2] = ((pixel[2] as u16 * 255 + a16 / 2) / a16).min(255) as u8;
}
}
}
#[inline]
pub(super) fn f32_to_f16_soft(x: f32) -> u16 {
let b = x.to_bits();
let sign = (b >> 16) & 0x8000;
let e = ((b >> 23) & 0xFF) as i32;
let m = b & 0x007F_FFFF;
if e == 0xFF {
return (sign | 0x7C00 | if m != 0 { (m >> 13).max(1) } else { 0 }) as u16;
}
let f16e = e - 112;
if e == 0 {
return sign as u16;
}
if f16e >= 31 {
return (sign | 0x7C00) as u16;
}
if f16e <= 0 {
if f16e < -10 {
return sign as u16; }
let full_m = m | 0x0080_0000;
let shift = (1 - f16e) as u32 + 13;
let shifted = full_m >> shift;
let half_bit = 1u32 << (shift - 1);
let remainder = full_m & ((1u32 << shift) - 1);
let round =
u32::from(remainder > half_bit || (remainder == half_bit && (shifted & 1) != 0));
return (sign | (shifted + round)) as u16;
}
let shifted_m = m >> 13;
let remainder = m & 0x1FFF;
let round = u32::from(remainder > 0x1000 || (remainder == 0x1000 && (shifted_m & 1) != 0));
let result = sign | ((f16e as u32) << 10) | shifted_m;
(result + round) as u16 }
#[inline]
pub(super) fn f16_to_f32_soft(h: u16) -> f32 {
let h32 = h as u32;
let sign = (h32 & 0x8000) << 16;
let exp = (h32 >> 10) & 0x1F;
let mant = h32 & 0x03FF;
if exp == 0 {
if mant == 0 {
return f32::from_bits(sign); }
let mut e = 1i32;
let mut m = mant;
while m & 0x0400 == 0 {
m <<= 1;
e -= 1;
}
let f32_exp = ((127 - 15 + e) as u32) << 23;
let f32_mant = (m & 0x03FF) << 13;
return f32::from_bits(sign | f32_exp | f32_mant);
}
if exp == 31 {
return f32::from_bits(sign | 0x7F80_0000 | (mant << 13));
}
let f32_exp = (exp + 127 - 15) << 23;
let f32_mant = mant << 13;
f32::from_bits(sign | f32_exp | f32_mant)
}
pub(crate) fn f32_to_f16_row_scalar(_token: ScalarToken, input: &[f32], output: &mut [u16]) {
debug_assert_eq!(input.len(), output.len());
for (inp, out) in input.iter().zip(output.iter_mut()) {
*out = f32_to_f16_soft(*inp);
}
}
pub(crate) fn f16_to_f32_row_scalar(_token: ScalarToken, input: &[u16], output: &mut [f32]) {
debug_assert_eq!(input.len(), output.len());
for (inp, out) in input.iter().zip(output.iter_mut()) {
*out = f16_to_f32_soft(*inp);
}
}
pub(crate) fn filter_h_row_f32_to_f16_scalar(
_token: ScalarToken,
input: &[f32],
output: &mut [u16],
weights: &F32WeightTable,
channels: usize,
) {
let out_width = weights.len();
for out_x in 0..out_width {
let left = weights.left[out_x] as usize;
let w = weights.weights(out_x);
let out_offset = out_x * channels;
for c in 0..channels {
let mut acc = 0.0f32;
for (t, &weight) in w.iter().enumerate() {
acc += input[(left + t) * channels + c] * weight;
}
output[out_offset + c] = f32_to_f16_soft(acc);
}
}
}
pub(crate) fn filter_v_row_f16_scalar(
_token: ScalarToken,
rows: &[&[u16]],
output: &mut [f32],
weights: &[f32],
) {
let width = output.len();
debug_assert_eq!(rows.len(), weights.len());
for v in output.iter_mut() {
*v = 0.0;
}
for (row, &weight) in rows.iter().zip(weights.iter()) {
debug_assert!(row.len() >= width);
for x in 0..width {
output[x] += f16_to_f32_soft(row[x]) * weight;
}
}
}
pub(crate) fn filter_v_all_f16_scalar(
_token: ScalarToken,
intermediate: &[u16],
output: &mut [f32],
h_row_len: usize,
in_h: usize,
out_h: usize,
weights: &F32WeightTable,
) {
for out_y in 0..out_h {
let left = weights.left[out_y];
let tap_count = weights.tap_count(out_y);
let w = weights.weights(out_y);
let out_start = out_y * h_row_len;
for x in 0..h_row_len {
let mut acc = 0.0f32;
for (t, &weight) in w[..tap_count].iter().enumerate() {
let in_y = (left + t as i32).clamp(0, in_h as i32 - 1) as usize;
acc += f16_to_f32_soft(intermediate[in_y * h_row_len + x]) * weight;
}
output[out_start + x] = acc;
}
}
}
pub(crate) fn srgb_u8_to_linear_f32_scalar(
_token: ScalarToken,
input: &[u8],
output: &mut [f32],
channels: usize,
has_alpha: bool,
) {
crate::color::srgb_u8_to_linear_f32_impl(input, output, channels, has_alpha);
}
pub(crate) fn linear_f32_to_srgb_u8_scalar(
_token: ScalarToken,
input: &[f32],
output: &mut [u8],
channels: usize,
has_alpha: bool,
) {
crate::color::linear_f32_to_srgb_u8_impl(input, output, channels, has_alpha);
}
fn tf_row_inplace_scalar(row: &mut [f32], channels: usize, has_alpha: bool, tf: fn(f32) -> f32) {
if has_alpha && channels >= 2 {
for pixel in row.chunks_exact_mut(channels) {
for v in &mut pixel[..channels - 1] {
*v = tf(*v);
}
}
} else {
for v in row.iter_mut() {
*v = tf(*v);
}
}
}
pub(crate) fn srgb_to_linear_row_scalar(
_token: ScalarToken,
row: &mut [f32],
channels: usize,
has_alpha: bool,
) {
tf_row_inplace_scalar(row, channels, has_alpha, crate::fastmath::srgb_to_linear);
}
pub(crate) fn srgb_from_linear_row_scalar(
_token: ScalarToken,
row: &mut [f32],
channels: usize,
has_alpha: bool,
) {
tf_row_inplace_scalar(row, channels, has_alpha, crate::fastmath::srgb_from_linear);
}
pub(crate) fn bt709_to_linear_row_scalar(
_token: ScalarToken,
row: &mut [f32],
channels: usize,
has_alpha: bool,
) {
tf_row_inplace_scalar(row, channels, has_alpha, crate::fastmath::bt709_to_linear);
}
pub(crate) fn bt709_from_linear_row_scalar(
_token: ScalarToken,
row: &mut [f32],
channels: usize,
has_alpha: bool,
) {
tf_row_inplace_scalar(row, channels, has_alpha, crate::fastmath::bt709_from_linear);
}
pub(crate) fn pq_to_linear_row_scalar(
_token: ScalarToken,
row: &mut [f32],
channels: usize,
has_alpha: bool,
) {
tf_row_inplace_scalar(row, channels, has_alpha, crate::fastmath::pq_to_linear);
}
pub(crate) fn pq_from_linear_row_scalar(
_token: ScalarToken,
row: &mut [f32],
channels: usize,
has_alpha: bool,
) {
tf_row_inplace_scalar(row, channels, has_alpha, crate::fastmath::pq_from_linear);
}
pub(crate) fn hlg_to_linear_row_scalar(
_token: ScalarToken,
row: &mut [f32],
channels: usize,
has_alpha: bool,
) {
tf_row_inplace_scalar(row, channels, has_alpha, crate::fastmath::hlg_to_linear);
}
pub(crate) fn hlg_from_linear_row_scalar(
_token: ScalarToken,
row: &mut [f32],
channels: usize,
has_alpha: bool,
) {
tf_row_inplace_scalar(row, channels, has_alpha, crate::fastmath::hlg_from_linear);
}
#[cfg(test)]
mod tests {
use super::*;
use crate::filter::{Filter, InterpolationDetails};
#[cfg(not(feature = "std"))]
use alloc::{vec, vec::Vec};
#[test]
fn test_horizontal_convolution_identity() {
let filter = InterpolationDetails::create(Filter::Box);
let width = 10;
let channels = 4;
let weights = F32WeightTable::new(width, width, &filter);
let input: Vec<f32> = (0..width as usize * channels)
.map(|i| (i as f32) / (width as f32 * channels as f32))
.collect();
let mut output = vec![0.0f32; width as usize * channels];
filter_h_row_f32_scalar(ScalarToken, &input, &mut output, &weights, channels);
for i in 0..input.len() {
assert!(
(input[i] - output[i]).abs() < 0.01,
"Mismatch at {}: {} vs {}",
i,
input[i],
output[i]
);
}
}
#[test]
fn test_vertical_convolution_single_row() {
let width = 40;
let row: Vec<f32> = (0..width).map(|i| i as f32).collect();
let mut output = vec![0.0f32; width];
filter_v_row_f32_scalar(ScalarToken, &[&row], &mut output, &[1.0]);
for i in 0..width {
assert!(
(row[i] - output[i]).abs() < 1e-6,
"Mismatch at {}: {} vs {}",
i,
row[i],
output[i]
);
}
}
#[test]
fn test_vertical_convolution_average() {
let width = 8;
let row_a: Vec<f32> = vec![1.0; width];
let row_b: Vec<f32> = vec![3.0; width];
let mut output = vec![0.0f32; width];
filter_v_row_f32_scalar(ScalarToken, &[&row_a, &row_b], &mut output, &[0.5, 0.5]);
for v in &output {
assert!((*v - 2.0).abs() < 1e-6);
}
}
#[test]
fn test_horizontal_downscale() {
let filter = InterpolationDetails::create(Filter::Lanczos);
let in_width = 100u32;
let out_width = 50u32;
let channels = 4;
let weights = F32WeightTable::new(in_width, out_width, &filter);
let input = vec![0.5f32; in_width as usize * channels];
let mut output = vec![0.0f32; out_width as usize * channels];
filter_h_row_f32_scalar(ScalarToken, &input, &mut output, &weights, channels);
for (i, &v) in output.iter().enumerate() {
assert!(
(v - 0.5).abs() < 0.01,
"Constant input should produce constant output at {}: {}",
i,
v
);
}
}
}