use crate::compat::*;
use crate::convolution::Coefficients;
const fn get_clip_table() -> [u8; 1280] {
let mut table = [0u8; 1280];
let mut i: usize = 640;
while i < 640 + 255 {
table[i] = (i - 640) as u8;
i += 1;
}
while i < 1280 {
table[i] = 255;
i += 1;
}
table
}
static CLIP8_LOOKUPS: [u8; 1280] = get_clip_table();
const PRECISION_BITS: u8 = 32 - 8 - 2;
const MAX_COEFFS_PRECISION: u8 = 16 - 1;
#[derive(Debug, Clone)]
pub(crate) struct CoefficientsI16Chunk {
pub start: u32,
values: Vec<i16>,
}
impl CoefficientsI16Chunk {
#[inline(always)]
pub fn values(&self) -> &[i16] {
&self.values
}
}
pub(crate) struct Normalizer16 {
precision: u8,
chunks: Vec<CoefficientsI16Chunk>,
}
impl Normalizer16 {
#[inline]
pub fn new(coefficients: Coefficients) -> Self {
let max_weight = coefficients
.values
.iter()
.max_by(|&x, &y| x.partial_cmp(y).unwrap())
.unwrap_or(&0.0)
.to_owned();
let mut precision = 0u8;
for cur_precision in 0..PRECISION_BITS {
precision = cur_precision;
let next_value: i32 = (max_weight * (1 << (precision + 1)) as f64).round() as i32;
if next_value >= (1 << MAX_COEFFS_PRECISION) {
break;
}
}
debug_assert!(precision >= 4);
let mut chunks = Vec::with_capacity(coefficients.bounds.len());
if coefficients.window_size > 0 {
let scale = (1 << precision) as f64;
let coef_chunks = coefficients.values.chunks_exact(coefficients.window_size);
for (chunk, bound) in coef_chunks.zip(&coefficients.bounds) {
let chunk_i16: Vec<i16> = chunk
.iter()
.take(bound.size as usize)
.map(|&v| (v * scale).round() as i16)
.collect();
chunks.push(CoefficientsI16Chunk {
start: bound.start,
values: chunk_i16,
});
}
}
Self { precision, chunks }
}
#[inline(always)]
pub fn precision(&self) -> u8 {
self.precision
}
#[inline(always)]
pub fn chunks(&self) -> &[CoefficientsI16Chunk] {
&self.chunks
}
pub fn chunks_len(&self) -> usize {
self.chunks.len()
}
#[inline(always)]
pub unsafe fn clip(&self, v: i32) -> u8 {
let index = (640 + (v >> self.precision)) as usize;
debug_assert!((128..=1151).contains(&index));
*CLIP8_LOOKUPS.get_unchecked(index)
}
}
const PRECISION16_BITS: u8 = 64 - 16 - 2;
const MAX_COEFFS_PRECISION16: u8 = 32 - 1;
#[derive(Debug, Clone)]
pub(crate) struct CoefficientsI32Chunk {
pub start: u32,
values: Vec<i32>,
}
impl CoefficientsI32Chunk {
#[inline(always)]
pub fn values(&self) -> &[i32] {
&self.values
}
}
pub(crate) struct Normalizer32 {
precision: u8,
chunks: Vec<CoefficientsI32Chunk>,
}
impl Normalizer32 {
#[inline]
pub fn new(coefficients: Coefficients) -> Self {
let max_weight = coefficients
.values
.iter()
.max_by(|&x, &y| x.partial_cmp(y).unwrap())
.unwrap_or(&0.0)
.to_owned();
let mut precision = 0u8;
for cur_precision in 0..PRECISION16_BITS {
precision = cur_precision;
let next_value: i64 = (max_weight * (1i64 << (precision + 1)) as f64).round() as i64;
if next_value >= (1i64 << MAX_COEFFS_PRECISION16) {
break;
}
}
debug_assert!(precision >= 4);
let mut chunks = Vec::with_capacity(coefficients.bounds.len());
if coefficients.window_size > 0 {
let scale = (1i64 << precision) as f64;
let coef_chunks = coefficients.values.chunks_exact(coefficients.window_size);
for (chunk, bound) in coef_chunks.zip(&coefficients.bounds) {
let chunk_i32: Vec<i32> = chunk
.iter()
.take(bound.size as usize)
.map(|&v| (v * scale).round() as i32)
.collect();
chunks.push(CoefficientsI32Chunk {
start: bound.start,
values: chunk_i32,
});
}
}
Self { precision, chunks }
}
#[inline]
pub fn precision(&self) -> u8 {
self.precision
}
#[inline(always)]
pub fn chunks(&self) -> &[CoefficientsI32Chunk] {
&self.chunks
}
#[inline(always)]
pub fn chunks_len(&self) -> usize {
self.chunks.len()
}
#[inline(always)]
pub fn clip(&self, v: i64) -> u16 {
(v >> self.precision).min(u16::MAX as i64).max(0) as u16
}
}
macro_rules! try_process_in_threads_h {
{$op: ident($src_view: ident, $dst_view: ident, $offset: ident, $($arg: ident),+$(,)?);} => {
#[allow(unused_labels)]
'block: {
#[cfg(feature = "rayon")]
{
use crate::threading::split_h_two_images_for_threading;
use rayon::prelude::*;
if let Some(iter) = split_h_two_images_for_threading($src_view, $dst_view, $offset) {
iter.for_each(|(src, mut dst)| {
$op(&src, &mut dst, 0, $($arg),+);
});
break 'block;
}
}
$op($src_view, $dst_view, $offset, $($arg),+);
}
};
}
macro_rules! try_process_in_threads_v {
{$op: ident($src_view: ident, $dst_view: ident, $offset: ident, $($arg: ident),+$(,)?);} => {
#[allow(unused_labels)]
'block: {
#[cfg(feature = "rayon")]
{
use crate::threading::split_v_two_images_for_threading;
use rayon::prelude::*;
if let Some(iter) = split_v_two_images_for_threading($src_view, $dst_view, $offset) {
iter.for_each(|(src, mut dst)| {
$op(&src, &mut dst, 0, $($arg),+);
});
break 'block;
}
}
$op($src_view, $dst_view, $offset, $($arg),+);
}
};
}
#[cfg(test)]
mod tests {
use super::*;
use crate::convolution::Bound;
fn get_coefficients(value: f64) -> Coefficients {
Coefficients {
values: vec![value],
window_size: 1,
bounds: vec![Bound { start: 0, size: 1 }],
}
}
#[test]
fn test_minimal_precision() {
assert!(Normalizer16::new(get_coefficients(0.0)).precision() >= 4);
assert!(Normalizer16::new(get_coefficients(2.0)).precision() >= 4);
assert!(Normalizer32::new(get_coefficients(0.0)).precision() >= 4);
assert!(Normalizer32::new(get_coefficients(2.0)).precision() >= 4);
}
}