#![forbid(unsafe_code)]
use num_traits::{AsPrimitive, Bounded};
#[derive(Debug, Clone)]
pub(crate) struct FilterWeights<T> {
pub weights: Vec<T>,
pub bounds: Vec<FilterBounds>,
pub kernel_size: usize,
pub aligned_size: usize,
pub distinct_elements: usize,
pub coeffs_size: i32,
}
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub(crate) struct FilterBounds {
pub start: usize,
pub size: usize,
}
impl FilterBounds {
pub(crate) fn new(start: usize, size: usize) -> FilterBounds {
FilterBounds { start, size }
}
}
impl<T> FilterWeights<T> {
pub(crate) fn new(
slice_ref: Vec<T>,
kernel_size: usize,
aligned_size: usize,
distinct_elements: usize,
coeffs_size: i32,
bounds: Vec<FilterBounds>,
) -> FilterWeights<T> {
FilterWeights::<T> {
weights: slice_ref,
bounds,
kernel_size,
aligned_size,
distinct_elements,
coeffs_size,
}
}
}
impl FilterWeights<f32> {
pub(crate) fn numerical_approximation_i16<const PRECISION: i32>(
&self,
alignment: usize,
) -> FilterWeights<i16> {
self.numerical_approximation::<i16, PRECISION>(alignment)
}
pub(crate) fn numerical_approximation<
J: Clone + Default + Copy + 'static + Bounded + AsPrimitive<f64>,
const PRECISION: i32,
>(
&self,
alignment: usize,
) -> FilterWeights<J>
where
f64: AsPrimitive<J>,
{
let align = if alignment != 0 {
(self.kernel_size.div_ceil(alignment)) * alignment
} else {
self.kernel_size
};
let precision_scale: f64 = (1i64 << PRECISION) as f64;
let mut output_kernel = vec![J::default(); self.distinct_elements * align];
let lower_bound = J::min_value().as_();
let upper_bound = J::max_value().as_();
for (chunk, kernel_chunk) in self
.weights
.chunks_exact(self.kernel_size)
.zip(output_kernel.chunks_exact_mut(align))
{
for (&weight, kernel) in chunk.iter().zip(kernel_chunk) {
*kernel = (weight as f64 * precision_scale)
.min(upper_bound)
.max(lower_bound)
.as_();
}
}
let mut new_bounds = vec![FilterBounds::new(0, 0); self.bounds.len()];
for (dst, src) in new_bounds.iter_mut().zip(self.bounds.iter()) {
*dst = *src;
}
FilterWeights::new(
output_kernel,
self.kernel_size,
align,
self.distinct_elements,
self.coeffs_size,
new_bounds,
)
}
#[allow(dead_code)]
pub(crate) fn numerical_approximation_q0_7(&self, alignment: usize) -> FilterWeights<i8> {
let align = if alignment != 0 {
(self.kernel_size.div_ceil(alignment)) * alignment
} else {
self.kernel_size
};
let precision_scale: f64 = (1i64 << 7) as f64;
let mut output_kernel = vec![0i8; self.distinct_elements * align];
for (chunk, kernel_chunk) in self
.weights
.chunks_exact(self.kernel_size)
.zip(output_kernel.chunks_exact_mut(align))
{
let mut local_sum = 0i32;
for (&weight, kernel) in chunk.iter().zip(kernel_chunk.iter_mut()) {
let new_element = (weight as f64 * precision_scale)
.min(i8::MAX as f64)
.max(i8::MIN as f64) as i8;
*kernel = new_element;
local_sum += new_element as i32;
}
if local_sum > 128 {
let len = kernel_chunk.len() / 2;
while local_sum > 128 {
local_sum -= 1;
kernel_chunk[len] = kernel_chunk[len].saturating_sub(1);
}
} else if local_sum < 128 {
let len = kernel_chunk.len() / 2;
while local_sum < 128 {
kernel_chunk[len] = kernel_chunk[len].saturating_add(1);
local_sum += 1;
}
}
}
let mut new_bounds = vec![FilterBounds::new(0, 0); self.bounds.len()];
for (dst, src) in new_bounds.iter_mut().zip(self.bounds.iter()) {
*dst = *src;
}
FilterWeights::new(
output_kernel,
self.kernel_size,
align,
self.distinct_elements,
self.coeffs_size,
new_bounds,
)
}
}
pub(crate) trait WeightsConverter<V> {
fn prepare_weights(&self, weights: &FilterWeights<f32>) -> FilterWeights<V>;
}
#[derive(Default)]
pub(crate) struct DefaultWeightsConverter {}
impl<V: Default + Copy + 'static + Clone + Bounded + AsPrimitive<f64>> WeightsConverter<V>
for DefaultWeightsConverter
where
f64: AsPrimitive<V>,
{
fn prepare_weights(&self, weights: &FilterWeights<f32>) -> FilterWeights<V> {
use crate::support::PRECISION;
weights.numerical_approximation::<V, PRECISION>(0)
}
}
#[derive(Default)]
#[cfg(all(target_arch = "aarch64", feature = "neon"))]
#[cfg(feature = "nightly_f16")]
pub(crate) struct WeightFloat16Converter {}
#[cfg(feature = "nightly_f16")]
#[allow(unused)]
use core::f16;
#[cfg(feature = "nightly_f16")]
#[cfg(all(target_arch = "aarch64", feature = "neon"))]
impl WeightsConverter<f16> for WeightFloat16Converter {
fn prepare_weights(&self, weights: &FilterWeights<f32>) -> FilterWeights<f16> {
use crate::neon::convert_weights_to_f16_fhm;
let converted_weights = convert_weights_to_f16_fhm(&weights.weights);
let new_bounds = weights.bounds.to_vec();
FilterWeights::new(
converted_weights,
weights.kernel_size,
weights.kernel_size,
weights.distinct_elements,
weights.coeffs_size,
new_bounds,
)
}
}