#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::avx2::convolve_vertical_avx_row;
use crate::convolution::{HorizontalConvolutionPass, VerticalConvolutionPass};
use crate::convolve_naive_u8::convolve_horizontal_rgba_native_row;
use crate::dispatch_group_u8::{convolve_horizontal_dispatch_u8, convolve_vertical_dispatch_u8};
use crate::filter_weights::{FilterBounds, FilterWeights};
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use crate::neon::convolve_vertical_neon_row;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use crate::neon::{convolve_horizontal_plane_neon_row, convolve_horizontal_plane_neon_rows_4_u8};
use crate::rgb_u8::convolve_vertical_rgb_native_row_u8;
#[cfg(all(
any(target_arch = "riscv64", target_arch = "riscv32"),
feature = "riscv"
))]
use crate::risc::convolve_vertical_risc_row;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use crate::sse::{
convolve_horizontal_plane_sse_row, convolve_horizontal_plane_sse_rows_4_u8,
convolve_vertical_sse_row,
};
use crate::ImageStore;
use rayon::ThreadPool;
impl<'a> HorizontalConvolutionPass<u8, 1> for ImageStore<'a, u8, 1> {
fn convolve_horizontal(
&self,
filter_weights: FilterWeights<f32>,
destination: &mut ImageStore<u8, 1>,
_pool: &Option<ThreadPool>,
) {
let mut _dispatcher_4_rows: Option<
fn(usize, usize, &FilterWeights<i16>, *const u8, usize, *mut u8, usize),
> = None;
let mut _dispatcher_1_row: fn(usize, usize, &FilterWeights<i16>, *const u8, *mut u8) =
convolve_horizontal_rgba_native_row::<u8, i32, 1>;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
_dispatcher_4_rows = Some(convolve_horizontal_plane_neon_rows_4_u8);
_dispatcher_1_row = convolve_horizontal_plane_neon_row;
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if is_x86_feature_detected!("sse4.1") {
_dispatcher_4_rows = Some(convolve_horizontal_plane_sse_rows_4_u8);
_dispatcher_1_row = convolve_horizontal_plane_sse_row;
}
}
convolve_horizontal_dispatch_u8(
self,
filter_weights,
destination,
_pool,
_dispatcher_4_rows,
_dispatcher_1_row,
);
}
}
impl<'a> VerticalConvolutionPass<u8, 1> for ImageStore<'a, u8, 1> {
fn convolve_vertical(
&self,
filter_weights: FilterWeights<f32>,
destination: &mut ImageStore<u8, 1>,
pool: &Option<ThreadPool>,
) {
let mut _dispatcher: fn(
dst_width: usize,
bounds: &FilterBounds,
unsafe_source_ptr_0: *const u8,
unsafe_destination_ptr_0: *mut u8,
src_stride: usize,
weight_ptr: *const i16,
) = convolve_vertical_rgb_native_row_u8::<u8, i32, 1>;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
_dispatcher = convolve_vertical_neon_row::<1>;
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if is_x86_feature_detected!("sse4.1") {
_dispatcher = convolve_vertical_sse_row::<1>;
}
if is_x86_feature_detected!("avx2") {
_dispatcher = convolve_vertical_avx_row::<1>;
}
}
#[cfg(all(
any(target_arch = "riscv64", target_arch = "riscv32"),
feature = "riscv"
))]
{
if std::arch::is_riscv_feature_detected!("v") {
_dispatcher = convolve_vertical_risc_row::<1>;
}
}
convolve_vertical_dispatch_u8(self, filter_weights, destination, pool, _dispatcher);
}
}