1use crate::ImageStore;
30#[cfg(all(target_arch = "x86_64", feature = "avx"))]
31use crate::avx2::{
32 convolve_horizontal_rgba_avx_row_one_f32, convolve_horizontal_rgba_avx_rows_4_f32,
33 convolve_vertical_avx_row_f32,
34};
35use crate::convolution::{ConvolutionOptions, HorizontalConvolutionPass, VerticalConvolutionPass};
36use crate::convolve_naive_f32::{
37 convolve_horizontal_4_row_f32_f64, convolve_horizontal_native_row_f32,
38 convolve_horizontal_native_row_f32_f64, convolve_horizontal_rgba_4_row_f32,
39};
40use crate::dispatch_group_f32::{convolve_horizontal_dispatch_f32, convolve_vertical_dispatch_f32};
41use crate::filter_weights::*;
42use crate::image_store::ImageStoreMut;
43#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
44use crate::neon::*;
45use crate::rgb_f32::{convolve_vertical_rgb_native_row_f32, convolve_vertical_rgb_native_row_f64};
46#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
47use crate::sse::*;
48
49impl HorizontalConvolutionPass<f32, f32, 4> for ImageStore<'_, f32, 4> {
50 #[allow(clippy::type_complexity)]
51 fn convolve_horizontal(
52 &self,
53 filter_weights: FilterWeights<f32>,
54 destination: &mut ImageStoreMut<f32, 4>,
55 pool: &novtb::ThreadPool,
56 _: ConvolutionOptions,
57 ) {
58 let mut _dispatcher_4_rows: Option<
59 fn(usize, usize, &FilterWeights<f32>, &[f32], usize, &mut [f32], usize),
60 > = Some(convolve_horizontal_rgba_4_row_f32::<4>);
61 let mut _dispatcher_row: fn(usize, usize, &FilterWeights<f32>, &[f32], &mut [f32]) =
62 convolve_horizontal_native_row_f32::<4>;
63 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
64 {
65 _dispatcher_4_rows = Some(convolve_horizontal_rgba_neon_rows_4);
66 _dispatcher_row = convolve_horizontal_rgba_neon_row_one;
67 }
68 #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
69 {
70 if std::arch::is_x86_feature_detected!("sse4.1") {
71 _dispatcher_4_rows = Some(convolve_horizontal_rgba_sse_rows_4_f32::<false>);
72 _dispatcher_row = convolve_horizontal_rgba_sse_row_one_f32::<false>;
73 }
74 }
75 #[cfg(all(target_arch = "x86_64", feature = "avx"))]
76 {
77 if std::arch::is_x86_feature_detected!("avx2") {
78 _dispatcher_4_rows = Some(convolve_horizontal_rgba_avx_rows_4_f32::<false>);
79 _dispatcher_row = convolve_horizontal_rgba_avx_row_one_f32::<false>;
80 if std::arch::is_x86_feature_detected!("fma") {
81 _dispatcher_4_rows = Some(convolve_horizontal_rgba_avx_rows_4_f32::<true>);
82 _dispatcher_row = convolve_horizontal_rgba_avx_row_one_f32::<true>;
83 }
84 }
85 }
86 convolve_horizontal_dispatch_f32(
87 self,
88 filter_weights,
89 destination,
90 pool,
91 _dispatcher_4_rows,
92 _dispatcher_row,
93 );
94 }
95}
96
97impl HorizontalConvolutionPass<f32, f64, 4> for ImageStore<'_, f32, 4> {
98 #[allow(clippy::type_complexity)]
99 fn convolve_horizontal(
100 &self,
101 filter_weights: FilterWeights<f64>,
102 destination: &mut ImageStoreMut<f32, 4>,
103 pool: &novtb::ThreadPool,
104 _: ConvolutionOptions,
105 ) {
106 let mut _dispatcher_4_rows: Option<
107 fn(usize, usize, &FilterWeights<f64>, &[f32], usize, &mut [f32], usize),
108 > = Some(convolve_horizontal_4_row_f32_f64::<4>);
109 let mut _dispatcher_row: fn(usize, usize, &FilterWeights<f64>, &[f32], &mut [f32]) =
110 convolve_horizontal_native_row_f32_f64::<4>;
111 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
112 {
113 use crate::neon::{
114 convolve_horizontal_rgba_neon_row_one_f32_f64,
115 convolve_horizontal_rgba_neon_rows_4_f32_f64,
116 };
117 _dispatcher_4_rows = Some(convolve_horizontal_rgba_neon_rows_4_f32_f64);
118 _dispatcher_row = convolve_horizontal_rgba_neon_row_one_f32_f64;
119 }
120 #[cfg(all(target_arch = "x86_64", feature = "avx"))]
121 {
122 use crate::avx2::{
123 convolve_horizontal_rgba_avx_row_one_f32_f64,
124 convolve_horizontal_rgba_avx_rows_4_f32_f64,
125 };
126 if std::arch::is_x86_feature_detected!("avx2") {
127 _dispatcher_4_rows = Some(convolve_horizontal_rgba_avx_rows_4_f32_f64::<false>);
128 _dispatcher_row = convolve_horizontal_rgba_avx_row_one_f32_f64::<false>;
129 if std::arch::is_x86_feature_detected!("fma") {
130 _dispatcher_4_rows = Some(convolve_horizontal_rgba_avx_rows_4_f32_f64::<true>);
131 _dispatcher_row = convolve_horizontal_rgba_avx_row_one_f32_f64::<true>;
132 }
133 }
134 }
135 convolve_horizontal_dispatch_f32(
136 self,
137 filter_weights,
138 destination,
139 pool,
140 _dispatcher_4_rows,
141 _dispatcher_row,
142 );
143 }
144}
145
146impl VerticalConvolutionPass<f32, f32, 4> for ImageStore<'_, f32, 4> {
147 fn convolve_vertical(
148 &self,
149 filter_weights: FilterWeights<f32>,
150 destination: &mut ImageStoreMut<f32, 4>,
151 pool: &novtb::ThreadPool,
152 _: ConvolutionOptions,
153 ) {
154 #[allow(clippy::type_complexity)]
155 let mut _dispatcher: fn(usize, &FilterBounds, &[f32], &mut [f32], usize, &[f32]) =
156 convolve_vertical_rgb_native_row_f32;
157 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
158 {
159 _dispatcher = convolve_vertical_rgb_neon_row_f32;
160 }
161 #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
162 {
163 if std::arch::is_x86_feature_detected!("sse4.1") {
164 _dispatcher = convolve_vertical_rgb_sse_row_f32::<false>;
165 }
166 }
167 #[cfg(all(target_arch = "x86_64", feature = "avx"))]
168 {
169 let has_fma = std::arch::is_x86_feature_detected!("fma");
170 if std::is_x86_feature_detected!("avx2") {
171 _dispatcher = convolve_vertical_avx_row_f32::<false>;
172 if has_fma {
173 _dispatcher = convolve_vertical_avx_row_f32::<true>;
174 }
175 }
176 }
177 convolve_vertical_dispatch_f32(self, filter_weights, destination, pool, _dispatcher);
178 }
179}
180
181impl VerticalConvolutionPass<f32, f64, 4> for ImageStore<'_, f32, 4> {
182 fn convolve_vertical(
183 &self,
184 filter_weights: FilterWeights<f64>,
185 destination: &mut ImageStoreMut<f32, 4>,
186 pool: &novtb::ThreadPool,
187 _: ConvolutionOptions,
188 ) {
189 #[allow(clippy::type_complexity)]
190 let mut _dispatcher: fn(usize, &FilterBounds, &[f32], &mut [f32], usize, &[f64]) =
191 convolve_vertical_rgb_native_row_f64;
192 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
193 {
194 use crate::neon::convolve_vertical_neon_row_f32_f64;
195 _dispatcher = convolve_vertical_neon_row_f32_f64;
196 }
197 #[cfg(all(target_arch = "x86_64", feature = "avx"))]
198 {
199 if std::arch::is_x86_feature_detected!("avx2") {
200 use crate::avx2::convolve_vertical_avx_row_f32_f64;
201 if std::arch::is_x86_feature_detected!("fma") {
202 _dispatcher = convolve_vertical_avx_row_f32_f64::<true>;
203 } else {
204 _dispatcher = convolve_vertical_avx_row_f32_f64::<false>;
205 }
206 }
207 }
208 convolve_vertical_dispatch_f32(self, filter_weights, destination, pool, _dispatcher);
209 }
210}