pic_scale/
rgba_f32.rs

1/*
2 * Copyright (c) Radzivon Bartoshyk. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1.  Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * 3.  Neither the name of the copyright holder nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::ImageStore;
30#[cfg(all(target_arch = "x86_64", feature = "avx"))]
31use crate::avx2::{
32    convolve_horizontal_rgba_avx_row_one_f32, convolve_horizontal_rgba_avx_rows_4_f32,
33    convolve_vertical_avx_row_f32,
34};
35use crate::convolution::{ConvolutionOptions, HorizontalConvolutionPass, VerticalConvolutionPass};
36use crate::convolve_naive_f32::{
37    convolve_horizontal_4_row_f32_f64, convolve_horizontal_native_row_f32,
38    convolve_horizontal_native_row_f32_f64, convolve_horizontal_rgba_4_row_f32,
39};
40use crate::dispatch_group_f32::{convolve_horizontal_dispatch_f32, convolve_vertical_dispatch_f32};
41use crate::filter_weights::*;
42use crate::image_store::ImageStoreMut;
43#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
44use crate::neon::*;
45use crate::rgb_f32::{convolve_vertical_rgb_native_row_f32, convolve_vertical_rgb_native_row_f64};
46#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
47use crate::sse::*;
48
49impl HorizontalConvolutionPass<f32, f32, 4> for ImageStore<'_, f32, 4> {
50    #[allow(clippy::type_complexity)]
51    fn convolve_horizontal(
52        &self,
53        filter_weights: FilterWeights<f32>,
54        destination: &mut ImageStoreMut<f32, 4>,
55        pool: &novtb::ThreadPool,
56        _: ConvolutionOptions,
57    ) {
58        let mut _dispatcher_4_rows: Option<
59            fn(usize, usize, &FilterWeights<f32>, &[f32], usize, &mut [f32], usize),
60        > = Some(convolve_horizontal_rgba_4_row_f32::<4>);
61        let mut _dispatcher_row: fn(usize, usize, &FilterWeights<f32>, &[f32], &mut [f32]) =
62            convolve_horizontal_native_row_f32::<4>;
63        #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
64        {
65            _dispatcher_4_rows = Some(convolve_horizontal_rgba_neon_rows_4);
66            _dispatcher_row = convolve_horizontal_rgba_neon_row_one;
67        }
68        #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
69        {
70            if std::arch::is_x86_feature_detected!("sse4.1") {
71                _dispatcher_4_rows = Some(convolve_horizontal_rgba_sse_rows_4_f32::<false>);
72                _dispatcher_row = convolve_horizontal_rgba_sse_row_one_f32::<false>;
73            }
74        }
75        #[cfg(all(target_arch = "x86_64", feature = "avx"))]
76        {
77            if std::arch::is_x86_feature_detected!("avx2") {
78                _dispatcher_4_rows = Some(convolve_horizontal_rgba_avx_rows_4_f32::<false>);
79                _dispatcher_row = convolve_horizontal_rgba_avx_row_one_f32::<false>;
80                if std::arch::is_x86_feature_detected!("fma") {
81                    _dispatcher_4_rows = Some(convolve_horizontal_rgba_avx_rows_4_f32::<true>);
82                    _dispatcher_row = convolve_horizontal_rgba_avx_row_one_f32::<true>;
83                }
84            }
85        }
86        convolve_horizontal_dispatch_f32(
87            self,
88            filter_weights,
89            destination,
90            pool,
91            _dispatcher_4_rows,
92            _dispatcher_row,
93        );
94    }
95}
96
97impl HorizontalConvolutionPass<f32, f64, 4> for ImageStore<'_, f32, 4> {
98    #[allow(clippy::type_complexity)]
99    fn convolve_horizontal(
100        &self,
101        filter_weights: FilterWeights<f64>,
102        destination: &mut ImageStoreMut<f32, 4>,
103        pool: &novtb::ThreadPool,
104        _: ConvolutionOptions,
105    ) {
106        let mut _dispatcher_4_rows: Option<
107            fn(usize, usize, &FilterWeights<f64>, &[f32], usize, &mut [f32], usize),
108        > = Some(convolve_horizontal_4_row_f32_f64::<4>);
109        let mut _dispatcher_row: fn(usize, usize, &FilterWeights<f64>, &[f32], &mut [f32]) =
110            convolve_horizontal_native_row_f32_f64::<4>;
111        #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
112        {
113            use crate::neon::{
114                convolve_horizontal_rgba_neon_row_one_f32_f64,
115                convolve_horizontal_rgba_neon_rows_4_f32_f64,
116            };
117            _dispatcher_4_rows = Some(convolve_horizontal_rgba_neon_rows_4_f32_f64);
118            _dispatcher_row = convolve_horizontal_rgba_neon_row_one_f32_f64;
119        }
120        #[cfg(all(target_arch = "x86_64", feature = "avx"))]
121        {
122            use crate::avx2::{
123                convolve_horizontal_rgba_avx_row_one_f32_f64,
124                convolve_horizontal_rgba_avx_rows_4_f32_f64,
125            };
126            if std::arch::is_x86_feature_detected!("avx2") {
127                _dispatcher_4_rows = Some(convolve_horizontal_rgba_avx_rows_4_f32_f64::<false>);
128                _dispatcher_row = convolve_horizontal_rgba_avx_row_one_f32_f64::<false>;
129                if std::arch::is_x86_feature_detected!("fma") {
130                    _dispatcher_4_rows = Some(convolve_horizontal_rgba_avx_rows_4_f32_f64::<true>);
131                    _dispatcher_row = convolve_horizontal_rgba_avx_row_one_f32_f64::<true>;
132                }
133            }
134        }
135        convolve_horizontal_dispatch_f32(
136            self,
137            filter_weights,
138            destination,
139            pool,
140            _dispatcher_4_rows,
141            _dispatcher_row,
142        );
143    }
144}
145
146impl VerticalConvolutionPass<f32, f32, 4> for ImageStore<'_, f32, 4> {
147    fn convolve_vertical(
148        &self,
149        filter_weights: FilterWeights<f32>,
150        destination: &mut ImageStoreMut<f32, 4>,
151        pool: &novtb::ThreadPool,
152        _: ConvolutionOptions,
153    ) {
154        #[allow(clippy::type_complexity)]
155        let mut _dispatcher: fn(usize, &FilterBounds, &[f32], &mut [f32], usize, &[f32]) =
156            convolve_vertical_rgb_native_row_f32;
157        #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
158        {
159            _dispatcher = convolve_vertical_rgb_neon_row_f32;
160        }
161        #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
162        {
163            if std::arch::is_x86_feature_detected!("sse4.1") {
164                _dispatcher = convolve_vertical_rgb_sse_row_f32::<false>;
165            }
166        }
167        #[cfg(all(target_arch = "x86_64", feature = "avx"))]
168        {
169            let has_fma = std::arch::is_x86_feature_detected!("fma");
170            if std::is_x86_feature_detected!("avx2") {
171                _dispatcher = convolve_vertical_avx_row_f32::<false>;
172                if has_fma {
173                    _dispatcher = convolve_vertical_avx_row_f32::<true>;
174                }
175            }
176        }
177        convolve_vertical_dispatch_f32(self, filter_weights, destination, pool, _dispatcher);
178    }
179}
180
181impl VerticalConvolutionPass<f32, f64, 4> for ImageStore<'_, f32, 4> {
182    fn convolve_vertical(
183        &self,
184        filter_weights: FilterWeights<f64>,
185        destination: &mut ImageStoreMut<f32, 4>,
186        pool: &novtb::ThreadPool,
187        _: ConvolutionOptions,
188    ) {
189        #[allow(clippy::type_complexity)]
190        let mut _dispatcher: fn(usize, &FilterBounds, &[f32], &mut [f32], usize, &[f64]) =
191            convolve_vertical_rgb_native_row_f64;
192        #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
193        {
194            use crate::neon::convolve_vertical_neon_row_f32_f64;
195            _dispatcher = convolve_vertical_neon_row_f32_f64;
196        }
197        #[cfg(all(target_arch = "x86_64", feature = "avx"))]
198        {
199            if std::arch::is_x86_feature_detected!("avx2") {
200                use crate::avx2::convolve_vertical_avx_row_f32_f64;
201                if std::arch::is_x86_feature_detected!("fma") {
202                    _dispatcher = convolve_vertical_avx_row_f32_f64::<true>;
203                } else {
204                    _dispatcher = convolve_vertical_avx_row_f32_f64::<false>;
205                }
206            }
207        }
208        convolve_vertical_dispatch_f32(self, filter_weights, destination, pool, _dispatcher);
209    }
210}