Skip to main content

trueno/vector/
dispatch.rs

1//! Backend dispatch macros for vector operations
2//!
3//! These macros handle routing operations to the appropriate SIMD backend
4//! based on CPU feature detection and the selected Backend variant.
5
6/// Macro to dispatch binary operations to appropriate backend
7///
8/// Routes operations like add, sub, mul, div to the best available SIMD backend.
9/// Falls back to scalar implementation when a hardware backend is unavailable.
10#[macro_export]
11macro_rules! dispatch_binary_op {
12    ($backend:expr, $op:ident, $a:expr, $b:expr, $result:expr) => {
13        // SAFETY: CPU feature verified at runtime, slices bounds-checked
14        unsafe {
15            match $backend {
16                $crate::Backend::Scalar => {
17                    $crate::backends::scalar::ScalarBackend::$op($a, $b, $result)
18                }
19                #[cfg(target_arch = "x86_64")]
20                $crate::Backend::SSE2 | $crate::Backend::AVX => {
21                    $crate::backends::sse2::Sse2Backend::$op($a, $b, $result)
22                }
23                #[cfg(target_arch = "x86_64")]
24                $crate::Backend::AVX2 => $crate::backends::avx2::Avx2Backend::$op($a, $b, $result),
25                #[cfg(target_arch = "x86_64")]
26                $crate::Backend::AVX512 => {
27                    $crate::backends::avx512::Avx512Backend::$op($a, $b, $result)
28                }
29                #[cfg(not(target_arch = "x86_64"))]
30                $crate::Backend::SSE2
31                | $crate::Backend::AVX
32                | $crate::Backend::AVX2
33                | $crate::Backend::AVX512 => {
34                    $crate::backends::scalar::ScalarBackend::$op($a, $b, $result)
35                }
36                #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
37                $crate::Backend::NEON => $crate::backends::neon::NeonBackend::$op($a, $b, $result),
38                #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
39                $crate::Backend::NEON => {
40                    $crate::backends::scalar::ScalarBackend::$op($a, $b, $result)
41                }
42                #[cfg(target_arch = "wasm32")]
43                $crate::Backend::WasmSIMD => {
44                    $crate::backends::wasm::WasmBackend::$op($a, $b, $result)
45                }
46                #[cfg(not(target_arch = "wasm32"))]
47                $crate::Backend::WasmSIMD => {
48                    $crate::backends::scalar::ScalarBackend::$op($a, $b, $result)
49                }
50                $crate::Backend::GPU | $crate::Backend::Auto => {
51                    $crate::backends::scalar::ScalarBackend::$op($a, $b, $result)
52                }
53            }
54        }
55    };
56}
57
58/// Macro to dispatch reduction operations (return f32)
59///
60/// Routes operations like sum, max, min to the best available SIMD backend.
61/// Falls back to scalar implementation when a hardware backend is unavailable.
62#[macro_export]
63macro_rules! dispatch_reduction {
64    ($backend:expr, $op:ident, $data:expr) => {
65        // SAFETY: CPU feature verified at runtime, slices bounds-checked
66        unsafe {
67            match $backend {
68                $crate::Backend::Scalar => $crate::backends::scalar::ScalarBackend::$op($data),
69                #[cfg(target_arch = "x86_64")]
70                $crate::Backend::SSE2 | $crate::Backend::AVX => {
71                    $crate::backends::sse2::Sse2Backend::$op($data)
72                }
73                #[cfg(target_arch = "x86_64")]
74                $crate::Backend::AVX2 => $crate::backends::avx2::Avx2Backend::$op($data),
75                #[cfg(target_arch = "x86_64")]
76                $crate::Backend::AVX512 => $crate::backends::avx512::Avx512Backend::$op($data),
77                #[cfg(not(target_arch = "x86_64"))]
78                $crate::Backend::SSE2
79                | $crate::Backend::AVX
80                | $crate::Backend::AVX2
81                | $crate::Backend::AVX512 => $crate::backends::scalar::ScalarBackend::$op($data),
82                #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
83                $crate::Backend::NEON => $crate::backends::neon::NeonBackend::$op($data),
84                #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
85                $crate::Backend::NEON => $crate::backends::scalar::ScalarBackend::$op($data),
86                #[cfg(target_arch = "wasm32")]
87                $crate::Backend::WasmSIMD => $crate::backends::wasm::WasmBackend::$op($data),
88                #[cfg(not(target_arch = "wasm32"))]
89                $crate::Backend::WasmSIMD => $crate::backends::scalar::ScalarBackend::$op($data),
90                $crate::Backend::GPU | $crate::Backend::Auto => {
91                    $crate::backends::scalar::ScalarBackend::$op($data)
92                }
93            }
94        }
95    };
96}
97
98/// Macro to dispatch unary operations (a -> result)
99///
100/// Routes operations like relu, sigmoid to the best available SIMD backend.
101/// Falls back to scalar implementation when a hardware backend is unavailable.
102#[macro_export]
103macro_rules! dispatch_unary_op {
104    ($backend:expr, $op:ident, $a:expr, $result:expr) => {
105        // SAFETY: CPU feature verified at runtime, slices bounds-checked
106        unsafe {
107            match $backend {
108                $crate::Backend::Scalar => {
109                    $crate::backends::scalar::ScalarBackend::$op($a, $result)
110                }
111                #[cfg(target_arch = "x86_64")]
112                $crate::Backend::SSE2 | $crate::Backend::AVX => {
113                    $crate::backends::sse2::Sse2Backend::$op($a, $result)
114                }
115                #[cfg(target_arch = "x86_64")]
116                $crate::Backend::AVX2 => $crate::backends::avx2::Avx2Backend::$op($a, $result),
117                #[cfg(target_arch = "x86_64")]
118                $crate::Backend::AVX512 => {
119                    $crate::backends::avx512::Avx512Backend::$op($a, $result)
120                }
121                #[cfg(not(target_arch = "x86_64"))]
122                $crate::Backend::SSE2
123                | $crate::Backend::AVX
124                | $crate::Backend::AVX2
125                | $crate::Backend::AVX512 => {
126                    $crate::backends::scalar::ScalarBackend::$op($a, $result)
127                }
128                #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
129                $crate::Backend::NEON => $crate::backends::neon::NeonBackend::$op($a, $result),
130                #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
131                $crate::Backend::NEON => $crate::backends::scalar::ScalarBackend::$op($a, $result),
132                #[cfg(target_arch = "wasm32")]
133                $crate::Backend::WasmSIMD => $crate::backends::wasm::WasmBackend::$op($a, $result),
134                #[cfg(not(target_arch = "wasm32"))]
135                $crate::Backend::WasmSIMD => {
136                    $crate::backends::scalar::ScalarBackend::$op($a, $result)
137                }
138                $crate::Backend::GPU | $crate::Backend::Auto => {
139                    $crate::backends::scalar::ScalarBackend::$op($a, $result)
140                }
141            }
142        }
143    };
144}
145
146pub use dispatch_binary_op;
147pub use dispatch_reduction;
148pub use dispatch_unary_op;