1#![allow(clippy::module_name_repetitions)]
59#![allow(unsafe_code)]
60
61pub mod colorspace;
62pub mod cost_distance_simd;
63pub mod filters;
64pub mod focal_simd;
65pub mod histogram;
66pub mod hydrology_simd;
67pub mod math;
68pub mod morphology;
69pub mod projection;
70pub mod raster;
71pub mod resampling;
72pub mod statistics;
73pub mod terrain_simd;
74pub mod texture_simd;
75pub mod threshold;
76
77pub mod detect {
83 use std::sync::OnceLock;
84
85 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
87 pub enum SimdLevel {
88 Scalar,
90 Simd128,
92 Simd256,
94 Simd512,
96 }
97
98 impl SimdLevel {
99 #[must_use]
101 pub const fn lanes_f32(self) -> usize {
102 match self {
103 Self::Scalar => 1,
104 Self::Simd128 => 4, Self::Simd256 => 8, Self::Simd512 => 16, }
108 }
109
110 #[must_use]
112 pub const fn lanes_f64(self) -> usize {
113 match self {
114 Self::Scalar => 1,
115 Self::Simd128 => 2, Self::Simd256 => 4, Self::Simd512 => 8, }
119 }
120
121 #[must_use]
123 pub const fn lanes_u8(self) -> usize {
124 match self {
125 Self::Scalar => 1,
126 Self::Simd128 => 16, Self::Simd256 => 32, Self::Simd512 => 64, }
130 }
131
132 #[must_use]
134 pub const fn preferred_alignment(self) -> usize {
135 match self {
136 Self::Scalar => 8,
137 Self::Simd128 => 16,
138 Self::Simd256 => 32,
139 Self::Simd512 => 64,
140 }
141 }
142
143 #[must_use]
145 pub const fn width_bits(self) -> usize {
146 match self {
147 Self::Scalar => 64,
148 Self::Simd128 => 128,
149 Self::Simd256 => 256,
150 Self::Simd512 => 512,
151 }
152 }
153 }
154
155 impl core::fmt::Display for SimdLevel {
156 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
157 match self {
158 Self::Scalar => write!(f, "Scalar"),
159 Self::Simd128 => {
160 #[cfg(target_arch = "x86_64")]
161 {
162 write!(f, "SSE2 (128-bit)")
163 }
164 #[cfg(target_arch = "aarch64")]
165 {
166 write!(f, "NEON (128-bit)")
167 }
168 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
169 {
170 write!(f, "SIMD128")
171 }
172 }
173 Self::Simd256 => write!(f, "AVX2 (256-bit)"),
174 Self::Simd512 => write!(f, "AVX-512 (512-bit)"),
175 }
176 }
177 }
178
179 #[derive(Debug, Clone)]
181 pub struct SimdCapabilities {
182 pub max_level: SimdLevel,
184 pub has_sse2: bool,
186 pub has_sse41: bool,
188 pub has_avx2: bool,
190 pub has_fma: bool,
192 pub has_avx512f: bool,
194 pub has_neon: bool,
196 }
197
198 impl SimdCapabilities {
199 #[must_use]
201 pub fn detect() -> Self {
202 #[cfg(target_arch = "x86_64")]
203 {
204 let has_sse2 = is_x86_feature_detected!("sse2");
205 let has_sse41 = is_x86_feature_detected!("sse4.1");
206 let has_avx2 = is_x86_feature_detected!("avx2");
207 let has_fma = is_x86_feature_detected!("fma");
208 let has_avx512f = is_x86_feature_detected!("avx512f");
209
210 let max_level = if has_avx512f {
211 SimdLevel::Simd512
212 } else if has_avx2 {
213 SimdLevel::Simd256
214 } else if has_sse2 {
215 SimdLevel::Simd128
216 } else {
217 SimdLevel::Scalar
218 };
219
220 Self {
221 max_level,
222 has_sse2,
223 has_sse41,
224 has_avx2,
225 has_fma,
226 has_avx512f,
227 has_neon: false,
228 }
229 }
230
231 #[cfg(target_arch = "aarch64")]
232 {
233 Self {
235 max_level: SimdLevel::Simd128,
236 has_sse2: false,
237 has_sse41: false,
238 has_avx2: false,
239 has_fma: false,
240 has_avx512f: false,
241 has_neon: true,
242 }
243 }
244
245 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
246 {
247 Self {
248 max_level: SimdLevel::Scalar,
249 has_sse2: false,
250 has_sse41: false,
251 has_avx2: false,
252 has_fma: false,
253 has_avx512f: false,
254 has_neon: false,
255 }
256 }
257 }
258 }
259
260 #[must_use]
264 pub fn capabilities() -> &'static SimdCapabilities {
265 static CAPS: OnceLock<SimdCapabilities> = OnceLock::new();
266 CAPS.get_or_init(SimdCapabilities::detect)
267 }
268
269 #[must_use]
271 pub fn best_level() -> SimdLevel {
272 capabilities().max_level
273 }
274
275 #[must_use]
277 pub fn is_available(level: SimdLevel) -> bool {
278 best_level() >= level
279 }
280}
281
282pub mod platform {
284 #[cfg(target_feature = "avx2")]
286 pub const HAS_AVX2: bool = true;
287
288 #[cfg(not(target_feature = "avx2"))]
290 pub const HAS_AVX2: bool = false;
291
292 #[cfg(target_feature = "avx512f")]
294 pub const HAS_AVX512: bool = true;
295
296 #[cfg(not(target_feature = "avx512f"))]
298 pub const HAS_AVX512: bool = false;
299
300 #[cfg(target_feature = "neon")]
302 pub const HAS_NEON: bool = true;
303
304 #[cfg(not(target_feature = "neon"))]
306 pub const HAS_NEON: bool = false;
307
308 #[must_use]
310 pub const fn lane_width_f32() -> usize {
311 #[cfg(target_feature = "avx512f")]
312 {
313 16
314 }
315 #[cfg(all(target_feature = "avx2", not(target_feature = "avx512f")))]
316 {
317 8
318 }
319 #[cfg(all(
320 target_feature = "neon",
321 not(target_feature = "avx2"),
322 not(target_feature = "avx512f")
323 ))]
324 {
325 4
326 }
327 #[cfg(not(any(
328 target_feature = "avx512f",
329 target_feature = "avx2",
330 target_feature = "neon"
331 )))]
332 {
333 4
334 }
335 }
336
337 #[must_use]
339 pub const fn lane_width_f64() -> usize {
340 #[cfg(target_feature = "avx512f")]
341 {
342 8
343 }
344 #[cfg(all(target_feature = "avx2", not(target_feature = "avx512f")))]
345 {
346 4
347 }
348 #[cfg(not(any(target_feature = "avx512f", target_feature = "avx2")))]
349 {
350 2
351 }
352 }
353
354 #[must_use]
356 pub const fn lane_width_u8() -> usize {
357 #[cfg(target_feature = "avx512f")]
358 {
359 64
360 }
361 #[cfg(all(target_feature = "avx2", not(target_feature = "avx512f")))]
362 {
363 32
364 }
365 #[cfg(not(any(target_feature = "avx512f", target_feature = "avx2")))]
366 {
367 16
368 }
369 }
370
371 #[must_use]
373 pub const fn preferred_alignment() -> usize {
374 #[cfg(target_feature = "avx512f")]
375 {
376 64
377 }
378 #[cfg(all(target_feature = "avx2", not(target_feature = "avx512f")))]
379 {
380 32
381 }
382 #[cfg(not(any(target_feature = "avx512f", target_feature = "avx2")))]
383 {
384 16
385 }
386 }
387}
388
389pub mod util {
391 use std::alloc::Layout;
392
393 #[must_use]
395 pub const fn chunks(len: usize, lane_width: usize) -> usize {
396 len / lane_width
397 }
398
399 #[must_use]
401 pub const fn remainder(len: usize, lane_width: usize) -> usize {
402 len % lane_width
403 }
404
405 #[must_use]
407 pub fn is_aligned<T>(ptr: *const T, align: usize) -> bool {
408 (ptr as usize) % align == 0
409 }
410
411 #[must_use]
413 pub const fn align_up(value: usize, align: usize) -> usize {
414 (value + align - 1) & !(align - 1)
415 }
416
417 #[must_use]
419 pub const fn align_down(value: usize, align: usize) -> usize {
420 value & !(align - 1)
421 }
422
423 pub fn aligned_vec_f32(len: usize, align: usize) -> Option<Vec<f32>> {
432 if len == 0 {
433 return Some(Vec::new());
434 }
435 let layout = Layout::from_size_align(len * std::mem::size_of::<f32>(), align).ok()?;
436 unsafe {
438 let ptr = std::alloc::alloc_zeroed(layout);
439 if ptr.is_null() {
440 return None;
441 }
442 let slice = std::slice::from_raw_parts_mut(ptr.cast::<f32>(), len);
443 Some(Vec::from_raw_parts(slice.as_mut_ptr(), len, len))
444 }
445 }
446
447 pub fn aligned_vec_f64(len: usize, align: usize) -> Option<Vec<f64>> {
455 if len == 0 {
456 return Some(Vec::new());
457 }
458 let layout = Layout::from_size_align(len * std::mem::size_of::<f64>(), align).ok()?;
459 unsafe {
461 let ptr = std::alloc::alloc_zeroed(layout);
462 if ptr.is_null() {
463 return None;
464 }
465 let slice = std::slice::from_raw_parts_mut(ptr.cast::<f64>(), len);
466 Some(Vec::from_raw_parts(slice.as_mut_ptr(), len, len))
467 }
468 }
469
470 #[inline]
475 pub fn process_chunks(len: usize, lanes: usize) -> (usize, usize) {
476 let n_chunks = len / lanes;
477 let remainder_start = n_chunks * lanes;
478 (n_chunks, remainder_start)
479 }
480}
481
482#[macro_export]
486#[doc(hidden)]
487macro_rules! simd_validate_binary {
488 ($a:expr, $b:expr, $out:expr) => {
489 if $a.len() != $b.len() || $a.len() != $out.len() {
490 return Err($crate::error::AlgorithmError::InvalidParameter {
491 parameter: "input",
492 message: format!(
493 "Slice length mismatch: a={}, b={}, out={}",
494 $a.len(),
495 $b.len(),
496 $out.len()
497 ),
498 });
499 }
500 };
501}
502
503#[macro_export]
505#[doc(hidden)]
506macro_rules! simd_validate_unary {
507 ($data:expr, $out:expr) => {
508 if $data.len() != $out.len() {
509 return Err($crate::error::AlgorithmError::InvalidParameter {
510 parameter: "input",
511 message: format!(
512 "Slice length mismatch: data={}, out={}",
513 $data.len(),
514 $out.len()
515 ),
516 });
517 }
518 };
519}
520
521#[cfg(test)]
522mod tests {
523 use super::*;
524
525 #[test]
526 fn test_platform_detection() {
527 let _avx2 = platform::HAS_AVX2;
528 let _avx512 = platform::HAS_AVX512;
529 let _neon = platform::HAS_NEON;
530
531 assert!(platform::lane_width_f32() >= 2);
532 assert!(platform::lane_width_f64() >= 2);
533 assert!(platform::lane_width_u8() >= 8);
534
535 let align = platform::preferred_alignment();
536 assert!(align.is_power_of_two());
537 assert!(align >= 16);
538 }
539
540 #[test]
541 fn test_runtime_detection() {
542 let caps = detect::SimdCapabilities::detect();
543
544 #[cfg(target_arch = "aarch64")]
546 {
547 assert!(caps.has_neon);
548 assert_eq!(caps.max_level, detect::SimdLevel::Simd128);
549 }
550
551 #[cfg(target_arch = "x86_64")]
553 {
554 assert!(caps.has_sse2);
555 assert!(caps.max_level >= detect::SimdLevel::Simd128);
556 }
557
558 assert!(caps.max_level >= detect::SimdLevel::Scalar);
560 }
561
562 #[test]
563 fn test_cached_capabilities() {
564 let caps1 = detect::capabilities();
565 let caps2 = detect::capabilities();
566
567 assert_eq!(caps1.max_level, caps2.max_level);
569 assert_eq!(caps1.has_neon, caps2.has_neon);
570 }
571
572 #[test]
573 fn test_simd_level_properties() {
574 assert_eq!(detect::SimdLevel::Scalar.lanes_f32(), 1);
575 assert_eq!(detect::SimdLevel::Simd128.lanes_f32(), 4);
576 assert_eq!(detect::SimdLevel::Simd256.lanes_f32(), 8);
577 assert_eq!(detect::SimdLevel::Simd512.lanes_f32(), 16);
578
579 assert_eq!(detect::SimdLevel::Simd128.lanes_f64(), 2);
580 assert_eq!(detect::SimdLevel::Simd256.lanes_f64(), 4);
581
582 assert_eq!(detect::SimdLevel::Simd128.lanes_u8(), 16);
583 assert_eq!(detect::SimdLevel::Simd256.lanes_u8(), 32);
584
585 assert_eq!(detect::SimdLevel::Simd128.preferred_alignment(), 16);
586 assert_eq!(detect::SimdLevel::Simd256.preferred_alignment(), 32);
587 assert_eq!(detect::SimdLevel::Simd512.preferred_alignment(), 64);
588 }
589
590 #[test]
591 fn test_simd_level_display() {
592 let level = detect::best_level();
593 let display = format!("{level}");
594 assert!(!display.is_empty());
595 }
596
597 #[test]
598 fn test_simd_level_ordering() {
599 assert!(detect::SimdLevel::Scalar < detect::SimdLevel::Simd128);
600 assert!(detect::SimdLevel::Simd128 < detect::SimdLevel::Simd256);
601 assert!(detect::SimdLevel::Simd256 < detect::SimdLevel::Simd512);
602 }
603
604 #[test]
605 fn test_is_available() {
606 assert!(detect::is_available(detect::SimdLevel::Scalar));
608
609 let best = detect::best_level();
611 assert!(detect::is_available(best));
612 }
613
614 #[test]
615 fn test_util_functions() {
616 assert_eq!(util::chunks(100, 8), 12);
617 assert_eq!(util::remainder(100, 8), 4);
618
619 assert_eq!(util::align_up(15, 16), 16);
620 assert_eq!(util::align_up(16, 16), 16);
621 assert_eq!(util::align_up(17, 16), 32);
622
623 assert_eq!(util::align_down(15, 16), 0);
624 assert_eq!(util::align_down(16, 16), 16);
625 assert_eq!(util::align_down(31, 16), 16);
626 }
627
628 #[test]
629 fn test_process_chunks() {
630 let (n_chunks, remainder_start) = util::process_chunks(100, 8);
631 assert_eq!(n_chunks, 12);
632 assert_eq!(remainder_start, 96);
633
634 let (n_chunks, remainder_start) = util::process_chunks(7, 4);
635 assert_eq!(n_chunks, 1);
636 assert_eq!(remainder_start, 4);
637 }
638
639 #[test]
640 fn test_pointer_alignment() {
641 let data = vec![1.0_f32; 100];
642 let ptr = data.as_ptr();
643 assert!(util::is_aligned(ptr, std::mem::align_of::<f32>()));
644 }
645
646 #[test]
647 fn test_aligned_vec_f32() {
648 if let Some(v) = util::aligned_vec_f32(100, 64) {
649 assert_eq!(v.len(), 100);
650 assert!(util::is_aligned(v.as_ptr(), 64));
651 for &val in &v {
652 assert_eq!(val, 0.0);
653 }
654 }
655 }
656
657 #[test]
658 fn test_aligned_vec_f64() {
659 if let Some(v) = util::aligned_vec_f64(100, 64) {
660 assert_eq!(v.len(), 100);
661 assert!(util::is_aligned(v.as_ptr(), 64));
662 for &val in &v {
663 assert_eq!(val, 0.0);
664 }
665 }
666 }
667
668 #[test]
669 fn test_aligned_vec_empty() {
670 let v = util::aligned_vec_f32(0, 64);
671 assert!(v.is_some());
672 if let Some(v) = v {
673 assert!(v.is_empty());
674 }
675 }
676}