scirs2_core/
numeric.rs

1//! Numeric traits and utilities for ``SciRS2``
2//!
3//! This module provides traits and utilities for working with numeric types
4//! in scientific computing contexts.
5
6use crate::error::{CoreError, CoreResult, ErrorContext};
7use num_traits::{Float, Num, NumCast, One, Zero};
8use std::fmt::Debug;
9use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
10
11/// A trait for numeric types that can be used in scientific calculations
12///
13/// This trait combines common numeric traits required for scientific computing
14/// operations, providing a unified trait bound for generic code.
15pub trait ScientificNumber:
16    Num
17    + Clone
18    + Copy
19    + PartialOrd
20    + Debug
21    + Zero
22    + One
23    + Add<Output = Self>
24    + Sub<Output = Self>
25    + Mul<Output = Self>
26    + Div<Output = Self>
27    + AddAssign
28    + SubAssign
29    + MulAssign
30    + DivAssign
31    + NumCast
32{
33    /// Absolute value
34    #[must_use]
35    fn abs(self) -> Self;
36
37    /// Square root
38    #[must_use]
39    fn sqrt(self) -> Self;
40
41    /// Square
42    #[must_use]
43    fn square(self) -> Self {
44        self * self
45    }
46
47    /// Maximum of two values
48    #[must_use]
49    fn max(self, other: Self) -> Self;
50
51    /// Minimum of two values
52    #[must_use]
53    fn min(self, other: Self) -> Self;
54
55    /// Check if the value is finite
56    #[must_use]
57    fn is_finite(self) -> bool;
58
59    /// Convert to f64
60    #[must_use]
61    fn to_f64(self) -> Option<f64>;
62
63    /// Convert from f64
64    #[must_use]
65    fn from_f64(value: f64) -> Option<Self>;
66
67    /// Convert from little-endian bytes
68    #[must_use]
69    fn from_le_bytes(bytes: &[u8]) -> Self;
70
71    /// Convert from big-endian bytes
72    #[must_use]
73    fn from_be_bytes(bytes: &[u8]) -> Self;
74
75    /// Convert to little-endian bytes
76    #[must_use]
77    fn to_le_bytes(self) -> Vec<u8>;
78
79    /// Convert to big-endian bytes
80    #[must_use]
81    fn to_be_bytes(self) -> Vec<u8>;
82}
83
84/// A trait for real-valued floating point types
85pub trait RealNumber: ScientificNumber + Float {
86    /// Returns the machine epsilon (the difference between 1.0 and the least value greater than 1.0)
87    #[must_use]
88    fn epsilon() -> Self;
89
90    /// Exponential function (e^x)
91    #[must_use]
92    fn exp(self) -> Self;
93
94    /// Natural logarithm (ln(x))
95    #[must_use]
96    fn ln(self) -> Self;
97
98    /// Base-10 logarithm
99    #[must_use]
100    fn log10(self) -> Self;
101
102    /// Base-2 logarithm
103    #[must_use]
104    fn log2(self) -> Self;
105
106    /// Sine function
107    #[must_use]
108    fn sin(self) -> Self;
109
110    /// Cosine function
111    #[must_use]
112    fn cos(self) -> Self;
113
114    /// Tangent function
115    #[must_use]
116    fn tan(self) -> Self;
117
118    /// Hyperbolic sine
119    #[must_use]
120    fn sinh(self) -> Self;
121
122    /// Hyperbolic cosine
123    #[must_use]
124    fn cosh(self) -> Self;
125
126    /// Hyperbolic tangent
127    #[must_use]
128    fn tanh(self) -> Self;
129
130    /// Inverse sine
131    #[must_use]
132    fn asin(self) -> Self;
133
134    /// Inverse cosine
135    #[must_use]
136    fn acos(self) -> Self;
137
138    /// Inverse tangent
139    #[must_use]
140    fn atan(self) -> Self;
141
142    /// Inverse tangent of y/x with correct quadrant
143    #[must_use]
144    fn atan2(self, other: Self) -> Self;
145
146    /// Power function
147    #[must_use]
148    fn powf(self, n: Self) -> Self;
149
150    /// Integer power function
151    #[must_use]
152    fn powi(self, n: i32) -> Self;
153
154    /// Factorial function (approximation for non-integers)
155    #[must_use]
156    fn factorial(self) -> Self;
157}
158
159/// A trait for complex number types
160pub trait ComplexNumber: ScientificNumber {
161    /// The real part of the complex number
162    type RealPart: RealNumber;
163
164    /// Returns the real part of the complex number
165    #[must_use]
166    fn re(&self) -> Self::RealPart;
167
168    /// Returns the imaginary part of the complex number
169    #[must_use]
170    fn im(&self) -> Self::RealPart;
171
172    /// Create a new complex number from real and imaginary parts
173    #[must_use]
174    fn from_parts(re: Self::RealPart, im: Self::RealPart) -> Self;
175
176    /// Returns the complex conjugate
177    #[must_use]
178    fn conj(self) -> Self;
179
180    /// Returns the magnitude (absolute value)
181    #[must_use]
182    fn abs(self) -> Self::RealPart;
183
184    /// Returns the argument (phase)
185    #[must_use]
186    fn arg(self) -> Self::RealPart;
187
188    /// Returns the complex number in exponential form (r, theta)
189    #[must_use]
190    fn to_polar(self) -> (Self::RealPart, Self::RealPart);
191
192    /// Creates a complex number from polar coordinates
193    #[must_use]
194    fn from_polar(r: Self::RealPart, theta: Self::RealPart) -> Self;
195
196    /// Exponential function
197    #[must_use]
198    fn exp(self) -> Self;
199
200    /// Natural logarithm
201    #[must_use]
202    fn ln(self) -> Self;
203
204    /// Power function with complex exponent
205    #[must_use]
206    fn powc(self, exp: Self) -> Self;
207
208    /// Power function with real exponent
209    #[must_use]
210    fn powf(self, exp: Self::RealPart) -> Self;
211
212    /// Square root
213    #[must_use]
214    fn sqrt(self) -> Self;
215}
216
217/// A trait for integers that can be used in scientific calculations
218pub trait ScientificInteger: ScientificNumber + Eq {
219    /// Greatest common divisor
220    #[must_use]
221    fn gcd(self, other: Self) -> Self;
222
223    /// Least common multiple
224    #[must_use]
225    fn lcm(self, other: Self) -> Self;
226
227    /// Check if the number is prime
228    #[must_use]
229    fn is_prime(self) -> bool;
230
231    /// Check if the number is even
232    #[must_use]
233    fn is_even(self) -> bool;
234
235    /// Check if the number is odd
236    #[must_use]
237    fn is_odd(self) -> bool;
238
239    /// Modular exponentiation (self^exp mod modulus)
240    #[must_use]
241    fn mod_pow(self, exp: Self, modulus: Self) -> Self;
242
243    /// Factorial
244    fn factorial(self) -> CoreResult<Self>;
245
246    /// Binomial coefficient (n choose k)
247    #[must_use]
248    fn binomial(self, k: Self) -> Self;
249}
250
251// Implement ScientificNumber for f32
252impl ScientificNumber for f32 {
253    fn abs(self) -> Self {
254        self.abs()
255    }
256
257    fn sqrt(self) -> Self {
258        self.sqrt()
259    }
260
261    fn max(self, other: Self) -> Self {
262        self.max(other)
263    }
264
265    fn min(self, other: Self) -> Self {
266        self.min(other)
267    }
268
269    fn is_finite(self) -> bool {
270        self.is_finite()
271    }
272
273    fn to_f64(self) -> Option<f64> {
274        Some(self as f64)
275    }
276
277    fn from_f64(value: f64) -> Option<Self> {
278        if value.is_finite() && value <= Self::MAX as f64 && value >= Self::MIN as f64 {
279            Some(value as f32)
280        } else {
281            None
282        }
283    }
284
285    fn from_le_bytes(bytes: &[u8]) -> Self {
286        let mut array = [0u8; 4];
287        array.copy_from_slice(&bytes[..4]);
288        f32::from_le_bytes(array)
289    }
290
291    fn from_be_bytes(bytes: &[u8]) -> Self {
292        let mut array = [0u8; 4];
293        array.copy_from_slice(&bytes[..4]);
294        f32::from_be_bytes(array)
295    }
296
297    fn to_le_bytes(self) -> Vec<u8> {
298        self.to_le_bytes().to_vec()
299    }
300
301    fn to_be_bytes(self) -> Vec<u8> {
302        self.to_be_bytes().to_vec()
303    }
304}
305
306// Implement RealNumber for f32
307impl RealNumber for f32 {
308    fn epsilon() -> Self {
309        Self::EPSILON
310    }
311
312    fn exp(self) -> Self {
313        self.exp()
314    }
315
316    fn ln(self) -> Self {
317        self.ln()
318    }
319
320    fn log10(self) -> Self {
321        self.log10()
322    }
323
324    fn log2(self) -> Self {
325        self.log2()
326    }
327
328    fn sin(self) -> Self {
329        self.sin()
330    }
331
332    fn cos(self) -> Self {
333        self.cos()
334    }
335
336    fn tan(self) -> Self {
337        self.tan()
338    }
339
340    fn sinh(self) -> Self {
341        self.sinh()
342    }
343
344    fn cosh(self) -> Self {
345        self.cosh()
346    }
347
348    fn tanh(self) -> Self {
349        self.tanh()
350    }
351
352    fn asin(self) -> Self {
353        self.asin()
354    }
355
356    fn acos(self) -> Self {
357        self.acos()
358    }
359
360    fn atan(self) -> Self {
361        self.atan()
362    }
363
364    fn atan2(self, other: Self) -> Self {
365        self.atan2(other)
366    }
367
368    fn powf(self, n: Self) -> Self {
369        self.powf(n)
370    }
371
372    fn powi(self, n: i32) -> Self {
373        self.powi(n)
374    }
375
376    fn factorial(self) -> Self {
377        if self < 0.0 {
378            return Self::NAN;
379        }
380
381        // Use Stirling's approximation for non-integers or large values
382        if self != self.trunc() || self > 100.0 {
383            const SQRT_TWO_PI: f32 = 2.506_628_3;
384            return SQRT_TWO_PI * self.powf(self + 0.5) * (-self).exp();
385        }
386
387        let mut result = 1.0;
388        let n = self as u32;
389        for i in 2..=n {
390            result *= i as f32;
391        }
392
393        result
394    }
395}
396
397// Implement ScientificNumber for f64
398impl ScientificNumber for f64 {
399    fn abs(self) -> Self {
400        self.abs()
401    }
402
403    fn sqrt(self) -> Self {
404        self.sqrt()
405    }
406
407    fn max(self, other: Self) -> Self {
408        self.max(other)
409    }
410
411    fn min(self, other: Self) -> Self {
412        self.min(other)
413    }
414
415    fn is_finite(self) -> bool {
416        self.is_finite()
417    }
418
419    fn to_f64(self) -> Option<f64> {
420        Some(self)
421    }
422
423    fn from_f64(value: f64) -> Option<Self> {
424        Some(value)
425    }
426
427    fn from_le_bytes(bytes: &[u8]) -> Self {
428        let mut array = [0u8; 8];
429        array.copy_from_slice(&bytes[..8]);
430        f64::from_le_bytes(array)
431    }
432
433    fn from_be_bytes(bytes: &[u8]) -> Self {
434        let mut array = [0u8; 8];
435        array.copy_from_slice(&bytes[..8]);
436        f64::from_be_bytes(array)
437    }
438
439    fn to_le_bytes(self) -> Vec<u8> {
440        self.to_le_bytes().to_vec()
441    }
442
443    fn to_be_bytes(self) -> Vec<u8> {
444        self.to_be_bytes().to_vec()
445    }
446}
447
448// Implement RealNumber for f64
449impl RealNumber for f64 {
450    fn epsilon() -> Self {
451        Self::EPSILON
452    }
453
454    fn exp(self) -> Self {
455        self.exp()
456    }
457
458    fn ln(self) -> Self {
459        self.ln()
460    }
461
462    fn log10(self) -> Self {
463        self.log10()
464    }
465
466    fn log2(self) -> Self {
467        self.log2()
468    }
469
470    fn sin(self) -> Self {
471        self.sin()
472    }
473
474    fn cos(self) -> Self {
475        self.cos()
476    }
477
478    fn tan(self) -> Self {
479        self.tan()
480    }
481
482    fn sinh(self) -> Self {
483        self.sinh()
484    }
485
486    fn cosh(self) -> Self {
487        self.cosh()
488    }
489
490    fn tanh(self) -> Self {
491        self.tanh()
492    }
493
494    fn asin(self) -> Self {
495        self.asin()
496    }
497
498    fn acos(self) -> Self {
499        self.acos()
500    }
501
502    fn atan(self) -> Self {
503        self.atan()
504    }
505
506    fn atan2(self, other: Self) -> Self {
507        self.atan2(other)
508    }
509
510    fn powf(self, n: Self) -> Self {
511        self.powf(n)
512    }
513
514    fn powi(self, n: i32) -> Self {
515        self.powi(n)
516    }
517
518    fn factorial(self) -> Self {
519        if self < 0.0 {
520            return Self::NAN;
521        }
522
523        // Use Stirling's approximation for non-integers or large values
524        if self != self.trunc() || self > 170.0 {
525            const SQRT_TWO_PI: f64 = 2.506_628_274_631_000_2;
526            return SQRT_TWO_PI * self.powf(self + 0.5) * (-self).exp();
527        }
528
529        let mut result = 1.0;
530        let n = self as u32;
531        for i in 2..=n {
532            result *= i as f64;
533        }
534
535        result
536    }
537}
538
539// Implement ScientificNumber for i32
540impl ScientificNumber for i32 {
541    fn abs(self) -> Self {
542        self.abs()
543    }
544
545    fn sqrt(self) -> Self {
546        (self as f64).sqrt() as i32
547    }
548
549    fn max(self, other: Self) -> Self {
550        std::cmp::max(self, other)
551    }
552
553    fn min(self, other: Self) -> Self {
554        std::cmp::min(self, other)
555    }
556
557    fn is_finite(self) -> bool {
558        true // integers are always finite
559    }
560
561    fn to_f64(self) -> Option<f64> {
562        Some(self as f64)
563    }
564
565    fn from_f64(value: f64) -> Option<Self> {
566        if value.is_finite() && value <= i32::MAX as f64 && value >= i32::MIN as f64 {
567            Some(value as i32)
568        } else {
569            None
570        }
571    }
572
573    fn from_le_bytes(bytes: &[u8]) -> Self {
574        let mut array = [0u8; 4];
575        array.copy_from_slice(&bytes[..4]);
576        i32::from_le_bytes(array)
577    }
578
579    fn from_be_bytes(bytes: &[u8]) -> Self {
580        let mut array = [0u8; 4];
581        array.copy_from_slice(&bytes[..4]);
582        i32::from_be_bytes(array)
583    }
584
585    fn to_le_bytes(self) -> Vec<u8> {
586        self.to_le_bytes().to_vec()
587    }
588
589    fn to_be_bytes(self) -> Vec<u8> {
590        self.to_be_bytes().to_vec()
591    }
592}
593
594// Implement ScientificInteger for i32
595impl ScientificInteger for i32 {
596    fn gcd(self, other: Self) -> Self {
597        let mut a = self.abs();
598        let mut b = other.abs();
599
600        if a == 0 {
601            return b;
602        }
603        if b == 0 {
604            return a;
605        }
606
607        while b != 0 {
608            let temp = b;
609            b = a % b;
610            a = temp;
611        }
612
613        a
614    }
615
616    fn lcm(self, other: Self) -> Self {
617        if self == 0 || other == 0 {
618            return 0;
619        }
620
621        let gcd = self.gcd(other);
622
623        (self / gcd) * other
624    }
625
626    fn is_prime(self) -> bool {
627        if self <= 1 {
628            return false;
629        }
630        if self <= 3 {
631            return true;
632        }
633        if self % 2 == 0 || self % 3 == 0 {
634            return false;
635        }
636
637        let mut i = 5;
638        while i * i <= self {
639            if self % i == 0 || self % (i + 2) == 0 {
640                return false;
641            }
642            i += 6;
643        }
644
645        true
646    }
647
648    fn is_even(self) -> bool {
649        self % 2 == 0
650    }
651
652    fn is_odd(self) -> bool {
653        self % 2 != 0
654    }
655
656    fn mod_pow(self, exp: Self, modulus: Self) -> Self {
657        if modulus == 1 {
658            return 0;
659        }
660
661        let mut base = self % modulus;
662        let mut result = 1;
663        let mut exponent = exp;
664
665        while exponent > 0 {
666            if exponent % 2 == 1 {
667                result = (result * base) % modulus;
668            }
669            exponent >>= 1;
670            base = (base * base) % modulus;
671        }
672
673        result
674    }
675
676    fn factorial(self) -> CoreResult<Self> {
677        if self < 0 {
678            return Err(CoreError::ValueError(ErrorContext::new(
679                "Factorial not defined for negative numbers".to_string(),
680            )));
681        }
682
683        let mut result = 1;
684        for i in 2..=self {
685            result *= i;
686        }
687
688        Ok(result)
689    }
690
691    fn binomial(self, k: Self) -> Self {
692        if k < 0 || k > self {
693            return 0;
694        }
695
696        let k = std::cmp::min(k, self - k);
697        let mut result = 1;
698
699        for i in 0..k {
700            result = result * (self - i) / (i + 1);
701        }
702
703        result
704    }
705}
706
707/// Conversion between different numeric types
708pub trait NumericConversion<T> {
709    /// Try to convert to the target type
710    fn try_convert(self) -> Option<T>;
711
712    /// Convert to the target type, or a default value if conversion fails
713    fn convert_or(self, default: T) -> T;
714
715    /// Convert to the target type, or panic if conversion fails
716    fn convert(self) -> T;
717}
718
719/// Implement NumericConversion for all types that implement NumCast
720impl<F, T> NumericConversion<T> for F
721where
722    F: NumCast,
723    T: NumCast,
724{
725    fn try_convert(self) -> Option<T> {
726        num_traits::cast(self)
727    }
728
729    fn convert_or(self, default: T) -> T {
730        self.try_convert().unwrap_or(default)
731    }
732
733    fn convert(self) -> T {
734        self.try_convert().expect("Numeric conversion failed")
735    }
736}
737
738/// Trait for converting between degrees and radians
739pub trait AngleConversion: Sized {
740    /// Convert from degrees to radians
741    fn to_radians(&self) -> CoreResult<Self>
742    where
743        Self: std::marker::Sized;
744
745    /// Convert from radians to degrees
746    fn to_degrees(&self) -> CoreResult<Self>
747    where
748        Self: std::marker::Sized;
749}
750
751/// Implement AngleConversion for all RealNumber types
752impl<T: RealNumber> AngleConversion for T {
753    fn to_radians(&self) -> CoreResult<Self> {
754        let pi = T::from_f64(std::f64::consts::PI).ok_or_else(|| {
755            CoreError::ValueError(ErrorContext::new(
756                "Failed to convert PI constant to target type".to_string(),
757            ))
758        })?;
759        let one_eighty = T::from_f64(180.0).ok_or_else(|| {
760            CoreError::ValueError(ErrorContext::new(
761                "Failed to convert 180.0 to target type".to_string(),
762            ))
763        })?;
764        Ok(*self * pi / one_eighty)
765    }
766
767    fn to_degrees(&self) -> CoreResult<Self> {
768        let pi = T::from_f64(std::f64::consts::PI).ok_or_else(|| {
769            CoreError::ValueError(ErrorContext::new(
770                "Failed to convert PI constant to target type".to_string(),
771            ))
772        })?;
773        let one_eighty = T::from_f64(180.0).ok_or_else(|| {
774            CoreError::ValueError(ErrorContext::new(
775                "Failed to convert 180.0 to target type".to_string(),
776            ))
777        })?;
778        Ok(*self * one_eighty / pi)
779    }
780}
781
782/// Type-safe representation of a unitless quantity
783#[derive(Clone, Copy, PartialEq, Debug)]
784pub struct Scalar<T: ScientificNumber>(pub T);
785
786impl<T: ScientificNumber> Scalar<T> {
787    /// Create a new scalar
788    pub fn new(value: T) -> Self {
789        Scalar(value)
790    }
791
792    /// Get the underlying value
793    pub fn value(&self) -> T {
794        self.0
795    }
796}
797
798impl<T: ScientificNumber> From<T> for Scalar<T> {
799    fn from(value: T) -> Self {
800        Scalar(value)
801    }
802}
803
804impl<T: ScientificNumber> Add for Scalar<T> {
805    type Output = Self;
806
807    fn add(self, other: Self) -> Self::Output {
808        Scalar(self.0 + other.0)
809    }
810}
811
812impl<T: ScientificNumber> Sub for Scalar<T> {
813    type Output = Self;
814
815    fn sub(self, other: Self) -> Self::Output {
816        Scalar(self.0 - other.0)
817    }
818}
819
820impl<T: ScientificNumber> Mul for Scalar<T> {
821    type Output = Self;
822
823    fn mul(self, other: Self) -> Self::Output {
824        Scalar(self.0 * other.0)
825    }
826}
827
828impl<T: ScientificNumber> Div for Scalar<T> {
829    type Output = Self;
830
831    fn div(self, other: Self) -> Self::Output {
832        Scalar(self.0 / other.0)
833    }
834}
835
836impl<T: ScientificNumber + Neg<Output = T>> Neg for Scalar<T> {
837    type Output = Self;
838
839    fn neg(self) -> Self::Output {
840        Scalar(self.0.neg())
841    }
842}
843
844/// Automated precision tracking for numerical computations
845pub mod precision_tracking;
846
847/// Specialized numeric types for scientific domains
848pub mod scientific_types;
849
850/// Arbitrary precision numerical computation support
851#[cfg(feature = "arbitrary-precision")]
852pub mod arbitrary_precision;
853
854/// Numerical stability improvements
855pub mod stability;
856
857/// Stable numerical algorithms
858pub mod stable_algorithms;
859
860/// Advanced-optimized SIMD operations for numerical computations
861///
862/// This module provides vectorized implementations of common mathematical operations
863/// using the highest available SIMD instruction sets for maximum performance.
864pub mod advanced_simd {
865    #[allow(unused_imports)]
866    use super::*;
867
868    #[cfg(target_arch = "aarch64")]
869    use std::arch::aarch64::*;
870    #[cfg(target_arch = "x86_64")]
871    use std::arch::x86_64::*;
872
873    /// Optimized vectorized addition for f32 arrays
874    #[inline]
875    pub fn add_f32_advanced(a: &[f32], b: &[f32], result: &mut [f32]) {
876        debug_assert_eq!(a.len(), b.len());
877        debug_assert_eq!(a.len(), result.len());
878
879        let len = a.len();
880        let mut i = 0;
881
882        // AVX2 path - process 8 elements at a time
883        #[cfg(target_arch = "x86_64")]
884        {
885            if is_x86_feature_detected!("avx2") {
886                unsafe {
887                    while i + 8 <= len {
888                        let va = _mm256_loadu_ps(a.as_ptr().add(i));
889                        let vb = _mm256_loadu_ps(b.as_ptr().add(i));
890                        let vr = _mm256_add_ps(va, vb);
891                        _mm256_storeu_ps(result.as_mut_ptr().add(i), vr);
892                        i += 8;
893                    }
894                }
895            }
896            // SSE path - process 4 elements at a time
897            else if is_x86_feature_detected!("sse") {
898                unsafe {
899                    while i + 4 <= len {
900                        let va = _mm_loadu_ps(a.as_ptr().add(i));
901                        let vb = _mm_loadu_ps(b.as_ptr().add(i));
902                        let vr = _mm_add_ps(va, vb);
903                        _mm_storeu_ps(result.as_mut_ptr().add(i), vr);
904                        i += 4;
905                    }
906                }
907            }
908        }
909
910        // ARM NEON path - process 4 elements at a time
911        #[cfg(target_arch = "aarch64")]
912        {
913            if std::arch::is_aarch64_feature_detected!("neon") {
914                unsafe {
915                    while i + 4 <= len {
916                        let va = vld1q_f32(a.as_ptr().add(i));
917                        let vb = vld1q_f32(b.as_ptr().add(i));
918                        let vr = vaddq_f32(va, vb);
919                        vst1q_f32(result.as_mut_ptr().add(i), vr);
920                        i += 4;
921                    }
922                }
923            }
924        }
925
926        // Scalar fallback for remaining elements
927        while i < len {
928            result[i] = a[i] + b[i];
929            i += 1;
930        }
931    }
932
933    /// Optimized vectorized multiplication for f32 arrays
934    #[inline]
935    pub fn mul_f32_advanced(a: &[f32], b: &[f32], result: &mut [f32]) {
936        debug_assert_eq!(a.len(), b.len());
937        debug_assert_eq!(a.len(), result.len());
938
939        let len = a.len();
940        let mut i = 0;
941
942        // AVX2 + FMA path for maximum performance
943        #[cfg(target_arch = "x86_64")]
944        {
945            if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
946                unsafe {
947                    while i + 8 <= len {
948                        let va = _mm256_loadu_ps(a.as_ptr().add(i));
949                        let vb = _mm256_loadu_ps(b.as_ptr().add(i));
950                        let vr = _mm256_mul_ps(va, vb);
951                        _mm256_storeu_ps(result.as_mut_ptr().add(i), vr);
952                        i += 8;
953                    }
954                }
955            } else if is_x86_feature_detected!("sse") {
956                unsafe {
957                    while i + 4 <= len {
958                        let va = _mm_loadu_ps(a.as_ptr().add(i));
959                        let vb = _mm_loadu_ps(b.as_ptr().add(i));
960                        let vr = _mm_mul_ps(va, vb);
961                        _mm_storeu_ps(result.as_mut_ptr().add(i), vr);
962                        i += 4;
963                    }
964                }
965            }
966        }
967
968        // ARM NEON path
969        #[cfg(target_arch = "aarch64")]
970        {
971            if std::arch::is_aarch64_feature_detected!("neon") {
972                unsafe {
973                    while i + 4 <= len {
974                        let va = vld1q_f32(a.as_ptr().add(i));
975                        let vb = vld1q_f32(b.as_ptr().add(i));
976                        let vr = vmulq_f32(va, vb);
977                        vst1q_f32(result.as_mut_ptr().add(i), vr);
978                        i += 4;
979                    }
980                }
981            }
982        }
983
984        // Scalar fallback
985        while i < len {
986            result[i] = a[i] * b[i];
987            i += 1;
988        }
989    }
990
991    /// Optimized fused multiply-add (a * b + c) for f32 arrays
992    #[inline]
993    pub fn fma_f32_advanced(a: &[f32], b: &[f32], c: &[f32], result: &mut [f32]) {
994        debug_assert_eq!(a.len(), b.len());
995        debug_assert_eq!(a.len(), c.len());
996        debug_assert_eq!(a.len(), result.len());
997
998        let len = a.len();
999        let mut i = 0;
1000
1001        // AVX2 + FMA path for optimal performance
1002        #[cfg(target_arch = "x86_64")]
1003        {
1004            if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
1005                unsafe {
1006                    while i + 8 <= len {
1007                        let va = _mm256_loadu_ps(a.as_ptr().add(i));
1008                        let vb = _mm256_loadu_ps(b.as_ptr().add(i));
1009                        let vc = _mm256_loadu_ps(c.as_ptr().add(i));
1010                        let vr = _mm256_fmadd_ps(va, vb, vc);
1011                        _mm256_storeu_ps(result.as_mut_ptr().add(i), vr);
1012                        i += 8;
1013                    }
1014                }
1015            } else if is_x86_feature_detected!("sse") {
1016                unsafe {
1017                    while i + 4 <= len {
1018                        let va = _mm_loadu_ps(a.as_ptr().add(i));
1019                        let vb = _mm_loadu_ps(b.as_ptr().add(i));
1020                        let vc = _mm_loadu_ps(c.as_ptr().add(i));
1021                        let vr = _mm_add_ps(_mm_mul_ps(va, vb), vc);
1022                        _mm_storeu_ps(result.as_mut_ptr().add(i), vr);
1023                        i += 4;
1024                    }
1025                }
1026            }
1027        }
1028
1029        // ARM NEON path with FMA
1030        #[cfg(target_arch = "aarch64")]
1031        {
1032            if std::arch::is_aarch64_feature_detected!("neon") {
1033                unsafe {
1034                    while i + 4 <= len {
1035                        let va = vld1q_f32(a.as_ptr().add(i));
1036                        let vb = vld1q_f32(b.as_ptr().add(i));
1037                        let vc = vld1q_f32(c.as_ptr().add(i));
1038                        let vr = vfmaq_f32(vc, va, vb);
1039                        vst1q_f32(result.as_mut_ptr().add(i), vr);
1040                        i += 4;
1041                    }
1042                }
1043            }
1044        }
1045
1046        // Scalar fallback
1047        while i < len {
1048            result[i] = a[i] * b[i] + c[0];
1049            i += 1;
1050        }
1051    }
1052
1053    /// Optimized vectorized dot product for f32 arrays
1054    #[inline]
1055    pub fn dot_product_f32_advanced(a: &[f32], b: &[f32]) -> f32 {
1056        debug_assert_eq!(a.len(), b.len());
1057
1058        let len = a.len();
1059        let mut i = 0;
1060        let mut sum = 0.0f32;
1061
1062        // AVX2 + FMA path for maximum throughput
1063        #[cfg(target_arch = "x86_64")]
1064        {
1065            if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
1066                unsafe {
1067                    let mut acc = _mm256_setzero_ps();
1068                    while i + 8 <= len {
1069                        let va = _mm256_loadu_ps(a.as_ptr().add(i));
1070                        let vb = _mm256_loadu_ps(b.as_ptr().add(i));
1071                        acc = _mm256_fmadd_ps(va, vb, acc);
1072                        i += 8;
1073                    }
1074                    // Horizontal sum of 8 floats
1075                    let hi = _mm256_extractf128_ps(acc, 1);
1076                    let lo = _mm256_castps256_ps128(acc);
1077                    let sum4 = _mm_add_ps(hi, lo);
1078                    let sum2 = _mm_add_ps(sum4, _mm_movehl_ps(sum4, sum4));
1079                    let sum1 = _mm_add_ss(sum2, _mm_shuffle_ps(sum2, sum2, 1));
1080                    sum = _mm_cvtss_f32(sum1);
1081                }
1082            } else if is_x86_feature_detected!("sse") {
1083                unsafe {
1084                    let mut acc = _mm_setzero_ps();
1085                    while i + 4 <= len {
1086                        let va = _mm_loadu_ps(a.as_ptr().add(i));
1087                        let vb = _mm_loadu_ps(b.as_ptr().add(i));
1088                        acc = _mm_add_ps(acc, _mm_mul_ps(va, vb));
1089                        i += 4;
1090                    }
1091                    // Horizontal sum of 4 floats
1092                    let sum2 = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
1093                    let sum1 = _mm_add_ss(sum2, _mm_shuffle_ps(sum2, sum2, 1));
1094                    sum = _mm_cvtss_f32(sum1);
1095                }
1096            }
1097        }
1098
1099        // ARM NEON path with accumulation
1100        #[cfg(target_arch = "aarch64")]
1101        {
1102            if std::arch::is_aarch64_feature_detected!("neon") {
1103                unsafe {
1104                    let mut acc = vdupq_n_f32(0.0);
1105                    while i + 4 <= len {
1106                        let va = vld1q_f32(a.as_ptr().add(i));
1107                        let vb = vld1q_f32(b.as_ptr().add(i));
1108                        acc = vfmaq_f32(acc, va, vb);
1109                        i += 4;
1110                    }
1111                    // Horizontal sum
1112                    sum = vaddvq_f32(acc);
1113                }
1114            }
1115        }
1116
1117        // Scalar accumulation for remaining elements
1118        while i < len {
1119            sum += a[i] * b[i];
1120            i += 1;
1121        }
1122
1123        sum
1124    }
1125
1126    /// Optimized vectorized sum reduction for f32 arrays
1127    #[inline]
1128    pub fn sum_f32_advanced(data: &[f32]) -> f32 {
1129        let len = data.len();
1130        let mut i = 0;
1131        let mut sum = 0.0f32;
1132
1133        // AVX2 path
1134        #[cfg(target_arch = "x86_64")]
1135        {
1136            if is_x86_feature_detected!("avx2") {
1137                unsafe {
1138                    let mut acc = _mm256_setzero_ps();
1139                    while i + 8 <= len {
1140                        let v = _mm256_loadu_ps(data.as_ptr().add(i));
1141                        acc = _mm256_add_ps(acc, v);
1142                        i += 8;
1143                    }
1144                    // Horizontal sum
1145                    let hi = _mm256_extractf128_ps(acc, 1);
1146                    let lo = _mm256_castps256_ps128(acc);
1147                    let sum4 = _mm_add_ps(hi, lo);
1148                    let sum2 = _mm_add_ps(sum4, _mm_movehl_ps(sum4, sum4));
1149                    let sum1 = _mm_add_ss(sum2, _mm_shuffle_ps(sum2, sum2, 1));
1150                    sum = _mm_cvtss_f32(sum1);
1151                }
1152            } else if is_x86_feature_detected!("sse") {
1153                unsafe {
1154                    let mut acc = _mm_setzero_ps();
1155                    while i + 4 <= len {
1156                        let v = _mm_loadu_ps(data.as_ptr().add(i));
1157                        acc = _mm_add_ps(acc, v);
1158                        i += 4;
1159                    }
1160                    let sum2 = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
1161                    let sum1 = _mm_add_ss(sum2, _mm_shuffle_ps(sum2, sum2, 1));
1162                    sum = _mm_cvtss_f32(sum1);
1163                }
1164            }
1165        }
1166
1167        // ARM NEON path
1168        #[cfg(target_arch = "aarch64")]
1169        {
1170            if std::arch::is_aarch64_feature_detected!("neon") {
1171                unsafe {
1172                    let mut acc = vdupq_n_f32(0.0);
1173                    while i + 4 <= len {
1174                        let v = vld1q_f32(data.as_ptr().add(i));
1175                        acc = vaddq_f32(acc, v);
1176                        i += 4;
1177                    }
1178                    sum = vaddvq_f32(acc);
1179                }
1180            }
1181        }
1182
1183        // Scalar accumulation
1184        while i < len {
1185            sum += data[i];
1186            i += 1;
1187        }
1188
1189        sum
1190    }
1191}
1192
1193#[cfg(test)]
1194mod tests {
1195    use super::*;
1196
1197    #[test]
1198    fn test_scientific_number_f32() {
1199        let a: f32 = 3.0;
1200        let b: f32 = 4.0;
1201
1202        assert_eq!(a.abs(), 3.0);
1203        assert_eq!(a.sqrt(), (3.0_f32).sqrt());
1204        assert_eq!(a.square(), 9.0);
1205        assert_eq!(a.max(b), 4.0);
1206        assert_eq!(a.min(b), 3.0);
1207        assert!(a.is_finite());
1208        assert_eq!(a.to_f64(), Some(3.0));
1209        assert_eq!(f32::from_f64(3.5), Some(3.5));
1210    }
1211
1212    #[test]
1213    fn test_scientific_number_f64() {
1214        let a: f64 = 3.0;
1215        let b: f64 = 4.0;
1216
1217        assert_eq!(a.abs(), 3.0);
1218        assert_eq!(a.sqrt(), (3.0_f64).sqrt());
1219        assert_eq!(a.square(), 9.0);
1220        assert_eq!(a.max(b), 4.0);
1221        assert_eq!(a.min(b), 3.0);
1222        assert!(a.is_finite());
1223        assert_eq!(a.to_f64(), Some(3.0));
1224        assert_eq!(f64::from_f64(3.5), Some(3.5));
1225    }
1226
1227    #[test]
1228    fn test_real_number_f32() {
1229        let a: f32 = 3.0;
1230
1231        assert_eq!(<f32 as RealNumber>::epsilon(), f32::EPSILON);
1232        assert_eq!(a.exp(), (3.0_f32).exp());
1233        assert_eq!(a.ln(), (3.0_f32).ln());
1234        assert_eq!(a.sin(), (3.0_f32).sin());
1235        assert_eq!(a.cos(), (3.0_f32).cos());
1236        assert_eq!(a.powf(2.0), 9.0);
1237    }
1238
1239    #[test]
1240    fn test_real_number_f64() {
1241        let a: f64 = 3.0;
1242
1243        assert_eq!(<f64 as RealNumber>::epsilon(), f64::EPSILON);
1244        assert_eq!(a.exp(), (3.0_f64).exp());
1245        assert_eq!(a.ln(), (3.0_f64).ln());
1246        assert_eq!(a.sin(), (3.0_f64).sin());
1247        assert_eq!(a.cos(), (3.0_f64).cos());
1248        assert_eq!(a.powf(2.0), 9.0);
1249    }
1250
1251    #[test]
1252    fn test_scientific_integer_i32() {
1253        let a: i32 = 12;
1254        let b: i32 = 8;
1255
1256        assert_eq!(a.gcd(b), 4);
1257        assert_eq!(a.lcm(b), 24);
1258        assert!(!a.is_prime());
1259        assert!(11_i32.is_prime());
1260        assert!(a.is_even());
1261        assert!(!a.is_odd());
1262        assert_eq!(a.mod_pow(2, 10), 4); // 12^2 mod 10 = 4
1263        assert_eq!(5_i32.factorial().unwrap(), 120);
1264        assert_eq!(5_i32.binomial(2), 10); // 5 choose 2 = 10
1265    }
1266
1267    #[test]
1268    fn test_numeric_conversion() {
1269        let a: f64 = 3.5;
1270        let b: i32 = a.try_convert().expect("3.5 should convert to i32 as 3");
1271        assert_eq!(b, 3);
1272
1273        let c: f32 = 100.5;
1274        let d: f64 = c.convert();
1275        assert_eq!(d, 100.5);
1276
1277        let e: i32 = 100;
1278        let f: f32 = e.convert();
1279        assert_eq!(f, 100.0);
1280    }
1281
1282    #[test]
1283    fn test_angle_conversion() {
1284        let degrees: f64 = 180.0;
1285        let radians = <f64 as AngleConversion>::to_radians(&degrees).unwrap();
1286        assert!((radians - std::f64::consts::PI).abs() < 1e-10);
1287
1288        let radians: f64 = std::f64::consts::PI / 2.0;
1289        let degrees = <f64 as AngleConversion>::to_degrees(&radians).unwrap();
1290        assert!((degrees - 90.0).abs() < 1e-10);
1291    }
1292
1293    #[test]
1294    fn test_scalar() {
1295        let a = Scalar::new(3.0_f64);
1296        let b = Scalar::new(4.0_f64);
1297
1298        assert_eq!((a + b).value(), 7.0);
1299        assert_eq!((a - b).value(), -1.0);
1300        assert_eq!((a * b).value(), 12.0);
1301        assert_eq!((a / b).value(), 0.75);
1302        assert_eq!((-a).value(), -3.0);
1303
1304        let c: Scalar<f64> = 5.0.into();
1305        assert_eq!(c.value(), 5.0);
1306    }
1307}
scirs2_core/numeric.rs

scirs2_core/
numeric.rs