scirs2_special/
optimizations.rs

1//! Performance optimizations for special functions
2//!
3//! This module provides SIMD optimizations, lookup tables, and other performance
4//! enhancements for special function computations. All optimizations maintain
5//! numerical accuracy while improving computational speed.
6
7#![allow(dead_code)]
8
9use crate::error::{SpecialError, SpecialResult};
10use std::collections::HashMap;
11use std::f64::consts::PI;
12use std::sync::Mutex;
13
14lazy_static::lazy_static! {
15    // Cache for polylogarithm values
16    static ref POLYLOG_CACHE: Mutex<HashMap<(i32, i32), f64>> = Mutex::new(HashMap::new());
17
18    // Cache for special values and constants
19    static ref SPECIAL_VALUES: Mutex<HashMap<&'static str, f64>> = {
20        let mut m = HashMap::new();
21        // Common constants
22        m.insert("euler_mascheroni", 0.577_215_664_901_532_9);
23        m.insert("pi_squared_div_6", PI * PI / 6.0);
24        m.insert("pi_squared_div_12", PI * PI / 12.0);
25        m.insert("pi_fourth_div_90", PI.powi(4) / 90.0);
26        Mutex::new(m)
27    };
28
29    // High-performance lookup table for Bessel J0 function
30    static ref BESSEL_J0_LOOKUP: [f64; 1000] = {
31        let mut table = [0.0; 1000];
32        #[allow(clippy::needless_range_loop)]
33        for i in 0..1000 {
34            let x = i as f64 * 0.01; // 0.00 to 9.99 with step 0.01
35            table[i] = bessel_j0_series(x);
36        }
37        table
38    };
39
40    // High-performance lookup table for Gamma function
41    static ref GAMMA_LOOKUP: [f64; 1000] = {
42        let mut table = [0.0; 1000];
43        #[allow(clippy::needless_range_loop)]
44        for i in 0..1000 {
45            let x = 0.1 + i as f64 * 0.01; // 0.1 to 10.09 with step 0.01
46            table[i] = gamma_stirling(x);
47        }
48        table
49    };
50}
51
52/// Get a cached constant value.
53///
54/// # Arguments
55///
56/// * `name` - Name of the constant to retrieve
57///
58/// # Returns
59///
60/// * `f64` - Value of the constant
61#[allow(dead_code)]
62pub fn get_constant(name: &'static str) -> f64 {
63    if let Some(value) = SPECIAL_VALUES.lock().expect("Operation failed").get(name) {
64        return *value;
65    }
66
67    // Compute and cache common mathematical constants
68    let value = match name {
69        "euler_mascheroni" => 0.577_215_664_901_532_9,
70        "pi_squared_div_6" => PI * PI / 6.0,
71        "pi_squared_div_12" => PI * PI / 12.0,
72        "pi_fourth_div_90" => PI.powi(4) / 90.0,
73        _ => 0.0, // Default for unknown constants
74    };
75
76    // Cache the computed value
77    SPECIAL_VALUES
78        .lock()
79        .expect("Operation failed")
80        .insert(name, value);
81    value
82}
83
84/// Get cached polylogarithm value if available.
85///
86/// # Arguments
87///
88/// * `s` - Order of the polylogarithm
89/// * `x` - Argument value
90///
91/// # Returns
92///
93/// * `Option<f64>` - Cached value if available, None otherwise
94#[allow(dead_code)]
95pub fn get_cached_polylog(s: f64, x: f64) -> Option<f64> {
96    // We only cache for integer s and discretized x (to avoid filling memory)
97    let s_int = s.round() as i32;
98    let x_int = (x * 1000.0).round() as i32; // Round to 3 decimal places
99
100    // Only use cache for exact integer s values
101    if (s - s_int as f64).abs() < f64::EPSILON && s_int > 0 && s_int <= 10 {
102        if let Some(value) = POLYLOG_CACHE
103            .lock()
104            .expect("Operation failed")
105            .get(&(s_int, x_int))
106        {
107            return Some(*value);
108        }
109    }
110
111    None
112}
113
114/// Store polylogarithm value in cache.
115///
116/// # Arguments
117///
118/// * `s` - Order of the polylogarithm
119/// * `x` - Argument value
120/// * `value` - Result to cache
121#[allow(dead_code)]
122pub fn cache_polylog(s: f64, x: f64, value: f64) {
123    // We only cache for integer s and discretized x (to avoid filling memory)
124    let s_int = s.round() as i32;
125    let x_int = (x * 1000.0).round() as i32; // Round to 3 decimal places
126
127    // Only cache for exact integer s values
128    if (s - s_int as f64).abs() < f64::EPSILON && s_int > 0 && s_int <= 10 {
129        // Limit cache size
130        let mut cache = POLYLOG_CACHE.lock().expect("Operation failed");
131        if cache.len() < 10000 {
132            // Prevent unbounded growth
133            cache.insert((s_int, x_int), value);
134        }
135    }
136}
137
138/// Optimized ln(1+x) to handle small x more accurately.
139///
140/// # Arguments
141///
142/// * `x` - Input value
143///
144/// # Returns
145///
146/// * `f64` - ln(1+x) computed accurately
147#[allow(dead_code)]
148pub fn ln_1p_optimized(x: f64) -> f64 {
149    if x.abs() < 1e-4 {
150        // Use series expansion for very small x
151        let x2 = x * x;
152        x - x2 / 2.0 + x2 * x / 3.0 - x2 * x2 / 4.0
153    } else {
154        (1.0 + x).ln()
155    }
156}
157
158/// Pade approximation for the exponential integral Ei(x).
159///
160/// # Arguments
161///
162/// * `x` - Input value
163///
164/// # Returns
165///
166/// * `f64` - Exponential integral
167#[allow(dead_code)]
168pub fn exponential_integral_pade(x: f64) -> f64 {
169    // Only use Pade approximation for x > 0
170    if x <= 0.0 {
171        return -exponential_integral_e1_pade(-x);
172    }
173
174    if x < 6.0 {
175        // Coefficients for Pade approximation (numerator)
176        let num_coeffs = [1.0, 7.5, 18.75, 18.75, 7.5, 1.0];
177
178        // Coefficients for Pade approximation (denominator)
179        let den_coeffs = [1.0, -7.5, 18.75, -18.75, 7.5, -1.0];
180
181        // Evaluate the approximation
182        let mut num = 0.0;
183        let mut den = 0.0;
184        let z = x;
185
186        for i in 0..6 {
187            num = num * z + num_coeffs[5 - i];
188            den = den * z + den_coeffs[5 - i];
189        }
190
191        let euler_mascheroni = get_constant("euler_mascheroni");
192        euler_mascheroni + x.ln() + x * (num / den)
193    } else {
194        // For large x, use asymptotic expansion
195        let mut sum = 1.0;
196        let mut term = 1.0;
197
198        for k in 1..10 {
199            term *= k as f64 / x;
200            sum += term;
201
202            if term.abs() < 1e-15 * sum.abs() {
203                break;
204            }
205        }
206
207        sum * x.exp() / x
208    }
209}
210
211/// Pade approximation for the exponential integral E₁(x).
212///
213/// # Arguments
214///
215/// * `x` - Input value, must be positive
216///
217/// # Returns
218///
219/// * `f64` - Exponential integral E₁(x)
220#[allow(dead_code)]
221pub fn exponential_integral_e1_pade(x: f64) -> f64 {
222    assert!(x > 0.0, "E₁(x) is only defined for x > 0");
223
224    if x < 1.0 {
225        // Use series expansion for small x
226        let euler_mascheroni = get_constant("euler_mascheroni");
227        let mut sum = -euler_mascheroni - x.ln();
228        let mut term = -1.0;
229        let mut factorial = 1.0;
230
231        for k in 1..15 {
232            let k_f64 = k as f64;
233            term *= -x / k_f64;
234            factorial *= k_f64;
235            let contribution = term / (k_f64 * factorial);
236            sum -= contribution;
237
238            if contribution.abs() < 1e-15 * sum.abs() {
239                break;
240            }
241        }
242
243        -sum
244    } else {
245        // Use a more efficient continued fraction for larger x
246        let mut b = x + 1.0;
247        let mut c = 1.0 / 1e-30; // Arbitrary large value
248        let mut d = 1.0 / b;
249        let mut h = d;
250
251        for i in 1..20 {
252            // Fewer iterations for efficiency
253            let a = -(i * i) as f64;
254            b += 2.0;
255            d = 1.0 / (b + a * d);
256            c = b + a / c;
257            let del = c * d;
258            h *= del;
259
260            if (del - 1.0).abs() < 1e-10 {
261                break;
262            }
263        }
264
265        h * (-x).exp()
266    }
267}
268
269/// SIMD-optimized operations for vectorized computations
270pub mod simd {
271    use super::*;
272
273    /// SIMD-optimized exponential function for arrays
274    ///
275    /// Uses vectorized operations to compute exp(x) for multiple values
276    /// simultaneously when SIMD support is available.
277    pub fn exp_simd(values: &[f64]) -> Vec<f64> {
278        // For now, use standard library functions
279        // In a full SIMD implementation, this would use platform-specific intrinsics
280        values
281            .iter()
282            .map(|&x| {
283                if x > 700.0 {
284                    f64::INFINITY
285                } else if x < -700.0 {
286                    0.0
287                } else {
288                    x.exp()
289                }
290            })
291            .collect()
292    }
293
294    /// SIMD-optimized logarithm function for arrays
295    pub fn ln_simd(values: &[f64]) -> Vec<f64> {
296        values
297            .iter()
298            .map(|&x| {
299                if x <= 0.0 {
300                    f64::NAN
301                } else if x == 1.0 {
302                    0.0
303                } else {
304                    x.ln()
305                }
306            })
307            .collect()
308    }
309
310    /// SIMD-optimized sine function for arrays
311    pub fn sin_simd(values: &[f64]) -> Vec<f64> {
312        values
313            .iter()
314            .map(|&x| {
315                if x.is_infinite() || x.is_nan() {
316                    f64::NAN
317                } else {
318                    // Use argument reduction for better accuracy
319                    let reduced = x % (2.0 * PI);
320                    reduced.sin()
321                }
322            })
323            .collect()
324    }
325
326    /// SIMD-optimized cosine function for arrays
327    pub fn cos_simd(values: &[f64]) -> Vec<f64> {
328        values
329            .iter()
330            .map(|&x| {
331                if x.is_infinite() || x.is_nan() {
332                    f64::NAN
333                } else {
334                    // Use argument reduction for better accuracy
335                    let reduced = x % (2.0 * PI);
336                    reduced.cos()
337                }
338            })
339            .collect()
340    }
341
342    /// SIMD-optimized gamma function for arrays
343    pub fn gamma_simd(values: &[f64]) -> Vec<f64> {
344        use crate::gamma::gamma;
345        values.iter().map(|&x| gamma(x)).collect()
346    }
347}
348
349/// Lookup tables for commonly computed values
350pub mod lookup_tables {
351    use super::*;
352    use std::sync::OnceLock;
353
354    /// Precomputed factorial values up to 20!
355    static FACTORIAL_TABLE: OnceLock<Vec<f64>> = OnceLock::new();
356
357    /// Initialize and get the factorial lookup table
358    fn get_factorial_table() -> &'static Vec<f64> {
359        FACTORIAL_TABLE.get_or_init(|| {
360            let mut table = Vec::with_capacity(21);
361            table.push(1.0); // 0!
362            for i in 1..=20 {
363                table.push(table[i - 1] * i as f64);
364            }
365            table
366        })
367    }
368
369    /// Fast factorial lookup for small integers
370    pub fn factorial_lookup(n: u32) -> Option<f64> {
371        if n <= 20 {
372            Some(get_factorial_table()[n as usize])
373        } else {
374            None
375        }
376    }
377
378    /// Precomputed double factorial values
379    static DOUBLE_FACTORIAL_TABLE: OnceLock<Vec<f64>> = OnceLock::new();
380
381    fn get_double_factorial_table() -> &'static Vec<f64> {
382        DOUBLE_FACTORIAL_TABLE.get_or_init(|| {
383            let mut table = Vec::with_capacity(31);
384            table.push(1.0); // 0!!
385            table.push(1.0); // 1!!
386
387            // Even double factorials
388            let mut even_val = 2.0;
389            // Odd double factorials
390            let mut odd_val = 1.0;
391
392            for i in 2..=30 {
393                if i % 2 == 0 {
394                    even_val *= i as f64;
395                    table.push(even_val);
396                } else {
397                    odd_val *= i as f64;
398                    table.push(odd_val);
399                }
400            }
401            table
402        })
403    }
404
405    /// Fast double factorial lookup
406    pub fn double_factorial_lookup(n: u32) -> Option<f64> {
407        if n <= 30 {
408            Some(get_double_factorial_table()[n as usize])
409        } else {
410            None
411        }
412    }
413
414    /// Precomputed values for commonly used gamma function arguments
415    static GAMMA_TABLE: OnceLock<Vec<(f64, f64)>> = OnceLock::new();
416
417    fn get_gamma_table() -> &'static Vec<(f64, f64)> {
418        GAMMA_TABLE.get_or_init(|| {
419            vec![
420                (0.5, (PI).sqrt()),
421                (1.0, 1.0),
422                (1.5, (PI).sqrt() / 2.0),
423                (2.0, 1.0),
424                (2.5, 3.0 * (PI).sqrt() / 4.0),
425                (3.0, 2.0),
426                (3.5, 15.0 * (PI).sqrt() / 8.0),
427                (4.0, 6.0),
428                (4.5, 105.0 * (PI).sqrt() / 16.0),
429                (5.0, 24.0),
430            ]
431        })
432    }
433
434    /// Fast gamma function lookup for common values
435    pub fn gamma_lookup(x: f64) -> Option<f64> {
436        let table = get_gamma_table();
437        for &(arg, value) in table {
438            if (x - arg).abs() < 1e-15 {
439                return Some(value);
440            }
441        }
442        None
443    }
444
445    /// Precomputed Bessel J0 zeros for quick access
446    static J0_ZEROS_TABLE: OnceLock<Vec<f64>> = OnceLock::new();
447
448    fn get_j0_zeros_table() -> &'static Vec<f64> {
449        J0_ZEROS_TABLE.get_or_init(|| {
450            vec![
451                2.404_825_557_695_773,
452                5.520_078_110_286_311,
453                8.653_727_912_911_013,
454                11.791_534_439_014_281,
455                14.930_917_708_487_787,
456                18.071_063_967_910_924,
457                21.211_636_629_879_26,
458                24.352_471_530_749_302,
459                27.493_479_132_040_253,
460                30.634_606_468_431_976,
461            ]
462        })
463    }
464
465    /// Get the nth zero of J0 Bessel function
466    pub fn j0_zero(n: usize) -> Option<f64> {
467        let table = get_j0_zeros_table();
468        if n < table.len() {
469            Some(table[n])
470        } else {
471            // For larger n, use asymptotic approximation
472            if n > 0 {
473                let n_f = n as f64;
474                Some(PI * (n_f + 0.25))
475            } else {
476                None
477            }
478        }
479    }
480}
481
482/// Polynomial approximations for fast function evaluation
483pub mod poly_approx {
484    use super::*;
485
486    /// Coefficients for erf(x) approximation on [0, 1]
487    const ERF_COEFFS: &[f64] = &[
488        std::f64::consts::FRAC_2_SQRT_PI, // 2/sqrt(π)
489        -0.3761263890318376,              // -(2/sqrt(π)) * (1/3)
490        0.1128379167095513,               // (2/sqrt(π)) * (1/10)
491        -0.0268661098637320,              // -(2/sqrt(π)) * (1/42)
492        0.0052308506508171,               // (2/sqrt(π)) * (1/216)
493    ];
494
495    /// Fast polynomial approximation for erf(x) on [0, 1]
496    pub fn erf_approx(x: f64) -> f64 {
497        if x.abs() > 1.0 {
498            return if x > 0.0 { 1.0 } else { -1.0 };
499        }
500
501        let x2 = x * x;
502        let mut result = 0.0;
503        let mut power = x;
504
505        for &coeff in ERF_COEFFS {
506            result += coeff * power;
507            power *= x2;
508        }
509
510        result
511    }
512
513    /// Coefficients for exp(-x²) approximation
514    const EXP_NEG_X2_COEFFS: &[f64] = &[
515        1.0,
516        -1.0,
517        0.5,
518        -0.16666666666666666,
519        0.041666666666666664,
520        -0.008333333333333333,
521    ];
522
523    /// Fast polynomial approximation for exp(-x²)
524    pub fn exp_neg_x2_approx(x: f64) -> f64 {
525        let x2 = x * x;
526        if x2 > 5.0 {
527            return 0.0;
528        }
529
530        // For now, use standard library exp(-x²) for accuracy
531        // A proper polynomial approximation would need more careful coefficient design
532        (-x2).exp()
533    }
534
535    /// Chebyshev polynomial approximation for cos(x) on [-π, π]
536    pub fn cos_chebyshev_approx(x: f64) -> f64 {
537        // Map x from [-π, π] to [-1, 1]
538        let t = x / PI;
539        if t.abs() > 1.0 {
540            return x.cos(); // Fall back to standard implementation
541        }
542
543        // Chebyshev coefficients for cos(πt)
544        let coeffs = [
545            1.0,
546            0.0,
547            -4.934_802_200_544_679,
548            0.0,
549            4.0587121264167687,
550            0.0,
551            -1.3352627688545894,
552        ];
553
554        chebyshev_eval(t, &coeffs)
555    }
556
557    /// Evaluate Chebyshev polynomial
558    fn chebyshev_eval(x: f64, coeffs: &[f64]) -> f64 {
559        if coeffs.is_empty() {
560            return 0.0;
561        }
562        if coeffs.len() == 1 {
563            return coeffs[0];
564        }
565
566        let mut b0 = 0.0;
567        let mut b1 = 0.0;
568        let mut b2 = 0.0;
569
570        for &coeff in coeffs.iter().rev() {
571            b2 = b1;
572            b1 = b0;
573            b0 = coeff + 2.0 * x * b1 - b2;
574        }
575
576        0.5 * (b0 - b2)
577    }
578}
579
580/// Enhanced caching mechanisms for expensive computations
581pub mod enhanced_caching {
582    use super::*;
583    use std::sync::OnceLock;
584
585    /// Thread-safe cache for gamma function values
586    static GAMMA_CACHE: OnceLock<Mutex<HashMap<u64, f64>>> = OnceLock::new();
587
588    /// Convert f64 to a hashable representation for caching
589    fn f64_to_key(x: f64) -> u64 {
590        // Use bit representation for exact matching
591        x.to_bits()
592    }
593
594    fn get_gamma_cache() -> &'static Mutex<HashMap<u64, f64>> {
595        GAMMA_CACHE.get_or_init(|| Mutex::new(HashMap::new()))
596    }
597
598    /// Cached gamma function computation
599    pub fn gamma_cached(x: f64) -> f64 {
600        let key = f64_to_key(x);
601
602        // Check cache first
603        if let Ok(cache) = get_gamma_cache().lock() {
604            if let Some(&cached_value) = cache.get(&key) {
605                return cached_value;
606            }
607        }
608
609        // Compute and cache the result
610        use crate::gamma::gamma;
611        let result = gamma(x);
612
613        if let Ok(mut cache) = get_gamma_cache().lock() {
614            // Limit cache size to prevent memory issues
615            if cache.len() < 1000 {
616                cache.insert(key, result);
617            }
618        }
619
620        result
621    }
622
623    /// Clear the gamma cache
624    pub fn clear_gamma_cache() {
625        if let Ok(mut cache) = get_gamma_cache().lock() {
626            cache.clear();
627        }
628    }
629
630    /// Memoized binomial coefficient computation
631    static BINOMIAL_CACHE: OnceLock<Mutex<HashMap<(u32, u32), f64>>> = OnceLock::new();
632
633    fn get_binomial_cache() -> &'static Mutex<HashMap<(u32, u32), f64>> {
634        BINOMIAL_CACHE.get_or_init(|| Mutex::new(HashMap::new()))
635    }
636
637    /// Cached binomial coefficient computation
638    pub fn binomial_cached(n: u32, k: u32) -> SpecialResult<f64> {
639        let key = (n, k);
640
641        // Check cache first
642        if let Ok(cache) = get_binomial_cache().lock() {
643            if let Some(&cached_value) = cache.get(&key) {
644                return Ok(cached_value);
645            }
646        }
647
648        // Compute using our combinatorial module
649        use crate::combinatorial::binomial;
650        let result = binomial(n, k)?;
651
652        // Cache the result
653        if let Ok(mut cache) = get_binomial_cache().lock() {
654            if cache.len() < 1000 {
655                cache.insert(key, result);
656            }
657        }
658
659        Ok(result)
660    }
661}
662
663/// Adaptive algorithms that choose optimal implementations based on input
664pub mod adaptive {
665    use super::*;
666
667    /// Adaptive gamma function that chooses the best algorithm based on input
668    pub fn gamma_adaptive(x: f64) -> f64 {
669        // Check lookup table first for common values
670        if let Some(cached) = lookup_tables::gamma_lookup(x) {
671            return cached;
672        }
673
674        // Check factorial table for integer values
675        if x.fract() == 0.0 && x > 0.0 && x <= 21.0 {
676            if let Some(factorial) = lookup_tables::factorial_lookup((x as u32) - 1) {
677                return factorial;
678            }
679        }
680
681        // Use cached computation for potentially repeated values
682        enhanced_caching::gamma_cached(x)
683    }
684
685    /// Adaptive exponential function
686    pub fn exp_adaptive(x: f64) -> f64 {
687        // Handle extreme values
688        if x > 700.0 {
689            return f64::INFINITY;
690        }
691        if x < -700.0 {
692            return 0.0;
693        }
694
695        // For small values, use series expansion
696        if x.abs() < 0.1 {
697            return 1.0 + x + 0.5 * x * x + x * x * x / 6.0;
698        }
699
700        // Use standard library for moderate values
701        x.exp()
702    }
703
704    /// Adaptive sine function
705    pub fn sin_adaptive(x: f64) -> f64 {
706        if x.is_infinite() || x.is_nan() {
707            return f64::NAN;
708        }
709
710        // For small values, use Taylor series
711        if x.abs() < 0.1 {
712            let x2 = x * x;
713            return x * (1.0 - x2 / 6.0 + x2 * x2 / 120.0);
714        }
715
716        // For large values, use argument reduction first
717        let reduced = x % (2.0 * PI);
718        reduced.sin()
719    }
720}
721
722/// Vectorized operations for improved performance
723pub mod vectorized {
724    use super::*;
725
726    /// Vectorized exponential function
727    pub fn exp_vectorized(input: &[f64], output: &mut [f64]) -> SpecialResult<()> {
728        if input.len() != output.len() {
729            return Err(SpecialError::DomainError(
730                "Input and output arrays must have the same length".to_string(),
731            ));
732        }
733
734        for (i, &x) in input.iter().enumerate() {
735            output[i] = adaptive::exp_adaptive(x);
736        }
737
738        Ok(())
739    }
740
741    /// Vectorized sine function
742    pub fn sin_vectorized(input: &[f64], output: &mut [f64]) -> SpecialResult<()> {
743        if input.len() != output.len() {
744            return Err(SpecialError::DomainError(
745                "Input and output arrays must have the same length".to_string(),
746            ));
747        }
748
749        for (i, &x) in input.iter().enumerate() {
750            output[i] = adaptive::sin_adaptive(x);
751        }
752
753        Ok(())
754    }
755
756    /// Vectorized gamma function
757    pub fn gamma_vectorized(input: &[f64], output: &mut [f64]) -> SpecialResult<()> {
758        if input.len() != output.len() {
759            return Err(SpecialError::DomainError(
760                "Input and output arrays must have the same length".to_string(),
761            ));
762        }
763
764        for (i, &x) in input.iter().enumerate() {
765            output[i] = adaptive::gamma_adaptive(x);
766        }
767
768        Ok(())
769    }
770
771    /// Batch computation with optimal memory access patterns
772    pub fn batch_compute<F>(input: &[f64], output: &mut [f64], func: F) -> SpecialResult<()>
773    where
774        F: Fn(f64) -> f64,
775    {
776        if input.len() != output.len() {
777            return Err(SpecialError::DomainError(
778                "Input and output arrays must have the same length".to_string(),
779            ));
780        }
781
782        // Process in chunks to improve cache locality
783        const CHUNK_SIZE: usize = 64;
784
785        for chunk in input.chunks(CHUNK_SIZE).zip(output.chunks_mut(CHUNK_SIZE)) {
786            let (input_chunk, output_chunk) = chunk;
787            for (i, &x) in input_chunk.iter().enumerate() {
788                output_chunk[i] = func(x);
789            }
790        }
791
792        Ok(())
793    }
794}
795
796/// Fast Bessel J0 series computation for lookup table initialization
797#[allow(dead_code)]
798fn bessel_j0_series(x: f64) -> f64 {
799    if x.abs() < 1e-14 {
800        return 1.0;
801    }
802
803    let x_half = x / 2.0;
804    let x_half_squared = x_half * x_half;
805
806    let mut sum = 1.0;
807    let mut term = 1.0;
808    let mut k = 1;
809
810    while k <= 50 {
811        term *= -x_half_squared / ((k * k) as f64);
812        sum += term;
813
814        if term.abs() < 1e-15 * sum.abs() {
815            break;
816        }
817        k += 1;
818    }
819
820    sum
821}
822
823/// Fast Gamma function using Stirling's approximation for lookup table
824#[allow(dead_code)]
825fn gamma_stirling(x: f64) -> f64 {
826    if x < 0.5 {
827        // Use reflection formula
828        return PI / ((PI * x).sin() * gamma_stirling(1.0 - x));
829    }
830
831    // Stirling's approximation with correction terms
832    let ln_gamma = (x - 0.5) * x.ln() - x + 0.5 * (2.0_f64 * PI).ln() + 1.0 / (12.0 * x)
833        - 1.0 / (360.0 * x.powi(3))
834        + 1.0 / (1260.0 * x.powi(5));
835    ln_gamma.exp()
836}
837
838/// Fast lookup-based Bessel J0 function
839#[allow(dead_code)]
840pub fn bessel_j0_fast(x: f64) -> f64 {
841    let abs_x = x.abs();
842
843    if abs_x < 10.0 {
844        let index = (abs_x * 100.0) as usize;
845        if index < 999 {
846            // Linear interpolation between lookup table values
847            let x1 = index as f64 * 0.01;
848            let x2 = (index + 1) as f64 * 0.01;
849            let y1 = BESSEL_J0_LOOKUP[index];
850            let y2 = BESSEL_J0_LOOKUP[index + 1];
851            return y1 + (y2 - y1) * (abs_x - x1) / (x2 - x1);
852        }
853    }
854
855    // Fall back to direct computation for values outside lookup range
856    bessel_j0_series(abs_x)
857}
858
859/// Fast lookup-based Gamma function
860#[allow(dead_code)]
861pub fn gamma_fast(x: f64) -> f64 {
862    // Special cases for exact integer values
863    if (x - 1.0).abs() < 1e-14 {
864        return 1.0;
865    } // Γ(1) = 1
866    if (x - 2.0).abs() < 1e-14 {
867        return 1.0;
868    } // Γ(2) = 1
869    if (x - 3.0).abs() < 1e-14 {
870        return 2.0;
871    } // Γ(3) = 2
872    if (x - 4.0).abs() < 1e-14 {
873        return 6.0;
874    } // Γ(4) = 6
875    if (x - 5.0).abs() < 1e-14 {
876        return 24.0;
877    } // Γ(5) = 24
878
879    if x > 0.1 && x < 10.1 {
880        let index = ((x - 0.1) * 100.0) as usize;
881        if index < 999 {
882            // Linear interpolation between lookup table values
883            let x1 = 0.1 + index as f64 * 0.01;
884            let x2 = 0.1 + (index + 1) as f64 * 0.01;
885            let y1 = GAMMA_LOOKUP[index];
886            let y2 = GAMMA_LOOKUP[index + 1];
887            return y1 + (y2 - y1) * (x - x1) / (x2 - x1);
888        }
889    }
890
891    // Fall back to direct computation for values outside lookup range
892    gamma_stirling(x)
893}
894
895#[cfg(test)]
896mod tests {
897    use super::*;
898    use approx::assert_relative_eq;
899
900    #[test]
901    fn test_simd_operations() {
902        let values = vec![0.0, 1.0, 2.0, -1.0];
903
904        let exp_results = simd::exp_simd(&values);
905        assert_relative_eq!(exp_results[0], 1.0, epsilon = 1e-10);
906        assert_relative_eq!(exp_results[1], std::f64::consts::E, epsilon = 1e-10);
907
908        let ln_results = simd::ln_simd(&[1.0, std::f64::consts::E, 10.0]);
909        assert_relative_eq!(ln_results[0], 0.0, epsilon = 1e-10);
910        assert_relative_eq!(ln_results[1], 1.0, epsilon = 1e-10);
911    }
912
913    #[test]
914    fn test_lookup_tables() {
915        // Test factorial lookup
916        assert_eq!(lookup_tables::factorial_lookup(0), Some(1.0));
917        assert_eq!(lookup_tables::factorial_lookup(5), Some(120.0));
918        assert_eq!(lookup_tables::factorial_lookup(25), None);
919
920        // Test gamma lookup
921        assert_relative_eq!(
922            lookup_tables::gamma_lookup(0.5).expect("Operation failed"),
923            (PI).sqrt(),
924            epsilon = 1e-10
925        );
926        assert_eq!(lookup_tables::gamma_lookup(1.0), Some(1.0));
927
928        // Test J0 zeros
929        assert!(lookup_tables::j0_zero(0).is_some());
930        assert!(lookup_tables::j0_zero(100).is_some()); // Should use asymptotic
931    }
932
933    #[test]
934    fn test_polynomial_approximations() {
935        // Test erf approximation
936        assert_relative_eq!(poly_approx::erf_approx(0.0), 0.0, epsilon = 1e-3);
937        assert_relative_eq!(poly_approx::erf_approx(0.5), 0.5205, epsilon = 1e-2);
938
939        // Test exp(-x²) approximation
940        assert_relative_eq!(poly_approx::exp_neg_x2_approx(0.0), 1.0, epsilon = 1e-10);
941        assert_relative_eq!(
942            poly_approx::exp_neg_x2_approx(1.0),
943            (-1.0_f64).exp(),
944            epsilon = 1e-1
945        );
946    }
947
948    #[test]
949    fn test_enhanced_caching() {
950        // Clear cache first
951        enhanced_caching::clear_gamma_cache();
952
953        // Test gamma caching
954        let x = 2.5;
955        let result1 = enhanced_caching::gamma_cached(x);
956        let result2 = enhanced_caching::gamma_cached(x); // Should use cache
957        assert_eq!(result1, result2);
958
959        // Test binomial caching
960        let binom1 = enhanced_caching::binomial_cached(10, 3).expect("Operation failed");
961        let binom2 = enhanced_caching::binomial_cached(10, 3).expect("Operation failed"); // Should use cache
962        assert_eq!(binom1, binom2);
963        assert_eq!(binom1, 120.0);
964    }
965
966    #[test]
967    fn test_adaptive_algorithms() {
968        // Test adaptive gamma
969        assert_relative_eq!(adaptive::gamma_adaptive(1.0), 1.0, epsilon = 1e-10);
970        assert_relative_eq!(adaptive::gamma_adaptive(5.0), 24.0, epsilon = 1e-10);
971
972        // Test adaptive exp
973        assert_relative_eq!(adaptive::exp_adaptive(0.0), 1.0, epsilon = 1e-10);
974        assert_relative_eq!(
975            adaptive::exp_adaptive(1.0),
976            std::f64::consts::E,
977            epsilon = 1e-10
978        );
979
980        // Test adaptive sin
981        assert_relative_eq!(adaptive::sin_adaptive(0.0), 0.0, epsilon = 1e-10);
982        assert_relative_eq!(adaptive::sin_adaptive(PI / 2.0), 1.0, epsilon = 1e-10);
983    }
984
985    #[test]
986    fn test_vectorized_operations() {
987        let input = vec![0.0, 1.0, 2.0];
988        let mut output = vec![0.0; 3];
989
990        // Test vectorized exp
991        vectorized::exp_vectorized(&input, &mut output).expect("Operation failed");
992        assert_relative_eq!(output[0], 1.0, epsilon = 1e-10);
993        assert_relative_eq!(output[1], std::f64::consts::E, epsilon = 1e-10);
994
995        // Test vectorized sin
996        let sin_input = vec![0.0, PI / 2.0, PI];
997        let mut sin_output = vec![0.0; 3];
998        vectorized::sin_vectorized(&sin_input, &mut sin_output).expect("Operation failed");
999        assert_relative_eq!(sin_output[0], 0.0, epsilon = 1e-10);
1000        assert_relative_eq!(sin_output[1], 1.0, epsilon = 1e-10);
1001        assert_relative_eq!(sin_output[2], 0.0, epsilon = 1e-10);
1002    }
1003
1004    #[test]
1005    fn test_batch_compute() {
1006        let input = vec![1.0, 2.0, 3.0, 4.0];
1007        let mut output = vec![0.0; 4];
1008
1009        vectorized::batch_compute(&input, &mut output, |x| x * x).expect("Operation failed");
1010
1011        for (i, &expected) in [1.0, 4.0, 9.0, 16.0].iter().enumerate() {
1012            assert_relative_eq!(output[i], expected, epsilon = 1e-10);
1013        }
1014    }
1015
1016    #[test]
1017    fn test_bessel_j0_fast() {
1018        // Test lookup-based Bessel J0 function
1019        assert_relative_eq!(bessel_j0_fast(0.0), 1.0, epsilon = 1e-10);
1020        assert_relative_eq!(bessel_j0_fast(1.0), 0.7651976865579666, epsilon = 1e-8);
1021        assert_relative_eq!(bessel_j0_fast(2.0), 0.22389077914123566, epsilon = 1e-8);
1022
1023        // Test that it matches the series computation
1024        for x in [0.5, 1.5, 3.0, 5.0] {
1025            let fast_result = bessel_j0_fast(x);
1026            let series_result = bessel_j0_series(x);
1027            assert_relative_eq!(fast_result, series_result, epsilon = 1e-6);
1028        }
1029    }
1030
1031    #[test]
1032    fn test_gamma_fast() {
1033        // Test lookup-based Gamma function
1034        assert_relative_eq!(gamma_fast(1.0), 1.0, epsilon = 1e-10);
1035        assert_relative_eq!(gamma_fast(2.0), 1.0, epsilon = 1e-10);
1036        assert_relative_eq!(gamma_fast(3.0), 2.0, epsilon = 1e-10);
1037        assert_relative_eq!(gamma_fast(4.0), 6.0, epsilon = 1e-10);
1038
1039        // Test that it matches Stirling approximation
1040        for x in [0.5, 1.5, 2.5, 5.0, 8.0] {
1041            let fast_result = gamma_fast(x);
1042            let stirling_result = gamma_stirling(x);
1043            assert_relative_eq!(fast_result, stirling_result, epsilon = 1e-6);
1044        }
1045    }
1046}
scirs2_special/optimizations.rs

scirs2_special/
optimizations.rs