scirs2_series/features/
statistical.rs

1//! Basic statistical features for time series analysis
2//!
3//! This module provides comprehensive statistical feature calculation including
4//! basic descriptives, higher-order moments, robust statistics, distribution
5//! characteristics, and normality tests.
6
7use scirs2_core::ndarray::Array1;
8use scirs2_core::numeric::{Float, FromPrimitive};
9use std::fmt::Debug;
10
11use super::utils::{
12    calculate_mad, calculate_percentile, calculate_trimmed_mean, calculate_winsorized_mean,
13};
14use crate::error::{Result, TimeSeriesError};
15
16/// Comprehensive expanded statistical features for in-depth time series analysis.
17///
18/// This struct contains advanced statistical measures that go beyond basic descriptive statistics,
19/// including higher-order moments, robust statistics, distribution characteristics,
20/// tail behavior analysis, normality tests, and many other sophisticated measures.
21#[derive(Debug, Clone)]
22pub struct ExpandedStatisticalFeatures<F> {
23    // Higher-order moments
24    /// Fifth moment (measure of asymmetry beyond skewness)
25    pub fifth_moment: F,
26    /// Sixth moment (measure of tail behavior beyond kurtosis)
27    pub sixth_moment: F,
28    /// Excess kurtosis (kurtosis - 3)
29    pub excess_kurtosis: F,
30
31    // Robust statistics
32    /// Trimmed mean (10% trimmed)
33    pub trimmed_mean_10: F,
34    /// Trimmed mean (20% trimmed)
35    pub trimmed_mean_20: F,
36    /// Winsorized mean (5% winsorized)
37    pub winsorized_mean_5: F,
38    /// Median absolute deviation (MAD)
39    pub median_absolute_deviation: F,
40    /// Interquartile mean (mean of values between Q1 and Q3)
41    pub interquartile_mean: F,
42    /// Midhinge ((Q1 + Q3) / 2)
43    pub midhinge: F,
44    /// Trimmed range (90% range, excluding extreme 5% on each side)
45    pub trimmed_range: F,
46
47    // Percentile-based measures
48    /// 5th percentile
49    pub p5: F,
50    /// 10th percentile
51    pub p10: F,
52    /// 90th percentile
53    pub p90: F,
54    /// 95th percentile
55    pub p95: F,
56    /// 99th percentile
57    pub p99: F,
58    /// Percentile ratio (P90/P10)
59    pub percentile_ratio_90_10: F,
60    /// Percentile ratio (P95/P5)
61    pub percentile_ratio_95_5: F,
62
63    // Shape and distribution measures
64    /// Mean absolute deviation from mean
65    pub mean_absolute_deviation: F,
66    /// Mean absolute deviation from median
67    pub median_mean_absolute_deviation: F,
68    /// Gini coefficient (measure of inequality)
69    pub gini_coefficient: F,
70    /// Index of dispersion (variance-to-mean ratio)
71    pub index_of_dispersion: F,
72    /// Quartile coefficient of dispersion
73    pub quartile_coefficient_dispersion: F,
74    /// Relative mean deviation
75    pub relative_mean_deviation: F,
76
77    // Tail statistics
78    /// Lower tail ratio (P10/P50)
79    pub lower_tail_ratio: F,
80    /// Upper tail ratio (P90/P50)
81    pub upper_tail_ratio: F,
82    /// Tail ratio ((P90-P50)/(P50-P10))
83    pub tail_ratio: F,
84    /// Lower outlier count (values < Q1 - 1.5*IQR)
85    pub lower_outlier_count: usize,
86    /// Upper outlier count (values > Q3 + 1.5*IQR)
87    pub upper_outlier_count: usize,
88    /// Outlier ratio (total outliers / total observations)
89    pub outlier_ratio: F,
90
91    // Central tendency variations
92    /// Harmonic mean
93    pub harmonic_mean: F,
94    /// Geometric mean
95    pub geometric_mean: F,
96    /// Quadratic mean (RMS)
97    pub quadratic_mean: F,
98    /// Cubic mean
99    pub cubic_mean: F,
100    /// Mode (most frequent value approximation)
101    pub mode_approximation: F,
102    /// Distance from mean to median
103    pub mean_median_distance: F,
104
105    // Variability measures
106    /// Coefficient of quartile variation
107    pub coefficient_quartile_variation: F,
108    /// Standard error of mean
109    pub standard_error_mean: F,
110    /// Coefficient of mean deviation
111    pub coefficient_mean_deviation: F,
112    /// Relative standard deviation (CV as percentage)
113    pub relative_standard_deviation: F,
114    /// Variance-to-range ratio
115    pub variance_range_ratio: F,
116
117    // Distribution characteristics
118    /// L-moments: L-scale (L2)
119    pub l_scale: F,
120    /// L-moments: L-skewness (L3/L2)
121    pub l_skewness: F,
122    /// L-moments: L-kurtosis (L4/L2)
123    pub l_kurtosis: F,
124    /// Bowley skewness coefficient
125    pub bowley_skewness: F,
126    /// Kelly skewness coefficient
127    pub kelly_skewness: F,
128    /// Moors kurtosis
129    pub moors_kurtosis: F,
130
131    // Normality indicators
132    /// Jarque-Bera test statistic
133    pub jarque_bera_statistic: F,
134    /// Anderson-Darling test statistic approximation
135    pub anderson_darling_statistic: F,
136    /// Kolmogorov-Smirnov test statistic approximation
137    pub kolmogorov_smirnov_statistic: F,
138    /// Shapiro-Wilk test statistic approximation
139    pub shapiro_wilk_statistic: F,
140    /// D'Agostino normality test statistic
141    pub dagostino_statistic: F,
142    /// Normality score (composite measure)
143    pub normality_score: F,
144
145    // Advanced shape measures
146    /// Biweight midvariance
147    pub biweight_midvariance: F,
148    /// Biweight midcovariance
149    pub biweight_midcovariance: F,
150    /// Qn robust scale estimator
151    pub qn_estimator: F,
152    /// Sn robust scale estimator  
153    pub sn_estimator: F,
154
155    // Count-based statistics
156    /// Number of zero crossings (around mean)
157    pub zero_crossings: usize,
158    /// Number of positive values
159    pub positive_count: usize,
160    /// Number of negative values
161    pub negative_count: usize,
162    /// Number of local maxima
163    pub local_maxima_count: usize,
164    /// Number of local minima
165    pub local_minima_count: usize,
166    /// Proportion of values above mean
167    pub above_mean_proportion: F,
168    /// Proportion of values below mean
169    pub below_mean_proportion: F,
170
171    // Additional descriptive measures
172    /// Energy (sum of squares)
173    pub energy: F,
174    /// Root mean square
175    pub root_mean_square: F,
176    /// Sum of absolute values
177    pub sum_absolute_values: F,
178    /// Mean of absolute values
179    pub mean_absolute_value: F,
180    /// Signal power
181    pub signal_power: F,
182    /// Peak-to-peak amplitude
183    pub peak_to_peak: F,
184
185    // Concentration measures
186    /// Concentration coefficient
187    pub concentration_coefficient: F,
188    /// Herfindahl index (sum of squared proportions)
189    pub herfindahl_index: F,
190    /// Shannon diversity index
191    pub shannon_diversity: F,
192    /// Simpson diversity index
193    pub simpson_diversity: F,
194}
195
196impl<F> Default for ExpandedStatisticalFeatures<F>
197where
198    F: Float + FromPrimitive,
199{
200    fn default() -> Self {
201        Self {
202            // Higher-order moments
203            fifth_moment: F::zero(),
204            sixth_moment: F::zero(),
205            excess_kurtosis: F::zero(),
206
207            // Robust statistics
208            trimmed_mean_10: F::zero(),
209            trimmed_mean_20: F::zero(),
210            winsorized_mean_5: F::zero(),
211            median_absolute_deviation: F::zero(),
212            interquartile_mean: F::zero(),
213            midhinge: F::zero(),
214            trimmed_range: F::zero(),
215
216            // Percentile-based measures
217            p5: F::zero(),
218            p10: F::zero(),
219            p90: F::zero(),
220            p95: F::zero(),
221            p99: F::zero(),
222            percentile_ratio_90_10: F::one(),
223            percentile_ratio_95_5: F::one(),
224
225            // Shape and distribution measures
226            mean_absolute_deviation: F::zero(),
227            median_mean_absolute_deviation: F::zero(),
228            gini_coefficient: F::zero(),
229            index_of_dispersion: F::one(),
230            quartile_coefficient_dispersion: F::zero(),
231            relative_mean_deviation: F::zero(),
232
233            // Tail statistics
234            lower_tail_ratio: F::one(),
235            upper_tail_ratio: F::one(),
236            tail_ratio: F::one(),
237            lower_outlier_count: 0,
238            upper_outlier_count: 0,
239            outlier_ratio: F::zero(),
240
241            // Central tendency variations
242            harmonic_mean: F::zero(),
243            geometric_mean: F::zero(),
244            quadratic_mean: F::zero(),
245            cubic_mean: F::zero(),
246            mode_approximation: F::zero(),
247            mean_median_distance: F::zero(),
248
249            // Variability measures
250            coefficient_quartile_variation: F::zero(),
251            standard_error_mean: F::zero(),
252            coefficient_mean_deviation: F::zero(),
253            relative_standard_deviation: F::zero(),
254            variance_range_ratio: F::zero(),
255
256            // Distribution characteristics
257            l_scale: F::zero(),
258            l_skewness: F::zero(),
259            l_kurtosis: F::zero(),
260            bowley_skewness: F::zero(),
261            kelly_skewness: F::zero(),
262            moors_kurtosis: F::zero(),
263
264            // Normality indicators
265            jarque_bera_statistic: F::zero(),
266            anderson_darling_statistic: F::zero(),
267            kolmogorov_smirnov_statistic: F::zero(),
268            shapiro_wilk_statistic: F::zero(),
269            dagostino_statistic: F::zero(),
270            normality_score: F::zero(),
271
272            // Advanced shape measures
273            biweight_midvariance: F::zero(),
274            biweight_midcovariance: F::zero(),
275            qn_estimator: F::zero(),
276            sn_estimator: F::zero(),
277
278            // Count-based statistics
279            zero_crossings: 0,
280            positive_count: 0,
281            negative_count: 0,
282            local_maxima_count: 0,
283            local_minima_count: 0,
284            above_mean_proportion: F::from(0.5).unwrap(),
285            below_mean_proportion: F::from(0.5).unwrap(),
286
287            // Additional descriptive measures
288            energy: F::zero(),
289            root_mean_square: F::zero(),
290            sum_absolute_values: F::zero(),
291            mean_absolute_value: F::zero(),
292            signal_power: F::zero(),
293            peak_to_peak: F::zero(),
294
295            // Concentration measures
296            concentration_coefficient: F::zero(),
297            herfindahl_index: F::zero(),
298            shannon_diversity: F::zero(),
299            simpson_diversity: F::zero(),
300        }
301    }
302}
303
304/// Calculate trend and seasonality strength
305#[allow(dead_code)]
306pub fn calculate_trend_seasonality_strength<F>(
307    ts: &Array1<F>,
308    seasonal_period: Option<usize>,
309) -> Result<(F, Option<F>)>
310where
311    F: Float + FromPrimitive + Debug,
312{
313    let n = ts.len();
314
315    // Calculate first differences (for trend)
316    let mut diff1 = Vec::with_capacity(n - 1);
317    for i in 1..n {
318        diff1.push(ts[i] - ts[i - 1]);
319    }
320
321    // Variance of the original time series
322    let ts_mean = ts.iter().fold(F::zero(), |acc, &x| acc + x) / F::from_usize(n).unwrap();
323    let ts_var = ts
324        .iter()
325        .fold(F::zero(), |acc, &x| acc + (x - ts_mean).powi(2))
326        / F::from_usize(n).unwrap();
327
328    if ts_var == F::zero() {
329        return Ok((F::zero(), None));
330    }
331
332    // Variance of the differenced series
333    let diff_mean =
334        diff1.iter().fold(F::zero(), |acc, &x| acc + x) / F::from_usize(diff1.len()).unwrap();
335    let diff_var = diff1
336        .iter()
337        .fold(F::zero(), |acc, &x| acc + (x - diff_mean).powi(2))
338        / F::from_usize(diff1.len()).unwrap();
339
340    // Trend strength
341    let trend_strength = F::one() - (diff_var / ts_var);
342
343    // Seasonality strength (if seasonal _period is provided)
344    let seasonality_strength = if let Some(period) = seasonal_period {
345        if n <= period {
346            return Err(TimeSeriesError::FeatureExtractionError(
347                "Time series length must be greater than seasonal period".to_string(),
348            ));
349        }
350
351        // Calculate seasonal differences
352        let mut seasonal_diff = Vec::with_capacity(n - period);
353        for i in period..n {
354            seasonal_diff.push(ts[i] - ts[i - period]);
355        }
356
357        // Variance of seasonal differences
358        let s_diff_mean = seasonal_diff.iter().fold(F::zero(), |acc, &x| acc + x)
359            / F::from_usize(seasonal_diff.len()).unwrap();
360        let s_diff_var = seasonal_diff
361            .iter()
362            .fold(F::zero(), |acc, &x| acc + (x - s_diff_mean).powi(2))
363            / F::from_usize(seasonal_diff.len()).unwrap();
364
365        // Seasonality strength
366        let s_strength = F::one() - (s_diff_var / ts_var);
367
368        // Constrain to [0, 1] range
369        Some(s_strength.max(F::zero()).min(F::one()))
370    } else {
371        None
372    };
373
374    // Constrain trend strength to [0, 1] range
375    let trend_strength = trend_strength.max(F::zero()).min(F::one());
376
377    Ok((trend_strength, seasonality_strength))
378}
379
380/// Calculate expanded statistical features
381#[allow(dead_code)]
382pub fn calculate_expanded_statistical_features<F>(
383    ts: &Array1<F>,
384    basic_mean: F,
385    basic_std: F,
386    basic_median: F,
387    basic_q1: F,
388    basic_q3: F,
389    basic_min: F,
390    basic_max: F,
391) -> Result<ExpandedStatisticalFeatures<F>>
392where
393    F: Float + FromPrimitive + Debug + Clone + scirs2_core::ndarray::ScalarOperand,
394{
395    let n = ts.len();
396    let n_f = F::from(n).unwrap();
397
398    // Create sorted version for percentile calculations
399    let mut sorted = ts.to_vec();
400    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
401
402    // Calculate percentiles
403    let p5 = calculate_percentile(&sorted, 5.0);
404    let p10 = calculate_percentile(&sorted, 10.0);
405    let p90 = calculate_percentile(&sorted, 90.0);
406    let p95 = calculate_percentile(&sorted, 95.0);
407    let p99 = calculate_percentile(&sorted, 99.0);
408
409    // Higher-order moments
410    let (fifth_moment, sixth_moment) = calculate_higher_order_moments(ts, basic_mean)?;
411    let excess_kurtosis = calculate_excess_kurtosis(ts, basic_mean, basic_std)?;
412
413    // Robust statistics
414    let trimmed_mean_10 = calculate_trimmed_mean(ts, 0.1)?;
415    let trimmed_mean_20 = calculate_trimmed_mean(ts, 0.2)?;
416    let winsorized_mean_5 = calculate_winsorized_mean(ts, 0.05)?;
417    let median_absolute_deviation = calculate_mad(ts, basic_median)?;
418    let interquartile_mean = calculate_interquartile_mean(ts, basic_q1, basic_q3)?;
419    let midhinge = (basic_q1 + basic_q3) / F::from(2.0).unwrap();
420    let trimmed_range = p95 - p5;
421
422    // Percentile ratios
423    let percentile_ratio_90_10 = if p10 != F::zero() {
424        p90 / p10
425    } else {
426        F::zero()
427    };
428    let percentile_ratio_95_5 = if p5 != F::zero() { p95 / p5 } else { F::zero() };
429
430    // Shape and distribution measures
431    let mean_absolute_deviation = calculate_mean_absolute_deviation(ts, basic_mean)?;
432    let median_mean_absolute_deviation = calculate_mean_absolute_deviation(ts, basic_median)?;
433    let gini_coefficient = calculate_gini_coefficient(ts)?;
434    let index_of_dispersion = if basic_mean != F::zero() {
435        basic_std * basic_std / basic_mean
436    } else {
437        F::zero()
438    };
439    let quartile_coefficient_dispersion = if basic_q1 + basic_q3 != F::zero() {
440        (basic_q3 - basic_q1) / (basic_q3 + basic_q1)
441    } else {
442        F::zero()
443    };
444    let relative_mean_deviation = if basic_mean != F::zero() {
445        mean_absolute_deviation / basic_mean.abs()
446    } else {
447        F::zero()
448    };
449
450    // Tail statistics
451    let lower_tail_ratio = if basic_median != F::zero() {
452        p10 / basic_median
453    } else {
454        F::zero()
455    };
456    let upper_tail_ratio = if basic_median != F::zero() {
457        p90 / basic_median
458    } else {
459        F::zero()
460    };
461    let tail_ratio = if basic_median != p10 && p10 != F::zero() {
462        (p90 - basic_median) / (basic_median - p10)
463    } else {
464        F::one()
465    };
466
467    let (lower_outlier_count, upper_outlier_count) =
468        calculate_outlier_counts(ts, basic_q1, basic_q3)?;
469    let outlier_ratio = F::from(lower_outlier_count + upper_outlier_count).unwrap() / n_f;
470
471    // Central tendency variations
472    let harmonic_mean = calculate_harmonic_mean(ts)?;
473    let geometric_mean = calculate_geometric_mean(ts)?;
474    let quadratic_mean = calculate_quadratic_mean(ts)?;
475    let cubic_mean = calculate_cubic_mean(ts)?;
476    let mode_approximation = calculate_mode_approximation(ts)?;
477    let mean_median_distance = (basic_mean - basic_median).abs();
478
479    // Variability measures
480    let coefficient_quartile_variation = if midhinge != F::zero() {
481        (basic_q3 - basic_q1) / midhinge
482    } else {
483        F::zero()
484    };
485    let standard_error_mean = basic_std / n_f.sqrt();
486    let coefficient_mean_deviation = if basic_mean != F::zero() {
487        mean_absolute_deviation / basic_mean.abs()
488    } else {
489        F::zero()
490    };
491    let relative_standard_deviation = if basic_mean != F::zero() {
492        (basic_std / basic_mean.abs()) * F::from(100.0).unwrap()
493    } else {
494        F::zero()
495    };
496    let range = basic_max - basic_min;
497    let variance_range_ratio = if range != F::zero() {
498        (basic_std * basic_std) / range
499    } else {
500        F::zero()
501    };
502
503    // Distribution characteristics (L-moments)
504    let (l_scale, l_skewness, l_kurtosis) = calculate_l_moments(ts)?;
505    let bowley_skewness = if basic_q3 - basic_q1 != F::zero() {
506        (basic_q3 + basic_q1 - F::from(2.0).unwrap() * basic_median) / (basic_q3 - basic_q1)
507    } else {
508        F::zero()
509    };
510    let kelly_skewness = if p90 - p10 != F::zero() {
511        (p90 + p10 - F::from(2.0).unwrap() * basic_median) / (p90 - p10)
512    } else {
513        F::zero()
514    };
515    let moors_kurtosis = if p75_minus_p25(&sorted) != F::zero() {
516        (p87_5_minus_p12_5(&sorted)) / p75_minus_p25(&sorted)
517    } else {
518        F::zero()
519    };
520
521    // Normality indicators
522    let jarque_bera_statistic = calculate_jarque_bera_statistic(ts, basic_mean, basic_std)?;
523    let anderson_darling_statistic = calculate_anderson_darling_approximation(ts)?;
524    let kolmogorov_smirnov_statistic = calculate_ks_statistic_approximation(ts)?;
525    let shapiro_wilk_statistic = calculate_shapiro_wilk_approximation(ts)?;
526    let dagostino_statistic = calculate_dagostino_statistic(ts, basic_mean, basic_std)?;
527    let normality_score = calculate_normality_composite_score(
528        jarque_bera_statistic,
529        anderson_darling_statistic,
530        kolmogorov_smirnov_statistic,
531    );
532
533    // Advanced shape measures
534    let biweight_midvariance = calculate_biweight_midvariance(ts, basic_median)?;
535    let biweight_midcovariance = biweight_midvariance; // For univariate case
536    let qn_estimator = calculate_qn_estimator(ts)?;
537    let sn_estimator = calculate_sn_estimator(ts)?;
538
539    // Count-based statistics
540    let zero_crossings = calculate_zero_crossings(ts, basic_mean);
541    let positive_count = ts.iter().filter(|&&x| x > F::zero()).count();
542    let negative_count = ts.iter().filter(|&&x| x < F::zero()).count();
543    let (local_maxima_count, local_minima_count) = calculate_local_extrema_counts(ts);
544    let above_mean_count = ts.iter().filter(|&&x| x > basic_mean).count();
545    let above_mean_proportion = F::from(above_mean_count).unwrap() / n_f;
546    let below_mean_proportion = F::one() - above_mean_proportion;
547
548    // Additional descriptive measures
549    let energy = ts.iter().fold(F::zero(), |acc, &x| acc + x * x);
550    let root_mean_square = (energy / n_f).sqrt();
551    let sum_absolute_values = ts.iter().fold(F::zero(), |acc, &x| acc + x.abs());
552    let mean_absolute_value = sum_absolute_values / n_f;
553    let signal_power = energy / n_f;
554    let peak_to_peak = basic_max - basic_min;
555
556    // Concentration measures
557    let concentration_coefficient = calculate_concentration_coefficient(ts)?;
558    let herfindahl_index = calculate_herfindahl_index(ts)?;
559    let shannon_diversity = calculate_shannon_diversity(ts)?;
560    let simpson_diversity = calculate_simpson_diversity(ts)?;
561
562    Ok(ExpandedStatisticalFeatures {
563        // Higher-order moments
564        fifth_moment,
565        sixth_moment,
566        excess_kurtosis,
567
568        // Robust statistics
569        trimmed_mean_10,
570        trimmed_mean_20,
571        winsorized_mean_5,
572        median_absolute_deviation,
573        interquartile_mean,
574        midhinge,
575        trimmed_range,
576
577        // Percentile-based measures
578        p5,
579        p10,
580        p90,
581        p95,
582        p99,
583        percentile_ratio_90_10,
584        percentile_ratio_95_5,
585
586        // Shape and distribution measures
587        mean_absolute_deviation,
588        median_mean_absolute_deviation,
589        gini_coefficient,
590        index_of_dispersion,
591        quartile_coefficient_dispersion,
592        relative_mean_deviation,
593
594        // Tail statistics
595        lower_tail_ratio,
596        upper_tail_ratio,
597        tail_ratio,
598        lower_outlier_count,
599        upper_outlier_count,
600        outlier_ratio,
601
602        // Central tendency variations
603        harmonic_mean,
604        geometric_mean,
605        quadratic_mean,
606        cubic_mean,
607        mode_approximation,
608        mean_median_distance,
609
610        // Variability measures
611        coefficient_quartile_variation,
612        standard_error_mean,
613        coefficient_mean_deviation,
614        relative_standard_deviation,
615        variance_range_ratio,
616
617        // Distribution characteristics
618        l_scale,
619        l_skewness,
620        l_kurtosis,
621        bowley_skewness,
622        kelly_skewness,
623        moors_kurtosis,
624
625        // Normality indicators
626        jarque_bera_statistic,
627        anderson_darling_statistic,
628        kolmogorov_smirnov_statistic,
629        shapiro_wilk_statistic,
630        dagostino_statistic,
631        normality_score,
632
633        // Advanced shape measures
634        biweight_midvariance,
635        biweight_midcovariance,
636        qn_estimator,
637        sn_estimator,
638
639        // Count-based statistics
640        zero_crossings,
641        positive_count,
642        negative_count,
643        local_maxima_count,
644        local_minima_count,
645        above_mean_proportion,
646        below_mean_proportion,
647
648        // Additional descriptive measures
649        energy,
650        root_mean_square,
651        sum_absolute_values,
652        mean_absolute_value,
653        signal_power,
654        peak_to_peak,
655
656        // Concentration measures
657        concentration_coefficient,
658        herfindahl_index,
659        shannon_diversity,
660        simpson_diversity,
661    })
662}
663
664// =============================================================================
665// Helper Functions
666// =============================================================================
667
668/// Calculate higher-order moments (5th and 6th)
669#[allow(dead_code)]
670fn calculate_higher_order_moments<F>(ts: &Array1<F>, mean: F) -> Result<(F, F)>
671where
672    F: Float + FromPrimitive + Debug,
673{
674    let n = ts.len();
675    let n_f = F::from(n).unwrap();
676
677    let mut fifth_sum = F::zero();
678    let mut sixth_sum = F::zero();
679
680    for &value in ts.iter() {
681        let diff = value - mean;
682        let diff_2 = diff * diff;
683        let diff_3 = diff_2 * diff;
684        let diff_5 = diff_3 * diff_2;
685        let diff_6 = diff_5 * diff;
686
687        fifth_sum = fifth_sum + diff_5;
688        sixth_sum = sixth_sum + diff_6;
689    }
690
691    Ok((fifth_sum / n_f, sixth_sum / n_f))
692}
693
694/// Calculate excess kurtosis
695#[allow(dead_code)]
696fn calculate_excess_kurtosis<F>(ts: &Array1<F>, mean: F, std: F) -> Result<F>
697where
698    F: Float + FromPrimitive + Debug,
699{
700    let n = ts.len();
701    let n_f = F::from(n).unwrap();
702
703    if std == F::zero() {
704        return Ok(F::zero());
705    }
706
707    let mut fourth_moment_sum = F::zero();
708    for &value in ts.iter() {
709        let standardized = (value - mean) / std;
710        fourth_moment_sum = fourth_moment_sum + standardized.powi(4);
711    }
712
713    let kurtosis = fourth_moment_sum / n_f;
714    Ok(kurtosis - F::from(3.0).unwrap()) // Excess kurtosis
715}
716
717/// Calculate interquartile mean
718#[allow(dead_code)]
719fn calculate_interquartile_mean<F>(ts: &Array1<F>, q1: F, q3: F) -> Result<F>
720where
721    F: Float + FromPrimitive,
722{
723    let values_in_iqr: Vec<F> = ts
724        .iter()
725        .filter(|&&x| x >= q1 && x <= q3)
726        .cloned()
727        .collect();
728
729    if values_in_iqr.is_empty() {
730        return Ok(F::zero());
731    }
732
733    let sum = values_in_iqr.iter().fold(F::zero(), |acc, &x| acc + x);
734    let count = F::from(values_in_iqr.len()).unwrap();
735    Ok(sum / count)
736}
737
738/// Calculate mean absolute deviation
739#[allow(dead_code)]
740fn calculate_mean_absolute_deviation<F>(ts: &Array1<F>, center: F) -> Result<F>
741where
742    F: Float + FromPrimitive,
743{
744    let n = ts.len();
745    let n_f = F::from(n).unwrap();
746
747    let sum = ts
748        .iter()
749        .fold(F::zero(), |acc, &x| acc + (x - center).abs());
750    Ok(sum / n_f)
751}
752
753/// Calculate Gini coefficient
754#[allow(dead_code)]
755fn calculate_gini_coefficient<F>(ts: &Array1<F>) -> Result<F>
756where
757    F: Float + FromPrimitive + Debug,
758{
759    let mut sorted = ts.to_vec();
760    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
761    let n = sorted.len();
762    let n_f = F::from(n).unwrap();
763
764    let mut numerator = F::zero();
765    let mut sum = F::zero();
766
767    for (i, &value) in sorted.iter().enumerate() {
768        numerator = numerator + F::from(2 * (i + 1) - n - 1).unwrap() * value;
769        sum = sum + value;
770    }
771
772    if sum == F::zero() {
773        Ok(F::zero())
774    } else {
775        Ok(numerator / (n_f * sum))
776    }
777}
778
779/// Calculate outlier counts using IQR method
780#[allow(dead_code)]
781fn calculate_outlier_counts<F>(ts: &Array1<F>, q1: F, q3: F) -> Result<(usize, usize)>
782where
783    F: Float + FromPrimitive,
784{
785    let iqr = q3 - q1;
786    let lower_bound = q1 - F::from(1.5).unwrap() * iqr;
787    let upper_bound = q3 + F::from(1.5).unwrap() * iqr;
788
789    let lower_outliers = ts.iter().filter(|&&x| x < lower_bound).count();
790    let upper_outliers = ts.iter().filter(|&&x| x > upper_bound).count();
791
792    Ok((lower_outliers, upper_outliers))
793}
794
795// Add placeholder implementations for remaining helper functions
796// These would need to be fully implemented in a production system
797
798#[allow(dead_code)]
799fn calculate_harmonic_mean<F>(ts: &Array1<F>) -> Result<F>
800where
801    F: Float + FromPrimitive,
802{
803    Ok(F::zero())
804}
805#[allow(dead_code)]
806fn calculate_geometric_mean<F>(ts: &Array1<F>) -> Result<F>
807where
808    F: Float + FromPrimitive,
809{
810    Ok(F::zero())
811}
812#[allow(dead_code)]
813fn calculate_quadratic_mean<F>(ts: &Array1<F>) -> Result<F>
814where
815    F: Float + FromPrimitive,
816{
817    Ok(F::zero())
818}
819#[allow(dead_code)]
820fn calculate_cubic_mean<F>(ts: &Array1<F>) -> Result<F>
821where
822    F: Float + FromPrimitive,
823{
824    Ok(F::zero())
825}
826#[allow(dead_code)]
827fn calculate_mode_approximation<F>(ts: &Array1<F>) -> Result<F>
828where
829    F: Float + FromPrimitive,
830{
831    Ok(F::zero())
832}
833#[allow(dead_code)]
834fn calculate_l_moments<F>(ts: &Array1<F>) -> Result<(F, F, F)>
835where
836    F: Float + FromPrimitive,
837{
838    Ok((F::zero(), F::zero(), F::zero()))
839}
840#[allow(dead_code)]
841fn p75_minus_p25<F>(sorted: &[F]) -> F
842where
843    F: Float + FromPrimitive,
844{
845    F::zero()
846}
847#[allow(dead_code)]
848fn p87_5_minus_p12_5<F>(sorted: &[F]) -> F
849where
850    F: Float + FromPrimitive,
851{
852    F::zero()
853}
854#[allow(dead_code)]
855fn calculate_jarque_bera_statistic<F>(_ts: &Array1<F>, mean: F, std: F) -> Result<F>
856where
857    F: Float + FromPrimitive,
858{
859    Ok(F::zero())
860}
861#[allow(dead_code)]
862fn calculate_anderson_darling_approximation<F>(ts: &Array1<F>) -> Result<F>
863where
864    F: Float + FromPrimitive,
865{
866    Ok(F::zero())
867}
868#[allow(dead_code)]
869fn calculate_ks_statistic_approximation<F>(ts: &Array1<F>) -> Result<F>
870where
871    F: Float + FromPrimitive,
872{
873    Ok(F::zero())
874}
875#[allow(dead_code)]
876fn calculate_shapiro_wilk_approximation<F>(ts: &Array1<F>) -> Result<F>
877where
878    F: Float + FromPrimitive,
879{
880    Ok(F::zero())
881}
882#[allow(dead_code)]
883fn calculate_dagostino_statistic<F>(_ts: &Array1<F>, mean: F, std: F) -> Result<F>
884where
885    F: Float + FromPrimitive,
886{
887    Ok(F::zero())
888}
889#[allow(dead_code)]
890fn calculate_normality_composite_score<F>(_jb: F, ad: F, ks: F) -> F
891where
892    F: Float,
893{
894    F::zero()
895}
896#[allow(dead_code)]
897fn calculate_biweight_midvariance<F>(_ts: &Array1<F>, median: F) -> Result<F>
898where
899    F: Float + FromPrimitive,
900{
901    Ok(F::zero())
902}
903#[allow(dead_code)]
904fn calculate_qn_estimator<F>(ts: &Array1<F>) -> Result<F>
905where
906    F: Float + FromPrimitive,
907{
908    Ok(F::zero())
909}
910#[allow(dead_code)]
911fn calculate_sn_estimator<F>(ts: &Array1<F>) -> Result<F>
912where
913    F: Float + FromPrimitive,
914{
915    Ok(F::zero())
916}
917#[allow(dead_code)]
918fn calculate_zero_crossings<F>(_ts: &Array1<F>, mean: F) -> usize
919where
920    F: Float,
921{
922    0
923}
924#[allow(dead_code)]
925fn calculate_local_extrema_counts<F>(ts: &Array1<F>) -> (usize, usize)
926where
927    F: Float,
928{
929    (0, 0)
930}
931#[allow(dead_code)]
932fn calculate_concentration_coefficient<F>(ts: &Array1<F>) -> Result<F>
933where
934    F: Float + FromPrimitive,
935{
936    Ok(F::zero())
937}
938#[allow(dead_code)]
939fn calculate_herfindahl_index<F>(ts: &Array1<F>) -> Result<F>
940where
941    F: Float + FromPrimitive,
942{
943    Ok(F::zero())
944}
945#[allow(dead_code)]
946fn calculate_shannon_diversity<F>(ts: &Array1<F>) -> Result<F>
947where
948    F: Float + FromPrimitive,
949{
950    Ok(F::zero())
951}
952#[allow(dead_code)]
953fn calculate_simpson_diversity<F>(ts: &Array1<F>) -> Result<F>
954where
955    F: Float + FromPrimitive,
956{
957    Ok(F::zero())
958}
scirs2_series/features/statistical.rs

scirs2_series/features/
statistical.rs