fdars_core/seasonal/
mod.rs

1//! Seasonal time series analysis for functional data.
2//!
3//! This module provides functions for analyzing seasonal patterns in functional data:
4//! - Period estimation (FFT, autocorrelation, regression-based)
5//! - Peak detection with prominence calculation
6//! - Seasonal strength measurement (variance and spectral methods)
7//! - Seasonality change detection (onset/cessation)
8//! - Instantaneous period estimation for drifting seasonality
9//! - Peak timing variability analysis for short series
10//! - Seasonality classification
11
12mod autoperiod;
13mod change;
14mod hilbert;
15mod lomb_scargle;
16mod matrix_profile;
17mod peak;
18mod period;
19mod sazed;
20mod ssa;
21mod strength;
22
23#[cfg(test)]
24mod tests;
25
26use crate::iter_maybe_parallel;
27use crate::matrix::FdMatrix;
28use num_complex::Complex;
29#[cfg(feature = "parallel")]
30use rayon::iter::ParallelIterator;
31use rustfft::FftPlanner;
32use std::f64::consts::PI;
33
34// Re-export all public items so `seasonal::X` still works
35pub use autoperiod::{
36    autoperiod, autoperiod_fdata, cfd_autoperiod, cfd_autoperiod_fdata, AutoperiodCandidate,
37    AutoperiodResult, CfdAutoperiodResult,
38};
39pub use change::{
40    analyze_peak_timing, classify_seasonality, detect_amplitude_modulation,
41    detect_amplitude_modulation_wavelet, detect_seasonality_changes,
42    detect_seasonality_changes_auto, AmplitudeModulationResult, ModulationType, SeasonalType,
43    SeasonalityClassification, ThresholdMethod, WaveletAmplitudeResult,
44};
45pub use hilbert::{hilbert_transform, instantaneous_period};
46pub use lomb_scargle::{lomb_scargle, lomb_scargle_fdata, LombScargleResult};
47pub use matrix_profile::{
48    matrix_profile, matrix_profile_fdata, matrix_profile_seasonality, MatrixProfileResult,
49};
50pub use peak::detect_peaks;
51pub use period::{
52    detect_multiple_periods, estimate_period_acf, estimate_period_fft, estimate_period_regression,
53};
54pub use sazed::{sazed, sazed_fdata, SazedComponents, SazedResult};
55pub use ssa::{ssa, ssa_fdata, ssa_seasonality, SsaResult};
56pub use strength::{
57    seasonal_strength_spectral, seasonal_strength_variance, seasonal_strength_wavelet,
58    seasonal_strength_windowed,
59};
60
61// Re-export types that are used by multiple submodules and by external consumers
62pub use self::types::*;
63
64mod types {
65    /// Result of period estimation.
66    #[derive(Debug, Clone)]
67    pub struct PeriodEstimate {
68        /// Estimated period
69        pub period: f64,
70        /// Dominant frequency (1/period)
71        pub frequency: f64,
72        /// Power at the dominant frequency
73        pub power: f64,
74        /// Confidence measure (ratio of peak power to mean power)
75        pub confidence: f64,
76    }
77
78    /// A detected peak in functional data.
79    #[derive(Debug, Clone)]
80    pub struct Peak {
81        /// Time at which the peak occurs
82        pub time: f64,
83        /// Value at the peak
84        pub value: f64,
85        /// Prominence of the peak (height relative to surrounding valleys)
86        pub prominence: f64,
87    }
88
89    /// Result of peak detection.
90    #[derive(Debug, Clone)]
91    pub struct PeakDetectionResult {
92        /// Peaks for each sample: `peaks[sample_idx]` contains peaks for that sample
93        pub peaks: Vec<Vec<Peak>>,
94        /// Inter-peak distances for each sample
95        pub inter_peak_distances: Vec<Vec<f64>>,
96        /// Mean period estimated from inter-peak distances across all samples
97        pub mean_period: f64,
98    }
99
100    /// A detected period from multiple period detection.
101    #[derive(Debug, Clone)]
102    pub struct DetectedPeriod {
103        /// Estimated period
104        pub period: f64,
105        /// FFT confidence (ratio of peak power to mean power)
106        pub confidence: f64,
107        /// Seasonal strength at this period (variance explained)
108        pub strength: f64,
109        /// Amplitude of the sinusoidal component
110        pub amplitude: f64,
111        /// Phase of the sinusoidal component (radians)
112        pub phase: f64,
113        /// Iteration number (1-indexed)
114        pub iteration: usize,
115    }
116
117    /// Method for computing seasonal strength.
118    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
119    pub enum StrengthMethod {
120        /// Variance decomposition: Var(seasonal) / Var(total)
121        Variance,
122        /// Spectral: power at seasonal frequencies / total power
123        Spectral,
124    }
125
126    /// A detected change point in seasonality.
127    #[derive(Debug, Clone)]
128    pub struct ChangePoint {
129        /// Time at which the change occurs
130        pub time: f64,
131        /// Type of change
132        pub change_type: ChangeType,
133        /// Seasonal strength before the change
134        pub strength_before: f64,
135        /// Seasonal strength after the change
136        pub strength_after: f64,
137    }
138
139    /// Type of seasonality change.
140    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
141    pub enum ChangeType {
142        /// Series becomes seasonal
143        Onset,
144        /// Series stops being seasonal
145        Cessation,
146    }
147
148    /// Result of seasonality change detection.
149    #[derive(Debug, Clone)]
150    pub struct ChangeDetectionResult {
151        /// Detected change points
152        pub change_points: Vec<ChangePoint>,
153        /// Time-varying seasonal strength curve used for detection
154        pub strength_curve: Vec<f64>,
155    }
156
157    /// Result of instantaneous period estimation.
158    #[derive(Debug, Clone)]
159    pub struct InstantaneousPeriod {
160        /// Instantaneous period at each time point
161        pub period: Vec<f64>,
162        /// Instantaneous frequency at each time point
163        pub frequency: Vec<f64>,
164        /// Instantaneous amplitude (envelope) at each time point
165        pub amplitude: Vec<f64>,
166    }
167
168    /// Result of peak timing variability analysis.
169    #[derive(Debug, Clone, PartialEq)]
170    pub struct PeakTimingResult {
171        /// Peak times for each cycle
172        pub peak_times: Vec<f64>,
173        /// Peak values
174        pub peak_values: Vec<f64>,
175        /// Within-period timing (0-1 scale, e.g., day-of-year / 365)
176        pub normalized_timing: Vec<f64>,
177        /// Mean normalized timing
178        pub mean_timing: f64,
179        /// Standard deviation of normalized timing
180        pub std_timing: f64,
181        /// Range of normalized timing (max - min)
182        pub range_timing: f64,
183        /// Variability score (0 = stable, 1 = highly variable)
184        pub variability_score: f64,
185        /// Trend in timing (positive = peaks getting later)
186        pub timing_trend: f64,
187        /// Cycle indices (1-indexed)
188        pub cycle_indices: Vec<usize>,
189    }
190}
191
192// ============================================================================
193// Internal helper functions (pub(super) so submodules can use them)
194// ============================================================================
195
196/// Compute mean curve from functional data matrix.
197///
198/// # Arguments
199/// * `data` - Functional data matrix (n x m)
200/// * `parallel` - Use parallel iteration (default: true)
201///
202/// # Returns
203/// Mean curve of length m
204#[inline]
205pub(super) fn compute_mean_curve_impl(data: &FdMatrix, parallel: bool) -> Vec<f64> {
206    let (n, m) = data.shape();
207    if parallel && m >= 100 {
208        // Use parallel iteration for larger datasets
209        iter_maybe_parallel!(0..m)
210            .map(|j| {
211                let mut sum = 0.0;
212                for i in 0..n {
213                    sum += data[(i, j)];
214                }
215                sum / n as f64
216            })
217            .collect()
218    } else {
219        // Sequential for small datasets or when disabled
220        (0..m)
221            .map(|j| {
222                let mut sum = 0.0;
223                for i in 0..n {
224                    sum += data[(i, j)];
225                }
226                sum / n as f64
227            })
228            .collect()
229    }
230}
231
232/// Compute mean curve (parallel by default for m >= 100).
233#[inline]
234pub(super) fn compute_mean_curve(data: &FdMatrix) -> Vec<f64> {
235    compute_mean_curve_impl(data, true)
236}
237
238/// Compute interior bounds for edge-skipping (10% on each side).
239///
240/// Used to avoid edge effects in wavelet and other analyses.
241///
242/// # Arguments
243/// * `m` - Total number of points
244///
245/// # Returns
246/// `(interior_start, interior_end)` indices, or `None` if range is invalid
247#[inline]
248pub(super) fn interior_bounds(m: usize) -> Option<(usize, usize)> {
249    let edge_skip = (m as f64 * 0.1) as usize;
250    let interior_start = edge_skip.min(m / 4);
251    let interior_end = m.saturating_sub(edge_skip).max(m * 3 / 4);
252
253    if interior_end <= interior_start {
254        None
255    } else {
256        Some((interior_start, interior_end))
257    }
258}
259
260/// Validate interior bounds with minimum span requirement.
261pub(super) fn valid_interior_bounds(m: usize, min_span: usize) -> Option<(usize, usize)> {
262    interior_bounds(m).filter(|&(s, e)| e > s + min_span)
263}
264
265/// Compute periodogram from data using FFT.
266/// Returns (frequencies, power) where frequencies are in cycles per unit time.
267pub(super) fn periodogram(data: &[f64], argvals: &[f64]) -> (Vec<f64>, Vec<f64>) {
268    let n = data.len();
269    if n < 2 || argvals.len() != n {
270        return (Vec::new(), Vec::new());
271    }
272
273    let dt = (argvals[n - 1] - argvals[0]) / (n - 1) as f64;
274    let fs = 1.0 / dt; // Sampling frequency
275
276    let mut planner = FftPlanner::<f64>::new();
277    let fft = planner.plan_fft_forward(n);
278
279    let mut buffer: Vec<Complex<f64>> = data.iter().map(|&x| Complex::new(x, 0.0)).collect();
280    fft.process(&mut buffer);
281
282    // Compute power spectrum (one-sided)
283    let n_freq = n / 2 + 1;
284    let mut power = Vec::with_capacity(n_freq);
285    let mut frequencies = Vec::with_capacity(n_freq);
286
287    for k in 0..n_freq {
288        let freq = k as f64 * fs / n as f64;
289        frequencies.push(freq);
290
291        let p = buffer[k].norm_sqr() / (n as f64 * n as f64);
292        // Double power for non-DC and non-Nyquist frequencies (one-sided spectrum)
293        let p = if k > 0 && k < n / 2 { 2.0 * p } else { p };
294        power.push(p);
295    }
296
297    (frequencies, power)
298}
299
300/// Naive O(n*max_lag) autocorrelation for small inputs.
301pub(super) fn autocorrelation_naive(data: &[f64], max_lag: usize, mean: f64, var: f64) -> Vec<f64> {
302    let n = data.len();
303    let max_lag = max_lag.min(n - 1);
304    let mut acf = Vec::with_capacity(max_lag + 1);
305    for lag in 0..=max_lag {
306        let mut sum = 0.0;
307        for i in 0..(n - lag) {
308            sum += (data[i] - mean) * (data[i + lag] - mean);
309        }
310        acf.push(sum / (n as f64 * var));
311    }
312    acf
313}
314
315/// Compute autocorrelation function up to max_lag.
316///
317/// Uses FFT-based Wiener-Khinchin theorem for n > 64, falling back to
318/// direct computation for small inputs where FFT overhead isn't worthwhile.
319pub(super) fn autocorrelation(data: &[f64], max_lag: usize) -> Vec<f64> {
320    let n = data.len();
321    if n == 0 {
322        return Vec::new();
323    }
324
325    let mean: f64 = data.iter().sum::<f64>() / n as f64;
326    // ACF convention: divide by n (matches R's acf(); divisor cancels in normalized ratio)
327    let var: f64 = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n as f64;
328
329    if var < 1e-15 {
330        return vec![1.0; max_lag.min(n) + 1];
331    }
332
333    let max_lag = max_lag.min(n - 1);
334
335    if n <= 64 {
336        return autocorrelation_naive(data, max_lag, mean, var);
337    }
338
339    // FFT-based ACF via Wiener-Khinchin theorem
340    let fft_len = (2 * n).next_power_of_two();
341
342    let mut planner = FftPlanner::<f64>::new();
343    let fft_forward = planner.plan_fft_forward(fft_len);
344    let fft_inverse = planner.plan_fft_inverse(fft_len);
345
346    // Zero-padded, mean-centered signal
347    let mut buffer: Vec<Complex<f64>> = Vec::with_capacity(fft_len);
348    for &x in data {
349        buffer.push(Complex::new(x - mean, 0.0));
350    }
351    buffer.resize(fft_len, Complex::new(0.0, 0.0));
352
353    // Forward FFT
354    fft_forward.process(&mut buffer);
355
356    // Power spectral density: |F(k)|^2
357    for c in buffer.iter_mut() {
358        *c = Complex::new(c.norm_sqr(), 0.0);
359    }
360
361    // Inverse FFT
362    fft_inverse.process(&mut buffer);
363
364    // Normalize and extract lags 0..=max_lag
365    let norm = fft_len as f64 * n as f64 * var;
366    (0..=max_lag).map(|lag| buffer[lag].re / norm).collect()
367}
368
369/// Try to add a peak, respecting minimum distance. Replaces previous peak if closer but higher.
370pub(super) fn try_add_peak(
371    peaks: &mut Vec<usize>,
372    candidate: usize,
373    signal: &[f64],
374    min_distance: usize,
375) {
376    if let Some(&last) = peaks.last() {
377        if candidate - last >= min_distance {
378            peaks.push(candidate);
379        } else if signal[candidate] > signal[last] {
380            // Safe: peaks is non-empty since last() succeeded
381            *peaks.last_mut().unwrap_or(&mut 0) = candidate;
382        }
383    } else {
384        peaks.push(candidate);
385    }
386}
387
388/// Find peaks in a 1D signal, returning indices.
389pub(crate) fn find_peaks_1d(signal: &[f64], min_distance: usize) -> Vec<usize> {
390    let n = signal.len();
391    if n < 3 {
392        return Vec::new();
393    }
394
395    let mut peaks = Vec::new();
396
397    for i in 1..(n - 1) {
398        if signal[i] > signal[i - 1] && signal[i] > signal[i + 1] {
399            try_add_peak(&mut peaks, i, signal, min_distance);
400        }
401    }
402
403    peaks
404}
405
406/// Compute prominence for a peak (height above surrounding valleys).
407pub(crate) fn compute_prominence(signal: &[f64], peak_idx: usize) -> f64 {
408    let n = signal.len();
409    let peak_val = signal[peak_idx];
410
411    // Find lowest point between peak and boundaries/higher peaks
412    let mut left_min = peak_val;
413    for i in (0..peak_idx).rev() {
414        if signal[i] >= peak_val {
415            break;
416        }
417        left_min = left_min.min(signal[i]);
418    }
419
420    let mut right_min = peak_val;
421    for i in (peak_idx + 1)..n {
422        if signal[i] >= peak_val {
423            break;
424        }
425        right_min = right_min.min(signal[i]);
426    }
427
428    peak_val - left_min.max(right_min)
429}
430
431/// Unwrap phase to remove 2pi discontinuities.
432pub(super) fn unwrap_phase(phase: &[f64]) -> Vec<f64> {
433    if phase.is_empty() {
434        return Vec::new();
435    }
436
437    let mut unwrapped = vec![phase[0]];
438    let mut cumulative_correction = 0.0;
439
440    for i in 1..phase.len() {
441        let diff = phase[i] - phase[i - 1];
442
443        // Check for wraparound
444        if diff > PI {
445            cumulative_correction -= 2.0 * PI;
446        } else if diff < -PI {
447            cumulative_correction += 2.0 * PI;
448        }
449
450        unwrapped.push(phase[i] + cumulative_correction);
451    }
452
453    unwrapped
454}
455
456/// Morlet wavelet function.
457///
458/// The Morlet wavelet is a complex exponential modulated by a Gaussian:
459/// psi(t) = exp(i * omega0 * t) * exp(-t^2 / 2)
460///
461/// where omega0 is the central frequency (typically 6 for good time-frequency trade-off).
462pub(super) fn morlet_wavelet(t: f64, omega0: f64) -> Complex<f64> {
463    let gaussian = (-t * t / 2.0).exp();
464    let oscillation = Complex::new((omega0 * t).cos(), (omega0 * t).sin());
465    oscillation * gaussian
466}
467
468/// Continuous Wavelet Transform at a single scale using Morlet wavelet.
469///
470/// Computes the wavelet coefficients at the specified scale (period) for all time points.
471/// Uses convolution in the time domain.
472///
473/// # Arguments
474/// * `signal` - Input signal
475/// * `argvals` - Time points
476/// * `scale` - Scale parameter (related to period)
477/// * `omega0` - Central frequency of Morlet wavelet (default: 6.0)
478///
479/// # Returns
480/// Complex wavelet coefficients at each time point
481#[allow(dead_code)]
482pub(super) fn cwt_morlet(
483    signal: &[f64],
484    argvals: &[f64],
485    scale: f64,
486    omega0: f64,
487) -> Vec<Complex<f64>> {
488    let n = signal.len();
489    if n == 0 || scale <= 0.0 {
490        return Vec::new();
491    }
492
493    let dt = (argvals[n - 1] - argvals[0]) / (n - 1) as f64;
494
495    // Compute wavelet coefficients via convolution
496    // W(a, b) = (1/sqrt(a)) * sum x[k] * psi*((t[k] - b) / a) * dt
497    let norm = 1.0 / scale.sqrt();
498
499    (0..n)
500        .map(|b| {
501            let mut sum = Complex::new(0.0, 0.0);
502            for k in 0..n {
503                let t_normalized = (argvals[k] - argvals[b]) / scale;
504                // Only compute within reasonable range (Gaussian decays quickly)
505                if t_normalized.abs() < 6.0 {
506                    let wavelet = morlet_wavelet(t_normalized, omega0);
507                    sum += signal[k] * wavelet.conj();
508                }
509            }
510            sum * norm * dt
511        })
512        .collect()
513}
514
515/// Continuous Wavelet Transform at a single scale using FFT (faster for large signals).
516///
517/// Uses the convolution theorem: CWT = IFFT(FFT(signal) * FFT(wavelet)*)
518pub(super) fn cwt_morlet_fft(
519    signal: &[f64],
520    argvals: &[f64],
521    scale: f64,
522    omega0: f64,
523) -> Vec<Complex<f64>> {
524    let n = signal.len();
525    if n == 0 || scale <= 0.0 {
526        return Vec::new();
527    }
528
529    let dt = (argvals[n - 1] - argvals[0]) / (n - 1) as f64;
530    let norm = 1.0 / scale.sqrt();
531
532    // Compute wavelet in time domain centered at t=0
533    let wavelet_time: Vec<Complex<f64>> = (0..n)
534        .map(|k| {
535            // Center the wavelet
536            let t = if k <= n / 2 {
537                k as f64 * dt / scale
538            } else {
539                (k as f64 - n as f64) * dt / scale
540            };
541            morlet_wavelet(t, omega0) * norm
542        })
543        .collect();
544
545    let mut planner = FftPlanner::<f64>::new();
546    let fft_forward = planner.plan_fft_forward(n);
547    let fft_inverse = planner.plan_fft_inverse(n);
548
549    // FFT of signal
550    let mut signal_fft: Vec<Complex<f64>> = signal.iter().map(|&x| Complex::new(x, 0.0)).collect();
551    fft_forward.process(&mut signal_fft);
552
553    // FFT of wavelet
554    let mut wavelet_fft = wavelet_time;
555    fft_forward.process(&mut wavelet_fft);
556
557    // Multiply in frequency domain (use conjugate of wavelet FFT for correlation)
558    let mut result: Vec<Complex<f64>> = signal_fft
559        .iter()
560        .zip(wavelet_fft.iter())
561        .map(|(s, w)| *s * w.conj())
562        .collect();
563
564    // Inverse FFT
565    fft_inverse.process(&mut result);
566
567    // Normalize and scale by dt
568    for c in result.iter_mut() {
569        *c *= dt / n as f64;
570    }
571
572    result
573}
574
575/// Compute Otsu's threshold for bimodal separation.
576///
577/// Finds the threshold that minimizes within-class variance (or equivalently
578/// maximizes between-class variance).
579pub(super) fn otsu_threshold(values: &[f64]) -> f64 {
580    if values.is_empty() {
581        return 0.5;
582    }
583
584    // Filter NaN values
585    let valid: Vec<f64> = values.iter().copied().filter(|x| x.is_finite()).collect();
586    if valid.is_empty() {
587        return 0.5;
588    }
589
590    let vmin = valid.iter().copied().fold(f64::INFINITY, f64::min);
591    let vmax = valid.iter().copied().fold(f64::NEG_INFINITY, f64::max);
592
593    if (vmax - vmin).abs() < 1e-10 {
594        return (vmin + vmax) / 2.0;
595    }
596
597    let n_bins = 256;
598    let (histogram, hist_min, bin_width) = build_histogram(&valid, n_bins);
599    let (best_bin, _) = find_optimal_threshold_bin(&histogram, valid.len() as f64);
600
601    hist_min + (best_bin as f64 + 0.5) * bin_width
602}
603
604/// Compute linear regression slope (simple OLS).
605pub(super) fn linear_slope(x: &[f64], y: &[f64]) -> f64 {
606    if x.len() != y.len() || x.len() < 2 {
607        return 0.0;
608    }
609
610    let n = x.len() as f64;
611    let mean_x: f64 = x.iter().sum::<f64>() / n;
612    let mean_y: f64 = y.iter().sum::<f64>() / n;
613
614    let mut num = 0.0;
615    let mut den = 0.0;
616
617    for (&xi, &yi) in x.iter().zip(y.iter()) {
618        num += (xi - mean_x) * (yi - mean_y);
619        den += (xi - mean_x).powi(2);
620    }
621
622    if den.abs() < 1e-15 {
623        0.0
624    } else {
625        num / den
626    }
627}
628
629/// Statistics from amplitude envelope analysis (shared by Hilbert and wavelet methods).
630pub(super) struct AmplitudeEnvelopeStats {
631    pub(super) modulation_score: f64,
632    pub(super) amplitude_trend: f64,
633    pub(super) has_modulation: bool,
634    pub(super) modulation_type: change::ModulationType,
635    pub(super) _mean_amp: f64,
636    pub(super) min_amp: f64,
637    pub(super) max_amp: f64,
638}
639
640/// Analyze an amplitude envelope slice to compute modulation statistics.
641pub(super) fn analyze_amplitude_envelope(
642    interior_envelope: &[f64],
643    interior_times: &[f64],
644    modulation_threshold: f64,
645) -> AmplitudeEnvelopeStats {
646    let n_interior = interior_envelope.len() as f64;
647
648    let mean_amp = interior_envelope.iter().sum::<f64>() / n_interior;
649    let min_amp = interior_envelope
650        .iter()
651        .copied()
652        .fold(f64::INFINITY, f64::min);
653    let max_amp = interior_envelope
654        .iter()
655        .copied()
656        .fold(f64::NEG_INFINITY, f64::max);
657
658    let variance = interior_envelope
659        .iter()
660        .map(|&a| (a - mean_amp).powi(2))
661        .sum::<f64>()
662        / n_interior;
663    let std_amp = variance.sqrt();
664    let modulation_score = if mean_amp > 1e-10 {
665        std_amp / mean_amp
666    } else {
667        0.0
668    };
669
670    let t_mean = interior_times.iter().sum::<f64>() / n_interior;
671    let mut cov_ta = 0.0;
672    let mut var_t = 0.0;
673    for (&t, &a) in interior_times.iter().zip(interior_envelope.iter()) {
674        cov_ta += (t - t_mean) * (a - mean_amp);
675        var_t += (t - t_mean).powi(2);
676    }
677    let slope = if var_t > 1e-10 { cov_ta / var_t } else { 0.0 };
678
679    let time_span = interior_times.last().unwrap_or(&1.0) - interior_times.first().unwrap_or(&0.0);
680    let amplitude_trend = if mean_amp > 1e-10 && time_span > 1e-10 {
681        (slope * time_span / mean_amp).clamp(-1.0, 1.0)
682    } else {
683        0.0
684    };
685
686    let has_modulation = modulation_score > modulation_threshold;
687    let modulation_type = if !has_modulation {
688        change::ModulationType::Stable
689    } else if amplitude_trend > 0.3 {
690        change::ModulationType::Emerging
691    } else if amplitude_trend < -0.3 {
692        change::ModulationType::Fading
693    } else {
694        change::ModulationType::Oscillating
695    };
696
697    AmplitudeEnvelopeStats {
698        modulation_score,
699        amplitude_trend,
700        has_modulation,
701        modulation_type,
702        _mean_amp: mean_amp,
703        min_amp,
704        max_amp,
705    }
706}
707
708/// Fit a sinusoid at the given period, subtract it from residual, and return (a, b, amplitude, phase).
709pub(super) fn fit_and_subtract_sinusoid(
710    residual: &mut [f64],
711    argvals: &[f64],
712    period: f64,
713) -> (f64, f64, f64, f64) {
714    let m = residual.len();
715    let omega = 2.0 * PI / period;
716    let mut cos_sum = 0.0;
717    let mut sin_sum = 0.0;
718
719    for (j, &t) in argvals.iter().enumerate() {
720        cos_sum += residual[j] * (omega * t).cos();
721        sin_sum += residual[j] * (omega * t).sin();
722    }
723
724    let a = 2.0 * cos_sum / m as f64;
725    let b = 2.0 * sin_sum / m as f64;
726    let amplitude = (a * a + b * b).sqrt();
727    let phase = b.atan2(a);
728
729    for (j, &t) in argvals.iter().enumerate() {
730        residual[j] -= a * (omega * t).cos() + b * (omega * t).sin();
731    }
732
733    (a, b, amplitude, phase)
734}
735
736/// Validate a single SAZED component: returns Some(period) if it passes range and confidence checks.
737pub(super) fn validate_sazed_component(
738    period: f64,
739    confidence: f64,
740    min_period: f64,
741    max_period: f64,
742    threshold: f64,
743) -> Option<f64> {
744    if period.is_finite() && period > min_period && period < max_period && confidence > threshold {
745        Some(period)
746    } else {
747        None
748    }
749}
750
751/// Count how many periods agree with a reference within tolerance, returning (count, sum).
752pub(super) fn count_agreeing_periods(
753    periods: &[f64],
754    reference: f64,
755    tolerance: f64,
756) -> (usize, f64) {
757    let mut count = 0;
758    let mut sum = 0.0;
759    for &p in periods {
760        let rel_diff = (reference - p).abs() / reference.max(p);
761        if rel_diff <= tolerance {
762            count += 1;
763            sum += p;
764        }
765    }
766    (count, sum)
767}
768
769/// Find the end of the initial ACF descent (first negative or first uptick).
770pub(super) fn find_acf_descent_end(acf: &[f64]) -> usize {
771    for i in 1..acf.len() {
772        if acf[i] < 0.0 {
773            return i;
774        }
775        if i > 1 && acf[i] > acf[i - 1] {
776            return i - 1;
777        }
778    }
779    1
780}
781
782/// Find the first ACF peak after initial descent. Returns Some((lag, acf_value)).
783pub(super) fn find_first_acf_peak(acf: &[f64]) -> Option<(usize, f64)> {
784    if acf.len() < 4 {
785        return None;
786    }
787
788    let min_search_start = find_acf_descent_end(acf);
789    let peaks = find_peaks_1d(&acf[min_search_start..], 1);
790    if peaks.is_empty() {
791        return None;
792    }
793
794    let peak_lag = peaks[0] + min_search_start;
795    Some((peak_lag, acf[peak_lag].max(0.0)))
796}
797
798/// Compute per-cycle seasonal strengths and identify weak seasons.
799pub(super) fn compute_cycle_strengths(
800    data: &FdMatrix,
801    argvals: &[f64],
802    period: f64,
803    strength_thresh: f64,
804) -> (Vec<f64>, Vec<usize>) {
805    let (n, m) = data.shape();
806    let t_start = argvals[0];
807    let t_end = argvals[m - 1];
808    let n_cycles = ((t_end - t_start) / period).floor() as usize;
809
810    let mut cycle_strengths = Vec::with_capacity(n_cycles);
811    let mut weak_seasons = Vec::new();
812
813    for cycle in 0..n_cycles {
814        let cycle_start = t_start + cycle as f64 * period;
815        let cycle_end = cycle_start + period;
816
817        let start_idx = argvals.iter().position(|&t| t >= cycle_start).unwrap_or(0);
818        let end_idx = argvals.iter().position(|&t| t > cycle_end).unwrap_or(m);
819
820        let cycle_m = end_idx - start_idx;
821        if cycle_m < 4 {
822            cycle_strengths.push(f64::NAN);
823            continue;
824        }
825
826        let cycle_data: Vec<f64> = (start_idx..end_idx)
827            .flat_map(|j| (0..n).map(move |i| data[(i, j)]))
828            .collect();
829        let cycle_mat = FdMatrix::from_column_major(cycle_data, n, cycle_m).unwrap();
830        let cycle_argvals: Vec<f64> = argvals[start_idx..end_idx].to_vec();
831
832        let strength_val =
833            strength::seasonal_strength_variance(&cycle_mat, &cycle_argvals, period, 3);
834
835        cycle_strengths.push(strength_val);
836        if strength_val < strength_thresh {
837            weak_seasons.push(cycle);
838        }
839    }
840
841    (cycle_strengths, weak_seasons)
842}
843
844/// Build a histogram from valid values. Returns (histogram, min_val, bin_width).
845pub(super) fn build_histogram(valid: &[f64], n_bins: usize) -> (Vec<usize>, f64, f64) {
846    let min_val = valid.iter().copied().fold(f64::INFINITY, f64::min);
847    let max_val = valid.iter().copied().fold(f64::NEG_INFINITY, f64::max);
848    let bin_width = (max_val - min_val) / n_bins as f64;
849    let mut histogram = vec![0usize; n_bins];
850    for &v in valid {
851        let bin = ((v - min_val) / bin_width).min(n_bins as f64 - 1.0) as usize;
852        histogram[bin] += 1;
853    }
854    (histogram, min_val, bin_width)
855}
856
857/// Find the optimal threshold bin using Otsu's between-class variance. Returns (best_bin, best_variance).
858pub(super) fn find_optimal_threshold_bin(histogram: &[usize], total: f64) -> (usize, f64) {
859    let n_bins = histogram.len();
860    let mut sum_total = 0.0;
861    for (i, &count) in histogram.iter().enumerate() {
862        sum_total += i as f64 * count as f64;
863    }
864
865    let mut best_bin = 0;
866    let mut best_variance = 0.0;
867    let mut sum_b = 0.0;
868    let mut weight_b = 0.0;
869
870    for t in 0..n_bins {
871        weight_b += histogram[t] as f64;
872        if weight_b == 0.0 {
873            continue;
874        }
875        let weight_f = total - weight_b;
876        if weight_f == 0.0 {
877            break;
878        }
879        sum_b += t as f64 * histogram[t] as f64;
880        let mean_b = sum_b / weight_b;
881        let mean_f = (sum_total - sum_b) / weight_f;
882        let variance = weight_b * weight_f * (mean_b - mean_f).powi(2);
883        if variance > best_variance {
884            best_variance = variance;
885            best_bin = t;
886        }
887    }
888
889    (best_bin, best_variance)
890}
891
892/// Sum power at harmonics of a fundamental frequency within tolerance.
893pub(super) fn sum_harmonic_power(
894    frequencies: &[f64],
895    power: &[f64],
896    fundamental_freq: f64,
897    tolerance: f64,
898) -> (f64, f64) {
899    let mut seasonal_power = 0.0;
900    let mut total_power = 0.0;
901
902    for (i, (&freq, &p)) in frequencies.iter().zip(power.iter()).enumerate() {
903        if i == 0 {
904            continue;
905        }
906        total_power += p;
907        let ratio = freq / fundamental_freq;
908        let nearest_harmonic = ratio.round();
909        if (ratio - nearest_harmonic).abs() < tolerance && nearest_harmonic >= 1.0 {
910            seasonal_power += p;
911        }
912    }
913
914    (seasonal_power, total_power)
915}
916
917/// Return the new seasonal state if `ss` represents a valid threshold crossing,
918/// or `None` if the index should be skipped (NaN, no change, or too close to the
919/// previous change point).
920pub(super) fn crossing_direction(
921    ss: f64,
922    threshold: f64,
923    in_seasonal: bool,
924    i: usize,
925    last_change_idx: Option<usize>,
926    min_dur_points: usize,
927) -> Option<bool> {
928    if ss.is_nan() {
929        return None;
930    }
931    let now_seasonal = ss > threshold;
932    if now_seasonal == in_seasonal {
933        return None;
934    }
935    if last_change_idx.is_some_and(|last_idx| i - last_idx < min_dur_points) {
936        return None;
937    }
938    Some(now_seasonal)
939}
940
941/// Build a `ChangePoint` for a threshold crossing at index `i`.
942pub(super) fn build_change_point(
943    i: usize,
944    ss: f64,
945    now_seasonal: bool,
946    strength_curve: &[f64],
947    argvals: &[f64],
948) -> ChangePoint {
949    let change_type = if now_seasonal {
950        ChangeType::Onset
951    } else {
952        ChangeType::Cessation
953    };
954    let strength_before = if i > 0 && !strength_curve[i - 1].is_nan() {
955        strength_curve[i - 1]
956    } else {
957        ss
958    };
959    ChangePoint {
960        time: argvals[i],
961        change_type,
962        strength_before,
963        strength_after: ss,
964    }
965}
966
967/// Detect threshold crossings in a strength curve, returning change points.
968pub(super) fn detect_threshold_crossings(
969    strength_curve: &[f64],
970    argvals: &[f64],
971    threshold: f64,
972    min_dur_points: usize,
973) -> Vec<ChangePoint> {
974    let mut change_points = Vec::new();
975    let mut in_seasonal = strength_curve[0] > threshold;
976    let mut last_change_idx: Option<usize> = None;
977
978    for (i, &ss) in strength_curve.iter().enumerate().skip(1) {
979        let Some(now_seasonal) = crossing_direction(
980            ss,
981            threshold,
982            in_seasonal,
983            i,
984            last_change_idx,
985            min_dur_points,
986        ) else {
987            continue;
988        };
989
990        change_points.push(build_change_point(
991            i,
992            ss,
993            now_seasonal,
994            strength_curve,
995            argvals,
996        ));
997
998        in_seasonal = now_seasonal;
999        last_change_idx = Some(i);
1000    }
1001
1002    change_points
1003}
1004
1005/// Find peaks in power spectrum above noise floor
1006pub(super) fn find_spectral_peaks(power: &[f64]) -> Vec<usize> {
1007    if power.len() < 3 {
1008        return Vec::new();
1009    }
1010
1011    // Estimate noise floor as median power
1012    let mut sorted_power: Vec<f64> = power.to_vec();
1013    crate::helpers::sort_nan_safe(&mut sorted_power);
1014    let noise_floor = sorted_power[sorted_power.len() / 2];
1015    let threshold = noise_floor * 2.0; // Peaks must be at least 2x median
1016
1017    // Find all local maxima above threshold
1018    let mut peaks: Vec<(usize, f64)> = Vec::new();
1019    for i in 1..(power.len() - 1) {
1020        if power[i] > power[i - 1] && power[i] > power[i + 1] && power[i] > threshold {
1021            peaks.push((i, power[i]));
1022        }
1023    }
1024
1025    // Sort by power (descending)
1026    peaks.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1027
1028    peaks.into_iter().map(|(idx, _)| idx).collect()
1029}
1030
1031/// Find consensus period from multiple estimates using tolerance-based voting
1032pub(super) fn find_consensus_period(periods: &[f64], tolerance: f64) -> (f64, usize) {
1033    if periods.is_empty() {
1034        return (f64::NAN, 0);
1035    }
1036    if periods.len() == 1 {
1037        return (periods[0], 1);
1038    }
1039
1040    let mut best_period = periods[0];
1041    let mut best_count = 0;
1042    let mut best_sum = 0.0;
1043
1044    for &p1 in periods {
1045        let (count, sum) = count_agreeing_periods(periods, p1, tolerance);
1046
1047        if count > best_count
1048            || (count == best_count && sum / count as f64 > best_sum / best_count.max(1) as f64)
1049        {
1050            best_count = count;
1051            best_period = sum / count as f64;
1052            best_sum = sum;
1053        }
1054    }
1055
1056    (best_period, best_count)
1057}
1058
1059/// Validate a candidate period using ACF
1060pub(super) fn validate_period_acf(acf: &[f64], period: f64, dt: f64) -> f64 {
1061    let lag = (period / dt).round() as usize;
1062
1063    if lag == 0 || lag >= acf.len() {
1064        return 0.0;
1065    }
1066
1067    // Score based on ACF value at the period lag
1068    // Positive ACF values indicate valid periodicity
1069    let acf_at_lag = acf[lag];
1070
1071    // Also check harmonics (period/2, period*2) for consistency
1072    let half_lag = lag / 2;
1073    let double_lag = lag * 2;
1074
1075    let mut score = acf_at_lag.max(0.0);
1076
1077    // For a true period, ACF at half-period should be low/negative
1078    // and ACF at double-period should also be high
1079    if half_lag > 0 && half_lag < acf.len() {
1080        let half_acf = acf[half_lag];
1081        // Penalize if half-period has high ACF (suggests half-period is real)
1082        if half_acf > acf_at_lag * 0.7 {
1083            score *= 0.5;
1084        }
1085    }
1086
1087    if double_lag < acf.len() {
1088        let double_acf = acf[double_lag];
1089        // Bonus if double-period also shows periodicity
1090        if double_acf > 0.3 {
1091            score *= 1.2;
1092        }
1093    }
1094
1095    score.min(1.0)
1096}
1097
1098/// Refine period estimate using gradient ascent on ACF
1099pub(super) fn refine_period_gradient(
1100    acf: &[f64],
1101    initial_period: f64,
1102    dt: f64,
1103    steps: usize,
1104) -> f64 {
1105    let mut period = initial_period;
1106    let step_size = dt * 0.5; // Search step size
1107
1108    for _ in 0..steps {
1109        let current_score = validate_period_acf(acf, period, dt);
1110        let left_score = validate_period_acf(acf, period - step_size, dt);
1111        let right_score = validate_period_acf(acf, period + step_size, dt);
1112
1113        if left_score > current_score && left_score > right_score {
1114            period -= step_size;
1115        } else if right_score > current_score {
1116            period += step_size;
1117        }
1118        // If current is best, we've converged
1119    }
1120
1121    period.max(dt) // Ensure period is at least one time step
1122}
1123
1124/// Cluster periods using a simple density-based approach
1125pub(super) fn cluster_periods(
1126    candidates: &[(f64, f64)],
1127    tolerance: f64,
1128    min_size: usize,
1129) -> Vec<(f64, f64)> {
1130    if candidates.is_empty() {
1131        return Vec::new();
1132    }
1133
1134    // Sort candidates by period
1135    let mut sorted: Vec<(f64, f64)> = candidates.to_vec();
1136    sorted.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
1137
1138    let mut clusters: Vec<(f64, f64)> = Vec::new(); // (center, total_power)
1139    let mut current_cluster: Vec<(f64, f64)> = vec![sorted[0]];
1140
1141    for &(period, power) in sorted.iter().skip(1) {
1142        let cluster_center =
1143            current_cluster.iter().map(|(p, _)| p).sum::<f64>() / current_cluster.len() as f64;
1144
1145        let rel_diff = (period - cluster_center).abs() / cluster_center.max(period);
1146
1147        if rel_diff <= tolerance {
1148            // Add to current cluster
1149            current_cluster.push((period, power));
1150        } else {
1151            // Finish current cluster and start new one
1152            if current_cluster.len() >= min_size {
1153                let center = current_cluster.iter().map(|(p, pw)| p * pw).sum::<f64>()
1154                    / current_cluster
1155                        .iter()
1156                        .map(|(_, pw)| pw)
1157                        .sum::<f64>()
1158                        .max(1e-15);
1159                let total_power: f64 = current_cluster.iter().map(|(_, pw)| pw).sum();
1160                clusters.push((center, total_power));
1161            }
1162            current_cluster = vec![(period, power)];
1163        }
1164    }
1165
1166    // Don't forget the last cluster
1167    if current_cluster.len() >= min_size {
1168        let center = current_cluster.iter().map(|(p, pw)| p * pw).sum::<f64>()
1169            / current_cluster
1170                .iter()
1171                .map(|(_, pw)| pw)
1172                .sum::<f64>()
1173                .max(1e-15);
1174        let total_power: f64 = current_cluster.iter().map(|(_, pw)| pw).sum();
1175        clusters.push((center, total_power));
1176    }
1177
1178    clusters
1179}
fdars_core/seasonal/mod.rs

fdars_core/seasonal/
mod.rs