Skip to main content

fdars_core/seasonal/
sazed.rs

1use crate::matrix::FdMatrix;
2
3use super::{
4    autocorrelation, compute_mean_curve, find_consensus_period, find_first_acf_peak, find_peaks_1d,
5    periodogram, validate_sazed_component,
6};
7
8/// Result of SAZED ensemble period detection.
9#[derive(Debug, Clone, PartialEq)]
10#[non_exhaustive]
11pub struct SazedResult {
12    /// Primary detected period (consensus from ensemble)
13    pub period: f64,
14    /// Confidence score (0-1, based on component agreement)
15    pub confidence: f64,
16    /// Periods detected by each component (may be NaN if not detected)
17    pub component_periods: SazedComponents,
18    /// Number of components that agreed on the final period
19    pub agreeing_components: usize,
20}
21
22/// Individual period estimates from each SAZED component.
23#[derive(Debug, Clone, PartialEq)]
24#[non_exhaustive]
25pub struct SazedComponents {
26    /// Period from spectral (FFT) detection
27    pub spectral: f64,
28    /// Period from ACF peak detection
29    pub acf_peak: f64,
30    /// Period from weighted ACF average
31    pub acf_average: f64,
32    /// Period from ACF zero-crossing analysis
33    pub zero_crossing: f64,
34    /// Period from spectral differencing
35    pub spectral_diff: f64,
36}
37
38/// SAZED: Spectral-ACF Zero-crossing Ensemble Detection
39///
40/// A parameter-free ensemble method for robust period detection.
41/// Combines 5 detection components:
42/// 1. Spectral (FFT) - peaks in periodogram
43/// 2. ACF peak - first significant peak in autocorrelation
44/// 3. ACF average - weighted mean of ACF peaks
45/// 4. Zero-crossing - period from ACF zero crossings
46/// 5. Spectral differencing - FFT on first-differenced signal
47///
48/// Each component provides both a period estimate and a confidence score.
49/// Only components with sufficient confidence participate in voting.
50/// The final period is chosen by majority voting with tolerance.
51///
52/// # Arguments
53/// * `data` - Input signal (1D time series or mean curve from fdata)
54/// * `argvals` - Time points corresponding to data
55/// * `tolerance` - Relative tolerance for considering periods equal (default: 0.05 = 5%)
56///
57/// # Returns
58/// * `SazedResult` containing the consensus period and component details
59pub fn sazed(data: &[f64], argvals: &[f64], tolerance: Option<f64>) -> SazedResult {
60    let n = data.len();
61    let tol = tolerance.unwrap_or(0.05); // Tighter default tolerance
62
63    if n < 8 || argvals.len() != n {
64        return SazedResult {
65            period: f64::NAN,
66            confidence: 0.0,
67            component_periods: SazedComponents {
68                spectral: f64::NAN,
69                acf_peak: f64::NAN,
70                acf_average: f64::NAN,
71                zero_crossing: f64::NAN,
72                spectral_diff: f64::NAN,
73            },
74            agreeing_components: 0,
75        };
76    }
77
78    let dt = (argvals[n - 1] - argvals[0]) / (n - 1) as f64;
79    let max_lag = (n / 2).max(4);
80    let signal_range = argvals[n - 1] - argvals[0];
81
82    // Minimum detectable period (at least 3 cycles)
83    let min_period = signal_range / (n as f64 / 3.0);
84    // Maximum detectable period (at most 2 complete cycles)
85    let max_period = signal_range / 2.0;
86
87    // Component 1: Spectral (FFT) detection with confidence
88    let (spectral_period, spectral_conf) = sazed_spectral_with_confidence(data, argvals);
89
90    // Component 2: ACF peak detection with confidence
91    let (acf_peak_period, acf_peak_conf) = sazed_acf_peak_with_confidence(data, dt, max_lag);
92
93    // Component 3: ACF weighted average (uses ACF peak confidence)
94    let acf_average_period = sazed_acf_average(data, dt, max_lag);
95
96    // Component 4: Zero-crossing analysis with confidence
97    let (zero_crossing_period, zero_crossing_conf) =
98        sazed_zero_crossing_with_confidence(data, dt, max_lag);
99
100    // Component 5: Spectral on differenced signal with confidence
101    let (spectral_diff_period, spectral_diff_conf) =
102        sazed_spectral_diff_with_confidence(data, argvals);
103
104    let components = SazedComponents {
105        spectral: spectral_period,
106        acf_peak: acf_peak_period,
107        acf_average: acf_average_period,
108        zero_crossing: zero_crossing_period,
109        spectral_diff: spectral_diff_period,
110    };
111
112    // Confidence thresholds for each component (tuned to minimize FPR on noise)
113    // For Gaussian noise: spectral peaks rarely exceed 6x median, ACF ~1/sqrt(n)
114    let spectral_thresh = 8.0; // Power ratio must be > 8x median (noise rarely exceeds 6x)
115    let acf_thresh = 0.3; // ACF correlation must be > 0.3 (noise ~0.1 for n=100)
116    let zero_crossing_thresh = 0.9; // Zero-crossing consistency > 90%
117    let spectral_diff_thresh = 6.0; // Diff spectral power ratio > 6x
118
119    // Minimum number of confident components required to report a period
120    let min_confident_components = 2;
121
122    // Collect valid periods (only from components with sufficient confidence)
123    let confident_periods: Vec<f64> = [
124        validate_sazed_component(
125            spectral_period,
126            spectral_conf,
127            min_period,
128            max_period,
129            spectral_thresh,
130        ),
131        validate_sazed_component(
132            acf_peak_period,
133            acf_peak_conf,
134            min_period,
135            max_period,
136            acf_thresh,
137        ),
138        validate_sazed_component(
139            acf_average_period,
140            acf_peak_conf,
141            min_period,
142            max_period,
143            acf_thresh,
144        ),
145        validate_sazed_component(
146            zero_crossing_period,
147            zero_crossing_conf,
148            min_period,
149            max_period,
150            zero_crossing_thresh,
151        ),
152        validate_sazed_component(
153            spectral_diff_period,
154            spectral_diff_conf,
155            min_period,
156            max_period,
157            spectral_diff_thresh,
158        ),
159    ]
160    .into_iter()
161    .flatten()
162    .collect();
163
164    // Require minimum number of confident components before reporting a period
165    if confident_periods.len() < min_confident_components {
166        return SazedResult {
167            period: f64::NAN,
168            confidence: 0.0,
169            component_periods: components,
170            agreeing_components: confident_periods.len(),
171        };
172    }
173
174    // Ensemble voting: find the mode with tolerance
175    let (consensus_period, agreeing_count) = find_consensus_period(&confident_periods, tol);
176    let confidence = agreeing_count as f64 / 5.0;
177
178    SazedResult {
179        period: consensus_period,
180        confidence,
181        component_periods: components,
182        agreeing_components: agreeing_count,
183    }
184}
185
186/// SAZED for functional data (matrix format)
187///
188/// Computes mean curve first, then applies SAZED.
189///
190/// # Arguments
191/// * `data` - Column-major matrix (n x m)
192/// * `argvals` - Evaluation points
193/// * `tolerance` - Relative tolerance for period matching
194pub fn sazed_fdata(data: &FdMatrix, argvals: &[f64], tolerance: Option<f64>) -> SazedResult {
195    let (n, m) = data.shape();
196    if n == 0 || m < 8 || argvals.len() != m {
197        return SazedResult {
198            period: f64::NAN,
199            confidence: 0.0,
200            component_periods: SazedComponents {
201                spectral: f64::NAN,
202                acf_peak: f64::NAN,
203                acf_average: f64::NAN,
204                zero_crossing: f64::NAN,
205                spectral_diff: f64::NAN,
206            },
207            agreeing_components: 0,
208        };
209    }
210
211    let mean_curve = compute_mean_curve(data);
212    sazed(&mean_curve, argvals, tolerance)
213}
214
215/// Spectral component with confidence: returns (period, power_ratio)
216fn sazed_spectral_with_confidence(data: &[f64], argvals: &[f64]) -> (f64, f64) {
217    let (frequencies, power) = periodogram(data, argvals);
218
219    if frequencies.len() < 3 {
220        return (f64::NAN, 0.0);
221    }
222
223    // Find peaks in power spectrum (skip DC)
224    let power_no_dc: Vec<f64> = power.iter().skip(1).copied().collect();
225
226    if power_no_dc.is_empty() {
227        return (f64::NAN, 0.0);
228    }
229
230    // Calculate noise floor as median
231    let mut sorted_power = power_no_dc.clone();
232    crate::helpers::sort_nan_safe(&mut sorted_power);
233    let noise_floor = sorted_power[sorted_power.len() / 2].max(1e-15);
234
235    // Find global maximum
236    let mut max_idx = 0;
237    let mut max_val = 0.0;
238    for (i, &p) in power_no_dc.iter().enumerate() {
239        if p > max_val {
240            max_val = p;
241            max_idx = i;
242        }
243    }
244
245    let power_ratio = max_val / noise_floor;
246    let freq = frequencies[max_idx + 1];
247
248    if freq > 1e-15 {
249        (1.0 / freq, power_ratio)
250    } else {
251        (f64::NAN, 0.0)
252    }
253}
254
255/// ACF peak component with confidence: returns (period, acf_value_at_peak)
256fn sazed_acf_peak_with_confidence(data: &[f64], dt: f64, max_lag: usize) -> (f64, f64) {
257    let acf = autocorrelation(data, max_lag);
258
259    match find_first_acf_peak(&acf) {
260        Some((peak_lag, acf_value)) => (peak_lag as f64 * dt, acf_value),
261        None => (f64::NAN, 0.0),
262    }
263}
264
265/// ACF average component: weighted mean of ACF peak locations
266fn sazed_acf_average(data: &[f64], dt: f64, max_lag: usize) -> f64 {
267    let acf = autocorrelation(data, max_lag);
268
269    if acf.len() < 4 {
270        return f64::NAN;
271    }
272
273    // Find all peaks in ACF
274    let peaks = find_peaks_1d(&acf[1..], 1);
275
276    if peaks.is_empty() {
277        return f64::NAN;
278    }
279
280    // Weight peaks by their ACF value
281    let mut weighted_sum = 0.0;
282    let mut weight_sum = 0.0;
283
284    for (i, &peak_idx) in peaks.iter().enumerate() {
285        let lag = peak_idx + 1;
286        let weight = acf[lag].max(0.0);
287
288        if i == 0 {
289            // First peak is the fundamental period
290            weighted_sum += lag as f64 * weight;
291            weight_sum += weight;
292        } else {
293            // Later peaks: estimate fundamental by dividing by harmonic number
294            let expected_fundamental = peaks[0] + 1;
295            let harmonic = ((lag as f64 / expected_fundamental as f64) + 0.5) as usize;
296            if harmonic > 0 {
297                let fundamental_est = lag as f64 / harmonic as f64;
298                weighted_sum += fundamental_est * weight;
299                weight_sum += weight;
300            }
301        }
302    }
303
304    if weight_sum > 1e-15 {
305        weighted_sum / weight_sum * dt
306    } else {
307        f64::NAN
308    }
309}
310
311/// Zero-crossing component with confidence: returns (period, consistency)
312/// Consistency is how regular the zero crossings are (std/mean of half-periods)
313fn sazed_zero_crossing_with_confidence(data: &[f64], dt: f64, max_lag: usize) -> (f64, f64) {
314    let acf = autocorrelation(data, max_lag);
315
316    if acf.len() < 4 {
317        return (f64::NAN, 0.0);
318    }
319
320    // Find zero crossings (sign changes)
321    let mut crossings = Vec::new();
322    for i in 1..acf.len() {
323        if acf[i - 1] * acf[i] < 0.0 {
324            // Linear interpolation for more precise crossing
325            let frac = acf[i - 1].abs() / (acf[i - 1].abs() + acf[i].abs());
326            crossings.push((i - 1) as f64 + frac);
327        }
328    }
329
330    if crossings.len() < 2 {
331        return (f64::NAN, 0.0);
332    }
333
334    // Period is twice the distance between consecutive zero crossings
335    // (ACF goes through two zero crossings per period)
336    let mut half_periods = Vec::new();
337    for i in 1..crossings.len() {
338        half_periods.push(crossings[i] - crossings[i - 1]);
339    }
340
341    if half_periods.is_empty() {
342        return (f64::NAN, 0.0);
343    }
344
345    // Calculate consistency: 1 - (std/mean) of half-periods
346    // High consistency means regular zero crossings
347    let mean: f64 = half_periods.iter().sum::<f64>() / half_periods.len() as f64;
348    let variance: f64 =
349        half_periods.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / half_periods.len() as f64;
350    let std = variance.sqrt();
351    let consistency = (1.0 - std / mean.max(1e-15)).clamp(0.0, 1.0);
352
353    // Median half-period
354    crate::helpers::sort_nan_safe(&mut half_periods);
355    let median_half = half_periods[half_periods.len() / 2];
356
357    (2.0 * median_half * dt, consistency)
358}
359
360/// Spectral differencing with confidence: returns (period, power_ratio)
361fn sazed_spectral_diff_with_confidence(data: &[f64], argvals: &[f64]) -> (f64, f64) {
362    if data.len() < 4 {
363        return (f64::NAN, 0.0);
364    }
365
366    // First difference to remove trend
367    let diff: Vec<f64> = data.windows(2).map(|w| w[1] - w[0]).collect();
368    let diff_argvals: Vec<f64> = argvals.windows(2).map(|w| (w[0] + w[1]) / 2.0).collect();
369
370    sazed_spectral_with_confidence(&diff, &diff_argvals)
371}