Skip to main content

fdars_core/seasonal/
matrix_profile.rs

1use crate::matrix::FdMatrix;
2
3use super::compute_mean_curve;
4
5/// Result of Matrix Profile computation.
6#[derive(Debug, Clone, PartialEq)]
7#[non_exhaustive]
8pub struct MatrixProfileResult {
9    /// The matrix profile (minimum z-normalized distance for each position)
10    pub profile: Vec<f64>,
11    /// Index of the nearest neighbor for each position
12    pub profile_index: Vec<usize>,
13    /// Subsequence length used
14    pub subsequence_length: usize,
15    /// Detected periods from arc analysis
16    pub detected_periods: Vec<f64>,
17    /// Arc counts at each index distance (for period detection)
18    pub arc_counts: Vec<usize>,
19    /// Most prominent detected period
20    pub primary_period: f64,
21    /// Confidence score for primary period (based on arc prominence)
22    pub confidence: f64,
23}
24
25/// Compute Matrix Profile using STOMP algorithm (Scalable Time series Ordered-search Matrix Profile).
26///
27/// The Matrix Profile is a data structure that stores the z-normalized Euclidean distance
28/// between each subsequence of a time series and its nearest neighbor. It enables efficient
29/// motif discovery and anomaly detection.
30///
31/// # Algorithm (STOMP - Zhu et al. 2016)
32/// 1. Pre-compute sliding mean and standard deviation using cumulative sums
33/// 2. Use FFT to compute first row of distance matrix
34/// 3. Update subsequent rows incrementally using the dot product update rule
35/// 4. Track minimum distance and index at each position
36///
37/// # Arguments
38/// * `values` - Time series values
39/// * `subsequence_length` - Length of subsequences to compare (window size)
40/// * `exclusion_zone` - Fraction of subsequence length to exclude around each position
41///   to prevent trivial self-matches. Default: 0.5
42///
43/// # Returns
44/// `MatrixProfileResult` with profile, indices, and detected periods.
45///
46/// # Example
47/// ```rust
48/// use fdars_core::seasonal::matrix_profile;
49/// use std::f64::consts::PI;
50///
51/// // Periodic signal
52/// let period = 20.0;
53/// let values: Vec<f64> = (0..100)
54///     .map(|i| (2.0 * PI * i as f64 / period).sin())
55///     .collect();
56///
57/// let result = matrix_profile(&values, Some(15), None);
58/// assert!((result.primary_period - period).abs() < 5.0);
59/// ```
60pub fn matrix_profile(
61    values: &[f64],
62    subsequence_length: Option<usize>,
63    exclusion_zone: Option<f64>,
64) -> MatrixProfileResult {
65    let n = values.len();
66
67    // Default subsequence length: ~ 1/4 of series length, capped at reasonable range
68    let m = subsequence_length.unwrap_or_else(|| {
69        let default_m = n / 4;
70        default_m.max(4).min(n / 2)
71    });
72
73    if m < 3 || m > n / 2 {
74        return MatrixProfileResult {
75            profile: vec![],
76            profile_index: vec![],
77            subsequence_length: m,
78            detected_periods: vec![],
79            arc_counts: vec![],
80            primary_period: f64::NAN,
81            confidence: 0.0,
82        };
83    }
84
85    let exclusion_zone = exclusion_zone.unwrap_or(0.5);
86    let exclusion_radius = (m as f64 * exclusion_zone).ceil() as usize;
87
88    // Number of subsequences
89    let profile_len = n - m + 1;
90
91    // Compute sliding statistics
92    let (means, stds) = compute_sliding_stats(values, m);
93
94    // Compute the matrix profile using STOMP
95    let (profile, profile_index) = stomp_core(values, m, &means, &stds, exclusion_radius);
96
97    // Perform arc analysis to detect periods
98    let (arc_counts, detected_periods, primary_period, confidence) =
99        analyze_arcs(&profile_index, profile_len, m);
100
101    MatrixProfileResult {
102        profile,
103        profile_index,
104        subsequence_length: m,
105        detected_periods,
106        arc_counts,
107        primary_period,
108        confidence,
109    }
110}
111
112/// Compute sliding mean and standard deviation using cumulative sums.
113///
114/// This is O(n) and avoids numerical issues with naive implementations.
115fn compute_sliding_stats(values: &[f64], m: usize) -> (Vec<f64>, Vec<f64>) {
116    let n = values.len();
117    let profile_len = n - m + 1;
118
119    // Compute cumulative sums
120    let mut cumsum = vec![0.0; n + 1];
121    let mut cumsum_sq = vec![0.0; n + 1];
122
123    for i in 0..n {
124        cumsum[i + 1] = cumsum[i] + values[i];
125        cumsum_sq[i + 1] = cumsum_sq[i] + values[i] * values[i];
126    }
127
128    // Compute means and stds
129    let mut means = Vec::with_capacity(profile_len);
130    let mut stds = Vec::with_capacity(profile_len);
131
132    let m_f64 = m as f64;
133
134    for i in 0..profile_len {
135        let sum = cumsum[i + m] - cumsum[i];
136        let sum_sq = cumsum_sq[i + m] - cumsum_sq[i];
137
138        let mean = sum / m_f64;
139        let variance = (sum_sq / m_f64) - mean * mean;
140        let std = variance.max(0.0).sqrt();
141
142        means.push(mean);
143        stds.push(std.max(1e-10)); // Prevent division by zero
144    }
145
146    (means, stds)
147}
148
149/// Core STOMP algorithm implementation.
150///
151/// Uses FFT for the first row and incremental updates for subsequent rows.
152fn stomp_core(
153    values: &[f64],
154    m: usize,
155    means: &[f64],
156    stds: &[f64],
157    exclusion_radius: usize,
158) -> (Vec<f64>, Vec<usize>) {
159    let n = values.len();
160    let profile_len = n - m + 1;
161
162    // Initialize profile with infinity and index with 0
163    let mut profile = vec![f64::INFINITY; profile_len];
164    let mut profile_index = vec![0usize; profile_len];
165
166    // Compute first row using direct computation (could use FFT for large n)
167    // QT[0,j] = sum(T[0:m] * T[j:j+m]) for each j
168    let mut qt = vec![0.0; profile_len];
169
170    // First query subsequence
171    for j in 0..profile_len {
172        let mut dot = 0.0;
173        for k in 0..m {
174            dot += values[k] * values[j + k];
175        }
176        qt[j] = dot;
177    }
178
179    // Process first row
180    update_profile_row(
181        0,
182        &qt,
183        means,
184        stds,
185        m,
186        exclusion_radius,
187        &mut profile,
188        &mut profile_index,
189    );
190
191    // Process subsequent rows using incremental updates
192    for i in 1..profile_len {
193        // Update QT using the sliding dot product update
194        // QT[i,j] = QT[i-1,j-1] - T[i-1]*T[j-1] + T[i+m-1]*T[j+m-1]
195        let mut qt_new = vec![0.0; profile_len];
196
197        // First element needs direct computation
198        let mut dot = 0.0;
199        for k in 0..m {
200            dot += values[i + k] * values[k];
201        }
202        qt_new[0] = dot;
203
204        // Update rest using incremental formula
205        for j in 1..profile_len {
206            qt_new[j] =
207                qt[j - 1] - values[i - 1] * values[j - 1] + values[i + m - 1] * values[j + m - 1];
208        }
209
210        qt = qt_new;
211
212        // Update profile with this row
213        update_profile_row(
214            i,
215            &qt,
216            means,
217            stds,
218            m,
219            exclusion_radius,
220            &mut profile,
221            &mut profile_index,
222        );
223    }
224
225    (profile, profile_index)
226}
227
228/// Update profile with distances from row i.
229fn update_profile_row(
230    i: usize,
231    qt: &[f64],
232    means: &[f64],
233    stds: &[f64],
234    m: usize,
235    exclusion_radius: usize,
236    profile: &mut [f64],
237    profile_index: &mut [usize],
238) {
239    let profile_len = profile.len();
240    let m_f64 = m as f64;
241
242    for j in 0..profile_len {
243        // Skip exclusion zone
244        if i.abs_diff(j) <= exclusion_radius {
245            continue;
246        }
247
248        // Compute z-normalized distance
249        // d = sqrt(2*m * (1 - (QT - m*mu_i*mu_j) / (m * sigma_i * sigma_j)))
250        let numerator = qt[j] - m_f64 * means[i] * means[j];
251        let denominator = m_f64 * stds[i] * stds[j];
252
253        let pearson = if denominator > 0.0 {
254            (numerator / denominator).clamp(-1.0, 1.0)
255        } else {
256            0.0
257        };
258
259        let dist_sq = 2.0 * m_f64 * (1.0 - pearson);
260        let dist = dist_sq.max(0.0).sqrt();
261
262        // Update profile for position i
263        if dist < profile[i] {
264            profile[i] = dist;
265            profile_index[i] = j;
266        }
267
268        // Update profile for position j (symmetric)
269        if dist < profile[j] {
270            profile[j] = dist;
271            profile_index[j] = i;
272        }
273    }
274}
275
276/// Analyze profile index to detect periods using arc counting.
277///
278/// Arcs connect each position to its nearest neighbor. The distance between
279/// connected positions reveals repeating patterns (periods).
280fn analyze_arcs(
281    profile_index: &[usize],
282    profile_len: usize,
283    m: usize,
284) -> (Vec<usize>, Vec<f64>, f64, f64) {
285    // Count arcs at each index distance
286    let max_distance = profile_len;
287    let mut arc_counts = vec![0usize; max_distance];
288
289    for (i, &j) in profile_index.iter().enumerate() {
290        let distance = i.abs_diff(j);
291        if distance < max_distance {
292            arc_counts[distance] += 1;
293        }
294    }
295
296    // Find peaks in arc counts (candidate periods)
297    let min_period = m / 2; // Minimum meaningful period
298    let mut peaks: Vec<(usize, usize)> = Vec::new();
299
300    // Simple peak detection with minimum spacing
301    for i in min_period..arc_counts.len().saturating_sub(1) {
302        if arc_counts[i] > arc_counts[i.saturating_sub(1)]
303            && arc_counts[i] > arc_counts[(i + 1).min(arc_counts.len() - 1)]
304            && arc_counts[i] >= 3
305        // Minimum count threshold
306        {
307            peaks.push((i, arc_counts[i]));
308        }
309    }
310
311    // Sort by count descending
312    peaks.sort_by(|a, b| b.1.cmp(&a.1));
313
314    // Extract top periods
315    let detected_periods: Vec<f64> = peaks.iter().take(5).map(|(p, _)| *p as f64).collect();
316
317    // Primary period and confidence
318    let (primary_period, confidence) = if let Some(&(period, count)) = peaks.first() {
319        // Confidence based on relative peak prominence
320        let total_arcs: usize = arc_counts[min_period..].iter().sum();
321        let conf = if total_arcs > 0 {
322            count as f64 / total_arcs as f64
323        } else {
324            0.0
325        };
326        (period as f64, conf.min(1.0))
327    } else {
328        (0.0, 0.0)
329    };
330
331    (arc_counts, detected_periods, primary_period, confidence)
332}
333
334/// Compute Matrix Profile for functional data (multiple curves).
335///
336/// Computes the matrix profile for each curve and returns aggregated results.
337///
338/// # Arguments
339/// * `data` - Column-major matrix (n x m) of functional data
340/// * `subsequence_length` - Length of subsequences. If None, automatically determined.
341/// * `exclusion_zone` - Exclusion zone fraction. Default: 0.5
342///
343/// # Returns
344/// `MatrixProfileResult` computed from the mean curve.
345pub fn matrix_profile_fdata(
346    data: &FdMatrix,
347    subsequence_length: Option<usize>,
348    exclusion_zone: Option<f64>,
349) -> MatrixProfileResult {
350    // Compute mean curve
351    let mean_curve = compute_mean_curve(data);
352
353    // Run matrix profile on mean curve
354    matrix_profile(&mean_curve, subsequence_length, exclusion_zone)
355}
356
357/// Detect seasonality using Matrix Profile analysis.
358///
359/// Returns true if significant periodicity is detected based on matrix profile analysis.
360///
361/// # Arguments
362/// * `values` - Time series values
363/// * `subsequence_length` - Length of subsequences to compare
364/// * `confidence_threshold` - Minimum confidence for positive detection. Default: 0.1
365///
366/// # Returns
367/// Tuple of (is_seasonal, detected_period, confidence)
368pub fn matrix_profile_seasonality(
369    values: &[f64],
370    subsequence_length: Option<usize>,
371    confidence_threshold: Option<f64>,
372) -> (bool, f64, f64) {
373    let result = matrix_profile(values, subsequence_length, None);
374
375    let threshold = confidence_threshold.unwrap_or(0.1);
376    let is_seasonal = result.confidence >= threshold && result.primary_period > 0.0;
377
378    (is_seasonal, result.primary_period, result.confidence)
379}