hrv_algos/analysis/
dfa.rs

1//! This module provides functionality for Detrended Fluctuation Analysis (DFA) and Unbiased Detrended Fluctuation Analysis (UDFA).
2//!
3//! DFA is a method for determining the statistical self-affinity of a signal. It is useful for analyzing time series data to detect long-range correlations.
4//!
5//! The module includes:
6//!
7//! - `DetrendAlgorithm` trait: Defines the interface for detrending algorithms.
8//! - `DetrendStrategy` enum: Provides different strategies for detrending, including linear detrending and custom detrending algorithms.
9//! - `LinearDetrend` struct: Implements linear detrending.
10//! - `DFAnalysis` struct: Represents the result of DFA or UDFA, including the scaling exponent, intercept, R-squared value, and log-transformed window sizes and fluctuation amplitudes.
11//! - `linear_fit` function: Performs linear regression on log-log data to compute the scaling exponent and other statistical properties.
12//!
13//! # Example
14//!
15//! ```rust
16//! use hrv_algos::analysis::dfa::{DFAnalysis, DetrendStrategy, LinearDetrend};
17//! use rand;
18//! // your data series
19//! let data = (0..128).map(|_| rand::random::<f64>()).collect::<Vec<f64>>();
20//! let windows = vec![4, 8, 16, 32];
21//! let detrender = DetrendStrategy::Linear;
22//!
23//! let analysis = DFAnalysis::dfa(&data, &windows, detrender).unwrap();
24//! println!("Alpha: {}", analysis.alpha);
25//! println!("Intercept: {}", analysis.intercept);
26//! println!("R-squared: {}", analysis.r_squared);
27//! ```
28//!
29//! # References
30//!
31//! - Yuan, Q., Gu, C., Weng, T., Yang, H. (2018). *Unbiased detrended fluctuation analysis: Long-range correlations in very short time series*. Physica A, 505, 179-189.
32//! - Bianchi, S. (2020). fathon: A Python package for a fast computation of detrended fluctuation analysis and related algorithms. Journal of Open Source Software, 5(45), 1828. https://doi.org/10.21105/joss.01828
33//! - fathon on github: https://github.com/stfbnc/fathon
34
35use anyhow::anyhow;
36use anyhow::Result;
37use core::f64;
38use nalgebra::DMatrix;
39use nalgebra::DVector;
40use nalgebra::DVectorView;
41use rayon::iter::IntoParallelRefIterator;
42use rayon::iter::ParallelIterator;
43use rayon::slice::ParallelSlice;
44
45/// A trait representing a detrending algorithm for time series data.
46///
47/// Implementors of this trait should provide a method to remove trends from
48/// the given data.
49///
50/// # Example
51///
52/// ```
53/// use hrv_algos::analysis::dfa::DetrendAlgorithm;
54/// use anyhow::Result;
55///
56/// struct MyDetrendAlgorithm;
57///
58/// impl DetrendAlgorithm for MyDetrendAlgorithm {
59///     fn detrend(&self, data: &[f64]) -> Result<Vec<f64>> {
60///         // Implementation goes here
61///         Ok(data.to_vec())
62///     }
63/// }
64/// ```
65#[cfg_attr(test, mockall::automock)]
66pub trait DetrendAlgorithm {
67    /// Removes trends from the provided data and returns the detrended data.
68    ///
69    /// # Parameters
70    ///
71    /// - `data`: A slice of f64 values representing the time series data to be detrended.
72    ///
73    /// # Returns
74    ///
75    /// A `Result` containing a vector of f64 values representing the detrended data on success,
76    /// or an error on failure.
77    fn detrend(&self, data: &[f64]) -> Result<Vec<f64>>;
78}
79
80/// Available detrend strategies for the DFA algorithm.
81/// user provided algorithms can be passed via the `Custom` variant.
82pub enum DetrendStrategy {
83    /// Linear detrending algorithm using least squares regression.
84    Linear,
85    /// A custom detrending algorithm that implements the `DetrendAlgorithm` trait.
86    /// The algorithm is wrapped in a `Box` to allow for dynamic dispatch and must
87    /// also implement the `Sync` and `Send` traits to ensure thread safety.
88    Custom(Box<dyn DetrendAlgorithm + Sync + Send>),
89}
90
91// implement the trait for the enum to dispatch calculation
92impl DetrendAlgorithm for DetrendStrategy {
93    fn detrend(&self, data: &[f64]) -> Result<Vec<f64>> {
94        match self {
95            // dispatch to the linear detrend algorithm
96            DetrendStrategy::Linear => LinearDetrend.detrend(data),
97            // dispatch to the custom detrend algorithm
98            DetrendStrategy::Custom(detrender) => detrender.detrend(data),
99        }
100    }
101}
102
103/// A linear detrending algorithm using least squares regression.
104pub struct LinearDetrend;
105
106impl DetrendAlgorithm for LinearDetrend {
107    fn detrend(&self, data: &[f64]) -> Result<Vec<f64>> {
108        if data.len() < 2 {
109            return Err(anyhow!(
110                "Data must contain at least two elements for detrending."
111            ));
112        }
113        let x: Vec<f64> = (0..data.len()).map(|i| i as f64).collect();
114        let ((a, b), _) = linear_fit(&x, data)?;
115        let detrended: Vec<f64> = data
116            .iter()
117            .zip(x.iter())
118            .map(|(&y, &i)| y - (a * i + b))
119            .collect();
120
121        Ok(detrended)
122    }
123}
124
125/// A struct representing Detrended Fluctuation Analysis (DFA) results.
126///
127/// DFA is a method used to find long-term statistical dependencies in time series data.
128///
129/// # Fields
130///
131/// * `alpha` - The scaling exponent, which indicates the presence of long-range correlations.
132/// * `intercept` - The intercept of the linear fit in the log-log plot.
133/// * `r_squared` - The coefficient of determination, which indicates the goodness of fit.
134/// * `log_n` - A vector containing the logarithm of the box sizes used in the analysis.
135/// * `log_f` - A vector containing the logarithm of the fluctuation function values corresponding to the box sizes.
136#[derive(Debug, Clone)]
137pub struct DFAnalysis {
138    pub alpha: f64,
139    pub intercept: f64,
140    pub r_squared: f64,
141    pub log_n: Vec<f64>,
142    pub log_f: Vec<f64>,
143}
144
145impl DFAnalysis {
146    /// Performs Detrended Fluctuation Analysis (DFA) on the provided data.
147    ///
148    /// This method computes the scaling exponent (Hurst exponent) and other statistical
149    /// properties of a time series using the DFA algorithm. DFA is used to detect long-range
150    /// correlations in time series data.
151    ///
152    /// # Arguments
153    ///
154    /// - `data`: A slice of `f64` representing the input time series data.
155    /// - `windows`: A slice of `usize` specifying the window sizes to use for the fluctuation analysis.
156    /// - `detrender`: A `DetrendStrategy` implementation used to remove trends from each segment.
157    ///
158    /// # Returns
159    ///
160    /// If successful, this method returns a `DFAnalysis` instance containing:
161    /// - `alpha`: The estimated scaling exponent.
162    /// - `intercept`: The intercept of the log-log regression.
163    /// - `r_squared`: The coefficient of determination for the log-log fit.
164    /// - `log_n`: A vector of log-transformed window sizes.
165    /// - `log_f`: A vector of log-transformed fluctuation amplitudes.
166    ///
167    /// # Errors
168    ///
169    /// This method returns an error if:
170    /// - `windows` is empty.
171    /// - The length of the input data is less than four times the largest window size.
172    /// - The smallest window size is less than 4.
173    /// - The `detrender` fails to detrend a segment.
174    ///
175    /// # Algorithm Overview
176    ///
177    /// 1. **Integration**:
178    ///    - The input data is transformed into a cumulative deviation profile (mean-centered).
179    ///
180    /// 2. **Segment Detrending**:
181    ///    - The profile is divided into overlapping segments, and each segment is detrended using
182    ///      the provided `DetrendStrategy`.
183    ///
184    /// 3. **Variance Calculation**:
185    ///    - Variances of the detrended segments are computed.
186    ///
187    /// 4. **Log-Log Regression**:
188    ///    - The log-transformed window sizes (`log_n`) and fluctuations (`log_f`) are
189    ///      used to perform a linear regression, yielding the scaling exponent `alpha`.
190    pub fn dfa(data: &[f64], windows: &[usize], detrender: DetrendStrategy) -> Result<Self> {
191        if windows.is_empty() {
192            return Err(anyhow!("Windows must not be empty"));
193        }
194        let windows = {
195            let mut _w = windows.to_owned();
196            _w.sort();
197            _w
198        };
199        if data.len() < 4 * *windows.last().unwrap() {
200            return Err(anyhow!(
201                "Data length must be at least 4x the size of the largest window"
202            ));
203        }
204        if *windows.first().unwrap() < 4 {
205            return Err(anyhow!("Minimum window size must be at least 4"));
206        }
207        let data = DVectorView::from(data);
208        let mean = data.mean();
209        let integrated: Vec<f64> = data
210            .iter()
211            .scan(0.0, |state, &x| {
212                *state += x - mean;
213                Some(*state)
214            })
215            .collect();
216
217        // Calculate fluctuations for each window size
218        let mut log_n: Vec<f64> = Vec::with_capacity(windows.len());
219        let mut log_f: Vec<f64> = Vec::with_capacity(windows.len());
220
221        for window in windows {
222            let (fluctuation, n_segments) = integrated
223                .par_chunks(window)
224                .filter_map(|slice| -> Option<Result<f64>> {
225                    if slice.len() != window {
226                        None
227                    } else {
228                        match detrender.detrend(slice) {
229                            Ok(data) => {
230                                let detrended = DVector::from(data);
231                                let var = detrended.variance();
232                                Some(Ok(var))
233                            }
234                            Err(e) => Some(Err(e)),
235                        }
236                    }
237                })
238                .collect::<Result<Vec<f64>, _>>()?
239                .iter()
240                .fold((0f64, 0usize), |(fluctuation, n_segments), f| {
241                    (fluctuation + f, n_segments + 1)
242                });
243            if n_segments > 0 {
244                let f_n = (fluctuation / n_segments as f64).sqrt();
245                log_n.push((window as f64).ln());
246                log_f.push(f_n.ln());
247            }
248        }
249
250        // Linear regression on log-log data
251        let ((alpha, intercept), r_squared) = linear_fit(&log_n, &log_f)?;
252
253        Ok(DFAnalysis {
254            alpha,
255            intercept,
256            r_squared,
257            log_n,
258            log_f,
259        })
260    }
261
262    /// Performs an unbiased Detrended Fluctuation Analysis (DFA) on the provided data.
263    /// this algorithm uses overlapping windows.
264    ///
265    /// This method computes the scaling exponent (Hurst exponent) and other statistical
266    /// properties of a time series using the DFA algorithm. DFA is used to detect long-range
267    /// correlations in time series data.
268    ///
269    /// # Arguments
270    ///
271    /// - `data`: A slice of `f64` representing the input time series data.
272    /// - `windows`: A slice of `usize` specifying the window sizes to use for the fluctuation analysis.
273    /// - `detrender`: A `DetrendStrategy` implementation used to remove trends from each segment.
274    ///
275    /// # Returns
276    ///
277    /// If successful, this method returns a `DFAnalysis` instance containing:
278    /// - `alpha`: The estimated scaling exponent.
279    /// - `intercept`: The intercept of the log-log regression.
280    /// - `r_squared`: The coefficient of determination for the log-log fit.
281    /// - `log_n`: A vector of log-transformed window sizes.
282    /// - `log_f`: A vector of log-transformed fluctuation amplitudes.
283    ///
284    /// # Errors
285    ///
286    /// This method returns an error if:
287    /// - `windows` is empty.
288    /// - The length of the input data is less than four times the largest window size.
289    /// - The smallest window size is less than 4.
290    /// - The `detrender` fails to detrend a segment.
291    ///
292    /// # Algorithm Overview
293    ///
294    /// 1. **Integration**:
295    ///    - The input data is transformed into a cumulative deviation profile (mean-centered).
296    ///
297    /// 2. **Segment Detrending**:
298    ///    - The profile is divided into overlapping segments, and each segment is detrended using
299    ///      the provided `DetrendStrategy`.
300    ///
301    /// 3. **Variance Calculation**:
302    ///    - Variances of the detrended segments are computed.
303    ///
304    /// 4. **Log-Log Regression**:
305    ///    - The log-transformed window sizes (`log_n`) and fluctuations (`log_f`) are
306    ///      used to perform a linear regression, yielding the scaling exponent `alpha`.
307    pub fn udfa(data: &[f64], windows: &[usize], detrender: DetrendStrategy) -> Result<Self> {
308        if windows.is_empty() {
309            return Err(anyhow!("Windows must not be empty"));
310        }
311        let windows = {
312            let mut _w = windows.to_owned();
313            _w.sort();
314            _w
315        };
316        if data.len() < 4 * *windows.last().unwrap() {
317            return Err(anyhow!(
318                "Data length must be at least 4x the size of the largest window"
319            ));
320        }
321        if *windows.first().unwrap() < 4 {
322            return Err(anyhow!("Minimum window size must be at least 4"));
323        }
324
325        let data = DVectorView::from(data);
326        let mean = data.mean();
327        let integrated: Vec<f64> = data
328            .iter()
329            .scan(0.0, |state, &x| {
330                *state += x - mean;
331                Some(*state)
332            })
333            .collect();
334
335        let results: Vec<_> = windows
336            .par_iter()
337            .map(|&window| -> Result<_> {
338                let n_segments = integrated.len() - window + 1;
339                if n_segments < 1 {
340                    return Ok(None);
341                }
342
343                let f: f64 = integrated
344                    .as_slice()
345                    .windows(window)
346                    .map(|slice| -> Result<f64> {
347                        let detrended = DVector::from(detrender.detrend(slice)?);
348
349                        let df_sum: f64 = detrended.sum();
350                        let df_2_sum: f64 = detrended.iter().map(|&x| x * x).sum();
351                        let df_odd_sum: f64 = detrended.iter().step_by(2).sum();
352                        let df_even_sum: f64 = detrended.iter().skip(1).step_by(2).sum();
353                        let df_shift_sum: f64 =
354                            detrended.as_slice().windows(2).map(|w| w[0] * w[1]).sum();
355
356                        let df_neg_mean = (df_odd_sum - df_even_sum) / window as f64;
357                        let df_neg_var = df_2_sum / window as f64 - df_neg_mean.powi(2);
358                        let df_pos_mean = df_sum / window as f64;
359                        let df_pos_var = df_2_sum / window as f64 - df_pos_mean.powi(2);
360
361                        let df_pos_shift = (df_shift_sum
362                            + df_pos_mean
363                                * (detrended[0] + detrended[window - 1]
364                                    - df_pos_mean * (window as f64 + 1.0)))
365                            / df_pos_var;
366
367                        let df_neg_shift = (-df_shift_sum
368                            + df_neg_mean
369                                * (detrended[0]
370                                    + detrended[window - 1] * (-1.0_f64).powi(window as i32 + 1)
371                                    - df_neg_mean * (window as f64 + 1.0)))
372                            / df_neg_var;
373
374                        let rho_a = (window as f64 + df_pos_shift) / (2.0 * window as f64 - 1.0);
375                        let rho_b = (window as f64 + df_neg_shift) / (2.0 * window as f64 - 1.0);
376
377                        let rho_a_star = rho_a + (1.0 + 3.0 * rho_a) / (2.0 * window as f64);
378                        let rho_b_star = rho_b + (1.0 + 3.0 * rho_b) / (2.0 * window as f64);
379
380                        Ok((rho_a_star + rho_b_star)
381                            * (1.0 - 1.0 / (2.0 * window as f64))
382                            * df_pos_var)
383                    })
384                    .collect::<Result<Vec<f64>>>()?
385                    .iter()
386                    .sum();
387
388                let f_n =
389                    (f * ((window - 1) as f64 / window as f64).sqrt() / n_segments as f64).sqrt();
390                Ok(Some(((window as f64).ln(), f_n.ln())))
391            })
392            .collect::<Result<Vec<Option<_>>>>()?
393            .into_iter()
394            .flatten()
395            .collect();
396
397        let (log_n, log_f): (Vec<_>, Vec<_>) =
398            results.into_iter().filter(|(_, f)| f.is_finite()).unzip();
399
400        let ((alpha, intercept), r_squared) = linear_fit(&log_n, &log_f)?;
401
402        Ok(DFAnalysis {
403            alpha,
404            intercept,
405            r_squared,
406            log_n,
407            log_f,
408        })
409    }
410}
411
412/// Performs linear regression on the provided data.
413///
414/// This function takes two slices of `f64` values representing the x and y coordinates
415/// of data points and performs a linear regression to find the best-fit line. It returns
416/// the slope and intercept of the line, as well as the coefficient of determination (R-squared value).
417///
418/// # Arguments
419///
420/// - `x`: A slice of `f64` values representing the x coordinates of the data points.
421/// - `y`: A slice of `f64` values representing the y coordinates of the data points.
422///
423/// # Returns
424///
425/// A `Result` containing a tuple with:
426/// - A tuple of two `f64` values representing the slope and intercept of the best-fit line.
427/// - An `f64` value representing the coefficient of determination (R-squared value).
428///
429/// # Errors
430///
431/// This function returns an error if:
432/// - The length of `x` is less than 2.
433/// - The lengths of `x` and `y` do not match.
434///
435/// # Example
436///
437/// ```ignore
438/// let x = [1.0, 2.0, 3.0, 4.0, 5.0];
439/// let y = [2.0, 4.0, 6.0, 8.0, 10.0];
440/// let ((slope, intercept), r_sqr) = linear_fit(&x, &y).unwrap();
441/// assert!((slope - 2.0).abs() < 1e-6, "Slope should be approximately 2.0");
442/// assert!((intercept - 0.0).abs() < 1e-6, "Intercept should be approximately 0.0");
443/// assert!(r_sqr > 0.999, "R-squared should be close to 1.0 for perfect fit.");
444/// ```
445fn linear_fit(x: &[f64], y: &[f64]) -> Result<((f64, f64), f64)> {
446    if x.len() < 2 {
447        return Err(anyhow!(
448            "Data must contain at least two elements for linear fit."
449        ));
450    }
451    if x.len() != y.len() {
452        return Err(anyhow!("X and Y data must have the same length."));
453    }
454    let prob_matrix = DMatrix::from_columns(&[
455        DVector::from_column_slice(x),
456        DVector::from_element(x.len(), 1.0),
457    ]);
458    let y = DVectorView::from(y);
459    let result = lstsq::lstsq(&prob_matrix, &y.into(), f64::EPSILON).map_err(|e| anyhow!(e))?;
460
461    let y_mean = y.mean();
462    let tss: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
463    let r_squared = 1.0 - (result.residuals / tss);
464
465    Ok(((result.solution[0], result.solution[1]), r_squared))
466}
467
468#[cfg(test)]
469mod tests {
470    use rand::{Rng, SeedableRng};
471
472    use super::*;
473
474    fn get_test_data(size: usize) -> Vec<f64> {
475        let mut rng = rand::rngs::StdRng::seed_from_u64(42);
476        (0..size)
477            .map(|_| 1000.0 + rng.gen_range(-10.0..10.0))
478            .collect()
479    }
480
481    #[test]
482    fn invalid_window() {
483        let data = get_test_data(4);
484        let windows = vec![];
485        let detrender = DetrendStrategy::Linear;
486        let result = DFAnalysis::dfa(&data, &windows, detrender);
487        assert!(result.is_err(), "DFA should fail with empty windows.");
488    }
489
490    #[test]
491    fn invalid_data_length() {
492        let data = get_test_data(3);
493        let windows = vec![4];
494        let detrender = DetrendStrategy::Linear;
495        let result = DFAnalysis::dfa(&data, &windows, detrender);
496        assert!(
497            result.is_err(),
498            "DFA should fail with less than 4x window size data."
499        );
500    }
501
502    #[test]
503    fn detrend_invalid_data() {
504        let data = get_test_data(1);
505        let result = LinearDetrend.detrend(&data);
506        assert!(
507            result.is_err(),
508            "Detrend should fail with less than 2 elements."
509        );
510    }
511
512    #[test]
513    fn custom_detrend() {
514        let mut detrender = MockDetrendAlgorithm::new();
515        detrender
516            .expect_detrend()
517            .times(1..)
518            .returning(|data| Ok(data.to_vec()));
519        let detrend_strategy = DetrendStrategy::Custom(Box::new(detrender));
520        let data = get_test_data(128);
521        let windows = vec![4, 5, 6];
522        let result = DFAnalysis::dfa(&data, &windows, detrend_strategy);
523        assert!(result.is_ok(), "DFA should succeed with custom detrender.");
524    }
525
526    #[test]
527    fn test_linear_fit() {
528        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
529        let y = [2.0, 4.0, 6.0, 8.0, 10.0];
530        let ((slope, intercept), r_sqr) = linear_fit(&x, &y).unwrap();
531        assert!(
532            (slope - 2.0).abs() < 1e-6,
533            "Slope should be approximately 2.0"
534        );
535        assert!(
536            (intercept - 0.0).abs() < 1e-6,
537            "Intercept should be approximately 0.0"
538        );
539        assert!(
540            r_sqr > 0.999,
541            "R-squared should be close to 1.0 for perfect fit."
542        );
543    }
544
545    #[test]
546    fn test_linear_fit_with_noise() {
547        let x = [1.0, 2.0, 3.0, 4.0, 5.0];
548        let y = [2.1, 3.9, 6.2, 7.8, 10.1];
549        let ((slope, intercept), _) = linear_fit(&x, &y).unwrap();
550        assert!(
551            (slope - 2.0).abs() < 0.2,
552            "Slope should be approximately 2.0"
553        );
554        assert!(
555            (intercept - 0.0).abs() < 0.2,
556            "Intercept should be approximately 0.0"
557        );
558    }
559
560    #[test]
561    fn test_linear_fit_error() {
562        let x = [1.0];
563        let y = [2.0];
564        let result = linear_fit(&x, &y);
565        assert!(
566            result.is_err(),
567            "Linear fit should fail with less than 2 elements."
568        );
569    }
570
571    #[test]
572    fn test_linear_fit_error_mismatch() {
573        let x = [1.0, 2.0];
574        let y = [2.0, 3.0, 4.0];
575        let result = linear_fit(&x, &y);
576        assert!(
577            result.is_err(),
578            "Linear fit should fail with mismatch between x and y data."
579        );
580    }
581}