traquer/
correlation.rs

1//! Correlation Indicators
2//!
3//! Provides technical indicators that measures how two or more series relate to one another.
4//! These indicators can capture trend or performance and is often used to track relation
5//! between an asset and a benchmark.
6use std::collections::HashMap;
7use std::iter;
8
9use itertools::izip;
10use num_traits::cast::ToPrimitive;
11
12use crate::smooth;
13use crate::statistic::distribution::{cov_stdev, rank, RankMode};
14
15/// Pearson Correlation Coefficient
16///
17/// The ratio between the covariance of two variables and the product of their standard
18/// deviations; thus, it is essentially a normalized measurement of the covariance,
19///
20/// ```math
21/// r_{xy} = \frac{n\sum x_i y_i - \sum x_i\sum y_i}{\sqrt{n\sum x_i^2-\left(\sum x_i\right)^2}~\sqrt{n\sum y_i^2-\left(\sum y_i\right)^2}}
22/// ```
23///
24/// ## Usage
25///
26/// A value between 0 and 1 implies a positive correlation; 0, no correlation; and between 0 and
27/// -1, a negative correlation.
28///
29/// ## Sources
30///
31/// [[1]](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient)
32///
33/// # Examples
34///
35/// ```
36/// use traquer::correlation;
37///
38/// correlation::pcc(
39///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
40///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
41///     6).collect::<Vec<f64>>();
42///
43/// ```
44pub fn pcc<'a, T: ToPrimitive>(
45    series1: &'a [T],
46    series2: &'a [T],
47    window: usize,
48) -> impl Iterator<Item = f64> + 'a {
49    iter::repeat(f64::NAN).take(window - 1).chain(
50        series1
51            .windows(window)
52            .zip(series2.windows(window))
53            .map(move |(x_win, y_win)| {
54                let mut sum_xy = 0.0;
55                let mut sum_x = 0.0;
56                let mut sum_y = 0.0;
57                let mut sum_x2 = 0.0;
58                let mut sum_y2 = 0.0;
59                for (x, y) in x_win.iter().zip(y_win) {
60                    let (x, y) = (x.to_f64().unwrap(), y.to_f64().unwrap());
61                    sum_xy += x * y;
62                    sum_x += x;
63                    sum_y += y;
64                    sum_x2 += x * x;
65                    sum_y2 += y * y;
66                }
67                (window as f64 * sum_xy - sum_x * sum_y)
68                    / ((window as f64 * sum_x2 - sum_x * sum_x).sqrt()
69                        * (window as f64 * sum_y2 - sum_y * sum_y).sqrt())
70            }),
71    )
72}
73
74/// Coefficient of Determination (r-squared)
75///
76/// The square of the correlation coefficient. Examines how differences in one variable
77/// can be explained by the difference in a second variable when predicting the outcome
78/// of a given event.
79///
80/// ## Usage
81///
82/// A value of 1 indicates 100% correlation; 0, no correlation.
83///
84/// ## Sources
85///
86/// [[1]](https://en.wikipedia.org/wiki/Coefficient_of_determination)
87/// [[2]](https://danshiebler.com/2017-06-25-metrics/)
88///
89/// # Examples
90///
91/// ```
92/// use traquer::correlation;
93///
94/// correlation::rsq(
95///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
96///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
97///     6).collect::<Vec<f64>>();
98///
99/// ```
100pub fn rsq<'a, T: ToPrimitive>(
101    series1: &'a [T],
102    series2: &'a [T],
103    window: usize,
104) -> impl Iterator<Item = f64> + 'a {
105    pcc(series1, series2, window).map(|x| x * x)
106}
107
108/// Beta Coefficient
109///
110/// Measure that compares the volatility of returns of an instrument to those of the market
111/// as a whole.
112///
113/// ## Usage
114///
115/// Value of 1 suggests correlation with market. A value above 1 suggests the instrument is more
116/// volatile compared to market and a value below 1 suggests, less volatility.
117///
118/// ## Sources
119///
120/// [[1]](https://seekingalpha.com/article/4493310-beta-for-stocks)
121///
122/// # Examples
123///
124/// ```
125/// use traquer::correlation;
126///
127/// correlation::beta(
128///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
129///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
130///     2).collect::<Vec<f64>>();
131///
132/// ```
133pub fn beta<'a, T: ToPrimitive>(
134    series1: &'a [T],
135    series2: &'a [T],
136    window: usize,
137) -> impl Iterator<Item = f64> + 'a {
138    iter::repeat(f64::NAN).take((window - 1) * 2 + 1).chain(
139        series1
140            .windows(2)
141            .map(|w| w[1].to_f64().unwrap() / w[0].to_f64().unwrap() - 1.0)
142            .collect::<Vec<f64>>()
143            .windows(window)
144            .zip(
145                series2
146                    .windows(2)
147                    .map(|w| w[1].to_f64().unwrap() / w[0].to_f64().unwrap() - 1.0)
148                    .collect::<Vec<f64>>()
149                    .windows(window),
150            )
151            .map(|(x_win, y_win)| {
152                let sum_x = x_win.iter().sum::<f64>();
153                let sum_y = y_win.iter().sum::<f64>();
154                (
155                    (x_win[window - 1] - sum_x / window as f64)
156                        * (y_win[window - 1] - sum_y / window as f64),
157                    (y_win[window - 1] - sum_y / window as f64).powi(2),
158                )
159            })
160            .collect::<Vec<(f64, f64)>>()
161            .windows(window)
162            .map(|w| {
163                let mut sum_covar = 0.0;
164                let mut sum_var = 0.0;
165                for (covar, var) in w {
166                    sum_covar += covar;
167                    sum_var += var;
168                }
169                (sum_covar / window as f64) / (sum_var / window as f64)
170            })
171            .collect::<Vec<f64>>(),
172    )
173}
174
175/// Performance Index
176///
177/// Used to compare a asset's price trend to the general trend of a benchmark index.
178///
179/// ## Usage
180///
181/// Value above 1 would state that base series outperforms benchmark.
182///
183/// ## Sources
184///
185/// [[1]](https://www.marketvolume.com/technicalanalysis/performanceindex.asp)
186///
187/// # Examples
188///
189/// ```
190/// use traquer::correlation;
191///
192/// correlation::perf(
193///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
194///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
195///     2).collect::<Vec<f64>>();
196///
197/// ```
198pub fn perf<'a, T: ToPrimitive>(
199    series1: &'a [T],
200    series2: &'a [T],
201    window: usize,
202) -> impl Iterator<Item = f64> + 'a {
203    izip!(
204        series1,
205        series2,
206        smooth::sma(series1, window),
207        smooth::sma(series2, window)
208    )
209    .map(|(x, y, ma_x, ma_y)| x.to_f64().unwrap() / y.to_f64().unwrap() * (ma_y / ma_x))
210}
211
212/// Relative Strength Comparison (Price Relative)
213///
214/// Compares performance by computing a ratio simply dividing the base security by a benchmark.
215///
216/// ## Usage
217///
218/// An increasing value suggests base security is outperforming the benchmark.
219///
220/// ## Sources
221///
222/// [[1]](https://www.fidelity.com/learning-center/trading-investing/technical-analysis/technical-indicator-guide/relative-strength-comparison)
223/// [[2]](https://chartschool.stockcharts.com/table-of-contents/technical-indicators-and-overlays/technical-indicators/price-relative-relative-strength)
224///
225/// # Examples
226///
227/// ```
228/// use traquer::correlation;
229///
230/// correlation::rsc(
231///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
232///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
233///     ).collect::<Vec<f64>>();
234///
235/// ```
236pub fn rsc<'a, T: ToPrimitive>(
237    series1: &'a [T],
238    series2: &'a [T],
239) -> impl Iterator<Item = f64> + 'a {
240    series1
241        .iter()
242        .zip(series2)
243        .map(|(x, y)| x.to_f64().unwrap() / y.to_f64().unwrap())
244}
245
246/// Spearman's Rank Correlation Coefficient
247///
248/// Assesses how well the relationship between two variables can be described using a monotonic
249/// function. Similar to Pearson correlation except uses rank value.
250///
251/// ## Usage
252///
253/// A value between 0 and 1 implies a positive correlation; 0, no correlation; and between 0 and
254/// -1, a negative correlation.
255///
256/// ## Sources
257///
258/// [[1]](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)
259///
260/// # Examples
261///
262/// ```
263/// use traquer::correlation;
264///
265/// correlation::srcc(
266///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
267///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
268///     6).collect::<Vec<f64>>();
269///
270/// ```
271pub fn srcc<'a, T: ToPrimitive + PartialOrd + Clone>(
272    series1: &'a [T],
273    series2: &'a [T],
274    window: usize,
275) -> impl Iterator<Item = f64> + 'a {
276    iter::repeat(f64::NAN).take(window - 1).chain(
277        series1
278            .windows(window)
279            .zip(series2.windows(window))
280            .map(|(x_win, y_win)| {
281                let (cov_xy, std_x, std_y) = cov_stdev(
282                    &rank(x_win, None).collect::<Vec<f64>>(),
283                    &rank(y_win, None).collect::<Vec<f64>>(),
284                );
285                cov_xy / (std_x * std_y)
286            }),
287    )
288}
289
290/// Kendall's Rank Correlation Coefficient
291///
292/// Meausres the similarity of the orderings of the data when ranked by each of the quantities by
293/// computing the number of concordant and disconcordant pairs. This computes Tau-b matching
294/// logic in scipy.
295///
296/// ## Usage
297///
298/// A value between 0 and 1 implies a positive correlation; 0, no correlation; and between 0 and
299/// -1, a negative correlation.
300///
301/// ## Sources
302///
303/// [[1]](https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient)
304/// [[2]](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html)
305///
306/// # Examples
307///
308/// ```
309/// use traquer::correlation;
310///
311/// correlation::krcc(
312///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
313///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
314///     6).collect::<Vec<f64>>();
315///
316/// ```
317pub fn krcc<'a, T: ToPrimitive>(
318    series1: &'a [T],
319    series2: &'a [T],
320    window: usize,
321) -> impl Iterator<Item = f64> + 'a {
322    iter::repeat(f64::NAN).take(window - 1).chain(
323        series1
324            .windows(window)
325            .zip(series2.windows(window))
326            .map(move |(x_win, y_win)| {
327                let mut nc = 0.0;
328                let mut x_tie = 0.0;
329                let mut y_tie = 0.0;
330                let mut xy_tie = 0.0;
331
332                for i in 0..window - 1 {
333                    for j in i + 1..window {
334                        let (xi, xj, yi, yj) = (
335                            x_win[i].to_f64().unwrap(),
336                            x_win[j].to_f64().unwrap(),
337                            y_win[i].to_f64().unwrap(),
338                            y_win[j].to_f64().unwrap(),
339                        );
340                        nc += ((xi - xj).signum() == (yi - yj).signum() && xi != xj) as u8 as f64;
341                        xy_tie += (xi == xj && yi == yj) as u8 as f64;
342                        x_tie += (xi == xj) as u8 as f64;
343                        y_tie += (yi == yj) as u8 as f64;
344                    }
345                }
346                let tot = (window * (window - 1)) as f64 * 0.5;
347                let nd = tot - nc - x_tie - y_tie + xy_tie;
348                (nc - nd) / ((tot - x_tie) * (tot - y_tie)).sqrt()
349            }),
350    )
351}
352
353/// Hoeffding's D
354///
355/// A test based on the population measure of deviation from independence. More
356/// resource-intensive compared to other correlation functions but may handle non-monotonic
357/// relationships better.
358///
359/// ## Usage
360///
361/// Generates a measure that ranges from -0.5 to 1, where the higher the number is, the
362/// more strongly dependent the two sequences are on each other.
363///
364/// ## Sources
365///
366/// [[1]](https://github.com/Dicklesworthstone/hoeffdings_d_explainer)
367///
368/// # Examples
369///
370/// ```
371/// use traquer::correlation;
372///
373/// correlation::hoeffd(
374///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
375///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
376///     6).collect::<Vec<f64>>();
377///
378/// ```
379pub fn hoeffd<'a, T: ToPrimitive + PartialOrd + Clone>(
380    series1: &'a [T],
381    series2: &'a [T],
382    window: usize,
383) -> impl Iterator<Item = f64> + 'a {
384    iter::repeat(f64::NAN).take(window - 1).chain(
385        series1
386            .windows(window)
387            .zip(series2.windows(window))
388            .map(move |(x_win, y_win)| {
389                let rank_x = rank(x_win, Some(RankMode::Average)).collect::<Vec<f64>>();
390                let rank_y = rank(y_win, Some(RankMode::Average)).collect::<Vec<f64>>();
391                let mut q = vec![1.0; window];
392                for i in 0..window {
393                    for j in 0..window {
394                        q[i] += (rank_x[j] < rank_x[i] && rank_y[j] < rank_y[i]) as u8 as f64;
395                        q[i] +=
396                            0.25 * (rank_x[j] == rank_x[i] && rank_y[j] == rank_y[i]) as u8 as f64;
397                        q[i] += 0.5
398                            * ((rank_x[j] == rank_x[i] && rank_y[j] < rank_y[i])
399                                || (rank_x[j] < rank_x[i] && rank_y[j] == rank_y[i]))
400                                as u8 as f64;
401                    }
402                    q[i] -= 0.25; // accounts for when comparing to itself
403                }
404                let d1 = q.iter().fold(0.0, |acc, x| acc + (x - 1.0) * (x - 2.0));
405                let d2 = rank_x.iter().zip(&rank_y).fold(0.0, |acc, (x, y)| {
406                    acc + (x - 1.0) * (x - 2.0) * (y - 1.0) * (y - 2.0)
407                });
408                let d3 = izip!(q, rank_x, rank_y).fold(0.0, |acc, (q, x, y)| {
409                    acc + (x - 2.0) * (y - 2.0) * (q - 1.0)
410                });
411                30.0 * (((window - 2) * (window - 3)) as f64 * d1 + d2
412                    - 2. * (window - 2) as f64 * d3)
413                    / (window * (window - 1) * (window - 2) * (window - 3) * (window - 4)) as f64
414            }),
415    )
416}
417
418/// Distance Correlation
419///
420/// Measures both linear and nonlinear association between two random variables or random vectors.
421/// Not to be confused with correlation distance which is related to Pearson Coefficient[3].
422///
423/// ## Usage
424///
425/// Generates a value between 0 and 1 where 0 implies series are independent and 1 implies they are
426/// surely equal.
427///
428/// ## Sources
429///
430/// [[1]](https://en.wikipedia.org/wiki/Distance_correlation)
431/// [[2]](https://www.freecodecamp.org/news/how-machines-make-predictions-finding-correlations-in-complex-data-dfd9f0d87889/)
432/// [[3]](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Pearson's_distance)
433///
434/// # Examples
435///
436/// ```
437/// use traquer::correlation;
438///
439/// correlation::dcor(
440///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
441///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
442///     6).collect::<Vec<f64>>();
443///
444/// ```
445pub fn dcor<'a, T: ToPrimitive>(
446    series1: &'a [T],
447    series2: &'a [T],
448    window: usize,
449) -> impl Iterator<Item = f64> + 'a {
450    fn centred_matrix<T: ToPrimitive>(x: &[T]) -> Vec<f64> {
451        let n = x.len();
452        // flattened NxN distance matrix, where [x_00..x0j, ... ,x_i0..x_ij]
453        let mut matrix = vec![0.0; n * n];
454        let mut matrix_sum = 0_f64;
455        for i in 0..n {
456            for j in i..n {
457                let idx = (i * n) + j;
458                let mirror_idx = idx / n + (idx % n) * n;
459                matrix[idx] = (x[i].to_f64().unwrap() - x[j].to_f64().unwrap()).abs();
460                matrix[mirror_idx] = matrix[idx];
461                matrix_sum += matrix[idx] * 2.;
462            }
463        }
464
465        // "double-centre" the matrix
466        let row_means: Vec<f64> = (0..matrix.len())
467            .step_by(n)
468            .map(|i| matrix[i..i + n].iter().sum::<f64>() / n as f64)
469            .collect();
470        let col_means = &row_means;
471        // undo the double count of mirror line rather than add if clause above
472        matrix_sum -= (0..matrix.len())
473            .step_by(n + 1)
474            .fold(0.0, |acc, x| acc + matrix[x]);
475        let matrix_mean: f64 = matrix_sum / (n * n) as f64;
476        for (i, row_mean) in row_means.iter().enumerate() {
477            for (j, col_mean) in col_means.iter().enumerate().skip(i) {
478                let idx = (i * n) + j;
479                let mirror_idx = idx / n + (idx % n) * n;
480                matrix[idx] += -row_mean - col_mean + matrix_mean;
481                matrix[mirror_idx] = matrix[idx];
482            }
483        }
484        matrix
485    }
486
487    iter::repeat(f64::NAN).take(window - 1).chain(
488        series1
489            .windows(window)
490            .zip(series2.windows(window))
491            .map(move |(x_win, y_win)| {
492                let centred_x = centred_matrix(x_win);
493                let centred_y = centred_matrix(y_win);
494                let dcov = (centred_x
495                    .iter()
496                    .zip(&centred_y)
497                    .map(|(a, b)| a * b)
498                    .sum::<f64>()
499                    / window.pow(2) as f64)
500                    .sqrt();
501                let dvar_x =
502                    (centred_x.iter().map(|a| a * a).sum::<f64>() / window.pow(2) as f64).sqrt();
503                let dvar_y =
504                    (centred_y.iter().map(|a| a * a).sum::<f64>() / window.pow(2) as f64).sqrt();
505
506                dcov / (dvar_x * dvar_y).sqrt()
507            }),
508    )
509}
510
511/// Mutual Information Coefficient
512///
513/// Measures the mutual dependence between the two variables by leveraging the entropy of each
514/// variable and computing the Kullback–Leibler divergence.
515///
516/// ## Usage
517///
518/// Generates a value between 0 and 1 where 0 implies series are independent and 1 implies they are
519/// surely equal.
520///
521/// ## Sources
522///
523/// [[1]](https://en.wikipedia.org/wiki/Mutual_information)
524/// [[2]](https://www.freecodecamp.org/news/how-machines-make-predictions-finding-correlations-in-complex-data-dfd9f0d87889/)
525///
526/// # Examples
527///
528/// ```
529/// use traquer::correlation;
530///
531/// correlation::mic(
532///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
533///     &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
534///     6).collect::<Vec<f64>>();
535///
536/// ```
537pub fn mic<'a, T: ToPrimitive>(
538    series1: &'a [T],
539    series2: &'a [T],
540    window: usize,
541) -> impl Iterator<Item = f64> + 'a {
542    fn mutual_info(x: &[usize], y: &[usize]) -> f64 {
543        let mut joint_counts = HashMap::new();
544        let mut x_counts = HashMap::new();
545        let mut y_counts = HashMap::new();
546        let n = x.len() as f64;
547
548        for (&xi, &yi) in x.iter().zip(y) {
549            *joint_counts.entry((xi, yi)).or_insert(0) += 1;
550            *x_counts.entry(xi).or_insert(0) += 1;
551            *y_counts.entry(yi).or_insert(0) += 1;
552        }
553
554        joint_counts.iter().fold(0.0, |acc, (&(xi, yi), &count)| {
555            let p_xy = count as f64 / n;
556            let p_x = x_counts[&xi] as f64 / n;
557            let p_y = y_counts[&yi] as f64 / n;
558            acc + p_xy * (p_xy / (p_x * p_y)).log2()
559        })
560    }
561
562    fn bin<T: ToPrimitive>(x: &[T], bins: usize) -> Vec<usize> {
563        let mut min = f64::MAX;
564        let mut max = f64::MIN;
565        for val in x {
566            let val = val.to_f64().unwrap();
567            min = min.min(val);
568            max = max.max(val);
569        }
570        let bin_width = (max - min) / bins as f64;
571        x.iter()
572            .map(|val| {
573                (((val.to_f64().unwrap() - min) / bin_width).floor() as usize).clamp(0, bins - 1)
574            })
575            .collect::<Vec<usize>>()
576    }
577
578    let max_bins = (window as f64).powf(0.6).ceil() as usize;
579
580    iter::repeat(f64::NAN).take(window - 1).chain(
581        series1
582            .windows(window)
583            .zip(series2.windows(window))
584            .map(move |(x_win, y_win)| {
585                let mut max_mi = f64::MIN;
586                for i in 2..=(max_bins / 2) {
587                    let binned1: Vec<usize> = bin(x_win, i);
588                    let mut j = 2;
589                    while i * j <= max_bins {
590                        let binned2: Vec<usize> = bin(y_win, j);
591                        let mi_estimate = mutual_info(&binned1, &binned2);
592                        let mi_normalized = mi_estimate / (i.min(j) as f64).log2();
593                        max_mi = mi_normalized.max(max_mi);
594                        j += 1;
595                    }
596                }
597                max_mi
598            }),
599    )
600}