traquer/correlation.rs
1//! Correlation Indicators
2//!
3//! Provides technical indicators that measures how two or more series relate to one another.
4//! These indicators can capture trend or performance and is often used to track relation
5//! between an asset and a benchmark.
6use std::collections::HashMap;
7use std::iter;
8
9use itertools::izip;
10use num_traits::cast::ToPrimitive;
11
12use crate::smooth;
13use crate::statistic::distribution::{cov_stdev, rank, RankMode};
14
15/// Pearson Correlation Coefficient
16///
17/// The ratio between the covariance of two variables and the product of their standard
18/// deviations; thus, it is essentially a normalized measurement of the covariance,
19///
20/// ```math
21/// r_{xy} = \frac{n\sum x_i y_i - \sum x_i\sum y_i}{\sqrt{n\sum x_i^2-\left(\sum x_i\right)^2}~\sqrt{n\sum y_i^2-\left(\sum y_i\right)^2}}
22/// ```
23///
24/// ## Usage
25///
26/// A value between 0 and 1 implies a positive correlation; 0, no correlation; and between 0 and
27/// -1, a negative correlation.
28///
29/// ## Sources
30///
31/// [[1]](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient)
32///
33/// # Examples
34///
35/// ```
36/// use traquer::correlation;
37///
38/// correlation::pcc(
39/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
40/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
41/// 6).collect::<Vec<f64>>();
42///
43/// ```
44pub fn pcc<'a, T: ToPrimitive>(
45 series1: &'a [T],
46 series2: &'a [T],
47 window: usize,
48) -> impl Iterator<Item = f64> + 'a {
49 iter::repeat(f64::NAN).take(window - 1).chain(
50 series1
51 .windows(window)
52 .zip(series2.windows(window))
53 .map(move |(x_win, y_win)| {
54 let mut sum_xy = 0.0;
55 let mut sum_x = 0.0;
56 let mut sum_y = 0.0;
57 let mut sum_x2 = 0.0;
58 let mut sum_y2 = 0.0;
59 for (x, y) in x_win.iter().zip(y_win) {
60 let (x, y) = (x.to_f64().unwrap(), y.to_f64().unwrap());
61 sum_xy += x * y;
62 sum_x += x;
63 sum_y += y;
64 sum_x2 += x * x;
65 sum_y2 += y * y;
66 }
67 (window as f64 * sum_xy - sum_x * sum_y)
68 / ((window as f64 * sum_x2 - sum_x * sum_x).sqrt()
69 * (window as f64 * sum_y2 - sum_y * sum_y).sqrt())
70 }),
71 )
72}
73
74/// Coefficient of Determination (r-squared)
75///
76/// The square of the correlation coefficient. Examines how differences in one variable
77/// can be explained by the difference in a second variable when predicting the outcome
78/// of a given event.
79///
80/// ## Usage
81///
82/// A value of 1 indicates 100% correlation; 0, no correlation.
83///
84/// ## Sources
85///
86/// [[1]](https://en.wikipedia.org/wiki/Coefficient_of_determination)
87/// [[2]](https://danshiebler.com/2017-06-25-metrics/)
88///
89/// # Examples
90///
91/// ```
92/// use traquer::correlation;
93///
94/// correlation::rsq(
95/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
96/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
97/// 6).collect::<Vec<f64>>();
98///
99/// ```
100pub fn rsq<'a, T: ToPrimitive>(
101 series1: &'a [T],
102 series2: &'a [T],
103 window: usize,
104) -> impl Iterator<Item = f64> + 'a {
105 pcc(series1, series2, window).map(|x| x * x)
106}
107
108/// Beta Coefficient
109///
110/// Measure that compares the volatility of returns of an instrument to those of the market
111/// as a whole.
112///
113/// ## Usage
114///
115/// Value of 1 suggests correlation with market. A value above 1 suggests the instrument is more
116/// volatile compared to market and a value below 1 suggests, less volatility.
117///
118/// ## Sources
119///
120/// [[1]](https://seekingalpha.com/article/4493310-beta-for-stocks)
121///
122/// # Examples
123///
124/// ```
125/// use traquer::correlation;
126///
127/// correlation::beta(
128/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
129/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
130/// 2).collect::<Vec<f64>>();
131///
132/// ```
133pub fn beta<'a, T: ToPrimitive>(
134 series1: &'a [T],
135 series2: &'a [T],
136 window: usize,
137) -> impl Iterator<Item = f64> + 'a {
138 iter::repeat(f64::NAN).take((window - 1) * 2 + 1).chain(
139 series1
140 .windows(2)
141 .map(|w| w[1].to_f64().unwrap() / w[0].to_f64().unwrap() - 1.0)
142 .collect::<Vec<f64>>()
143 .windows(window)
144 .zip(
145 series2
146 .windows(2)
147 .map(|w| w[1].to_f64().unwrap() / w[0].to_f64().unwrap() - 1.0)
148 .collect::<Vec<f64>>()
149 .windows(window),
150 )
151 .map(|(x_win, y_win)| {
152 let sum_x = x_win.iter().sum::<f64>();
153 let sum_y = y_win.iter().sum::<f64>();
154 (
155 (x_win[window - 1] - sum_x / window as f64)
156 * (y_win[window - 1] - sum_y / window as f64),
157 (y_win[window - 1] - sum_y / window as f64).powi(2),
158 )
159 })
160 .collect::<Vec<(f64, f64)>>()
161 .windows(window)
162 .map(|w| {
163 let mut sum_covar = 0.0;
164 let mut sum_var = 0.0;
165 for (covar, var) in w {
166 sum_covar += covar;
167 sum_var += var;
168 }
169 (sum_covar / window as f64) / (sum_var / window as f64)
170 })
171 .collect::<Vec<f64>>(),
172 )
173}
174
175/// Performance Index
176///
177/// Used to compare a asset's price trend to the general trend of a benchmark index.
178///
179/// ## Usage
180///
181/// Value above 1 would state that base series outperforms benchmark.
182///
183/// ## Sources
184///
185/// [[1]](https://www.marketvolume.com/technicalanalysis/performanceindex.asp)
186///
187/// # Examples
188///
189/// ```
190/// use traquer::correlation;
191///
192/// correlation::perf(
193/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
194/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
195/// 2).collect::<Vec<f64>>();
196///
197/// ```
198pub fn perf<'a, T: ToPrimitive>(
199 series1: &'a [T],
200 series2: &'a [T],
201 window: usize,
202) -> impl Iterator<Item = f64> + 'a {
203 izip!(
204 series1,
205 series2,
206 smooth::sma(series1, window),
207 smooth::sma(series2, window)
208 )
209 .map(|(x, y, ma_x, ma_y)| x.to_f64().unwrap() / y.to_f64().unwrap() * (ma_y / ma_x))
210}
211
212/// Relative Strength Comparison (Price Relative)
213///
214/// Compares performance by computing a ratio simply dividing the base security by a benchmark.
215///
216/// ## Usage
217///
218/// An increasing value suggests base security is outperforming the benchmark.
219///
220/// ## Sources
221///
222/// [[1]](https://www.fidelity.com/learning-center/trading-investing/technical-analysis/technical-indicator-guide/relative-strength-comparison)
223/// [[2]](https://chartschool.stockcharts.com/table-of-contents/technical-indicators-and-overlays/technical-indicators/price-relative-relative-strength)
224///
225/// # Examples
226///
227/// ```
228/// use traquer::correlation;
229///
230/// correlation::rsc(
231/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
232/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
233/// ).collect::<Vec<f64>>();
234///
235/// ```
236pub fn rsc<'a, T: ToPrimitive>(
237 series1: &'a [T],
238 series2: &'a [T],
239) -> impl Iterator<Item = f64> + 'a {
240 series1
241 .iter()
242 .zip(series2)
243 .map(|(x, y)| x.to_f64().unwrap() / y.to_f64().unwrap())
244}
245
246/// Spearman's Rank Correlation Coefficient
247///
248/// Assesses how well the relationship between two variables can be described using a monotonic
249/// function. Similar to Pearson correlation except uses rank value.
250///
251/// ## Usage
252///
253/// A value between 0 and 1 implies a positive correlation; 0, no correlation; and between 0 and
254/// -1, a negative correlation.
255///
256/// ## Sources
257///
258/// [[1]](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)
259///
260/// # Examples
261///
262/// ```
263/// use traquer::correlation;
264///
265/// correlation::srcc(
266/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
267/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
268/// 6).collect::<Vec<f64>>();
269///
270/// ```
271pub fn srcc<'a, T: ToPrimitive + PartialOrd + Clone>(
272 series1: &'a [T],
273 series2: &'a [T],
274 window: usize,
275) -> impl Iterator<Item = f64> + 'a {
276 iter::repeat(f64::NAN).take(window - 1).chain(
277 series1
278 .windows(window)
279 .zip(series2.windows(window))
280 .map(|(x_win, y_win)| {
281 let (cov_xy, std_x, std_y) = cov_stdev(
282 &rank(x_win, None).collect::<Vec<f64>>(),
283 &rank(y_win, None).collect::<Vec<f64>>(),
284 );
285 cov_xy / (std_x * std_y)
286 }),
287 )
288}
289
290/// Kendall's Rank Correlation Coefficient
291///
292/// Meausres the similarity of the orderings of the data when ranked by each of the quantities by
293/// computing the number of concordant and disconcordant pairs. This computes Tau-b matching
294/// logic in scipy.
295///
296/// ## Usage
297///
298/// A value between 0 and 1 implies a positive correlation; 0, no correlation; and between 0 and
299/// -1, a negative correlation.
300///
301/// ## Sources
302///
303/// [[1]](https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient)
304/// [[2]](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html)
305///
306/// # Examples
307///
308/// ```
309/// use traquer::correlation;
310///
311/// correlation::krcc(
312/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
313/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
314/// 6).collect::<Vec<f64>>();
315///
316/// ```
317pub fn krcc<'a, T: ToPrimitive>(
318 series1: &'a [T],
319 series2: &'a [T],
320 window: usize,
321) -> impl Iterator<Item = f64> + 'a {
322 iter::repeat(f64::NAN).take(window - 1).chain(
323 series1
324 .windows(window)
325 .zip(series2.windows(window))
326 .map(move |(x_win, y_win)| {
327 let mut nc = 0.0;
328 let mut x_tie = 0.0;
329 let mut y_tie = 0.0;
330 let mut xy_tie = 0.0;
331
332 for i in 0..window - 1 {
333 for j in i + 1..window {
334 let (xi, xj, yi, yj) = (
335 x_win[i].to_f64().unwrap(),
336 x_win[j].to_f64().unwrap(),
337 y_win[i].to_f64().unwrap(),
338 y_win[j].to_f64().unwrap(),
339 );
340 nc += ((xi - xj).signum() == (yi - yj).signum() && xi != xj) as u8 as f64;
341 xy_tie += (xi == xj && yi == yj) as u8 as f64;
342 x_tie += (xi == xj) as u8 as f64;
343 y_tie += (yi == yj) as u8 as f64;
344 }
345 }
346 let tot = (window * (window - 1)) as f64 * 0.5;
347 let nd = tot - nc - x_tie - y_tie + xy_tie;
348 (nc - nd) / ((tot - x_tie) * (tot - y_tie)).sqrt()
349 }),
350 )
351}
352
353/// Hoeffding's D
354///
355/// A test based on the population measure of deviation from independence. More
356/// resource-intensive compared to other correlation functions but may handle non-monotonic
357/// relationships better.
358///
359/// ## Usage
360///
361/// Generates a measure that ranges from -0.5 to 1, where the higher the number is, the
362/// more strongly dependent the two sequences are on each other.
363///
364/// ## Sources
365///
366/// [[1]](https://github.com/Dicklesworthstone/hoeffdings_d_explainer)
367///
368/// # Examples
369///
370/// ```
371/// use traquer::correlation;
372///
373/// correlation::hoeffd(
374/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
375/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
376/// 6).collect::<Vec<f64>>();
377///
378/// ```
379pub fn hoeffd<'a, T: ToPrimitive + PartialOrd + Clone>(
380 series1: &'a [T],
381 series2: &'a [T],
382 window: usize,
383) -> impl Iterator<Item = f64> + 'a {
384 iter::repeat(f64::NAN).take(window - 1).chain(
385 series1
386 .windows(window)
387 .zip(series2.windows(window))
388 .map(move |(x_win, y_win)| {
389 let rank_x = rank(x_win, Some(RankMode::Average)).collect::<Vec<f64>>();
390 let rank_y = rank(y_win, Some(RankMode::Average)).collect::<Vec<f64>>();
391 let mut q = vec![1.0; window];
392 for i in 0..window {
393 for j in 0..window {
394 q[i] += (rank_x[j] < rank_x[i] && rank_y[j] < rank_y[i]) as u8 as f64;
395 q[i] +=
396 0.25 * (rank_x[j] == rank_x[i] && rank_y[j] == rank_y[i]) as u8 as f64;
397 q[i] += 0.5
398 * ((rank_x[j] == rank_x[i] && rank_y[j] < rank_y[i])
399 || (rank_x[j] < rank_x[i] && rank_y[j] == rank_y[i]))
400 as u8 as f64;
401 }
402 q[i] -= 0.25; // accounts for when comparing to itself
403 }
404 let d1 = q.iter().fold(0.0, |acc, x| acc + (x - 1.0) * (x - 2.0));
405 let d2 = rank_x.iter().zip(&rank_y).fold(0.0, |acc, (x, y)| {
406 acc + (x - 1.0) * (x - 2.0) * (y - 1.0) * (y - 2.0)
407 });
408 let d3 = izip!(q, rank_x, rank_y).fold(0.0, |acc, (q, x, y)| {
409 acc + (x - 2.0) * (y - 2.0) * (q - 1.0)
410 });
411 30.0 * (((window - 2) * (window - 3)) as f64 * d1 + d2
412 - 2. * (window - 2) as f64 * d3)
413 / (window * (window - 1) * (window - 2) * (window - 3) * (window - 4)) as f64
414 }),
415 )
416}
417
418/// Distance Correlation
419///
420/// Measures both linear and nonlinear association between two random variables or random vectors.
421/// Not to be confused with correlation distance which is related to Pearson Coefficient[3].
422///
423/// ## Usage
424///
425/// Generates a value between 0 and 1 where 0 implies series are independent and 1 implies they are
426/// surely equal.
427///
428/// ## Sources
429///
430/// [[1]](https://en.wikipedia.org/wiki/Distance_correlation)
431/// [[2]](https://www.freecodecamp.org/news/how-machines-make-predictions-finding-correlations-in-complex-data-dfd9f0d87889/)
432/// [[3]](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Pearson's_distance)
433///
434/// # Examples
435///
436/// ```
437/// use traquer::correlation;
438///
439/// correlation::dcor(
440/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
441/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
442/// 6).collect::<Vec<f64>>();
443///
444/// ```
445pub fn dcor<'a, T: ToPrimitive>(
446 series1: &'a [T],
447 series2: &'a [T],
448 window: usize,
449) -> impl Iterator<Item = f64> + 'a {
450 fn centred_matrix<T: ToPrimitive>(x: &[T]) -> Vec<f64> {
451 let n = x.len();
452 // flattened NxN distance matrix, where [x_00..x0j, ... ,x_i0..x_ij]
453 let mut matrix = vec![0.0; n * n];
454 let mut matrix_sum = 0_f64;
455 for i in 0..n {
456 for j in i..n {
457 let idx = (i * n) + j;
458 let mirror_idx = idx / n + (idx % n) * n;
459 matrix[idx] = (x[i].to_f64().unwrap() - x[j].to_f64().unwrap()).abs();
460 matrix[mirror_idx] = matrix[idx];
461 matrix_sum += matrix[idx] * 2.;
462 }
463 }
464
465 // "double-centre" the matrix
466 let row_means: Vec<f64> = (0..matrix.len())
467 .step_by(n)
468 .map(|i| matrix[i..i + n].iter().sum::<f64>() / n as f64)
469 .collect();
470 let col_means = &row_means;
471 // undo the double count of mirror line rather than add if clause above
472 matrix_sum -= (0..matrix.len())
473 .step_by(n + 1)
474 .fold(0.0, |acc, x| acc + matrix[x]);
475 let matrix_mean: f64 = matrix_sum / (n * n) as f64;
476 for (i, row_mean) in row_means.iter().enumerate() {
477 for (j, col_mean) in col_means.iter().enumerate().skip(i) {
478 let idx = (i * n) + j;
479 let mirror_idx = idx / n + (idx % n) * n;
480 matrix[idx] += -row_mean - col_mean + matrix_mean;
481 matrix[mirror_idx] = matrix[idx];
482 }
483 }
484 matrix
485 }
486
487 iter::repeat(f64::NAN).take(window - 1).chain(
488 series1
489 .windows(window)
490 .zip(series2.windows(window))
491 .map(move |(x_win, y_win)| {
492 let centred_x = centred_matrix(x_win);
493 let centred_y = centred_matrix(y_win);
494 let dcov = (centred_x
495 .iter()
496 .zip(¢red_y)
497 .map(|(a, b)| a * b)
498 .sum::<f64>()
499 / window.pow(2) as f64)
500 .sqrt();
501 let dvar_x =
502 (centred_x.iter().map(|a| a * a).sum::<f64>() / window.pow(2) as f64).sqrt();
503 let dvar_y =
504 (centred_y.iter().map(|a| a * a).sum::<f64>() / window.pow(2) as f64).sqrt();
505
506 dcov / (dvar_x * dvar_y).sqrt()
507 }),
508 )
509}
510
511/// Mutual Information Coefficient
512///
513/// Measures the mutual dependence between the two variables by leveraging the entropy of each
514/// variable and computing the Kullback–Leibler divergence.
515///
516/// ## Usage
517///
518/// Generates a value between 0 and 1 where 0 implies series are independent and 1 implies they are
519/// surely equal.
520///
521/// ## Sources
522///
523/// [[1]](https://en.wikipedia.org/wiki/Mutual_information)
524/// [[2]](https://www.freecodecamp.org/news/how-machines-make-predictions-finding-correlations-in-complex-data-dfd9f0d87889/)
525///
526/// # Examples
527///
528/// ```
529/// use traquer::correlation;
530///
531/// correlation::mic(
532/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
533/// &[1.0,2.0,3.0,4.0,5.0,6.0,4.0,5.0],
534/// 6).collect::<Vec<f64>>();
535///
536/// ```
537pub fn mic<'a, T: ToPrimitive>(
538 series1: &'a [T],
539 series2: &'a [T],
540 window: usize,
541) -> impl Iterator<Item = f64> + 'a {
542 fn mutual_info(x: &[usize], y: &[usize]) -> f64 {
543 let mut joint_counts = HashMap::new();
544 let mut x_counts = HashMap::new();
545 let mut y_counts = HashMap::new();
546 let n = x.len() as f64;
547
548 for (&xi, &yi) in x.iter().zip(y) {
549 *joint_counts.entry((xi, yi)).or_insert(0) += 1;
550 *x_counts.entry(xi).or_insert(0) += 1;
551 *y_counts.entry(yi).or_insert(0) += 1;
552 }
553
554 joint_counts.iter().fold(0.0, |acc, (&(xi, yi), &count)| {
555 let p_xy = count as f64 / n;
556 let p_x = x_counts[&xi] as f64 / n;
557 let p_y = y_counts[&yi] as f64 / n;
558 acc + p_xy * (p_xy / (p_x * p_y)).log2()
559 })
560 }
561
562 fn bin<T: ToPrimitive>(x: &[T], bins: usize) -> Vec<usize> {
563 let mut min = f64::MAX;
564 let mut max = f64::MIN;
565 for val in x {
566 let val = val.to_f64().unwrap();
567 min = min.min(val);
568 max = max.max(val);
569 }
570 let bin_width = (max - min) / bins as f64;
571 x.iter()
572 .map(|val| {
573 (((val.to_f64().unwrap() - min) / bin_width).floor() as usize).clamp(0, bins - 1)
574 })
575 .collect::<Vec<usize>>()
576 }
577
578 let max_bins = (window as f64).powf(0.6).ceil() as usize;
579
580 iter::repeat(f64::NAN).take(window - 1).chain(
581 series1
582 .windows(window)
583 .zip(series2.windows(window))
584 .map(move |(x_win, y_win)| {
585 let mut max_mi = f64::MIN;
586 for i in 2..=(max_bins / 2) {
587 let binned1: Vec<usize> = bin(x_win, i);
588 let mut j = 2;
589 while i * j <= max_bins {
590 let binned2: Vec<usize> = bin(y_win, j);
591 let mi_estimate = mutual_info(&binned1, &binned2);
592 let mi_normalized = mi_estimate / (i.min(j) as f64).log2();
593 max_mi = mi_normalized.max(max_mi);
594 j += 1;
595 }
596 }
597 max_mi
598 }),
599 )
600}