fn rankdata(values: &[f64]) -> Vec<f64> {
let n = values.len();
if n == 0 {
return vec![];
}
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_by(|&a, &b| {
values[a]
.partial_cmp(&values[b])
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut ranks = vec![0.0_f64; n];
let mut i = 0;
while i < n {
let mut j = i + 1;
while j < n && values[indices[j]] == values[indices[i]] {
j += 1;
}
let avg_rank = (i + 1 + j) as f64 / 2.0;
for &idx in &indices[i..j] {
ranks[idx] = avg_rank;
}
i = j;
}
ranks
}
fn pearson(x: &[f64], y: &[f64]) -> f64 {
let n = x.len() as f64;
if n < 2.0 {
return f64::NAN;
}
let mean_x = x.iter().sum::<f64>() / n;
let mean_y = y.iter().sum::<f64>() / n;
let mut cov = 0.0_f64;
let mut var_x = 0.0_f64;
let mut var_y = 0.0_f64;
for i in 0..x.len() {
let dx = x[i] - mean_x;
let dy = y[i] - mean_y;
cov += dx * dy;
var_x += dx * dx;
var_y += dy * dy;
}
if var_x == 0.0 || var_y == 0.0 {
return f64::NAN;
}
cov / (var_x * var_y).sqrt()
}
fn regularized_incomplete_beta(x: f64, a: f64, b: f64) -> f64 {
if x <= 0.0 {
return 0.0;
}
if x >= 1.0 {
return 1.0;
}
if x > (a + 1.0) / (a + b + 2.0) {
return 1.0 - regularized_incomplete_beta_cf(1.0 - x, b, a);
}
regularized_incomplete_beta_cf(x, a, b)
}
fn regularized_incomplete_beta_cf(x: f64, a: f64, b: f64) -> f64 {
let ln_beta = ln_gamma(a) + ln_gamma(b) - ln_gamma(a + b);
let front = (x.ln() * a + (1.0 - x).ln() * b - ln_beta).exp() / a;
front * betacf(a, b, x)
}
fn betacf(a: f64, b: f64, x: f64) -> f64 {
let tiny = 1e-30_f64;
let eps = 3e-14;
let max_iter = 200;
let qab = a + b;
let qap = a + 1.0;
let qam = a - 1.0;
let mut c = 1.0_f64;
let mut d = 1.0 - qab * x / qap;
if d.abs() < tiny {
d = tiny;
}
d = 1.0 / d;
let mut h = d;
for m in 1..=max_iter {
let em = m as f64;
let m2 = 2.0 * em;
let aa = em * (b - em) * x / ((qam + m2) * (a + m2));
d = 1.0 + aa * d;
if d.abs() < tiny {
d = tiny;
}
c = 1.0 + aa / c;
if c.abs() < tiny {
c = tiny;
}
d = 1.0 / d;
h *= d * c;
let aa = -(a + em) * (qab + em) * x / ((a + m2) * (qap + m2));
d = 1.0 + aa * d;
if d.abs() < tiny {
d = tiny;
}
c = 1.0 + aa / c;
if c.abs() < tiny {
c = tiny;
}
d = 1.0 / d;
let del = d * c;
h *= del;
if (del - 1.0).abs() <= eps {
return h;
}
}
h }
fn ln_gamma(x: f64) -> f64 {
#[allow(clippy::excessive_precision)]
let coefs = [
0.999_999_999_999_809_93,
676.520_368_121_885_1,
-1259.139_216_722_402_8,
771.323_428_777_653_08,
-176.615_029_162_140_6,
12.507_343_278_686_905,
-0.138_571_095_265_720_12,
9.984_369_578_019_572e-6,
1.505_632_735_149_311_6e-7,
];
if x < 0.5 {
let pi = std::f64::consts::PI;
return pi.ln() - (pi * x).sin().ln() - ln_gamma(1.0 - x);
}
let x = x - 1.0;
let mut sum = coefs[0];
for (i, &c) in coefs[1..].iter().enumerate() {
sum += c / (x + i as f64 + 1.0);
}
let t = x + 7.5; 0.5 * (2.0 * std::f64::consts::PI).ln() + (t.ln() * (x + 0.5)) - t + sum.ln()
}
fn t_distribution_two_tailed_p(t_stat: f64, df: f64) -> f64 {
if df <= 0.0 {
return f64::NAN;
}
let x = df / (df + t_stat * t_stat);
let p_one_tail = 0.5 * regularized_incomplete_beta(x, df / 2.0, 0.5);
2.0 * p_one_tail
}
pub fn spearman(x: &[f64], y: &[f64]) -> (f64, f64) {
let n = x.len();
if n != y.len() || n < 3 {
return (f64::NAN, f64::NAN);
}
let rank_x = rankdata(x);
let rank_y = rankdata(y);
let r = pearson(&rank_x, &rank_y);
if r.is_nan() {
return (f64::NAN, f64::NAN);
}
let r_clamped = r.clamp(-1.0, 1.0);
if (r_clamped.abs() - 1.0).abs() < 1e-15 {
return (r_clamped, 0.0);
}
let df = n as f64 - 2.0;
let t_stat = r_clamped * (df / (1.0 - r_clamped * r_clamped)).sqrt();
let p_value = t_distribution_two_tailed_p(t_stat, df);
(r_clamped, p_value)
}
pub fn quintile_spread(scores: &[f64], returns: &[f64], n_quantiles: usize) -> f64 {
let n = scores.len();
if n != returns.len() || n < n_quantiles || n_quantiles == 0 {
return f64::NAN;
}
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_by(|&a, &b| {
scores[a]
.partial_cmp(&scores[b])
.unwrap_or(std::cmp::Ordering::Equal)
});
let group_size = n / n_quantiles;
if group_size == 0 {
return f64::NAN;
}
let bottom_mean: f64 = indices[..group_size]
.iter()
.map(|&i| returns[i])
.sum::<f64>()
/ group_size as f64;
let top_start = n - group_size;
let top_mean: f64 = indices[top_start..]
.iter()
.map(|&i| returns[i])
.sum::<f64>()
/ group_size as f64;
top_mean - bottom_mean
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rankdata_no_ties() {
let values = [3.0, 1.0, 2.0];
let ranks = rankdata(&values);
assert!((ranks[0] - 3.0).abs() < 1e-10);
assert!((ranks[1] - 1.0).abs() < 1e-10);
assert!((ranks[2] - 2.0).abs() < 1e-10);
}
#[test]
fn rankdata_with_ties() {
let values = [1.0, 2.0, 2.0, 4.0];
let ranks = rankdata(&values);
assert!((ranks[0] - 1.0).abs() < 1e-10);
assert!((ranks[1] - 2.5).abs() < 1e-10); assert!((ranks[2] - 2.5).abs() < 1e-10);
assert!((ranks[3] - 4.0).abs() < 1e-10);
}
#[test]
fn rankdata_empty() {
let ranks = rankdata(&[]);
assert!(ranks.is_empty());
}
#[test]
fn spearman_perfect_positive() {
let x: Vec<f64> = (0..50).map(|i| i as f64).collect();
let y: Vec<f64> = (0..50).map(|i| i as f64).collect();
let (r, p) = spearman(&x, &y);
assert!((r - 1.0).abs() < 1e-10, "expected r=1.0, got {r}");
assert!(p < 1e-10, "expected p≈0, got {p}");
}
#[test]
fn spearman_perfect_negative() {
let x: Vec<f64> = (0..50).map(|i| i as f64).collect();
let y: Vec<f64> = (0..50).rev().map(|i| i as f64).collect();
let (r, p) = spearman(&x, &y);
assert!((r - (-1.0)).abs() < 1e-10, "expected r=-1.0, got {r}");
assert!(p < 1e-10, "expected p≈0, got {p}");
}
#[test]
fn spearman_too_few() {
let x = [1.0, 2.0];
let y = [3.0, 4.0];
let (r, p) = spearman(&x, &y);
assert!(r.is_nan());
assert!(p.is_nan());
}
#[test]
fn spearman_unequal_length() {
let x = [1.0, 2.0, 3.0];
let y = [1.0, 2.0];
let (r, _) = spearman(&x, &y);
assert!(r.is_nan());
}
#[test]
fn quintile_spread_basic() {
let scores: Vec<f64> = (1..=10).map(|i| i as f64).collect();
let returns: Vec<f64> = (1..=10).map(|i| i as f64 * 0.01).collect();
let spread = quintile_spread(&scores, &returns, 5);
assert!(spread > 0.0, "expected positive spread, got {spread}");
}
#[test]
fn quintile_spread_zero_for_random() {
let scores: Vec<f64> = (1..=10).map(|i| i as f64).collect();
let returns: Vec<f64> = (1..=10).rev().map(|i| i as f64 * 0.01).collect();
let spread = quintile_spread(&scores, &returns, 5);
assert!(spread < 0.0, "expected negative spread, got {spread}");
}
#[test]
fn quintile_spread_invalid() {
let scores = [1.0, 2.0];
let returns = [0.01, 0.02];
let spread = quintile_spread(&scores, &returns, 5);
assert!(spread.is_nan());
}
#[test]
fn ln_gamma_known_values() {
assert!(ln_gamma(1.0).abs() < 1e-10);
assert!(ln_gamma(2.0).abs() < 1e-10);
assert!((ln_gamma(5.0) - 24.0_f64.ln()).abs() < 1e-8);
}
}