use scirs2_core::ndarray::{ArrayBase, Data, Ix1};
use scirs2_core::numeric::Float;
use std::cmp::{Ord, Ordering};
use crate::error::{MetricsError, Result};
pub mod label;
#[allow(dead_code)]
pub fn mean_reciprocal_rank<T, S, R>(
y_true: &[ArrayBase<S, Ix1>],
y_score: &[ArrayBase<R, Ix1>],
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_score.len() {
return Err(MetricsError::InvalidInput(format!(
"y_true and y_score have different lengths: {} vs {}",
y_true.len(),
y_score.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let mut reciprocal_ranks = Vec::with_capacity(y_true.len());
for (true_relevance, scores) in y_true.iter().zip(y_score.iter()) {
if true_relevance.shape() != scores.shape() {
return Err(MetricsError::InvalidInput(format!(
"Relevance and _score arrays have different shapes: {:?} vs {:?}",
true_relevance.shape(),
scores.shape()
)));
}
let mut score_relevance: Vec<_> = scores
.iter()
.zip(true_relevance.iter())
.map(|(s, r)| (s.clone(), r.clone()))
.collect();
score_relevance.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(Ordering::Equal));
let zero = T::zero();
let mut found_relevant = false;
for (rank, (_, relevance)) in score_relevance.iter().enumerate() {
if *relevance > zero {
reciprocal_ranks.push(1.0 / (rank as f64 + 1.0));
found_relevant = true;
break;
}
}
if !found_relevant {
reciprocal_ranks.push(0.0);
}
}
let mrr = reciprocal_ranks.iter().sum::<f64>() / reciprocal_ranks.len() as f64;
Ok(mrr)
}
#[allow(dead_code)]
fn dcg<T>(_relevancescores: &[T], k: Option<usize>) -> f64
where
T: Float + Clone,
{
let limit = k
.unwrap_or(_relevancescores.len())
.min(_relevancescores.len());
(0..limit)
.map(|i| {
let rel = _relevancescores[i].to_f64().unwrap_or(0.0);
rel / (((i + 2) as f64).log2())
})
.sum()
}
#[allow(dead_code)]
pub fn ndcg_score<T, S, R>(
y_true: &[ArrayBase<S, Ix1>],
y_score: &[ArrayBase<R, Ix1>],
k: Option<usize>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_score.len() {
return Err(MetricsError::InvalidInput(format!(
"y_true and y_score have different lengths: {} vs {}",
y_true.len(),
y_score.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let mut ndcg_scores = Vec::with_capacity(y_true.len());
for (true_relevance, scores) in y_true.iter().zip(y_score.iter()) {
if true_relevance.shape() != scores.shape() {
return Err(MetricsError::InvalidInput(format!(
"Relevance and _score arrays have different shapes: {:?} vs {:?}",
true_relevance.shape(),
scores.shape()
)));
}
let relevance_vec: Vec<_> = true_relevance.iter().cloned().collect();
let scores_vec: Vec<_> = scores.iter().cloned().collect();
let mut score_relevance: Vec<_> = scores_vec
.iter()
.zip(relevance_vec.iter())
.map(|(s, r)| (s.clone(), r.clone()))
.collect();
score_relevance.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(Ordering::Equal));
let sorted_relevance: Vec<_> = score_relevance.iter().map(|(_, r)| r.clone()).collect();
let mut ideal_relevance = relevance_vec.clone();
ideal_relevance.sort_by(|a, b| b.partial_cmp(a).unwrap_or(Ordering::Equal));
let dcg_val = dcg(&sorted_relevance, k);
let idcg_val = dcg(&ideal_relevance, k);
let ndcg = if idcg_val > 0.0 {
dcg_val / idcg_val
} else {
0.0
};
ndcg_scores.push(ndcg);
}
let mean_ndcg = ndcg_scores.iter().sum::<f64>() / ndcg_scores.len() as f64;
Ok(mean_ndcg)
}
#[allow(dead_code)]
fn average_precision<T>(y_truesorted: &[T], k: Option<usize>) -> f64
where
T: Float + Clone,
{
let zero = T::zero();
let k = k.unwrap_or(y_truesorted.len());
let limit = k.min(y_truesorted.len());
let total_relevant = y_truesorted.iter().filter(|&&r| r > zero).count();
if total_relevant == 0 {
return 0.0;
}
let mut cum_sum = 0.0;
let mut running_sum = 0.0;
for i in 0..limit {
let rel = y_truesorted[i].clone();
if rel > zero {
running_sum += 1.0;
let precision = running_sum / (i as f64 + 1.0);
cum_sum += precision;
}
}
cum_sum / total_relevant as f64
}
#[allow(dead_code)]
pub fn mean_average_precision<T, S, R>(
y_true: &[ArrayBase<S, Ix1>],
y_score: &[ArrayBase<R, Ix1>],
k: Option<usize>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_score.len() {
return Err(MetricsError::InvalidInput(format!(
"y_true and y_score have different lengths: {} vs {}",
y_true.len(),
y_score.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let mut ap_scores = Vec::with_capacity(y_true.len());
for (true_relevance, scores) in y_true.iter().zip(y_score.iter()) {
if true_relevance.shape() != scores.shape() {
return Err(MetricsError::InvalidInput(format!(
"Relevance and _score arrays have different shapes: {:?} vs {:?}",
true_relevance.shape(),
scores.shape()
)));
}
let mut score_relevance: Vec<_> = scores
.iter()
.zip(true_relevance.iter())
.map(|(s, r)| (s.clone(), r.clone()))
.collect();
score_relevance.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(Ordering::Equal));
let sorted_relevance: Vec<_> = score_relevance.iter().map(|(_, r)| r.clone()).collect();
let ap = average_precision(&sorted_relevance, k);
ap_scores.push(ap);
}
let map = ap_scores.iter().sum::<f64>() / ap_scores.len() as f64;
Ok(map)
}
#[allow(dead_code)]
pub fn precision_at_k<T, S, R>(
y_true: &[ArrayBase<S, Ix1>],
y_score: &[ArrayBase<R, Ix1>],
k: usize,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_score.len() {
return Err(MetricsError::InvalidInput(format!(
"y_true and y_score have different lengths: {} vs {}",
y_true.len(),
y_score.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
if k == 0 {
return Err(MetricsError::InvalidInput(
"k must be greater than 0".to_string(),
));
}
let mut precision_scores = Vec::with_capacity(y_true.len());
let zero = T::zero();
for (true_relevance, scores) in y_true.iter().zip(y_score.iter()) {
if true_relevance.shape() != scores.shape() {
return Err(MetricsError::InvalidInput(format!(
"Relevance and _score arrays have different shapes: {:?} vs {:?}",
true_relevance.shape(),
scores.shape()
)));
}
let mut score_relevance: Vec<_> = scores
.iter()
.zip(true_relevance.iter())
.map(|(s, r)| (s.clone(), r.clone()))
.collect();
score_relevance.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(Ordering::Equal));
let limit = k.min(score_relevance.len());
let relevant_count = score_relevance[0..limit]
.iter()
.filter(|(_, r)| r > &zero)
.count();
let precision = relevant_count as f64 / limit as f64;
precision_scores.push(precision);
}
let avg_precision = precision_scores.iter().sum::<f64>() / precision_scores.len() as f64;
Ok(avg_precision)
}
#[allow(dead_code)]
pub fn recall_at_k<T, S, R>(
y_true: &[ArrayBase<S, Ix1>],
y_score: &[ArrayBase<R, Ix1>],
k: usize,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_score.len() {
return Err(MetricsError::InvalidInput(format!(
"y_true and y_score have different lengths: {} vs {}",
y_true.len(),
y_score.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
if k == 0 {
return Err(MetricsError::InvalidInput(
"k must be greater than 0".to_string(),
));
}
let mut recall_scores = Vec::with_capacity(y_true.len());
let zero = T::zero();
for (true_relevance, scores) in y_true.iter().zip(y_score.iter()) {
if true_relevance.shape() != scores.shape() {
return Err(MetricsError::InvalidInput(format!(
"Relevance and _score arrays have different shapes: {:?} vs {:?}",
true_relevance.shape(),
scores.shape()
)));
}
let total_relevant = true_relevance.iter().filter(|&&r| r > zero).count();
if total_relevant == 0 {
recall_scores.push(0.0);
continue;
}
let mut score_relevance: Vec<_> = scores
.iter()
.zip(true_relevance.iter())
.map(|(s, r)| (s.clone(), r.clone()))
.collect();
score_relevance.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(Ordering::Equal));
let limit = k.min(score_relevance.len());
let retrieved_relevant = score_relevance[0..limit]
.iter()
.filter(|(_, r)| r > &zero)
.count();
let recall = retrieved_relevant as f64 / total_relevant as f64;
recall_scores.push(recall);
}
let avg_recall = recall_scores.iter().sum::<f64>() / recall_scores.len() as f64;
Ok(avg_recall)
}
#[allow(dead_code)]
pub fn kendalls_tau<T, S, R>(x: &ArrayBase<S, Ix1>, y: &ArrayBase<R, Ix1>) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if x.len() != y.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
x.len(),
y.len()
)));
}
if x.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let n = x.len();
let mut concordant = 0;
let mut discordant = 0;
for i in 0..n {
for j in (i + 1)..n {
let x_i = x[i].to_f64().expect("Operation failed");
let x_j = x[j].to_f64().expect("Operation failed");
let y_i = y[i].to_f64().expect("Operation failed");
let y_j = y[j].to_f64().expect("Operation failed");
let x_diff = x_j - x_i;
let y_diff = y_j - y_i;
let product = x_diff * y_diff;
if product > 0.0 {
concordant += 1;
} else if product < 0.0 {
discordant += 1;
}
}
}
let n_pairs = n * (n - 1) / 2;
if n_pairs == 0 {
return Ok(0.0);
}
let tau = (concordant as f64 - discordant as f64) / n_pairs as f64;
Ok(tau)
}
#[allow(dead_code)]
pub fn spearmans_rho<T, S, R>(x: &ArrayBase<S, Ix1>, y: &ArrayBase<R, Ix1>) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if x.len() != y.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
x.len(),
y.len()
)));
}
if x.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let n = x.len();
let x_ranks = rank_data(x)?;
let y_ranks = rank_data(y)?;
let mut d_squared_sum = 0.0;
for i in 0..n {
let d = x_ranks[i] - y_ranks[i];
d_squared_sum += d * d;
}
let rho = 1.0 - (6.0 * d_squared_sum) / (n as f64 * (n * n - 1) as f64);
Ok(rho)
}
#[allow(dead_code)]
fn rank_data<T, S>(x: &ArrayBase<S, Ix1>) -> Result<Vec<f64>>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
{
let n = x.len();
let mut value_index: Vec<_> = x
.iter()
.enumerate()
.map(|(i, val)| (val.to_f64().unwrap_or(0.0), i))
.collect();
value_index.sort_by(|(a, _), (b, _)| a.partial_cmp(b).unwrap_or(Ordering::Equal));
let mut ranks = vec![0.0; n];
let mut i = 0;
while i < n {
let val = value_index[i].0;
let mut j = i + 1;
while j < n && (value_index[j].0 - val).abs() < 1e-10 {
j += 1;
}
if j > i + 1 {
let rank_avg = (2 * i + j - 1) as f64 / 2.0 + 1.0;
for k in i..j {
let idx = value_index[k].1;
ranks[idx] = rank_avg;
}
} else {
let idx = value_index[i].1;
ranks[idx] = (i + 1) as f64;
}
i = j;
}
Ok(ranks)
}
#[allow(dead_code)]
pub fn map_at_k<T, S, R>(
y_true: &[ArrayBase<S, Ix1>],
y_score: &[ArrayBase<R, Ix1>],
k: usize,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if k == 0 {
return Err(MetricsError::InvalidInput(
"k must be greater than 0".to_string(),
));
}
mean_average_precision(y_true, y_score, Some(k))
}
#[allow(dead_code)]
pub fn click_through_rate<T, S, R>(
y_true: &[ArrayBase<S, Ix1>],
y_score: &[ArrayBase<R, Ix1>],
k: usize,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_score.len() {
return Err(MetricsError::InvalidInput(format!(
"y_true and y_score have different lengths: {} vs {}",
y_true.len(),
y_score.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
if k == 0 {
return Err(MetricsError::InvalidInput(
"k must be greater than 0".to_string(),
));
}
let mut ctr_scores = Vec::with_capacity(y_true.len());
let zero = T::zero();
for (true_relevance, scores) in y_true.iter().zip(y_score.iter()) {
if true_relevance.shape() != scores.shape() {
return Err(MetricsError::InvalidInput(format!(
"Relevance and _score arrays have different shapes: {:?} vs {:?}",
true_relevance.shape(),
scores.shape()
)));
}
let mut score_relevance: Vec<_> = scores
.iter()
.zip(true_relevance.iter())
.map(|(s, r)| (s.clone(), r.clone()))
.collect();
score_relevance.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(Ordering::Equal));
let limit = k.min(score_relevance.len());
let mut query_ctr = 0.0;
for i in 0..limit {
let position_bias = 1.0 / (i as f64 + 1.0);
let relevance = if score_relevance[i].1 > zero {
1.0
} else {
0.0
};
query_ctr += position_bias * relevance;
}
let normalizer = (1..=limit).map(|i| 1.0 / i as f64).sum::<f64>();
if normalizer > 0.0 {
query_ctr /= normalizer;
}
ctr_scores.push(query_ctr);
}
let avg_ctr = ctr_scores.iter().sum::<f64>() / ctr_scores.len() as f64;
Ok(avg_ctr)
}