use scirs2_core::ndarray::{Array1, ArrayBase, Data, Ix1};
use scirs2_core::numeric::Float;
use std::cmp::Ordering;
use std::collections::HashSet;
use crate::error::{MetricsError, Result};
#[allow(dead_code)]
pub fn detection_accuracy<T, S, R>(
y_true: &ArrayBase<S, Ix1>,
y_pred: &ArrayBase<R, Ix1>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_pred.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let zero = T::zero();
let total = y_true.len();
let mut correct = 0;
for (truth, pred) in y_true.iter().zip(y_pred.iter()) {
if (truth > &zero && pred > &zero) || (truth == &zero && pred == &zero) {
correct += 1;
}
}
Ok(correct as f64 / total as f64)
}
#[allow(dead_code)]
pub fn false_alarm_rate<T, S, R>(
y_true: &ArrayBase<S, Ix1>,
y_pred: &ArrayBase<R, Ix1>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_pred.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let zero = T::zero();
let mut false_alarms = 0;
let mut total_normal = 0;
for (truth, pred) in y_true.iter().zip(y_pred.iter()) {
if truth == &zero {
total_normal += 1;
if pred > &zero {
false_alarms += 1;
}
}
}
if total_normal == 0 {
return Err(MetricsError::InvalidInput(
"No normal instances in ground truth".to_string(),
));
}
Ok(false_alarms as f64 / total_normal as f64)
}
#[allow(dead_code)]
pub fn miss_detection_rate<T, S, R>(
y_true: &ArrayBase<S, Ix1>,
y_pred: &ArrayBase<R, Ix1>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_pred.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let zero = T::zero();
let mut missed_anomalies = 0;
let mut total_anomalies = 0;
for (truth, pred) in y_true.iter().zip(y_pred.iter()) {
if truth > &zero {
total_anomalies += 1;
if pred == &zero {
missed_anomalies += 1;
}
}
}
if total_anomalies == 0 {
return Err(MetricsError::InvalidInput(
"No anomalies in ground truth".to_string(),
));
}
Ok(missed_anomalies as f64 / total_anomalies as f64)
}
#[allow(dead_code)]
pub fn anomaly_auc_score<T, S, R>(
y_true: &ArrayBase<S, Ix1>,
y_score: &ArrayBase<R, Ix1>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_score.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
y_true.len(),
y_score.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let mut score_label: Vec<_> = y_score
.iter()
.zip(y_true.iter())
.map(|(s, l)| (s.to_f64().unwrap_or(0.0), l.to_f64().unwrap_or(0.0)))
.collect();
score_label.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(Ordering::Equal));
let positives: usize = y_true.iter().filter(|&&x| x > T::zero()).count();
let negatives = y_true.len() - positives;
if positives == 0 || negatives == 0 {
return Err(MetricsError::InvalidInput(
"AUC is not defined when only one class is present".to_string(),
));
}
let mut positive_rank_sum = 0.0;
let mut current_rank = 1.0;
let mut prev_score = score_label[0].0;
let mut count_at_current_score = 1;
let mut pos_at_current_score = if score_label[0].1 > 0.0 { 1 } else { 0 };
for i in 1..score_label.len() {
let (_score, label) = score_label[i];
if (_score - prev_score).abs() < 1e-10 {
count_at_current_score += 1;
if label > 0.0 {
pos_at_current_score += 1;
}
} else {
let avg_rank = current_rank + (count_at_current_score - 1) as f64 / 2.0;
positive_rank_sum += avg_rank * pos_at_current_score as f64;
current_rank += count_at_current_score as f64;
count_at_current_score = 1;
pos_at_current_score = if label > 0.0 { 1 } else { 0 };
prev_score = _score;
}
}
let avg_rank = current_rank + (count_at_current_score - 1) as f64 / 2.0;
positive_rank_sum += avg_rank * pos_at_current_score as f64;
let u = positive_rank_sum - (positives * (positives + 1)) as f64 / 2.0;
let auc = u / (positives * negatives) as f64;
Ok(auc)
}
#[allow(dead_code)]
pub fn anomaly_average_precision_score<T, S, R>(
y_true: &ArrayBase<S, Ix1>,
y_score: &ArrayBase<R, Ix1>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_score.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
y_true.len(),
y_score.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let mut score_label: Vec<_> = y_score
.iter()
.zip(y_true.iter())
.map(|(s, l)| (s.to_f64().unwrap_or(0.0), l.to_f64().unwrap_or(0.0)))
.collect();
score_label.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(Ordering::Equal));
let total_positives: usize = y_true.iter().filter(|&&x| x > T::zero()).count();
if total_positives == 0 {
return Err(MetricsError::InvalidInput(
"Average precision is not defined when there are no positive samples".to_string(),
));
}
let mut true_positives = 0;
let mut false_positives = 0;
let mut sum_precision = 0.0;
let mut prev_recall = 0.0;
for (_, label) in score_label.iter() {
if *label > 0.0 {
true_positives += 1;
} else {
false_positives += 1;
}
let precision = true_positives as f64 / (true_positives + false_positives) as f64;
let recall = true_positives as f64 / total_positives as f64;
if *label > 0.0 {
sum_precision += precision * (recall - prev_recall);
prev_recall = recall;
}
}
Ok(sum_precision)
}
#[allow(dead_code)]
pub fn kl_divergence<T, S, R>(p: &ArrayBase<S, Ix1>, q: &ArrayBase<R, Ix1>) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if p.len() != q.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
p.len(),
q.len()
)));
}
if p.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let p_sum: f64 = p.iter().map(|x| x.to_f64().unwrap_or(0.0)).sum();
let q_sum: f64 = q.iter().map(|x| x.to_f64().unwrap_or(0.0)).sum();
if (p_sum - 1.0).abs() > 1e-5 || (q_sum - 1.0).abs() > 1e-5 {
return Err(MetricsError::InvalidInput(
"Inputs must be probability distributions that sum to approximately 1".to_string(),
));
}
let zero = T::zero();
let epsilon = 1e-10;
let mut kl = 0.0;
for (p_i, q_i) in p.iter().zip(q.iter()) {
let p_val = p_i.to_f64().unwrap_or(0.0);
let q_val = q_i.to_f64().unwrap_or(0.0).max(epsilon);
if p_i > &zero {
kl += p_val * (p_val / q_val).ln();
}
}
Ok(kl)
}
#[allow(dead_code)]
pub fn js_divergence<T, S, R>(p: &ArrayBase<S, Ix1>, q: &ArrayBase<R, Ix1>) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if p.len() != q.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
p.len(),
q.len()
)));
}
if p.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let p_sum: f64 = p.iter().map(|x| x.to_f64().unwrap_or(0.0)).sum();
let q_sum: f64 = q.iter().map(|x| x.to_f64().unwrap_or(0.0)).sum();
if (p_sum - 1.0).abs() > 1e-5 || (q_sum - 1.0).abs() > 1e-5 {
return Err(MetricsError::InvalidInput(
"Inputs must be probability distributions that sum to approximately 1".to_string(),
));
}
let p_f64: Array1<f64> = p.iter().map(|x| x.to_f64().unwrap_or(0.0)).collect();
let q_f64: Array1<f64> = q.iter().map(|x| x.to_f64().unwrap_or(0.0)).collect();
let m: Array1<f64> = p_f64
.iter()
.zip(q_f64.iter())
.map(|(p_i, q_i)| (p_i + q_i) / 2.0)
.collect();
let mut js = 0.0;
let epsilon = 1e-10;
for i in 0..p_f64.len() {
let p_i = p_f64[i];
let q_i = q_f64[i];
let m_i = m[i];
if p_i > epsilon {
js += 0.5 * p_i * (p_i / m_i).ln();
}
if q_i > epsilon {
js += 0.5 * q_i * (q_i / m_i).ln();
}
}
Ok(js)
}
#[allow(dead_code)]
pub fn wasserstein_distance<T, S, R>(
u_values: &ArrayBase<S, Ix1>,
v_values: &ArrayBase<R, Ix1>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if u_values.is_empty() || v_values.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let mut u_sorted: Vec<f64> = u_values.iter().map(|x| x.to_f64().unwrap_or(0.0)).collect();
let mut v_sorted: Vec<f64> = v_values.iter().map(|x| x.to_f64().unwrap_or(0.0)).collect();
u_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
v_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
let n = u_sorted.len();
let m = v_sorted.len();
let mut i = 0;
let mut j = 0;
let mut u_cdf = 0.0;
let mut v_cdf = 0.0;
let mut emd = 0.0;
let mut prev_value = f64::NEG_INFINITY;
while i < n || j < m {
let u_value = if i < n { u_sorted[i] } else { f64::INFINITY };
let v_value = if j < m { v_sorted[j] } else { f64::INFINITY };
let current_value = u_value.min(v_value);
if current_value > prev_value {
emd += (current_value - prev_value) * (u_cdf - v_cdf).abs();
prev_value = current_value;
}
if u_value <= v_value {
u_cdf += 1.0 / n as f64;
i += 1;
}
if v_value <= u_value {
v_cdf += 1.0 / m as f64;
j += 1;
}
}
Ok(emd)
}
#[allow(dead_code)]
pub fn maximum_mean_discrepancy<T, S, R>(
x: &ArrayBase<S, Ix1>,
y: &ArrayBase<R, Ix1>,
bandwidth: Option<f64>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if x.is_empty() || y.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let x_f64: Vec<f64> = x.iter().map(|v| v.to_f64().unwrap_or(0.0)).collect();
let y_f64: Vec<f64> = y.iter().map(|v| v.to_f64().unwrap_or(0.0)).collect();
let sigma = match bandwidth {
Some(bw) => bw,
None => {
let mut distances = Vec::new();
for i in 0..x_f64.len() {
for j in (i + 1)..x_f64.len() {
let dist = (x_f64[i] - x_f64[j]).powi(2);
distances.push(dist);
}
}
for i in 0..y_f64.len() {
for j in (i + 1)..y_f64.len() {
let dist = (y_f64[i] - y_f64[j]).powi(2);
distances.push(dist);
}
}
for &x_val in &x_f64 {
for &y_val in &y_f64 {
let dist = (x_val - y_val).powi(2);
distances.push(dist);
}
}
if distances.is_empty() {
return Err(MetricsError::InvalidInput(
"Not enough points to compute distances".to_string(),
));
}
distances.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
let median = if distances.len() % 2 == 0 {
(distances[distances.len() / 2 - 1] + distances[distances.len() / 2]) / 2.0
} else {
distances[distances.len() / 2]
};
median.sqrt()
}
};
let rbf_kernel = |x1: f64, x2: f64| -> f64 {
let squared_distance = (x1 - x2).powi(2);
(-squared_distance / (2.0 * sigma.powi(2))).exp()
};
let n_x = x_f64.len() as f64;
let n_y = y_f64.len() as f64;
let mut xx_sum = 0.0;
let mut yy_sum = 0.0;
let mut xy_sum = 0.0;
for i in 0..x_f64.len() {
for j in 0..x_f64.len() {
if i != j {
xx_sum += rbf_kernel(x_f64[i], x_f64[j]);
}
}
}
for i in 0..y_f64.len() {
for j in 0..y_f64.len() {
if i != j {
yy_sum += rbf_kernel(y_f64[i], y_f64[j]);
}
}
}
for &x_val in &x_f64 {
for &y_val in &y_f64 {
xy_sum += rbf_kernel(x_val, y_val);
}
}
xx_sum /= n_x * (n_x - 1.0);
yy_sum /= n_y * (n_y - 1.0);
xy_sum /= n_x * n_y;
let mmd = xx_sum + yy_sum - 2.0 * xy_sum;
Ok(mmd.max(0.0))
}
#[allow(dead_code)]
pub fn precision_recall_with_tolerance<T, S, R>(
y_true: &ArrayBase<S, Ix1>,
y_pred: &ArrayBase<R, Ix1>,
tolerance: usize,
) -> Result<(f64, f64, f64)>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_pred.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let zero = T::zero();
let n = y_true.len();
let mut true_anomaly_indices = Vec::new();
for (i, val) in y_true.iter().enumerate() {
if val > &zero {
true_anomaly_indices.push(i);
}
}
let mut pred_anomaly_indices = Vec::new();
for (i, val) in y_pred.iter().enumerate() {
if val > &zero {
pred_anomaly_indices.push(i);
}
}
if true_anomaly_indices.is_empty() {
if pred_anomaly_indices.is_empty() {
return Ok((1.0, 1.0, 1.0));
} else {
return Ok((0.0, 1.0, 0.0));
}
}
if pred_anomaly_indices.is_empty() {
return Ok((1.0, 0.0, 0.0));
}
let mut true_anomaly_regions: HashSet<usize> = HashSet::new();
for idx in &true_anomaly_indices {
let start = idx.saturating_sub(tolerance);
let end = (*idx + tolerance).min(n - 1);
for i in start..=end {
true_anomaly_regions.insert(i);
}
}
let mut true_positives = 0;
for idx in &pred_anomaly_indices {
if true_anomaly_regions.contains(idx) {
true_positives += 1;
}
}
let precision = true_positives as f64 / pred_anomaly_indices.len() as f64;
let recall = if true_anomaly_indices.is_empty() {
1.0 } else {
let mut detected_anomalies = 0;
for idx in &true_anomaly_indices {
let start = idx.saturating_sub(tolerance);
let end = (*idx + tolerance).min(n - 1);
let region_detected = pred_anomaly_indices
.iter()
.any(|&p_idx| p_idx >= start && p_idx <= end);
if region_detected {
detected_anomalies += 1;
}
}
detected_anomalies as f64 / true_anomaly_indices.len() as f64
};
let f1 = if precision + recall > 0.0 {
2.0 * precision * recall / (precision + recall)
} else {
0.0
};
Ok((precision, recall, f1))
}
#[allow(dead_code)]
pub fn point_adjusted_precision_recall<T, S, R>(
y_true: &ArrayBase<S, Ix1>,
y_pred: &ArrayBase<R, Ix1>,
) -> Result<(f64, f64, f64)>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_pred.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let zero = T::zero();
let mut true_segments = Vec::new();
let mut current_segment = Vec::new();
let mut in_segment = false;
for (i, val) in y_true.iter().enumerate() {
if val > &zero {
if !in_segment {
in_segment = true;
current_segment = vec![i];
} else {
current_segment.push(i);
}
} else if in_segment {
in_segment = false;
true_segments.push(current_segment.clone());
current_segment = Vec::new();
}
}
if in_segment {
true_segments.push(current_segment);
}
let mut pred_anomaly_indices = Vec::new();
for (i, val) in y_pred.iter().enumerate() {
if val > &zero {
pred_anomaly_indices.push(i);
}
}
if true_segments.is_empty() {
if pred_anomaly_indices.is_empty() {
return Ok((1.0, 1.0, 1.0));
} else {
return Ok((0.0, 1.0, 0.0));
}
}
if pred_anomaly_indices.is_empty() {
return Ok((1.0, 0.0, 0.0));
}
let mut detected_segments = 0;
for segment in &true_segments {
for &idx in segment {
if pred_anomaly_indices.contains(&idx) {
detected_segments += 1;
break;
}
}
}
let recall = detected_segments as f64 / true_segments.len() as f64;
let mut detected_segment_points = HashSet::new();
for segment in &true_segments {
for &pred_idx in &pred_anomaly_indices {
if segment.contains(&pred_idx) {
for &true_idx in segment {
detected_segment_points.insert(true_idx);
}
break;
}
}
}
let mut true_positives = 0;
for &idx in &pred_anomaly_indices {
if detected_segment_points.contains(&idx) {
true_positives += 1;
}
}
let precision = true_positives as f64 / pred_anomaly_indices.len() as f64;
let f1 = if precision + recall > 0.0 {
2.0 * precision * recall / (precision + recall)
} else {
0.0
};
Ok((precision, recall, f1))
}
#[allow(dead_code)]
pub fn nab_score<T, S, R>(
y_true: &ArrayBase<S, Ix1>,
y_pred: &ArrayBase<R, Ix1>,
anomaly_window: Option<usize>,
tp_weight: Option<f64>,
fp_weight: Option<f64>,
) -> Result<f64>
where
T: Float + PartialOrd + Clone,
S: Data<Elem = T>,
R: Data<Elem = T>,
{
if y_true.len() != y_pred.len() {
return Err(MetricsError::InvalidInput(format!(
"Arrays have different lengths: {} vs {}",
y_true.len(),
y_pred.len()
)));
}
if y_true.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty arrays provided".to_string(),
));
}
let zero = T::zero();
let n = y_true.len();
let window_size = anomaly_window.unwrap_or(10);
let weight_tp = tp_weight.unwrap_or(1.0);
let weight_fp = fp_weight.unwrap_or(-0.5);
let mut anomaly_windows = Vec::new();
for (i, val) in y_true.iter().enumerate() {
if val > &zero {
let start = i.saturating_sub(window_size / 2);
let end = (i + window_size / 2).min(n - 1);
anomaly_windows.push((start, end, i)); }
}
if anomaly_windows.is_empty() {
let all_zeros = y_pred.iter().all(|&val| val == T::zero());
if all_zeros {
return Ok(100.0); } else {
return Ok(0.0); }
}
let mut prediction_indices = Vec::new();
for (i, val) in y_pred.iter().enumerate() {
if val > &zero {
prediction_indices.push(i);
}
}
let mut score = 0.0;
let mut detected_windows = HashSet::new();
for &pred_idx in &prediction_indices {
let mut is_tp = false;
for (i, &(start, end, center)) in anomaly_windows.iter().enumerate() {
if pred_idx >= start && pred_idx <= end && !detected_windows.contains(&i) {
is_tp = true;
detected_windows.insert(i);
let relative_position = (pred_idx as isize - center as isize) as f64;
let sigmoid_scale = 1.0 / (1.0 + (0.5 * relative_position).exp());
score += weight_tp * sigmoid_scale;
break;
}
}
if !is_tp {
score += weight_fp;
}
}
let fn_count = anomaly_windows.len() - detected_windows.len();
score -= fn_count as f64 * weight_tp;
let max_score = anomaly_windows.len() as f64 * weight_tp;
let min_score = -max_score;
let normalized_score = 100.0 * (score - min_score) / (max_score - min_score);
Ok(normalized_score.clamp(0.0, 100.0))
}