use scirs2_core::ndarray::{Array1, Array2};
use std::collections::HashMap;
use crate::error::Result;
use super::config::*;
pub fn calculate_std_dev(scores: &[f64]) -> f64 {
if scores.len() <= 1 {
return 0.0;
}
let mean = scores.iter().sum::<f64>() / scores.len() as f64;
let variance =
scores.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (scores.len() - 1) as f64;
variance.sqrt()
}
pub fn is_score_better(new_score: f64, best_score: f64, metric: &EvaluationMetric) -> bool {
match metric {
EvaluationMetric::SilhouetteScore
| EvaluationMetric::CalinskiHarabaszIndex
| EvaluationMetric::AdjustedRandIndex => new_score > best_score,
EvaluationMetric::DaviesBouldinIndex | EvaluationMetric::Inertia => {
new_score < best_score || best_score == f64::NEG_INFINITY
}
_ => new_score > best_score,
}
}
pub fn should_stop_early(
evaluation_history: &[EvaluationResult],
early_stop_config: &EarlyStoppingConfig,
) -> bool {
if evaluation_history.len() < early_stop_config.patience {
return false;
}
let recent_evaluations =
&evaluation_history[evaluation_history.len() - early_stop_config.patience..];
let best_recent = recent_evaluations
.iter()
.map(|r| r.score)
.fold(f64::NEG_INFINITY, f64::max);
let current_best = evaluation_history
.iter()
.map(|r| r.score)
.fold(f64::NEG_INFINITY, f64::max);
(current_best - best_recent) < early_stop_config.min_improvement
}
pub fn create_convergence_info(
evaluation_history: &[EvaluationResult],
max_evaluations: usize,
) -> ConvergenceInfo {
ConvergenceInfo {
converged: evaluation_history.len() >= max_evaluations,
convergence_iteration: None,
stopping_reason: if evaluation_history.len() >= max_evaluations {
StoppingReason::MaxEvaluations
} else {
StoppingReason::EarlyStopping
},
}
}
pub fn calculate_exploration_stats(evaluation_history: &[EvaluationResult]) -> ExplorationStats {
let mut parameter_distributions = HashMap::new();
let mut parameter_importance = HashMap::new();
for result in evaluation_history {
for (param_name, &value) in &result.parameters {
parameter_distributions
.entry(param_name.clone())
.or_insert_with(Vec::new)
.push(value);
}
}
for (param_name, values) in ¶meter_distributions {
let scores: Vec<f64> = evaluation_history.iter().map(|r| r.score).collect();
let correlation = calculate_correlation(values, &scores);
parameter_importance.insert(param_name.clone(), correlation.abs());
}
ExplorationStats {
coverage: calculate_coverage(¶meter_distributions),
parameter_distributions,
parameter_importance,
}
}
pub fn calculate_correlation(x: &[f64], y: &[f64]) -> f64 {
if x.len() != y.len() || x.len() < 2 {
return 0.0;
}
let n = x.len() as f64;
let sum_x: f64 = x.iter().sum();
let sum_y: f64 = y.iter().sum();
let sum_xy: f64 = x.iter().zip(y.iter()).map(|(a, b)| a * b).sum();
let sum_x_sq: f64 = x.iter().map(|a| a * a).sum();
let sum_y_sq: f64 = y.iter().map(|a| a * a).sum();
let numerator = n * sum_xy - sum_x * sum_y;
let denominator = ((n * sum_x_sq - sum_x * sum_x) * (n * sum_y_sq - sum_y * sum_y)).sqrt();
if denominator == 0.0 {
0.0
} else {
numerator / denominator
}
}
fn calculate_coverage(parameter_distributions: &HashMap<String, Vec<f64>>) -> f64 {
if parameter_distributions.is_empty() {
return 0.0;
}
let mut total_coverage = 0.0;
for values in parameter_distributions.values() {
if values.len() <= 1 {
continue;
}
let min_val = values.iter().copied().fold(f64::INFINITY, f64::min);
let max_val = values.iter().copied().fold(f64::NEG_INFINITY, f64::max);
let range = max_val - min_val;
if range > 0.0 {
let mut sorted_values = values.clone();
sorted_values.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
sorted_values.dedup_by(|a, b| (*a - *b).abs() < 1e-10);
let coverage = sorted_values.len() as f64 / values.len() as f64;
total_coverage += coverage;
}
}
total_coverage / parameter_distributions.len() as f64
}
pub fn calculate_inertia(
data: &Array2<f64>,
labels: &Array1<usize>,
centroids: &Array2<f64>,
) -> Result<f64> {
let mut total_inertia = 0.0;
for (i, &label) in labels.iter().enumerate() {
if label >= centroids.nrows() {
continue; }
let mut distance_sq = 0.0;
for j in 0..data.ncols() {
let diff = data[[i, j]] - centroids[[label, j]];
distance_sq += diff * diff;
}
total_inertia += distance_sq;
}
Ok(total_inertia)
}
pub fn calculate_sample_silhouette(
data: &Array2<f64>,
labels: &Array1<i32>,
sample_idx: usize,
) -> f64 {
let sample_label = labels[sample_idx];
let n_samples = data.nrows();
if n_samples <= 1 {
return 0.0;
}
let mut intra_cluster_distances = Vec::new();
for j in 0..n_samples {
if j != sample_idx && labels[j] == sample_label {
let distance = euclidean_distance(&data.row(sample_idx), &data.row(j));
intra_cluster_distances.push(distance);
}
}
let a_i = if intra_cluster_distances.is_empty() {
0.0
} else {
intra_cluster_distances.iter().sum::<f64>() / intra_cluster_distances.len() as f64
};
let unique_labels: Vec<i32> = {
let mut labels_vec: Vec<i32> = labels.iter().copied().collect();
labels_vec.sort();
labels_vec.dedup();
labels_vec
};
let mut min_inter_cluster_distance = f64::INFINITY;
for &other_label in &unique_labels {
if other_label == sample_label {
continue;
}
let mut inter_cluster_distances = Vec::new();
for j in 0..n_samples {
if labels[j] == other_label {
let distance = euclidean_distance(&data.row(sample_idx), &data.row(j));
inter_cluster_distances.push(distance);
}
}
if !inter_cluster_distances.is_empty() {
let mean_distance =
inter_cluster_distances.iter().sum::<f64>() / inter_cluster_distances.len() as f64;
min_inter_cluster_distance = min_inter_cluster_distance.min(mean_distance);
}
}
let b_i = if min_inter_cluster_distance == f64::INFINITY {
0.0
} else {
min_inter_cluster_distance
};
if a_i == 0.0 && b_i == 0.0 {
0.0
} else {
(b_i - a_i) / a_i.max(b_i)
}
}
pub fn euclidean_distance(
a: &scirs2_core::ndarray::ArrayView1<f64>,
b: &scirs2_core::ndarray::ArrayView1<f64>,
) -> f64 {
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f64>()
.sqrt()
}
pub fn manhattan_distance(
a: &scirs2_core::ndarray::ArrayView1<f64>,
b: &scirs2_core::ndarray::ArrayView1<f64>,
) -> f64 {
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).abs())
.sum::<f64>()
}
pub fn cosine_similarity(
a: &scirs2_core::ndarray::ArrayView1<f64>,
b: &scirs2_core::ndarray::ArrayView1<f64>,
) -> f64 {
let dot_product: f64 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f64 = a.iter().map(|x| x * x).sum::<f64>().sqrt();
let norm_b: f64 = b.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
0.0
} else {
dot_product / (norm_a * norm_b)
}
}
pub fn normalize_vector(vector: &mut [f64]) {
let norm: f64 = vector.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm > 1e-10 {
for x in vector.iter_mut() {
*x /= norm;
}
}
}
pub fn linspace(start: f64, stop: f64, num: usize) -> Vec<f64> {
if num <= 1 {
return vec![start];
}
let step = (stop - start) / (num - 1) as f64;
(0..num).map(|i| start + i as f64 * step).collect()
}
pub fn logspace(start: f64, stop: f64, num: usize, base: f64) -> Vec<f64> {
let linear_values = linspace(start, stop, num);
linear_values.iter().map(|x| base.powf(*x)).collect()
}
pub fn percentile(sorted_data: &[f64], p: f64) -> f64 {
if sorted_data.is_empty() {
return 0.0;
}
let index = (p / 100.0) * (sorted_data.len() - 1) as f64;
let lower = index.floor() as usize;
let upper = index.ceil() as usize;
if lower == upper {
sorted_data[lower]
} else {
let weight = index - lower as f64;
sorted_data[lower] * (1.0 - weight) + sorted_data[upper] * weight
}
}
pub fn median(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut sorted_data = data.to_vec();
sorted_data.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
percentile(&sorted_data, 50.0)
}
pub fn iqr(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut sorted_data = data.to_vec();
sorted_data.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
let q75 = percentile(&sorted_data, 75.0);
let q25 = percentile(&sorted_data, 25.0);
q75 - q25
}
pub fn is_outlier(value: f64, data: &[f64], multiplier: f64) -> bool {
if data.len() < 4 {
return false;
}
let mut sorted_data = data.to_vec();
sorted_data.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
let q25 = percentile(&sorted_data, 25.0);
let q75 = percentile(&sorted_data, 75.0);
let iqr_value = q75 - q25;
let lower_bound = q25 - multiplier * iqr_value;
let upper_bound = q75 + multiplier * iqr_value;
value < lower_bound || value > upper_bound
}
pub fn hash_parameters(params: &HashMap<String, f64>) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
let mut sorted_params: Vec<_> = params.iter().collect();
sorted_params.sort_by_key(|(k, _)| *k);
for (key, value) in sorted_params {
key.hash(&mut hasher);
value.to_bits().hash(&mut hasher);
}
hasher.finish()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_calculate_std_dev() {
let scores = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let std_dev = calculate_std_dev(&scores);
assert!((std_dev - 1.5811388300841898).abs() < 1e-10);
}
#[test]
fn test_calculate_correlation() {
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let y = vec![2.0, 4.0, 6.0, 8.0, 10.0];
let correlation = calculate_correlation(&x, &y);
assert!((correlation - 1.0).abs() < 1e-10);
}
#[test]
fn test_euclidean_distance() {
use scirs2_core::ndarray::Array1;
let a = Array1::from_vec(vec![0.0, 0.0]);
let b = Array1::from_vec(vec![3.0, 4.0]);
let distance = euclidean_distance(&a.view(), &b.view());
assert!((distance - 5.0).abs() < 1e-10);
}
#[test]
fn test_percentile() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let p50 = percentile(&data, 50.0);
assert!((p50 - 3.0).abs() < 1e-10);
}
}